diff --git a/.cargo/config b/.cargo/config index 6ae16ea5e..82986228d 100644 --- a/.cargo/config +++ b/.cargo/config @@ -1,2 +1,85 @@ -[alias] -t = "test --bin traceback1 --test enclone_test -- --nocapture" +[target.x86_64-unknown-linux-gnu] +rustflags = [ + "--codegen=link-args=-Wl,--compress-debug-sections=zlib", + "-C", "target-feature=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt", + "-D", "clippy::perf", + "-D", "clippy::style", + "-D", "clippy::suspicious", + "-A", "clippy::comparison_chain", + "-W", "future_incompatible", + "-W", "nonstandard_style", + "-W", "rust_2018_compatibility", + "-W", "rust_2021_compatibility", + "-W", "unused", + "-D", "clippy::enum_glob_use", + "-D", "clippy::needless_lifetimes", + "-D", "clippy::redundant_closure_for_method_calls", + "-D", "clippy::unused_io_amount", + "-D", "clippy::wildcard_imports", + "-D", "clippy::unnecessary_unwrap", + "-D", "clippy::uninlined_format_args", + "-W", "clippy::disallowed_names", + "-W", "clippy::enum_variant_names", + "-W", "clippy::large-enum-variant", + "-W", "clippy::missing_safety_doc", + "-A", "clippy::type_complexity", + "-A", "clippy::too_many_arguments", + "-W", "clippy::result_unit_err", +] + +[target.x86_64-pc-windows-gnu] +linker = "x86_64-w64-mingw32-gcc" +rustflags = [ + "-C", "target-feature=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt", + "-D", "clippy::perf", + "-D", "clippy::style", + "-D", "clippy::suspicious", + "-A", "clippy::comparison_chain", + "-W", "future_incompatible", + "-W", "nonstandard_style", + "-W", "rust_2018_compatibility", + "-W", "rust_2021_compatibility", + "-W", "unused", + "-D", "clippy::enum_glob_use", + "-D", "clippy::needless_lifetimes", + "-D", "clippy::redundant_closure_for_method_calls", + "-D", "clippy::unused_io_amount", + "-D", "clippy::wildcard_imports", + "-D", "clippy::unnecessary_unwrap", + "-D", "clippy::uninlined_format_args", + "-W", "clippy::disallowed_names", + "-W", "clippy::enum_variant_names", + "-W", "clippy::large-enum-variant", + "-W", "clippy::missing_safety_doc", + "-A", "clippy::type_complexity", + "-A", "clippy::too_many_arguments", + "-W", "clippy::result_unit_err", +] + +[target.'cfg(target_arch = "x86_64")'] +rustflags = [ + "-C", "target-feature=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt", + "-D", "clippy::perf", + "-D", "clippy::style", + "-D", "clippy::suspicious", + "-A", "clippy::comparison_chain", + "-W", "future_incompatible", + "-W", "nonstandard_style", + "-W", "rust_2018_compatibility", + "-W", "rust_2021_compatibility", + "-W", "unused", + "-D", "clippy::enum_glob_use", + "-D", "clippy::needless_lifetimes", + "-D", "clippy::redundant_closure_for_method_calls", + "-D", "clippy::unused_io_amount", + "-D", "clippy::wildcard_imports", + "-D", "clippy::unnecessary_unwrap", + "-D", "clippy::uninlined_format_args", + "-W", "clippy::disallowed_names", + "-W", "clippy::enum_variant_names", + "-W", "clippy::large-enum-variant", + "-W", "clippy::missing_safety_doc", + "-A", "clippy::type_complexity", + "-A", "clippy::too_many_arguments", + "-W", "clippy::result_unit_err", +] diff --git a/.git-credentials b/.git-credentials deleted file mode 100644 index e69de29bb..000000000 diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..bac221239 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,41 @@ +version: 2 + +updates: +- package-ecosystem: cargo + directory: "/" + schedule: + interval: weekly + open-pull-requests-limit: 10 + reviewers: + - sreenathkrishnan + ignore: + # ignore crates from rust-toolbox repo other than vdj_ann. + # They all get updated together anyway. + - dependency-name: align_tools + - dependency-name: amino + - dependency-name: ansi_escape + - dependency-name: binary_vec_io + - dependency-name: dna + - dependency-name: equiv + - dependency-name: exons + - dependency-name: expr_tools + - dependency-name: fasta_tools + - dependency-name: graph_simple + - dependency-name: hyperbase + - dependency-name: io_utils + - dependency-name: kmer_lookup + - dependency-name: load_feature_bc + - dependency-name: mirror_sparse_matrix + - dependency-name: perf_stats + - dependency-name: pretty_trace + - dependency-name: stats_utils + - dependency-name: stirling_numbers + - dependency-name: string_utils + - dependency-name: tables + - dependency-name: vdj_ann_ref + - dependency-name: vdj_types + - dependency-name: vector_utils +- package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: weekly diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml deleted file mode 100644 index 20d0c852d..000000000 --- a/.github/workflows/release.yaml +++ /dev/null @@ -1,99 +0,0 @@ -on: - push: - # Sequence of patterns matched against refs/tags - tags: - - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 - -name: Build and Upload Release - -env: - CARGO_INCREMENTAL: 0 - -jobs: - - linux: - runs-on: ubuntu-latest - steps: - - name: Checkout git repository - uses: actions/checkout@master - - name: Login to GitHub Package Registry - run: docker login docker.pkg.github.com -u 10xbuild -p ${{secrets.GH_PAT}} - - name: Make release build - run: > - docker run -v ${{github.workspace}}:/root - -e GITHUB_SHA -e GITHUB_REF - docker.pkg.github.com/10xdev/toolchain-scripts/toolchain:latest - /bin/bash -lec ' - cargo build --release; - target/release/enclone --help | grep -q enclone; - readelf -V target/release/enclone; - '; - mkdir ${{runner.temp}}/artifacts; - cp -a target/release/enclone ${{runner.temp}}/artifacts/enclone-linux - - name: Upload build artifact - uses: actions/upload-artifact@v1 - with: - name: enclone - path: ${{runner.temp}}/artifacts - macos: - runs-on: macos-latest - env: - MACOSX_DEPLOYMENT_TARGET: 10.7 - steps: - - name: Checkout git repository - uses: actions/checkout@master - - name: Make release build - run: | - cargo build --release - target/release/enclone --help | grep -q enclone - mkdir ${{runner.temp}}/artifacts - cp -a target/release/enclone ${{runner.temp}}/artifacts/enclone-macos - - name: Upload build artifact - uses: actions/upload-artifact@v1 - with: - name: enclone - path: ${{runner.temp}}/artifacts - - setup-release: - needs: [linux, macos] - runs-on: ubuntu-latest - steps: - - uses: actions/download-artifact@v1 - with: - name: enclone - path: ${{runner.temp}}/artifacts - - - run: ls ${{runner.temp}}/artifacts - - - name: Create Release - id: create_release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: ${{ github.ref }} - release_name: Release ${{ github.ref }} - draft: false - prerelease: false - - - name: Upload Linux - id: upload-linux-release - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: ${{runner.temp}}/artifacts/enclone-linux - asset_name: enclone_linux - asset_content_type: application/octet-stream - - - name: Upload Mac - id: upload-mac-release - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: ${{runner.temp}}/artifacts/enclone-macos - asset_name: enclone_macos - asset_content_type: application/octet-stream \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 4c383002f..c5aad16f4 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -4,57 +4,137 @@ on: pull_request: push: branches: - - master + - main env: + DENY_VERSION: "0.13.9" + RUST_VERSION: "1.71.0" CARGO_INCREMENTAL: 0 +permissions: + contents: read + # Write permissions are required in order to produce annotations. + checks: write + jobs: test-mac: # This job runs on MacOS Catalina runs-on: macos-latest steps: - - name: rust version - run: rustup default 1.43.0 - - name: add rustfmt - run: rustup component add rustfmt - - name: install cargo-license - run: cargo install cargo-license + + - name: Install protoc + run: brew install protobuf + - name: Install Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ env.RUST_VERSION }} + components: rustfmt + + # check out master + - name: Checkout enclone master uses: actions/checkout@master - - name: Check Rust formatting - run: cargo fmt -- --check + + - uses: Swatinem/rust-cache@v2 + + # start the real work + + - name: Remove the Cargo target directory + if: github.ref == 'refs/heads/master' + run: cargo clean - name: build-enclone - run: cargo build --release - - name: download test data - run: git clone --depth=1 https://github.com/10XGenomics/enclone-data.git + run: cargo build - name: unit tests - run: cd enclone_main; cargo test --release --features basic -- --nocapture + run: cargo test -- --nocapture test-linux: # This job runs on Linux runs-on: ubuntu-latest steps: - - name: rust version - run: rustup default 1.43.0 - - name: add rustfmt - run: rustup component add rustfmt - - name: install cargo-license - run: cargo install cargo-license - - name: Checkout enclone master - uses: actions/checkout@master + - name: Install protoc + run: sudo apt-get install -y protobuf-compiler + + - name: Install Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ env.RUST_VERSION }} + components: rustfmt, clippy + + # check out + + - name: Compute required fetch depth + id: fetch_depth + run: > + echo + "depth=$(("${{github.event.pull_request.commits}}" + 2))" + >> "$GITHUB_OUTPUT" + - uses: actions/checkout@v4 with: - fetch-depth: 100 + fetch-depth: ${{ steps.fetch_depth.outputs.depth }} + + # set up caching + + - uses: Swatinem/rust-cache@v2 + + # start the real work + - name: Check for large files run: > git rev-list --objects ${{github.event.pull_request.base.sha}}.. | git cat-file --batch-check='%(objectname) %(objecttype) %(objectsize) %(rest)' - | awk '$2 == "blob" && $3 >= 1000000 { print substr($1, 1, 7), $3, $4; fail=1 } END { exit fail }' + | awk '$2 == "blob" && $3 >= 1600000 { print substr($1, 1, 7), $3, $4; fail=1 } END { exit fail }' + - name: Remove the Cargo target directory + if: github.ref == 'refs/heads/master' + run: cargo clean + - name: Install cargo-deny + run: | + wget https://github.com/EmbarkStudios/cargo-deny/releases/download/${DENY_VERSION}/cargo-deny-${DENY_VERSION}-x86_64-unknown-linux-musl.tar.gz + tar -xvf cargo-deny-${DENY_VERSION}-x86_64-unknown-linux-musl.tar.gz + mkdir -p ~/bin/ + cp cargo-deny-${DENY_VERSION}-x86_64-unknown-linux-musl/cargo-deny ~/bin/ + rm -r cargo-deny-${DENY_VERSION}-x86_64-unknown-linux-musl* + echo "$HOME/bin" >> $GITHUB_PATH - name: Check Rust formatting run: cargo fmt -- --check + - name: Deny duplicate dependencies (lib/rust) + run: cargo deny --locked check + - name: Run clippy + uses: 10XGenomics/clippy-check@main + with: + # Github limits the number of annotations it will display on a PR for + # a given action, so we're going to disable (for now!) some of the + # noisier lints so that more important ones are more likely to be + # visible. + # NOTE: keep this list in sync with lib/rust/.cargo/config, or else + # which is ignored when running in this mode but not when + # running locally. + # TODO: re-enable the allowed lints at the end of this list. + args: | + --all-targets --locked -- + -D clippy::perf + -D clippy::style + -D clippy::suspicious + -A clippy::comparison_chain + -W future_incompatible + -W nonstandard_style + -W rust_2018_compatibility + -W rust_2021_compatibility + -W unused + -D clippy::enum_glob_use + -D clippy::needless_lifetimes + -D clippy::redundant_closure_for_method_calls + -D clippy::unused_io_amount + -D clippy::wildcard_imports + -D clippy::unnecessary_unwrap + -D clippy::uninlined_format_args + -W clippy::disallowed_names + -W clippy::enum_variant_names + -W clippy::large-enum-variant + -W clippy::missing_safety_doc + -A clippy::type_complexity + -A clippy::too_many_arguments + -W clippy::result_unit_err - name: build-enclone - run: cargo build --release - - name: download test data - run: git clone --depth=1 https://github.com/10XGenomics/enclone-data.git + run: cargo build - name: unit tests - run: cd enclone_main; cargo test --release --features basic -- --nocapture + run: cargo test -- --nocapture diff --git a/.gitignore b/.gitignore index 3d5a82163..91e182059 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ target -enclone_main/test/inputs/version14/85679/outs/raw_feature_bc_matrix/matrix.bin -enclone_main/testx/outputs -enclone-data +*.o +*.so +*.swp +*.DS_Store diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 000000000..b85de7497 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,5 @@ +{ + "recommendations": [ + "rust-lang.rust-analyzer" + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..779f73f66 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "editor.formatOnSave": true, + "files.insertFinalNewline": true, + "rust-analyzer.checkOnSave.allTargets": true, + "rust-analyzer.checkOnSave.command": "clippy", + "rust-analyzer.imports.merge.glob": false +} diff --git a/Cargo.lock b/Cargo.lock index 8518a52ce..32fbccee0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,87 +1,82 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "addr2line" -version = "0.13.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b6a2d3371669ab3ca9797670853d61402b03d0b4b9ebf33d677dfa720203072" +checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" dependencies = [ "gimli", ] [[package]] name = "adler" -version = "0.2.3" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aho-corasick" -version = "0.7.13" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86" +checksum = "e5bce8d450891e3b36f85a2230cec441fddd60e0c455b61b15bb3ffba955ca85" dependencies = [ "memchr", ] [[package]] name = "align_tools" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85174db137528c768bc52637a1434d92ea9b96860ccddc0f07feb96c48b615f" +version = "0.1.12" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ - "bio", + "bio_edit", "debruijn", - "itertools 0.9.0", + "itertools", + "string_utils", "vector_utils", ] [[package]] name = "amino" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95097f68674a8108944ed3c1cf342f251cb9702dd212c763826d0aec0280b3e7" +version = "0.1.7" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "debruijn", + "string_utils", ] [[package]] name = "ansi_escape" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327cd633c991e455ef3134c286bdeec05ee44686cb351df09d735696cbad75df" +version = "0.1.3" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "string_utils", "vector_utils", ] +[[package]] +name = "anstyle" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" + [[package]] name = "anyhow" -version = "1.0.31" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f" +checksum = "a26fa4d7e3f2eebadf743988fc8aec9fa9a9e82611acafd77c1462ed6262440a" [[package]] name = "approx" -version = "0.3.2" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0e60b75072ecd4168020818c0107f2857bb6c4e64252d8d3983f6263b40a5c3" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" dependencies = [ "num-traits", ] -[[package]] -name = "arrayref" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" - -[[package]] -name = "arrayvec" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8" - [[package]] name = "ascii" version = "1.0.0" @@ -90,12 +85,13 @@ checksum = "bbf56136a5198c7b01a49e3afcbef6cf84597273d298f54432926024107b0109" [[package]] name = "assert_cmd" -version = "0.12.2" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936fcf2c692b37c696cd0002c57752b2d9478402450c9ca4a463f6afae16d6f5" +checksum = "88903cb14723e4d4003335bb7f8a14f27691649105346a0f0957466c096adfe6" dependencies = [ + "anstyle", + "bstr", "doc-comment", - "escargot", "predicates", "predicates-core", "predicates-tree", @@ -104,32 +100,32 @@ dependencies = [ [[package]] name = "attohttpc" -version = "0.12.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de33d017f0add8b019c6d98c3132c82c8815ca96bbed8e8006e7402c840562b3" +checksum = "0f77d243921b0979fbbd728dd2d5162e68ac8252976797c24eb5b3a6af9090dc" dependencies = [ "flate2", "http", "log", "rustls", "url", - "webpki", "webpki-roots", ] [[package]] name = "autocfg" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "backtrace" -version = "0.3.50" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46254cf2fdcdf1badb5934448c1bcbe046a56537b3987d96c51a7afc5d03f293" +checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" dependencies = [ "addr2line", + "cc", "cfg-if", "libc", "miniz_oxide", @@ -137,137 +133,80 @@ dependencies = [ "rustc-demangle", ] -[[package]] -name = "base64" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" -dependencies = [ - "byteorder", -] - -[[package]] -name = "base64" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7" - [[package]] name = "binary_vec_io" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03087a34218f73cb3032a37ca2cbe858f81c8e01b177d26fdea8942dacdcfa89" +version = "0.1.12" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ - "failure", + "itertools", ] [[package]] name = "bincode" -version = "1.3.1" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30d3a39baa26f9651f17b375061f3233dde33424a8b72b0dbe93a68a0bc896d" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" dependencies = [ - "byteorder", "serde", ] [[package]] -name = "bio" -version = "0.31.0" +name = "bio-types" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ca7a4ba213a7a4b07471edf914d12e5afbadcd274a844ebbfe2df195301a04f" +checksum = "dfa990f40a28735fa598dc3dd58d73e62e6b41458959d623903b927ba7b04c80" dependencies = [ - "approx", - "bio-types", - "bit-set", - "bv", - "bytecount", - "csv", - "custom_derive", - "fnv", - "fxhash", - "getset", - "itertools 0.9.0", - "itertools-num", + "derive-new", "lazy_static", - "multimap 0.6.0", - "ndarray", - "newtype_derive", - "num-integer", - "num-traits", - "ordered-float", - "petgraph 0.4.13", - "quick-error", "regex", - "serde", - "serde_derive", - "snafu", - "statrs", - "strum", "strum_macros", - "vec_map", + "thiserror", ] [[package]] -name = "bio-types" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d4011aaa9f8bfece367ef7d23f23279710eacf7df462d92c5135eabeb82799b" +name = "bio_edit" +version = "0.1.1" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ - "derive-new", - "lazy_static", - "quick-error", - "regex", - "serde", - "serde_derive", + "bio-types", + "bit-set", ] [[package]] name = "bit-set" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" dependencies = [ "bit-vec", ] [[package]] name = "bit-vec" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0dc55f2d8a1a85650ac47858bb001b4c0dd73d79e3c455a842925e68d29cd3" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" [[package]] name = "bitflags" -version = "1.2.1" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" - -[[package]] -name = "blake2b_simd" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8fb2d74254a3a0b5cac33ac9f8ed0e44aa50378d9dbb2e5d83bd21ed1dc2c8a" -dependencies = [ - "arrayref", - "arrayvec", - "constant_time_eq", -] +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "block-buffer" -version = "0.9.0" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" dependencies = [ "generic-array", ] [[package]] name = "boomphf" -version = "0.5.5" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe255794a88d6c4421d75a86240179a78d4fe3b194b956d178923223754c0b2e" +checksum = "4e8fb7f38fef59c32549861151d63a6190865e60cf690340c13e3d7178b42a2f" dependencies = [ "crossbeam-utils", "log", @@ -278,55 +217,52 @@ dependencies = [ [[package]] name = "bstr" -version = "0.2.13" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31accafdb70df7871592c058eca3985b71104e15ac32f64706022c58867da931" +checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05" dependencies = [ - "lazy_static", "memchr", "regex-automata", "serde", ] [[package]] -name = "bumpalo" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e8c087f005730276d1096a652e92a8bacee2e2472bcc9715a74d2bec38b5820" +name = "build_enclone_proto" +version = "0.5.219" +dependencies = [ + "prost-build", + "tempfile", +] [[package]] -name = "bv" -version = "0.11.1" +name = "bumpalo" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8834bb1d8ee5dc048ee3124f2c7c1afcc6bc9aed03f11e9dfd8c69470a5db340" -dependencies = [ - "feature-probe", - "serde", -] +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" [[package]] -name = "bytecount" -version = "0.6.0" +name = "bytemuck" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0017894339f586ccb943b01b9555de56770c11cda818e7e3d8bd93f4ed7f46e" +checksum = "2f5715e491b5a1598fc2bef5a606847b5dc1d48ea625bd3c02c00de8285591da" [[package]] name = "byteorder" -version = "1.3.4" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "bytes" -version = "0.5.6" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38" +checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c" [[package]] name = "bzip2" -version = "0.3.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42b7c3cbf0fa9c1b82308d57191728ca0256cb821220f4e2fd410a72ade26e3b" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" dependencies = [ "bzip2-sys", "libc", @@ -334,9 +270,9 @@ dependencies = [ [[package]] name = "bzip2-sys" -version = "0.1.9+1.0.8" +version = "0.1.11+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad3b39a260062fca31f7b0b12f207e8f2590a67d32ec7d59c20484b07ea7285e" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" dependencies = [ "cc", "libc", @@ -345,144 +281,93 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.58" +version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a06fb2e53271d7c279ec1efea6ab691c35a2ae67ec0d91d7acec0caf13b518" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +dependencies = [ + "jobserver", +] [[package]] name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - -[[package]] -name = "chrono" -version = "0.4.13" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c74d84029116787153e02106bf53e66828452a4b325cc8652b788b5967c0a0b6" -dependencies = [ - "num-integer", - "num-traits", - "time", -] +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "cloudabi" -version = "0.0.3" +name = "cpufeatures" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" +checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" dependencies = [ - "bitflags", + "libc", ] -[[package]] -name = "constant_time_eq" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - -[[package]] -name = "cpuid-bool" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec6763c20301ab0dc67051d1b6f4cc9132ad9e6eddcb1f10c6c53ea6d6ae2183" - [[package]] name = "crc32fast" -version = "1.2.0" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" dependencies = [ "cfg-if", ] [[package]] name = "crossbeam-deque" -version = "0.7.3" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" dependencies = [ + "cfg-if", "crossbeam-epoch", "crossbeam-utils", - "maybe-uninit", ] [[package]] name = "crossbeam-epoch" -version = "0.8.2" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" +checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", - "lazy_static", - "maybe-uninit", "memoffset", + "once_cell", "scopeguard", ] -[[package]] -name = "crossbeam-queue" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "774ba60a54c213d409d5353bda12d49cd68d14e45036a285234c8d6f91f92570" -dependencies = [ - "cfg-if", - "crossbeam-utils", - "maybe-uninit", -] - [[package]] name = "crossbeam-utils" -version = "0.7.2" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" +checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" dependencies = [ - "autocfg", "cfg-if", - "lazy_static", -] - -[[package]] -name = "csv" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00affe7f6ab566df61b4be3ce8cf16bc2576bca0963ceb0955e45d514bf9a279" -dependencies = [ - "bstr", - "csv-core", - "itoa", - "ryu", - "serde", + "once_cell", ] [[package]] -name = "csv-core" -version = "0.1.10" +name = "crypto-common" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ - "memchr", + "generic-array", + "typenum", ] -[[package]] -name = "custom_derive" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" - [[package]] name = "debruijn" -version = "0.3.2" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e6824051de090778a86d069d8a40672337d9046a2da6a0a6f453e985db74853" +checksum = "346d29ff45270b3642817082a513e55765ef5686b68e6cc0ed595897a64439d5" dependencies = [ "bit-set", "boomphf", - "itertools 0.9.0", + "itertools", "log", - "num", + "num-traits", "serde", "serde_derive", "serde_json", @@ -491,9 +376,9 @@ dependencies = [ [[package]] name = "derive-new" -version = "0.5.8" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71f31892cd5c62e414316f2963c5689242c43d8e7bbcaaeca97e5e28c95d91d9" +checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535" dependencies = [ "proc-macro2", "quote", @@ -501,39 +386,19 @@ dependencies = [ ] [[package]] -name = "difference" -version = "2.0.0" +name = "difflib" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" [[package]] name = "digest" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" -dependencies = [ - "generic-array", -] - -[[package]] -name = "dirs" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3" -dependencies = [ - "cfg-if", - "dirs-sys", -] - -[[package]] -name = "dirs-sys" -version = "0.3.5" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e93d7f5705de3e49895a2b5e0b8855a1c27f080192ae9c32a6432d50741a57a" +checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" dependencies = [ - "libc", - "redox_users", - "winapi", + "block-buffer", + "crypto-common", ] [[package]] @@ -544,263 +409,249 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.5.3" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" +checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" [[package]] name = "enclone" -version = "0.4.48" +version = "0.5.219" dependencies = [ "amino", "ansi_escape", "assert_cmd", - "bio", "debruijn", - "dirs", "enclone_core", + "enclone_proto", "equiv", "graph_simple", "hdf5", "io_utils", - "itertools 0.9.0", - "mirror_sparse_matrix", + "itertools", "pager", "perf_stats", - "petgraph 0.4.13", + "petgraph", "pretty_trace", + "qd", "rayon", - "regex", - "serde", - "serde_derive", - "serde_json", "stats_utils", - "stirling_numbers", "string_utils", - "tilde-expand", "vdj_ann", "vector_utils", ] [[package]] -name = "enclone_core" -version = "0.4.48" +name = "enclone_args" +version = "0.5.219" dependencies = [ - "ansi_escape", - "bio", - "bytes", - "chrono", "debruijn", + "enclone_core", + "enclone_vars", + "evalexpr", + "expr_tools", "hdf5", "io_utils", + "itertools", "mirror_sparse_matrix", - "perf_stats", + "rand", + "rayon", "regex", - "serde", - "serde_derive", "serde_json", "string_utils", + "vdj_ann", "vector_utils", ] [[package]] -name = "enclone_help" -version = "0.4.48" -dependencies = [ - "ansi_escape", - "enclone_core", - "io_utils", - "string_utils", - "tables", -] - -[[package]] -name = "enclone_main" -version = "0.4.48" +name = "enclone_core" +version = "0.5.219" dependencies = [ + "amino", "ansi_escape", "attohttpc", + "bio_edit", "debruijn", - "enclone", - "enclone_core", - "enclone_help", - "enclone_print", "enclone_proto", - "enclone_tail", - "equiv", - "failure", - "file-lock", - "flate2", + "evalexpr", "hdf5", "io_utils", - "itertools 0.9.0", + "itertools", + "lazy_static", + "mirror_sparse_matrix", "perf_stats", - "pretty_trace", + "qd", "rayon", "regex", "serde", - "serde_derive", - "serde_json", - "sha2", "stats_utils", "string_utils", + "superslice", + "tables", + "tilde-expand", "vdj_ann", "vector_utils", + "zstd", ] [[package]] name = "enclone_print" -version = "0.4.48" +version = "0.5.219" dependencies = [ "amino", "ansi_escape", - "bio", - "byteorder", - "bytes", + "bio_edit", "debruijn", + "enclone_args", "enclone_core", "enclone_proto", + "enclone_vars", "equiv", - "failure", + "expr_tools", "hdf5", "io_utils", - "itertools 0.9.0", - "mirror_sparse_matrix", + "itertools", "ndarray", "permutation", - "pretty_trace", + "qd", "rayon", - "serde", - "serde_derive", + "regex", "serde_json", "stats_utils", "string_utils", "tables", + "triple_accel", "vdj_ann", "vector_utils", ] [[package]] name = "enclone_proto" -version = "0.4.48" +version = "0.5.219" dependencies = [ - "bio", + "bio_edit", "byteorder", - "bytes", - "enclone_core", - "failure", "prost", - "prost-build", "serde", - "serde_derive", + "thiserror", + "vdj_ann", +] + +[[package]] +name = "enclone_ranger" +version = "0.5.219" +dependencies = [ + "enclone", + "enclone_args", + "enclone_core", + "enclone_print", + "enclone_stuff", + "hdf5", + "rayon", + "string_utils", + "vdj_ann", ] [[package]] -name = "enclone_tail" -version = "0.4.48" +name = "enclone_stuff" +version = "0.5.219" dependencies = [ "amino", - "ansi_escape", "debruijn", + "enclone", + "enclone_args", "enclone_core", + "enclone_print", "enclone_proto", "equiv", + "evalexpr", "hdf5", "io_utils", - "itertools 0.9.0", - "mirror_sparse_matrix", + "itertools", "ndarray", - "perf_stats", - "pretty_trace", + "qd", "rayon", - "serde", - "serde_derive", - "serde_json", + "regex", "stats_utils", "string_utils", "tables", - "tar", "vdj_ann", "vector_utils", ] +[[package]] +name = "enclone_vars" +version = "0.5.219" +dependencies = [ + "io_utils", + "itertools", + "pretty_trace", + "string_utils", + "vector_utils", +] + [[package]] name = "equiv" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e6ea08a7aa97d095ac9df295c2b1dddb59c671d636049fcdc762261ee8780a3" +version = "0.1.3" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" [[package]] -name = "errno" -version = "0.2.5" +name = "equivalent" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b480f641ccf0faf324e20c1d3e53d81b7484c698b42ea677f6907ae4db195371" -dependencies = [ - "errno-dragonfly", - "libc", - "winapi", -] +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] -name = "errno-dragonfly" -version = "0.1.1" +name = "errno" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14ca354e36190500e1e1fb267c647932382b54053c50b14970856c0b00a35067" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" dependencies = [ - "gcc", + "errno-dragonfly", "libc", + "winapi", ] [[package]] -name = "escargot" -version = "0.5.0" +name = "errno" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74cf96bec282dcdb07099f7e31d9fed323bca9435a09aba7b6d99b7617bca96d" +checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" dependencies = [ - "lazy_static", - "log", - "serde", - "serde_json", + "libc", + "windows-sys 0.48.0", ] [[package]] -name = "exons" +name = "errno-dragonfly" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6ca2fb297ee157bdc2d7534624dcca939ed44095fa32a8eb3b6c73d7fdc818" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" dependencies = [ - "io_utils", - "string_utils", - "vector_utils", + "cc", + "libc", ] [[package]] -name = "failure" -version = "0.1.8" +name = "evalexpr" +version = "11.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d32e9bd16cc02eae7db7ef620b392808b89f6a5e16bb3497d159c6b92a0f4f86" -dependencies = [ - "backtrace", - "failure_derive", -] +checksum = "1e757e796a66b54d19fa26de38e75c3351eb7a3755c85d7d181a8c61437ff60c" [[package]] -name = "failure_derive" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4" +name = "expr_tools" +version = "0.1.3" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", + "evalexpr", + "statrs", + "string_utils", + "vector_utils", ] [[package]] name = "fasta_tools" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7b0783a473eea06105edf2fae500b233ba89a6c8693f2dfe0e6d6ffeec9010f" +version = "0.1.8" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "debruijn", "flate2", @@ -809,56 +660,39 @@ dependencies = [ ] [[package]] -name = "feature-probe" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "835a3dc7d1ec9e75e2b5fb4ba75396837112d2060b03f7d43bc1897c7f7211da" - -[[package]] -name = "file-lock" -version = "1.1.20" +name = "fastrand" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16486239b3741480cef090b6f9924faf5dd5481022c6f266a51fab1a92971a2" +checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" dependencies = [ - "gcc", - "libc", - "mktemp", - "nix 0.11.1", + "instant", ] [[package]] name = "filetime" -version = "0.2.10" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "affc17579b132fc2461adf7c575cc6e8b134ebca52c51f5411388965227dc695" +checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9" dependencies = [ "cfg-if", "libc", "redox_syscall", - "winapi", + "windows-sys 0.42.0", ] [[package]] name = "fixedbitset" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33" - -[[package]] -name = "fixedbitset" -version = "0.2.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flate2" -version = "1.0.16" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68c90b0fc46cf89d227cc78b40e494ff81287a92dd07631e5af0d06fe3cf885e" +checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" dependencies = [ - "cfg-if", "crc32fast", - "libc", "miniz_oxide", ] @@ -869,31 +703,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] -name = "fuchsia-cprng" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" - -[[package]] -name = "fxhash" -version = "0.2.1" +name = "form_urlencoded" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" dependencies = [ - "byteorder", + "matches", + "percent-encoding", ] -[[package]] -name = "gcc" -version = "0.3.55" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f5f3913fa0bfe7ee1fd8248b6b9f42a5af4b9d65ec2dd2c3c26132b950ecfc2" - [[package]] name = "generic-array" -version = "0.14.3" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60fb4bb6bba52f78a471264d9a3b7d026cc0af47b22cd2cffbc0b787ca003e63" +checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" dependencies = [ "typenum", "version_check", @@ -901,64 +724,59 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.1.14" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" +checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" dependencies = [ "cfg-if", "libc", "wasi", ] -[[package]] -name = "getset" -version = "0.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb3f5b7d8d70c9bd23cf29b2b38094661418fb0ea79f1b0cc2019a11d6f5429" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "gimli" -version = "0.22.0" +version = "0.27.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aaf91faf136cb47367fa430cd46e37a788775e7fa104f8b4bcb3861dc389b724" +checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" [[package]] name = "graph_simple" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29a19baeba1a54526fb193cd3419e9f68795f4132d55241399ff295d903b3d7" +version = "0.1.5" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ - "petgraph 0.4.13", + "petgraph", "vector_utils", ] +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + [[package]] name = "hdf5" -version = "0.6.0" -source = "git+https://github.com/pmarks/hdf5-rs.git?rev=0c98e57b2af1f4247708c198b324ba3a8bc18dba#0c98e57b2af1f4247708c198b324ba3a8bc18dba" +version = "0.8.1" +source = "git+https://github.com/10XGenomics/hdf5-rust.git?branch=conda_nov2021#2d4a40b7ef75de530bc53fd2eb0fe75047e083ad" dependencies = [ "bitflags", + "cfg-if", "hdf5-derive", "hdf5-sys", "hdf5-types", "lazy_static", "libc", "ndarray", - "num-integer", - "num-traits", "parking_lot", + "paste", ] [[package]] name = "hdf5-derive" -version = "0.6.0" -source = "git+https://github.com/pmarks/hdf5-rs.git?rev=0c98e57b2af1f4247708c198b324ba3a8bc18dba#0c98e57b2af1f4247708c198b324ba3a8bc18dba" +version = "0.8.1" +source = "git+https://github.com/10XGenomics/hdf5-rust.git?branch=conda_nov2021#2d4a40b7ef75de530bc53fd2eb0fe75047e083ad" dependencies = [ + "proc-macro-error", "proc-macro2", "quote", "syn", @@ -966,54 +784,44 @@ dependencies = [ [[package]] name = "hdf5-sys" -version = "0.6.0" -source = "git+https://github.com/pmarks/hdf5-rs.git?rev=0c98e57b2af1f4247708c198b324ba3a8bc18dba#0c98e57b2af1f4247708c198b324ba3a8bc18dba" +version = "0.8.1" +source = "git+https://github.com/10XGenomics/hdf5-rust.git?branch=conda_nov2021#2d4a40b7ef75de530bc53fd2eb0fe75047e083ad" dependencies = [ "attohttpc", "bzip2", "libc", "libloading", - "md5", "pkg-config", "regex", "serde", "serde_derive", + "sha2", "tar", "winreg", ] [[package]] name = "hdf5-types" -version = "0.6.0" -source = "git+https://github.com/pmarks/hdf5-rs.git?rev=0c98e57b2af1f4247708c198b324ba3a8bc18dba#0c98e57b2af1f4247708c198b324ba3a8bc18dba" +version = "0.8.1" +source = "git+https://github.com/10XGenomics/hdf5-rust.git?branch=conda_nov2021#2d4a40b7ef75de530bc53fd2eb0fe75047e083ad" dependencies = [ "ascii", + "cfg-if", + "hdf5-sys", "libc", ] [[package]] name = "heck" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" -dependencies = [ - "unicode-segmentation", -] - -[[package]] -name = "hermit-abi" -version = "0.1.15" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3deed196b6e7f9e44a2ae8d94225d80302d81208b1bb673fd21fe634645c85a9" -dependencies = [ - "libc", -] +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" [[package]] name = "http" -version = "0.2.1" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d569972648b2c512421b5f2a405ad6ac9666547189d0c5477a3f200f3e02f9" +checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" dependencies = [ "bytes", "fnv", @@ -1022,23 +830,22 @@ dependencies = [ [[package]] name = "hyperbase" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5ce3419d89b5959ccc3a7238e1f76c99c859ca59592aed46ff562a12004537c" +version = "0.1.8" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "debruijn", "equiv", "graph_simple", "kmer_lookup", - "petgraph 0.4.13", + "petgraph", "vector_utils", ] [[package]] name = "idna" -version = "0.2.0" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2673c30ee86b5b96a9cb52ad15718aa1f966f5ab9ad54a8b95d5ca33120a9" +checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" dependencies = [ "matches", "unicode-bidi", @@ -1047,74 +854,82 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.4.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c398b2b113b55809ceb9ee3e753fcbac793f1956663f3c36549c1346015c2afe" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" dependencies = [ - "autocfg", + "equivalent", + "hashbrown", ] [[package]] -name = "io_utils" -version = "0.2.6" +name = "instant" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cd15063bab0636a45c1fa792a7b5c9843a65a8cbbbce6c8fd90f45299a01269" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "io-lifetimes" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" +dependencies = [ + "libc", + "windows-sys 0.42.0", +] + +[[package]] +name = "io_utils" +version = "0.3.2" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "bincode", "flate2", "lz4", "serde", "string_utils", - "vector_utils", ] [[package]] name = "itertools" -version = "0.8.2" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] [[package]] -name = "itertools" -version = "0.9.0" +name = "itoa" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" -dependencies = [ - "either", -] +checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" [[package]] -name = "itertools-num" -version = "0.1.3" +name = "jobserver" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a872a22f9e6f7521ca557660adb96dd830e54f0f490fa115bb55dd69d38b27e7" +checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" dependencies = [ - "num-traits", + "libc", ] -[[package]] -name = "itoa" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6" - [[package]] name = "js-sys" -version = "0.3.42" +version = "0.3.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52732a3d3ad72c58ad2dc70624f9c17b46ecd0943b9a4f1ee37c4c18c5d983e2" +checksum = "258451ab10b34f8af53416d1fdab72c22e805f0c92a1136d59470ec0b11138b2" dependencies = [ "wasm-bindgen", ] [[package]] name = "kmer_lookup" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4205468f56540dbca189e4f044a364f7275afe2e2d68c7693fe6d0ada2afba9d" +version = "0.1.5" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "debruijn", "rayon", @@ -1129,43 +944,56 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.72" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9f8082297d534141b30c8d39e9b1773713ab50fdbe4ff30f750d063b3bfd701" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "libloading" -version = "0.5.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b111a074963af1d37a139918ac6d49ad1d0d5e47f72fd55388619691a7d753" +checksum = "efbc0f03f9a775e9f6aed295c6a1ba2253c5757a9e03d55c6caa46a681abcddd" dependencies = [ - "cc", + "cfg-if", "winapi", ] +[[package]] +name = "libm" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "292a948cd991e376cf75541fe5b97a1081d713c618b4f1b9500f8844e49eb565" + +[[package]] +name = "linux-raw-sys" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" + [[package]] name = "lock_api" -version = "0.3.4" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4da24a77a3d8a6d4862d95f72e6fdb9c09a643ecdb402d754004a557f2bec75" +checksum = "9f80bf5aacaf25cbfc8210d1cfb718f2bf3b11c4c54e5afe36c236853a8ec390" dependencies = [ + "autocfg", "scopeguard", ] [[package]] name = "log" -version = "0.4.11" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" dependencies = [ "cfg-if", ] [[package]] name = "lz4" -version = "1.23.2" +version = "1.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aac20ed6991e01bf6a2e68cc73df2b389707403662a8ba89f68511fb340f724c" +checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" dependencies = [ "libc", "lz4-sys", @@ -1173,9 +1001,9 @@ dependencies = [ [[package]] name = "lz4-sys" -version = "1.9.2" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dca79aa95d8b3226213ad454d328369853be3a1382d89532a854f4d69640acae" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" dependencies = [ "cc", "libc", @@ -1183,157 +1011,135 @@ dependencies = [ [[package]] name = "matches" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" +checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" [[package]] name = "matrixmultiply" -version = "0.2.3" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4f7ec66360130972f34830bfad9ef05c6610a43938a467bcc9ab9369ab3478f" +checksum = "add85d4dd35074e6fedc608f8c8f513a3548619a9024b751949ef0e8e45a4d84" dependencies = [ "rawpointer", ] -[[package]] -name = "maybe-uninit" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" - -[[package]] -name = "md5" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6bcd6433cff03a4bfc3d9834d504467db1f1cf6d0ea765d37d330249ed629d" - [[package]] name = "memchr" -version = "2.3.3" +version = "2.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" +checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" [[package]] name = "memoffset" -version = "0.5.5" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c198b026e1bbf08a937e94c6c60f9ec4a2267f5b0d2eec9c1b21b061ce2be55f" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" dependencies = [ "autocfg", ] [[package]] name = "miniz_oxide" -version = "0.4.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be0f75932c1f6cfae3c04000e40114adf955636e19040f9c0a2c380702aa1c7f" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" dependencies = [ "adler", ] [[package]] name = "mirror_sparse_matrix" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79dbdcd84fec978b1ef88e02c17170b9a48a9436a31bfea6a523da8dfe716ea0" +version = "0.1.17" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "binary_vec_io", - "io_utils", - "pretty_trace", ] [[package]] -name = "mktemp" -version = "0.3.1" +name = "multimap" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77001ceb9eed65439f3dc2a2543f9ba1417d912686bf224a7738d0966e6dcd69" -dependencies = [ - "uuid", -] +checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" [[package]] -name = "multimap" -version = "0.6.0" +name = "nalgebra" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de234f818d54830a7103b9be18ad0861d75aeb5e3c89759bc3f9a004cc39cfa3" +checksum = "d506eb7e08d6329505faa8a3a00a5dcc6de9f76e0c77e4b75763ae3c770831ff" dependencies = [ - "serde", + "approx", + "matrixmultiply", + "nalgebra-macros", + "num-complex", + "num-rational", + "num-traits", + "rand", + "rand_distr", + "simba", + "typenum", ] [[package]] -name = "multimap" -version = "0.8.1" +name = "nalgebra-macros" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8883adfde9756c1d30b0f519c9b8c502a94b41ac62f696453c37c7fc0a958ce" +checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "ndarray" -version = "0.13.1" +version = "0.15.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac06db03ec2f46ee0ecdca1a1c34a99c0d188a0d83439b84bf0cb4b386e4ab09" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" dependencies = [ "matrixmultiply", - "num-complex 0.2.4", + "num-complex", "num-integer", "num-traits", "rawpointer", ] -[[package]] -name = "newtype_derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac8cd24d9f185bb7223958d8c1ff7a961b74b1953fd05dba7cc568a63b3861ec" -dependencies = [ - "rustc_version", -] - [[package]] name = "nix" -version = "0.11.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "becb657d662f1cd2ef38c7ad480ec6b8cf9e96b27adb543e594f9cf0f2e6065c" +checksum = "195cdbc1741b8134346d515b3a56a1c94b0912758009cfd53f99ea0f57b065fc" dependencies = [ "bitflags", - "cc", "cfg-if", "libc", - "void", + "memoffset", ] [[package]] -name = "nix" -version = "0.13.1" +name = "num-complex" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dbdc256eaac2e3bd236d93ad999d3479ef775c863dbda3068c4006a92eec51b" +checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19" dependencies = [ - "bitflags", - "cc", - "cfg-if", - "libc", - "void", + "num-traits", ] [[package]] -name = "num" -version = "0.3.0" +name = "num-integer" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3e176191bc4faad357e3122c4747aa098ac880e88b168f106386128736cf4a" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" dependencies = [ - "num-bigint", - "num-complex 0.3.0", - "num-integer", - "num-iter", - "num-rational", + "autocfg", "num-traits", ] [[package]] -name = "num-bigint" -version = "0.3.0" +name = "num-rational" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7f3fc75e3697059fb1bc465e3d8cca6cf92f56854f201158b3f9c77d5a3cfa0" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" dependencies = [ "autocfg", "num-integer", @@ -1341,143 +1147,69 @@ dependencies = [ ] [[package]] -name = "num-complex" -version = "0.2.4" +name = "num-traits" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6b19411a9719e753aff12e5187b74d60d3dc449ec3f4dc21e3989c3f554bc95" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" dependencies = [ "autocfg", - "num-traits", + "libm", ] [[package]] -name = "num-complex" -version = "0.3.0" +name = "object" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05ad05bd8977050b171b3f6b48175fea6e0565b7981059b486075e1026a9fb5" +checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" dependencies = [ - "num-traits", + "memchr", ] [[package]] -name = "num-integer" -version = "0.1.43" +name = "once_cell" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d59457e662d541ba17869cf51cf177c0b5f0cbf476c66bdc90bf1edac4f875b" -dependencies = [ - "autocfg", - "num-traits", -] +checksum = "074864da206b4973b84eb91683020dbefd6a8c3f0f38e054d93954e891935e4e" [[package]] -name = "num-iter" -version = "0.1.41" +name = "pager" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a6e6b7c748f995c4c29c5f5ae0248536e04a5739927c74ec0fa564805094b9f" +checksum = "2599211a5c97fbbb1061d3dc751fa15f404927e4846e07c643287d6d1f462880" dependencies = [ - "autocfg", - "num-integer", - "num-traits", + "errno 0.2.8", + "libc", ] [[package]] -name = "num-rational" -version = "0.3.0" +name = "parking_lot" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5b4d7360f362cfb50dde8143501e6940b22f644be75a4cc90b2d81968908138" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ - "autocfg", - "num-bigint", - "num-integer", - "num-traits", + "lock_api", + "parking_lot_core", ] [[package]] -name = "num-traits" -version = "0.2.12" +name = "parking_lot_core" +version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611" -dependencies = [ - "autocfg", -] - -[[package]] -name = "num_cpus" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "object" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ab52be62400ca80aa00285d25253d7f7c437b7375c4de678f5405d3afe82ca5" - -[[package]] -name = "once_cell" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b631f7e854af39a1739f401cf34a8a013dfe09eac4fa4dba91e9768bd28168d" - -[[package]] -name = "opaque-debug" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" - -[[package]] -name = "ordered-float" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3741934be594d77de1c8461ebcbbe866f585ea616a9753aa78f2bdc69f0e4579" -dependencies = [ - "num-traits", -] - -[[package]] -name = "ordermap" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063" - -[[package]] -name = "pager" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b69ced2bfa977c4541743a7427b89c94120684791a9629941fe6028dccab6528" -dependencies = [ - "errno", - "libc", -] - -[[package]] -name = "parking_lot" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3a704eb390aafdc107b0e392f56a82b668e3a71366993b5340f5833fd62505e" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d58c7c768d4ba344e3e8d72518ac13e259d7c7ade24167003b8488e10b6740a3" +checksum = "ba1ef8814b5c993410bb3adfad7a5ed269563e4a2f90c41f5d85be7fb47133bf" dependencies = [ "cfg-if", - "cloudabi", "libc", "redox_syscall", "smallvec", - "winapi", + "windows-sys 0.42.0", ] +[[package]] +name = "paste" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1de2e551fb905ac83f73f7aedf2f0cb4a0da7e35efa24a202a936269f1f18e1" + [[package]] name = "percent-encoding" version = "2.1.0" @@ -1486,9 +1218,8 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" [[package]] name = "perf_stats" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bc31826c63b47737878cee46aeae502c339163f3b23d437cbe29055c5e90afc" +version = "0.1.8" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "io_utils", "libc", @@ -1497,100 +1228,120 @@ dependencies = [ [[package]] name = "permutation" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9978962f8a4b158e97447a6d09d2d75e206d2994eff056c894019f362b27142" - -[[package]] -name = "petgraph" -version = "0.4.13" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f" -dependencies = [ - "fixedbitset 0.1.9", - "ordermap", -] +checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7" [[package]] name = "petgraph" -version = "0.5.1" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7" +checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ - "fixedbitset 0.2.0", + "fixedbitset", "indexmap", ] [[package]] name = "pkg-config" -version = "0.3.18" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d36492546b6af1463394d46f0c834346f31548646f6ba10849802c9c9a27ac33" +checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" [[package]] name = "ppv-lite86" -version = "0.2.8" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" [[package]] name = "predicates" -version = "1.0.4" +version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "347a1b6f0b21e636bc9872fb60b83b8e185f6f5516298b8238699f7f9a531030" +checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" dependencies = [ - "difference", + "anstyle", + "difflib", + "itertools", "predicates-core", ] [[package]] name = "predicates-core" -version = "1.0.0" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06075c3a3e92559ff8929e7a280684489ea27fe44805174c3ebd9328dcb37178" +checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" [[package]] name = "predicates-tree" -version = "1.0.0" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e63c4859013b38a76eca2414c64911fba30def9e3202ac461a2d22831220124" +checksum = "4d86de6de25020a36c6d3643a86d9a6a9f552107c0559c60ea03551b5e16c032" dependencies = [ "predicates-core", - "treeline", + "termtree", ] [[package]] name = "pretty_trace" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07adf99fb16816593ee6a9febe3a97f032c6197cc8e8e0fc0e5e1515214ed685" +version = "0.5.24" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "backtrace", - "failure", - "io_utils", "lazy_static", "libc", - "nix 0.13.1", - "rayon", - "stats_utils", + "nix", "string_utils", "vector_utils", ] +[[package]] +name = "prettyplease" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + [[package]] name = "proc-macro2" -version = "1.0.18" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa" +checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" dependencies = [ - "unicode-xid", + "unicode-ident", ] [[package]] name = "prost" -version = "0.6.1" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce49aefe0a6144a45de32927c77bd2859a5f7677b55f220ae5b744e87389c212" +checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" dependencies = [ "bytes", "prost-derive", @@ -1598,30 +1349,34 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.6.1" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b10678c913ecbd69350e8535c3aef91a8676c0773fc1d7b95cdd196d7f2f26" +checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ "bytes", "heck", - "itertools 0.8.2", + "itertools", + "lazy_static", "log", - "multimap 0.8.1", - "petgraph 0.5.1", + "multimap", + "petgraph", + "prettyplease", "prost", "prost-types", + "regex", + "syn", "tempfile", "which", ] [[package]] name = "prost-derive" -version = "0.6.1" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "537aa19b95acde10a12fec4301466386f757403de4cd4e5b4fa78fb5ecb18f72" +checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" dependencies = [ "anyhow", - "itertools 0.8.2", + "itertools", "proc-macro2", "quote", "syn", @@ -1629,106 +1384,65 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.6.1" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1834f67c0697c001304b75be76f67add9c89742eda3a085ad8ee0bb38c3417aa" +checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13" dependencies = [ - "bytes", "prost", ] [[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" +name = "qd" +version = "0.2.0-alpha" +source = "git+https://github.com/Barandis/qd#0fb276d70346f11f4b2a5b30568d8a26d0dd88df" [[package]] name = "quote" -version = "1.0.7" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" dependencies = [ "proc-macro2", ] [[package]] name = "rand" -version = "0.3.23" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ac302d8f83c0c1974bf758f6b041c6c8ada916fbb44a609158ca8b064cc76c" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "libc", - "rand 0.4.6", -] - -[[package]] -name = "rand" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" -dependencies = [ - "fuchsia-cprng", - "libc", - "rand_core 0.3.1", - "rdrand", - "winapi", -] - -[[package]] -name = "rand" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" -dependencies = [ - "getrandom", "libc", "rand_chacha", - "rand_core 0.5.1", - "rand_hc", + "rand_core", ] [[package]] name = "rand_chacha" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" -dependencies = [ - "ppv-lite86", - "rand_core 0.5.1", -] - -[[package]] -name = "rand_core" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ - "rand_core 0.4.2", + "ppv-lite86", + "rand_core", ] [[package]] name = "rand_core" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" - -[[package]] -name = "rand_core" -version = "0.5.1" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" dependencies = [ "getrandom", ] [[package]] -name = "rand_hc" -version = "0.2.0" +name = "rand_distr" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" dependencies = [ - "rand_core 0.5.1", + "num-traits", + "rand", ] [[package]] @@ -1739,96 +1453,67 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" [[package]] name = "rayon" -version = "1.3.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f02856753d04e03e26929f820d0a0a337ebe71f849801eea335d464b349080" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" dependencies = [ - "autocfg", - "crossbeam-deque", "either", "rayon-core", ] [[package]] name = "rayon-core" -version = "1.7.1" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e92e15d89083484e11353891f1af602cc661426deb9564c298b270c726973280" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" dependencies = [ "crossbeam-deque", - "crossbeam-queue", "crossbeam-utils", - "lazy_static", - "num_cpus", -] - -[[package]] -name = "rdrand" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -dependencies = [ - "rand_core 0.3.1", ] [[package]] name = "redox_syscall" -version = "0.1.57" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" - -[[package]] -name = "redox_users" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b23093265f8d200fa7b4c2c76297f47e681c655f6f1285a8780d6a022f7431" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ - "getrandom", - "redox_syscall", - "rust-argon2", + "bitflags", ] [[package]] name = "regex" -version = "1.3.9" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6" +checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" dependencies = [ "aho-corasick", "memchr", + "regex-automata", "regex-syntax", - "thread_local", ] [[package]] name = "regex-automata" -version = "0.1.9" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4" +checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" dependencies = [ - "byteorder", + "aho-corasick", + "memchr", + "regex-syntax", ] [[package]] name = "regex-syntax" -version = "0.6.18" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8" - -[[package]] -name = "remove_dir_all" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "ring" -version = "0.16.15" +version = "0.16.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "952cd6b98c85bbc30efa1ba5783b8abf12fec8b3287ffa52605b9432313e34e4" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" dependencies = [ "cc", "libc", @@ -1839,57 +1524,68 @@ dependencies = [ "winapi", ] -[[package]] -name = "rust-argon2" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bc8af4bda8e1ff4932523b94d3dd20ee30a87232323eda55903ffd71d2fb017" -dependencies = [ - "base64 0.11.0", - "blake2b_simd", - "constant_time_eq", - "crossbeam-utils", -] - [[package]] name = "rustc-demangle" -version = "0.1.16" +version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783" +checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" [[package]] -name = "rustc-serialize" -version = "0.3.24" +name = "rustix" +version = "0.36.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda" - -[[package]] -name = "rustc_version" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f5376ea5e30ce23c03eb77cbe4962b988deead10910c372b226388b594c084" +checksum = "305efbd14fde4139eb501df5f136994bb520b033fa9fbdce287507dc23b8c7ed" dependencies = [ - "semver", + "bitflags", + "errno 0.3.5", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys 0.45.0", ] [[package]] name = "rustls" -version = "0.16.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b25a18b1bf7387f0145e7f8324e700805aade3842dd3db2e74e4cdeb4677c09e" +checksum = "07180898a28ed6a7f7ba2311594308f595e3dd2e3c3812fa0a80a47b45f17e5d" dependencies = [ - "base64 0.10.1", "log", "ring", + "rustls-webpki", "sct", - "webpki", ] +[[package]] +name = "rustls-webpki" +version = "0.100.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e98ff011474fa39949b7e5c0428f9b4937eda7da7848bbb947786b7be0b27dab" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97477e48b4cf8603ad5f7aaf897467cf42ab4218a38ef76fb14c2d6773a6d6a8" + [[package]] name = "ryu" -version = "1.0.5" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" + +[[package]] +name = "safe_arch" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "794821e4ccb0d9f979512f9c1973480123f9bd62a90d74ab0f9426fcf8f4a529" +dependencies = [ + "bytemuck", +] [[package]] name = "scopeguard" @@ -1899,34 +1595,28 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "sct" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3042af939fca8c3453b7af0f1c66e533a15a86169e39de2657310ade8f98d3c" +checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" dependencies = [ "ring", "untrusted", ] -[[package]] -name = "semver" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" - [[package]] name = "serde" -version = "1.0.114" +version = "1.0.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5317f7588f0a5078ee60ef675ef96735a1442132dc645eb1d12c018620ed8cd3" +checksum = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.114" +version = "1.0.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0be94b04690fbaed37cddffc5c134bf537c8e3329d53e982fe04c374978f8e" +checksum = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d" dependencies = [ "proc-macro2", "quote", @@ -1935,9 +1625,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.56" +version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3433e879a558dde8b5e8feb2a04899cf34fdde1fafb894687e52105fc1162ac3" +checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3" dependencies = [ "itoa", "ryu", @@ -1946,43 +1636,33 @@ dependencies = [ [[package]] name = "sha2" -version = "0.9.1" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2933378ddfeda7ea26f48c555bdad8bb446bf8a3d17832dc83e380d444cfb8c1" +checksum = "899bf02746a2c92bf1053d9327dadb252b01af1f81f90cdb902411f518bc7215" dependencies = [ - "block-buffer", "cfg-if", - "cpuid-bool", + "cpufeatures", "digest", - "opaque-debug", ] [[package]] -name = "smallvec" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3757cb9d89161a2f24e1cf78efa0c1fcff485d18e3f55e0aa3480824ddaa0f3f" - -[[package]] -name = "snafu" -version = "0.6.8" +name = "simba" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7f5aed652511f5c9123cf2afbe9c244c29db6effa2abb05c866e965c82405ce" +checksum = "f0b7840f121a46d63066ee7a99fc81dcabbc6105e437cae43528cea199b5a05f" dependencies = [ - "doc-comment", - "snafu-derive", + "approx", + "num-complex", + "num-traits", + "paste", + "wide", ] [[package]] -name = "snafu-derive" -version = "0.6.8" +name = "smallvec" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebf8f7d5720104a9df0f7076a8682024e958bba0fe9848767bb44f251f3648e9" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] +checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" [[package]] name = "spin" @@ -1992,52 +1672,40 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "statrs" -version = "0.12.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cce16f6de653e88beca7bd13780d08e09d4489dbca1f9210e041bc4852481382" +checksum = "2d08e5e1748192713cc281da8b16924fb46be7b0c2431854eadc785823e5696e" dependencies = [ - "rand 0.7.3", + "approx", + "lazy_static", + "nalgebra", + "num-traits", + "rand", ] [[package]] name = "stats_utils" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f41d546028b75dfcebf71c01c8ed6d1537fa873341f62de41cb5695247c92d" - -[[package]] -name = "stirling_numbers" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0403dcb230e09ab5f3c9b36a7e83dead88a384cf5ae48a8484531d48b594aaba" -dependencies = [ - "num-traits", -] +version = "0.1.3" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" [[package]] name = "string_utils" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d0a7faf9c52f98f57f5a1be176ee3890628fe65ba509fae6e6e38868632384b" +version = "0.1.4" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "vector_utils", ] -[[package]] -name = "strum" -version = "0.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57bd81eb48f4c437cadc685403cad539345bf703d78e63707418431cecd4522b" - [[package]] name = "strum_macros" -version = "0.18.0" +version = "0.24.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87c85aa3f8ea653bfd3ddf25f7ee357ee4d204731f6aa9ad04002306f6e2774c" +checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" dependencies = [ "heck", "proc-macro2", "quote", + "rustversion", "syn", ] @@ -2049,71 +1717,73 @@ checksum = "ab16ced94dbd8a46c82fd81e3ed9a8727dac2977ea869d217bcc4ea1f122e81f" [[package]] name = "syn" -version = "1.0.34" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936cae2873c940d92e697597c5eee105fb570cd5689c695806f672883653349b" +checksum = "60b9b43d45702de4c839cb9b51d9f529c5dd26a4aff255b42b1ebc03e88ee908" dependencies = [ "proc-macro2", "quote", - "unicode-xid", -] - -[[package]] -name = "synstructure" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b834f2d66f734cb897113e34aaff2f1ab4719ca946f9a7358dba8f8064148701" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "unicode-xid", + "unicode-ident", ] [[package]] name = "tables" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca5324a796d6617a460da3a2b60243a1361397e66dcea45d6a0c554358462a0a" +version = "0.1.5" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "io_utils", - "itertools 0.9.0", + "itertools", "string_utils", ] [[package]] name = "tar" -version = "0.4.29" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8a4c1d0bee3230179544336c15eefb563cf0302955d962e456542323e8c2e8a" +checksum = "4b55807c0344e1e6c04d7c965f5289c39a8d94ae23ed5c0b57aabac549f871c6" dependencies = [ "filetime", "libc", - "redox_syscall", "xattr", ] [[package]] name = "tempfile" -version = "3.1.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" +checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95" dependencies = [ "cfg-if", - "libc", - "rand 0.7.3", + "fastrand", "redox_syscall", - "remove_dir_all", - "winapi", + "rustix", + "windows-sys 0.42.0", ] [[package]] -name = "thread_local" -version = "1.0.1" +name = "termtree" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507e9898683b6c43a9aa55b64259b721b52ba226e0f3779137e50ad114a4c90b" + +[[package]] +name = "thiserror" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" +checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" dependencies = [ - "lazy_static", + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -2126,63 +1796,53 @@ dependencies = [ ] [[package]] -name = "time" -version = "0.1.43" +name = "tinyvec" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" dependencies = [ - "libc", - "winapi", + "tinyvec_macros", ] [[package]] -name = "tinyvec" -version = "0.3.3" +name = "tinyvec_macros" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53953d2d3a5ad81d9f844a32f14ebb121f50b650cd59d0ee2a07cf13c617efed" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] -name = "treeline" -version = "0.1.0" +name = "triple_accel" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41" +checksum = "22048bc95dfb2ffd05b1ff9a756290a009224b60b2f0e7525faeee7603851e63" [[package]] name = "typenum" -version = "1.12.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33" +checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" [[package]] name = "unicode-bidi" -version = "0.3.4" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" -dependencies = [ - "matches", -] +checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" + +[[package]] +name = "unicode-ident" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" [[package]] name = "unicode-normalization" -version = "0.1.13" +version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb19cf769fa8c6a80a162df694621ebeb4dafb606470b2b2fce0be40a98a977" +checksum = "854cbdc4f7bc6ae19c820d44abdc3277ac3e1b2b93db20a636825d9322fb60e6" dependencies = [ "tinyvec", ] -[[package]] -name = "unicode-segmentation" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" - -[[package]] -name = "unicode-xid" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" - [[package]] name = "untrusted" version = "0.7.1" @@ -2191,78 +1851,50 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" [[package]] name = "url" -version = "2.1.1" +version = "2.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "829d4a8476c35c9bf0bbce5a3b23f4106f79728039b726d292bb93bc106787cb" +checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" dependencies = [ + "form_urlencoded", "idna", "matches", "percent-encoding", ] -[[package]] -name = "uuid" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78c590b5bd79ed10aad8fb75f078a59d8db445af6c743e55c4a53227fc01c13f" -dependencies = [ - "rand 0.3.23", - "rustc-serialize", -] - [[package]] name = "vdj_ann" -version = "0.1.2" -source = "git+https://github.com/10XGenomics/rust-toolbox.git?rev=183e2d657e6436494072a32cf8da4f7b753d1e69#183e2d657e6436494072a32cf8da4f7b753d1e69" +version = "0.4.4" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "align_tools", "amino", - "bio", + "bio_edit", "debruijn", - "exons", "fasta_tools", - "flate2", "hyperbase", "io_utils", - "itertools 0.8.2", + "itertools", "kmer_lookup", - "pretty_trace", "serde", "serde_json", - "sha2", "stats_utils", "string_utils", - "strum", - "strum_macros", "vdj_types", "vector_utils", ] [[package]] name = "vdj_types" -version = "0.1.0" -source = "git+https://github.com/10XGenomics/rust-toolbox.git?rev=183e2d657e6436494072a32cf8da4f7b753d1e69#183e2d657e6436494072a32cf8da4f7b753d1e69" -dependencies = [ - "serde", - "serde_json", - "strum", - "strum_macros", -] - -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" +version = "0.2.0" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "serde", ] [[package]] name = "vector_utils" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e107021f89b5d21750b11933706dbf1804eb48fa96dd81966e113786d707b5f" +version = "0.1.5" +source = "git+https://github.com/10XGenomics/rust-toolbox.git?branch=master#305edf966100aa6b338014f66e4690a830bca323" dependencies = [ "permutation", "superslice", @@ -2270,15 +1902,9 @@ dependencies = [ [[package]] name = "version_check" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" - -[[package]] -name = "void" -version = "1.0.2" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wait-timeout" @@ -2291,15 +1917,15 @@ dependencies = [ [[package]] name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" +version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.65" +version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3edbcc9536ab7eababcc6d2374a0b7bfe13a2b6d562c5e07f370456b1a8f33d" +checksum = "fc7652e3f6c4706c8d9cd54832c4a4ccb9b5336e2c3bd154d5cccfbf1c1f5f7d" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -2307,13 +1933,13 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.65" +version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ed2fb8c84bfad20ea66b26a3743f3e7ba8735a69fe7d95118c33ec8fc1244d" +checksum = "662cd44805586bd52971b9586b1df85cdbbd9112e4ef4d8f41559c334dc6ac3f" dependencies = [ "bumpalo", - "lazy_static", "log", + "once_cell", "proc-macro2", "quote", "syn", @@ -2322,9 +1948,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.65" +version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb071268b031a64d92fc6cf691715ca5a40950694d8f683c5bb43db7c730929e" +checksum = "b260f13d3012071dfb1512849c033b1925038373aea48ced3012c09df952c602" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2332,9 +1958,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.65" +version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf592c807080719d1ff2f245a687cbadb3ed28b2077ed7084b47aba8b691f2c6" +checksum = "5be8e654bdd9b79216c2929ab90721aa82faf65c48cdf08bdc4e7f51357b80da" dependencies = [ "proc-macro2", "quote", @@ -2345,46 +1971,45 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.65" +version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b6c0220ded549d63860c78c38f3bcc558d1ca3f4efa74942c536ddbbb55e87" +checksum = "6598dd0bd3c7d51095ff6531a5b23e02acdc81804e30d8f07afb77b7215a140a" [[package]] name = "web-sys" -version = "0.3.42" +version = "0.3.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8be2398f326b7ba09815d0b403095f34dd708579220d099caae89be0b32137b2" +checksum = "ed055ab27f941423197eb86b2035720b1a3ce40504df082cac2ecc6ed73335a1" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] -name = "webpki" -version = "0.21.3" +name = "webpki-roots" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab146130f5f790d45f82aeeb09e55a256573373ec64409fc19a6fb82fb1032ae" -dependencies = [ - "ring", - "untrusted", -] +checksum = "c9c6eda1c830a36f361e7721c87fd79ea84293b54f8c48c959f85ec636f0f196" [[package]] -name = "webpki-roots" -version = "0.18.0" +name = "which" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91cd5736df7f12a964a5067a12c62fa38e1bd8080aff1f80bc29be7c80d19ab4" +checksum = "1c831fbbee9e129a8cf93e7747a82da9d95ba8e16621cae60ec2cdc849bacb7b" dependencies = [ - "webpki", + "either", + "libc", + "once_cell", ] [[package]] -name = "which" -version = "3.1.1" +name = "wide" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d011071ae14a2f6671d0b74080ae0cd8ebf3a6f8c9589a2cd45f23126fe29724" +checksum = "b3aba2d1dac31ac7cae82847ac5b8be822aee8f99a4e100f279605016b185c5f" dependencies = [ - "libc", + "bytemuck", + "safe_arch", ] [[package]] @@ -2409,11 +2034,158 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "winreg" -version = "0.6.2" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2986deb581c4fe11b621998a5e53361efe6b48a151178d0cd9eeffa4dc6acc9" +checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" dependencies = [ "serde", "winapi", @@ -2421,18 +2193,47 @@ dependencies = [ [[package]] name = "wyhash" -version = "0.4.1" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fe26121db27575e4fb30ceded9806fbfe0edb489f170a17506d9ad0b1aca41c" +checksum = "baf6e163c25e3fac820b4b453185ea2dea3b6a3e0a721d4d23d75bd33734c295" dependencies = [ - "rand_core 0.5.1", + "rand_core", ] [[package]] name = "xattr" -version = "0.2.2" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc" +dependencies = [ + "libc", +] + +[[package]] +name = "zstd" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "244c3741f4240ef46274860397c7c74e50eb23624996930e484c16679633a54c" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "6.0.3+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68e4a3f57d13d0ab7e478665c60f35e2a613dcd527851c2c7287ce5c787e134a" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.1+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" +dependencies = [ + "cc", "libc", ] diff --git a/Cargo.toml b/Cargo.toml index b708b392b..12fb99ecd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,28 +1,65 @@ [workspace] members = [ + "build_enclone_proto", "enclone", + "enclone_args", "enclone_core", - "enclone_help", - "enclone_main", "enclone_print", "enclone_proto", - "enclone_tail", + "enclone_ranger", + "enclone_stuff", + "enclone_vars", ] +resolver = "2" + +# ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +# dev mode profile: setting lto = true or lto = "thin" or codegen-units = 1 or even +# codegen-units = 5 all speed up execution (as measured by our cycle count test), by up to +# about 10%, but they all have at least some effect on compile time, and it doesn't seem worth it + [profile.dev] debug = 1 opt-level = 3 +split-debuginfo = "unpacked" + +# ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ [profile.release] debug = 1 lto = true codegen-units = 1 +overflow-checks = true + +# ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ [profile.test] +debug = 1 opt-level = 3 +split-debuginfo = "unpacked" # Cautionary note. We set package versions in enclone_versions, but it's not clear that # this is a sound practice, particularly in the case where a crate outside the enclone repo # depends on a crate in the enclone repo. Such an external crate sees "*" and it's not clear # what is done with that. + +# ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +# We attempted to speed up loading on the Mac using zld, +# https://github.com/michaeleisel/zld. First we downloaded the current release at +# https://github.com/michaeleisel/zld/releases/tag/1.3.2, and then added these lines +# (without the #) to .cargo/config: +# [target.x86_64-apple-darwin] +# rustflags = ["-C", "link-arg=-fuse-ld=/Users/david.jaffe/bin/zld"] +# To get this to work, we had to install Xcode 12.4, by going to the apple developers site. +# For the particular version of macOS we have, the later versions of Xcode wouldn't work. +# To complete the installation, we did +# sudo xcode-select -s /path/to/Xcode.app +# and then cargo b worked. This seemed to speed up loading by 5-10%, but that did not seem +# like enough to justify using zld. It might be worth trying again later, as it is being +# improved. + +[workspace.dependencies] +itertools = ">=0.10" diff --git a/GUIDE b/GUIDE index 0acb43588..57a3095a6 100644 --- a/GUIDE +++ b/GUIDE @@ -1,16 +1,27 @@ guide to some of the directories here: -enclone_main this includes the enclone main program -enclone_core core definitions, etc. -enclone_help help menus from the command line -enclone most of the code up to "printing" -enclone_print prints clonotypes in the sense of creating the "pictures" (much of core logic) -enclone_proto stuff for communicating with Loupe, and reused for other purposes -enclone_tail last part of the code +enclone most of the code up to "printing" +enclone_args argument processing +enclone_com inter-processs communication (experimenal) +enclone_core core definitions, etc. +enclone_denovo experimental code for making VDJ references +enclone_exec just the actual main program +enclone_help help menus from the command line +enclone_main this includes the enclone main program guts +enclone_paper calculations for enclone paper +enclone_print prints clonotypes in the sense of creating the "pictures" (much of core logic) +enclone_proto stuff for communicating with Loupe, and reused for other purposes +enclone_ranger entry point for cellranger +enclone_stuff things called by enclone_main and enclone_ranger +enclone_tail last part of the code +enclone_tools miscellaneous binaries +enclone_vars variable definitions +enclone_version determine current version string +enclone_visual experimental GUI client -fonts DejaVuSansMono, in case not otherwise available -img images for site -pages pages on site and source code for such -target where rust puts compilation stuff -third_party credits +credits credits +fonts DejaVuSansMono, in case not otherwise available +img images for site +pages pages on site and source code for such +target where rust puts compilation stuff diff --git a/LICENSE.txt b/LICENSE.txt index 03a262ff2..b4e1fe1f6 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2020 10x Genomics +Copyright (c) 2021 10x Genomics Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/PREHISTORY b/PREHISTORY deleted file mode 100644 index 0dee95e05..000000000 --- a/PREHISTORY +++ /dev/null @@ -1,17513 +0,0 @@ - -This repo was moved from the 10x Genomics Cell Ranger repo, branch dj/cr-1577b, on 1/6/2019. -The intent was to keep the history, but that didn't work. This file is a summary of the history, -consisting of manually pruned logs from dj/cr-1577b and the preceding branch dj/cr-1577. - -manually pruned history from cellranger branch dj/cr-1577b -(see also "manually pruned history from cellranger branch dj/cr-1577", below) - -commit d0662aa085ba7ab095e3bb0950ae286a56636ca0 -Author: David Jaffe -AuthorDate: Sun Jan 5 10:00:50 2020 -0800 -Commit: David Jaffe -CommitDate: Sun Jan 5 10:00:50 2020 -0800 - - remove unneeded dependency - -M lib/rust/Cargo.lock -M lib/rust/enclone/Cargo.toml -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/proc_args2.rs - -commit edc5c0262d10020b783cdd86812ac2a569d4dcd9 -Author: David Jaffe -AuthorDate: Sun Jan 5 08:20:37 2020 -0800 -Commit: David Jaffe -CommitDate: Sun Jan 5 08:20:37 2020 -0800 - - update whitelist for happening - -M lib/rust/enclone/src/proc_args2.rs - -commit d98225bd15a3925443e6f09ed2bf3a1ce91d0d18 -Author: David Jaffe -AuthorDate: Sun Jan 5 15:51:17 2020 +0000 -Commit: David Jaffe -CommitDate: Sun Jan 5 15:51:17 2020 +0000 - - fix crate reference - -M lib/rust/enclone/tests/enclone_test.rs - -commit 33e5815ecd90d1770ea386c451fe66fd6bfd3ce9 -Author: David Jaffe -AuthorDate: Sat Jan 4 20:25:13 2020 +0000 -Commit: David Jaffe -CommitDate: Sat Jan 4 20:25:13 2020 +0000 - - improve timers - -M lib/rust/enclone/src/main_enclone.rs - -commit ae3b49f0b8a745225ec3299cc427481f0635392d -Author: David Jaffe -AuthorDate: Sat Jan 4 16:27:05 2020 +0000 -Commit: David Jaffe -CommitDate: Sat Jan 4 16:27:05 2020 +0000 - - improve timers - -M lib/rust/enclone/src/main_enclone.rs - -commit 23228b4b2ae5aba47e40e8bc07f4af64fe6ee6fb -Author: David Jaffe -AuthorDate: Sat Jan 4 16:12:36 2020 +0000 -Commit: David Jaffe -CommitDate: Sat Jan 4 16:12:36 2020 +0000 - - add comments - -M lib/rust/enclone/src/join.rs - -commit 66c307bdbd56f3848df2fffd104803138f76dfbe -Author: David Jaffe -AuthorDate: Sat Jan 4 07:59:01 2020 -0800 -Commit: David Jaffe -CommitDate: Sat Jan 4 07:59:01 2020 -0800 - - add comments - -M lib/rust/enclone/src/join_core.rs - -commit 2415215c58c6eb26d18cdad7f8b6c5c88746fb5a -Author: David Jaffe -AuthorDate: Sat Jan 4 15:02:48 2020 +0000 -Commit: David Jaffe -CommitDate: Sat Jan 4 15:02:48 2020 +0000 - - update list of crates for happening - -M lib/rust/enclone/src/proc_args2.rs - -commit 9592fea0fb21815eafbbd83c2352d3cb1e1e719b -Author: David Jaffe -AuthorDate: Fri Jan 3 14:18:35 2020 -0800 -Commit: David Jaffe -CommitDate: Fri Jan 3 14:18:35 2020 -0800 - - put large files in a subdirectory - -R100 lib/rust/enclone/test/inputs/101287/outs/all_contig_annotations.json.lz4 lib/rust/enclone/test/inputs/version12/101287/outs/all_contig_annotations.json.lz4 -R100 lib/rust/enclone/test/inputs/123085/outs/all_contig_annotations.json.lz4 lib/rust/enclone/test/inputs/version12/123085/outs/all_contig_annotations.json.lz4 -R100 lib/rust/enclone/test/inputs/123089/outs/all_contig_annotations.json.lz4 lib/rust/enclone/test/inputs/version12/123089/outs/all_contig_annotations.json.lz4 -R100 lib/rust/enclone/test/inputs/126106/outs/metrics_summary_json.json lib/rust/enclone/test/inputs/version12/126106/outs/metrics_summary_json.json -R100 lib/rust/enclone/test/inputs/126106/outs/raw_feature_bc_matrix.h5 lib/rust/enclone/test/inputs/version12/126106/outs/raw_feature_bc_matrix.h5 -R100 lib/rust/enclone/test/inputs/126106/outs/raw_feature_bc_matrix/barcodes.tsv.gz lib/rust/enclone/test/inputs/version12/126106/outs/raw_feature_bc_matrix/barcodes.tsv.gz -R100 lib/rust/enclone/test/inputs/126106/outs/raw_feature_bc_matrix/features.tsv.gz lib/rust/enclone/test/inputs/version12/126106/outs/raw_feature_bc_matrix/features.tsv.gz -R100 lib/rust/enclone/test/inputs/163911/outs/all_contig_annotations.json.lz4 lib/rust/enclone/test/inputs/version12/163911/outs/all_contig_annotations.json.lz4 -R100 lib/rust/enclone/test/inputs/163914/outs/all_contig_annotations.json.lz4 lib/rust/enclone/test/inputs/version12/163914/outs/all_contig_annotations.json.lz4 -R100 lib/rust/enclone/test/inputs/165807/outs/all_contig_annotations.json.lz4 lib/rust/enclone/test/inputs/version12/165807/outs/all_contig_annotations.json.lz4 -R100 lib/rust/enclone/test/inputs/165808/outs/all_contig_annotations.json.lz4 lib/rust/enclone/test/inputs/version12/165808/outs/all_contig_annotations.json.lz4 -R100 lib/rust/enclone/test/inputs/52177/outs/all_contig_annotations.json.lz4 lib/rust/enclone/test/inputs/version12/52177/outs/all_contig_annotations.json.lz4 -R100 lib/rust/enclone/test/inputs/83808/outs/all_contig_annotations.json.lz4 lib/rust/enclone/test/inputs/version12/83808/outs/all_contig_annotations.json.lz4 -R100 lib/rust/enclone/test/inputs/85333/outs/all_contig_annotations.json.lz4 lib/rust/enclone/test/inputs/version12/85333/outs/all_contig_annotations.json.lz4 -R100 lib/rust/enclone/test/inputs/85679/outs/metrics_summary_json.json lib/rust/enclone/test/inputs/version12/85679/outs/metrics_summary_json.json -R100 lib/rust/enclone/test/inputs/85679/outs/raw_feature_bc_matrix/barcodes.tsv.gz lib/rust/enclone/test/inputs/version12/85679/outs/raw_feature_bc_matrix/barcodes.tsv.gz -R100 lib/rust/enclone/test/inputs/85679/outs/raw_feature_bc_matrix/features.tsv.gz lib/rust/enclone/test/inputs/version12/85679/outs/raw_feature_bc_matrix/features.tsv.gz -R100 lib/rust/enclone/test/inputs/85679/outs/raw_gene_bc_matrices_h5.h5 lib/rust/enclone/test/inputs/version12/85679/outs/raw_gene_bc_matrices_h5.h5 -R100 lib/rust/enclone/test/inputs/86233/outs/all_contig_annotations.json.lz4 lib/rust/enclone/test/inputs/version12/86233/outs/all_contig_annotations.json.lz4 -R100 lib/rust/enclone/test/inputs/86237/outs/all_contig_annotations.json.lz4 lib/rust/enclone/test/inputs/version12/86237/outs/all_contig_annotations.json.lz4 -M lib/rust/enclone/tests/enclone_test.rs - -commit c85b4890ee07a83b4dbba5a72a847d82d16bf89d -Author: David Jaffe -AuthorDate: Fri Jan 3 14:08:57 2020 -0800 -Commit: David Jaffe -CommitDate: Fri Jan 3 14:08:57 2020 -0800 - - explain how we're handling large files - -A lib/rust/enclone/test/inputs/NOTE - -commit 0ce9c3cf386eabd61e63434a5dd450c4e9c6abfb -Author: David Jaffe -AuthorDate: Fri Jan 3 09:46:02 2020 -0800 -Commit: David Jaffe -CommitDate: Fri Jan 3 09:46:02 2020 -0800 - - add some timers - -M lib/rust/enclone/src/main_enclone.rs - -commit 83b83eb5fa8a700083a1c1c0b5a1becd4d49b299 -Author: David Jaffe -AuthorDate: Fri Jan 3 08:13:03 2020 -0800 -Commit: David Jaffe -CommitDate: Fri Jan 3 08:13:03 2020 -0800 - - improve sample and donor naming convention - -M lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/proc_args3.rs -M lib/rust/enclone/test/inputs/enclone_test10_output - -commit ea90b16e8799360d71b97781386a74123028abc6 -Author: David Jaffe -AuthorDate: Fri Jan 3 08:01:37 2020 -0800 -Commit: David Jaffe -CommitDate: Fri Jan 3 08:01:37 2020 -0800 - - fix error message - -M lib/rust/enclone/src/proc_args.rs - -commit 33766bd531c59de1b0b0074367023190113d7898 -Author: David Jaffe -AuthorDate: Fri Jan 3 08:00:38 2020 -0800 -Commit: David Jaffe -CommitDate: Fri Jan 3 08:00:38 2020 -0800 - - remove unneeded mut - -M lib/rust/enclone/tests/enclone_test.rs - -commit e95027e38fcef6cf9deeb99dd4290b033c609519 -Author: David Jaffe -AuthorDate: Fri Jan 3 08:00:00 2020 -0800 -Commit: David Jaffe -CommitDate: Fri Jan 3 08:00:00 2020 -0800 - - test descriptions now include quotation marks - -M lib/rust/enclone/tests/enclone_test.rs - -commit abbdf7e872d8908d33150c26098ac31a6c75cd9c -Author: David Jaffe -AuthorDate: Fri Jan 3 06:51:32 2020 -0800 -Commit: David Jaffe -CommitDate: Fri Jan 3 06:51:32 2020 -0800 - - major changes to allele finding to avoid fake alleles - -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/enclone.out -M lib/rust/enclone/src/proc_args.rs -A lib/rust/enclone/test/inputs/83808/outs/all_contig_annotations.json.lz4 -M lib/rust/enclone/test/inputs/enclone_test10_output -M lib/rust/enclone/test/inputs/enclone_test20_output -A lib/rust/enclone/test/inputs/enclone_test23_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 15d49f433adc65027cb043e331d5ab26ed4fcee0 -Author: David Jaffe -AuthorDate: Thu Jan 2 16:41:24 2020 -0800 -Commit: David Jaffe -CommitDate: Thu Jan 2 16:41:24 2020 -0800 - - make allele finding work on exact_clonotypes rather than info - -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/enclone.out -M lib/rust/enclone/src/main_enclone.rs -M lib/rust/enclone/test/inputs/enclone_test10_output - -commit b6320ec06fdbd0dc75a2259e1a5ee55b413beb79 -Author: David Jaffe -AuthorDate: Thu Jan 2 11:04:45 2020 -0800 -Commit: David Jaffe -CommitDate: Thu Jan 2 11:04:45 2020 -0800 - - note added datasets - -M lib/rust/enclone/src/enclone.testlist.all - -commit 8a0f743378147c252c07f8721c2b75c1e528c922 -Author: David Jaffe -AuthorDate: Thu Jan 2 11:03:03 2020 -0800 -Commit: David Jaffe -CommitDate: Thu Jan 2 11:03:03 2020 -0800 - - add a test - -A lib/rust/enclone/test/inputs/165807/outs/all_contig_annotations.json.lz4 -A lib/rust/enclone/test/inputs/165808/outs/all_contig_annotations.json.lz4 -A lib/rust/enclone/test/inputs/enclone_test22_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 65a3e6daa48c4bee9a814919f16f9acd41b4d8fb -Author: David Jaffe -AuthorDate: Thu Jan 2 10:12:06 2020 -0800 -Commit: David Jaffe -CommitDate: Thu Jan 2 10:12:06 2020 -0800 - - fix issues with DESCRIP option - -M lib/rust/enclone/src/proc_args2.rs - -commit c01119a237ad46749bf5c2fe0c5b8d522bdd6a1a -Author: David Jaffe -AuthorDate: Thu Jan 2 09:56:22 2020 -0800 -Commit: David Jaffe -CommitDate: Thu Jan 2 09:56:22 2020 -0800 - - exclude dataset with contamination issue - -M lib/rust/enclone/src/enclone.out -M lib/rust/enclone/src/enclone.testdata -M lib/rust/enclone/src/enclone.testlist.all - -commit a86bc137775a837b94e0ff157c450ebf80b0d613 -Author: David Jaffe -AuthorDate: Thu Jan 2 09:23:28 2020 -0800 -Commit: David Jaffe -CommitDate: Thu Jan 2 09:23:28 2020 -0800 - - kill a FP by adding a dataset - -M lib/rust/enclone/src/enclone.out -M lib/rust/enclone/src/enclone.testdata -M lib/rust/enclone/src/enclone.testlist.all - -commit f050f2d357afc2408eaa39ae3c773339fbdd2e44 -Author: David Jaffe -AuthorDate: Thu Jan 2 09:15:53 2020 -0800 -Commit: David Jaffe -CommitDate: Thu Jan 2 09:15:53 2020 -0800 - - for less, use -r not -R - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/proc_args2.rs - -commit 15d93df3fc7751f47ef0933a5fcf0de3d15f7c90 -Author: David Jaffe -AuthorDate: Thu Jan 2 06:32:48 2020 -0800 -Commit: David Jaffe -CommitDate: Thu Jan 2 06:32:48 2020 -0800 - - add comments - -M lib/rust/enclone/src/allele.rs - -commit 3669e31ba27ead3bd1707904b21b5d68b4844666 -Author: David Jaffe -AuthorDate: Thu Jan 2 06:10:45 2020 -0800 -Commit: David Jaffe -CommitDate: Thu Jan 2 06:10:45 2020 -0800 - - add to "enclone help quick" - -M lib/rust/enclone/src/help1.rs - -commit ccbda89784b567185fae8e5a91f80aa8d853aa59 -Author: David Jaffe -AuthorDate: Wed Jan 1 09:17:49 2020 -0800 -Commit: David Jaffe -CommitDate: Wed Jan 1 09:17:49 2020 -0800 - - tweak help - -M lib/rust/enclone/src/help1.rs - -commit 7dd6459f9cc428753a909b53bdcf07d9492dc32b -Author: David Jaffe -AuthorDate: Wed Jan 1 09:11:22 2020 -0800 -Commit: David Jaffe -CommitDate: Wed Jan 1 09:11:22 2020 -0800 - - page output to less -R -F - -M lib/rust/Cargo.lock -M lib/rust/enclone/Cargo.toml -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help5.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/proc_args2.rs - -commit fd7a613b058ac8549063218bc8041225fc9cddb5 -Author: David Jaffe -AuthorDate: Tue Dec 31 07:41:24 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 31 07:41:24 2019 -0800 - - add test for MIN_CHAINS_EXACT - -A lib/rust/enclone/test/inputs/enclone_test21_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 7b56b26891ee47b7b3b76fccaf363b6eb44f05c2 -Author: David Jaffe -AuthorDate: Tue Dec 31 07:30:24 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 31 07:30:24 2019 -0800 - - delete NMERGE_ONESIES and add MIN_CHAINS_EXACT - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help4.rs -M lib/rust/enclone/src/print_utils5.rs -M lib/rust/enclone/src/proc_args.rs - -commit 75c88cd0b6396880df342480155ab1e7cc2fcd54 -Author: David Jaffe -AuthorDate: Tue Dec 31 07:12:05 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 31 07:12:05 2019 -0800 - - improve variable name - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_utils5.rs -M lib/rust/enclone/src/proc_args.rs - -commit 65ccfcb9d6b272ce7d1bde04a1e25fd2591031ba -Author: David Jaffe -AuthorDate: Mon Dec 30 08:40:10 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 30 08:40:10 2019 -0800 - - fix handling of DESCRIP option - -M lib/rust/enclone/src/proc_args2.rs - -commit 57158f447156bc4d9287f02eb3fd33bec9c118bb -Author: David Jaffe -AuthorDate: Mon Dec 30 08:25:31 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 30 08:25:31 2019 -0800 - - exclude a dataset as contaminated - -M lib/rust/enclone/src/enclone.out -M lib/rust/enclone/src/enclone.testdata - -commit 9a8c36883b738ada8715a64ba98be3e32863948f -Author: David Jaffe -AuthorDate: Mon Dec 30 07:12:34 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 30 07:12:34 2019 -0800 - - merge agora datasets - -M lib/rust/enclone/src/enclone.testdata - -commit 947da2e3a237293053b003d5045a6318cbed83a8 -Author: David Jaffe -AuthorDate: Mon Dec 30 06:28:09 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 30 06:28:09 2019 -0800 - - move MIN_CELLS_EXACT imposition to better location - -M lib/rust/enclone/src/misc2.rs -M lib/rust/enclone/src/print_utils5.rs - -commit 086cf7625f64504fb02b5910ec9a0c659ffa7486 -Author: David Jaffe -AuthorDate: Sun Dec 29 09:30:26 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 29 09:30:26 2019 -0800 - - add exact subclonotype grouping - -M lib/rust/enclone/src/help4.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/src/proc_args2.rs - -commit aaf40598b92dab2c1bd82c2a3ccbd71226abf81e -Author: David Jaffe -AuthorDate: Sun Dec 29 07:28:45 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 29 07:28:45 2019 -0800 - - add option TOY - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit e861491ad0b39b939c6476a80b2f4114f3ec719f -Author: David Jaffe -AuthorDate: Sun Dec 29 06:10:51 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 29 06:10:51 2019 -0800 - - code simplification - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 9d845db76195868ad5dc602e8a012e87b22bc7da -Author: David Jaffe -AuthorDate: Sun Dec 29 06:07:14 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 29 06:07:14 2019 -0800 - - clarify doc - -M lib/rust/enclone/src/help4.rs - -commit 9a0e0f811a9f97b929f16f55a1001cfc7581a7be -Author: David Jaffe -AuthorDate: Sat Dec 28 09:17:22 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 28 09:17:22 2019 -0800 - - add option NMERGE_ONESIES - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help4.rs -M lib/rust/enclone/src/proc_args.rs - -commit 27980dea0a4289c1d499ee91624526724c970004 -Author: David Jaffe -AuthorDate: Sat Dec 28 09:10:08 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 28 09:10:08 2019 -0800 - - document ndiff - -M lib/rust/enclone/src/help4.rs - -commit 31abeed03045fb4335cb5cc2e93cdbd92abfed4e -Author: David Jaffe -AuthorDate: Sat Dec 28 09:06:26 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 28 09:06:26 2019 -0800 - - generalize ndiff to ndiff1 etc. - -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/src/proc_args.rs - -commit 86665eac46023f5b3d58645a90dd5789f3d090c2 -Author: David Jaffe -AuthorDate: Sat Dec 28 08:39:28 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 28 08:39:28 2019 -0800 - - split out "enclone help amino" - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help4.rs - -commit 9d2a8eadb4548946b48ac241452ba07cae83edca -Author: David Jaffe -AuthorDate: Sat Dec 28 08:14:50 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 28 08:14:50 2019 -0800 - - add CVAR ndiff - -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/src/proc_args.rs - -commit 4a472b7db3bba18771cde369fdbc718d943ff28d -Author: David Jaffe -AuthorDate: Sat Dec 28 07:54:46 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 28 07:54:46 2019 -0800 - - simplify - -M lib/rust/enclone/src/print_clonotypes.rs - -commit d3386b1d756d56bf747243c6ae74f063c333e118 -Author: David Jaffe -AuthorDate: Sat Dec 28 07:50:24 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 28 07:50:24 2019 -0800 - - neuter rord - -M lib/rust/enclone/src/print_clonotypes.rs - -commit f6826454561415cce788eaa821edc2f7c3c35ebe -Author: David Jaffe -AuthorDate: Sat Dec 28 07:48:13 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 28 07:48:13 2019 -0800 - - remove sort of sr (no longer needed) - -M lib/rust/enclone/src/print_clonotypes.rs - -commit d2dec822f3f0468e97d4dc447b8d24a12ee63b57 -Author: David Jaffe -AuthorDate: Sat Dec 28 07:46:38 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 28 07:46:38 2019 -0800 - - presort exact subclonotypes in a clonotype - -M lib/rust/Cargo.lock -M lib/rust/enclone/Cargo.toml -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit f49b54a1be6e37e046b3c05808e8ffa1013d5d94 -Author: David Jaffe -AuthorDate: Sat Dec 28 06:05:31 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 28 06:05:31 2019 -0800 - - move dataset to tcr list because it is tcr - -M lib/rust/enclone/src/enclone.testdata -M lib/rust/enclone/src/enclone.testdata.tcr - -commit deb969c1ca2baab61ea3472d5ad1c3f53c4c2fcb -Author: David Jaffe -AuthorDate: Sat Dec 28 06:03:29 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 28 06:03:29 2019 -0800 - - fail if no data provided - -M lib/rust/enclone/src/proc_args.rs - -commit eee717de0e3cc47ca50ba48f10dd33430a0bf5d8 -Author: David Jaffe -AuthorDate: Sat Dec 28 05:54:14 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 28 05:54:14 2019 -0800 - - improve some error messages - -M lib/rust/enclone/src/load_gex.rs -M lib/rust/enclone/src/proc_args3.rs - -commit 5255c30e033f5b2eb53d22b2e121ccb8b792ee79 -Author: David Jaffe -AuthorDate: Fri Dec 27 09:19:34 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 27 09:19:34 2019 -0800 - - add to ideas - -M lib/rust/enclone/src/help5.rs - -commit 45a79716f08161896eed5dfe4e903cbb1a867464 -Author: David Jaffe -AuthorDate: Fri Dec 27 08:41:00 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 27 08:41:00 2019 -0800 - - clarify error message - -M lib/rust/enclone/src/proc_args3.rs - -commit b28714b94ec7834af1a06cf33ae674c1e993bf41 -Author: David Jaffe -AuthorDate: Fri Dec 27 08:38:31 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 27 08:38:31 2019 -0800 - - doc simplification - -M lib/rust/enclone/src/help1.rs - -commit 09f06283cd611f4570db93dac9a20345a21d4835 -Author: David Jaffe -AuthorDate: Fri Dec 27 08:23:45 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 27 08:23:45 2019 -0800 - - add quick guide - -M lib/rust/enclone/src/help1.rs - -commit 55bbec79b040b09b899bae0733462ac79a5e403b -Author: David Jaffe -AuthorDate: Fri Dec 27 07:48:21 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 27 07:48:21 2019 -0800 - - doc tweak - -M lib/rust/enclone/src/help2.rs - -commit a4233e51cdeabefc1eb5519c713fdaa25293562b -Author: David Jaffe -AuthorDate: Fri Dec 27 03:41:41 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 27 03:41:41 2019 -0800 - - reverse sort clonotypes by size - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 6ca5d1eadda4b6029b9978752f5b4ee58881e1c6 -Author: David Jaffe -AuthorDate: Thu Dec 26 12:25:56 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 26 12:25:56 2019 -0800 - - clarify documentation - -M lib/rust/enclone/src/help3.rs - -commit 4cdcbb1e99e00c4723f75555758cf7107cfc0b8d -Author: David Jaffe -AuthorDate: Thu Dec 26 12:05:50 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 26 12:05:50 2019 -0800 - - improve some error messages - -M lib/rust/enclone/src/proc_args.rs - -commit f12ae61bed93db6f1e2587131f23522dc5ce8bce -Author: David Jaffe -AuthorDate: Thu Dec 26 11:30:11 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 26 11:30:11 2019 -0800 - - delete unused variable - -M lib/rust/enclone/src/print_clonotypes.rs - -commit e29068649f428db1edb581182942a3835b2ae04f -Author: David Jaffe -AuthorDate: Thu Dec 26 11:24:43 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 26 11:24:43 2019 -0800 - - improve quality score filtering - -M lib/rust/enclone/src/help4.rs -M lib/rust/enclone/src/print_utils5.rs -M lib/rust/enclone/test/inputs/enclone_test16_output - -commit b326fa2d29e84083a808a6eb39b8ffb398c4257f -Author: David Jaffe -AuthorDate: Thu Dec 26 08:20:17 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 26 08:20:17 2019 -0800 - - clarify enclone help parseable - -M lib/rust/enclone/src/help3.rs - -commit 496402082c8c639c32a5890de85a05b1831e8cc8 -Author: David Jaffe -AuthorDate: Thu Dec 26 08:03:20 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 26 08:03:20 2019 -0800 - - add mechanism to see qual scores - -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/print_utils1.rs -M lib/rust/enclone/src/print_utils2.rs - -commit 68794c240bc02af21c502871614427472439d49b -Author: David Jaffe -AuthorDate: Thu Dec 26 07:29:15 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 26 07:29:15 2019 -0800 - - partial implementation of quality score printing option - -M lib/rust/enclone/src/help3.rs - -commit c466283cdc70079b44bb7c0356066e4cfa7d8802 -Author: David Jaffe -AuthorDate: Thu Dec 26 06:52:20 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 26 06:52:20 2019 -0800 - - document SEQC and FULL_SEQC - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/help4.rs - -commit d272c13569210f5143ca35ac935976e0d762c288 -Author: David Jaffe -AuthorDate: Thu Dec 26 06:39:47 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 26 06:39:47 2019 -0800 - - clarify what BARCODES does - -M lib/rust/enclone/src/help4.rs - -commit bd83d7ec466a95b16441da865e6b3320f8f5f6a3 -Author: David Jaffe -AuthorDate: Thu Dec 26 06:34:55 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 26 06:34:55 2019 -0800 - - make a help page more logical - -M lib/rust/enclone/src/help4.rs - -commit 00b8ec60ccd48b1989a0c905b64cd520ae732b70 -Author: David Jaffe -AuthorDate: Thu Dec 26 06:32:51 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 26 06:32:51 2019 -0800 - - bold a couple of things - -M lib/rust/enclone/src/help4.rs - -commit f9e89e5745bd618151738a26cecb3b7b6037b83f -Author: David Jaffe -AuthorDate: Thu Dec 26 06:27:42 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 26 06:27:42 2019 -0800 - - kill SEQCS, FULL_SEQCS; also fix a test - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_utils3.rs -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/test/inputs/enclone_test10_output -M lib/rust/enclone/test/inputs/enclone_test4_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 1790dbb2cfcb9b0f6d137e7eddce7ca04514bcfe -Author: David Jaffe -AuthorDate: Wed Dec 25 12:07:02 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 25 12:07:02 2019 -0800 - - correct terminology - -M lib/rust/enclone/src/print_utils1.rs - -commit 36106c52ed67eaee4a2ff12cfeac84abe11b23d2 -Author: David Jaffe -AuthorDate: Wed Dec 25 09:38:58 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 25 09:38:58 2019 -0800 - - split file to reduce file sizes - -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils4.rs -A lib/rust/enclone/src/print_utils5.rs - -commit 85d428f697e675e249eb0c45001e043b5a39eece -Author: David Jaffe -AuthorDate: Wed Dec 25 09:05:57 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 25 09:05:57 2019 -0800 - - some reorg to reduce file sizes - -M lib/rust/enclone/src/proc_args2.rs -M lib/rust/enclone/src/proc_args3.rs - -commit 90ae534b3d57f8750925a24d01d9cb9ae68ab4d5 -Author: David Jaffe -AuthorDate: Wed Dec 25 09:01:31 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 25 09:01:31 2019 -0800 - - some splitting to reduce file sizes - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/help4.rs -A lib/rust/enclone/src/help5.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/proc_args2.rs - -commit 086202697d4a00d16f3ec013e932fd8990871471 -Author: David Jaffe -AuthorDate: Wed Dec 25 08:49:30 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 25 08:49:30 2019 -0800 - - for TCR, do not allow diffs in CDR3 - -M lib/rust/enclone/src/join_core.rs - -commit 2de77fc18c6112c0a31b3310e0103c5f1a96d8f1 -Author: David Jaffe -AuthorDate: Wed Dec 25 07:37:27 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 25 07:37:27 2019 -0800 - - add TCR test - -A lib/rust/enclone/src/enclone.test.tcr -M lib/rust/enclone/src/enclone.testdata.tcr - -commit 3271f857f0be40a98303f876c0012995006cdaac -Author: David Jaffe -AuthorDate: Wed Dec 25 07:23:34 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 25 07:23:34 2019 -0800 - - add examples - -M lib/rust/enclone/src/bin/split_by_tags.rs - -commit 06abd7ef0711066f052280cd4565dfcee6b331a9 -Author: David Jaffe -AuthorDate: Wed Dec 25 06:56:18 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 25 06:56:18 2019 -0800 - - clarify faq item - -M lib/rust/enclone/src/help4.rs - -commit 08b1ed434cc18462c40a4355180248ff911543e0 -Author: David Jaffe -AuthorDate: Wed Dec 25 06:38:26 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 25 06:38:26 2019 -0800 - - tuning of "enclone help special" page - -M lib/rust/enclone/src/help3.rs - -commit 1c2e8558c7d0f66c12f9cbb6ce691c98d92a7c60 -Author: David Jaffe -AuthorDate: Wed Dec 25 06:27:44 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 25 06:27:44 2019 -0800 - - rename option to KEEP_IMPROPER and document. - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/proc_args.rs - -commit 2caeb0d7d9ec5dd11402b3b2bbff8146cd889fa3 -Author: David Jaffe -AuthorDate: Wed Dec 25 06:16:51 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 25 06:16:51 2019 -0800 - - document whitelist contamination filtering - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/help3.rs - -commit fdec3a704b4d00d119f68b7e564a899c186ba0bf -Author: David Jaffe -AuthorDate: Wed Dec 25 06:09:11 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 25 06:09:11 2019 -0800 - - add whitelist contamination filtering test - -A lib/rust/enclone/test/inputs/52177/outs/all_contig_annotations.json -A lib/rust/enclone/test/inputs/52177/outs/all_contig_annotations.json.lz4 -A lib/rust/enclone/test/inputs/enclone_test20_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 9516b3af070a73c680c53feeb762aa4554df73ab -Author: David Jaffe -AuthorDate: Tue Dec 24 16:03:57 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 24 16:03:57 2019 -0800 - - improve test - -M lib/rust/enclone/tests/enclone_test.rs - -commit e0fe9c124d8bf7fedadc563f5f6bfdc35fc24347 -Author: David Jaffe -AuthorDate: Tue Dec 24 15:59:32 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 24 15:59:32 2019 -0800 - - formatting fix - -M lib/rust/enclone/src/help3.rs - -commit 5bfa145cf482e6fb38b5a639d9a9bc535174ba78 -Author: David Jaffe -AuthorDate: Tue Dec 24 15:56:32 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 24 15:56:32 2019 -0800 - - add filter for certain stupid foursies - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/main_enclone.rs -M lib/rust/enclone/src/proc_args.rs - -commit 4b8fc8227aa1c435177ae1873fdaa1b2b4728499 -Author: David Jaffe -AuthorDate: Tue Dec 24 07:26:23 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 24 07:26:23 2019 -0800 - - add a switch for a filtering option - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/print_utils4.rs -M lib/rust/enclone/src/proc_args.rs - -commit 9a2fb6e1abda905ee554519c14d0ebf707a04b38 -Author: David Jaffe -AuthorDate: Tue Dec 24 07:09:09 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 24 07:09:09 2019 -0800 - - add example corresponding to last bugfix - -A lib/rust/enclone/test/inputs/enclone_test19_output -M lib/rust/enclone/tests/enclone_test.rs - -commit e214e728fa2559e5b26d1a531eb4df3d80e90081 -Author: David Jaffe -AuthorDate: Tue Dec 24 06:58:48 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 24 06:58:48 2019 -0800 - - fix very nasty subtle bug in graph_filter - -M lib/rust/enclone/src/graph_filter.rs - -commit 744b3b0339742c28bad883f4515b676a12300f98 -Author: David Jaffe -AuthorDate: Tue Dec 24 06:25:45 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 24 06:25:45 2019 -0800 - - add logging if GRAPH on - -M lib/rust/enclone/src/graph_filter.rs - -commit cd85aa6335745da1333ebfcd0d85a551e08664a3 -Author: David Jaffe -AuthorDate: Mon Dec 23 13:11:46 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 13:11:46 2019 -0800 - - CellRanger ==> Cell Ranger - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/help4.rs -M lib/rust/enclone/src/read_json.rs - -commit ba17855ca1e61dd3b3a6f1a2d75ceeb42135fa7f -Author: David Jaffe -AuthorDate: Mon Dec 23 10:23:46 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 10:23:46 2019 -0800 - - doc fix - -M lib/rust/enclone/src/help4.rs - -commit 046ee3959f3d708723010d7116133e353c27d146 -Author: David Jaffe -AuthorDate: Mon Dec 23 09:00:35 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 09:00:35 2019 -0800 - - nicify error message - -M lib/rust/enclone/src/read_json.rs - -commit 9a132e1a542ac25202d3cd6b63ddeb6c63be1113 -Author: David Jaffe -AuthorDate: Mon Dec 23 08:06:23 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 08:06:23 2019 -0800 - - delete more extern crate statements - -M lib/rust/enclone/Cargo.toml -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/join_core.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/load_gex.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils2.rs - -commit 7d3a6a7090a0d5d66536828317f8c9033bb9d0a5 -Author: David Jaffe -AuthorDate: Mon Dec 23 08:00:09 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 08:00:09 2019 -0800 - - kill more unneeded extern statements - -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/graph_filter.rs -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/join_core.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/load_gex.rs -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/src/proc_args2.rs -M lib/rust/enclone/src/read_json.rs -M lib/rust/enclone/src/types.rs - -commit e3e4b72fa4267e6a6b252d392fa0013defbe70f0 -Author: David Jaffe -AuthorDate: Mon Dec 23 07:38:14 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 07:38:14 2019 -0800 - - delete more unneeded extern statements - -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/join2.rs -M lib/rust/enclone/src/loupe.rs -M lib/rust/enclone/src/main_build_immcantation_inputs.rs -M lib/rust/enclone/src/main_enclone.rs -M lib/rust/enclone/src/proc_args.rs - -commit 25b2be1a710a771a4d662af947921329108840c9 -Author: David Jaffe -AuthorDate: Mon Dec 23 06:47:11 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 06:47:11 2019 -0800 - - remove more unneeded extern crate lines - -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/join2.rs -M lib/rust/enclone/src/join_core.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/loupe.rs -M lib/rust/enclone/src/print_utils4.rs - -commit db7eff510abe4aa811e3623421c8fd92f18c58dd -Author: David Jaffe -AuthorDate: Mon Dec 23 06:37:01 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 06:37:01 2019 -0800 - - delete more unneeded extern crate lines - -M lib/rust/enclone/src/filter.rs -M lib/rust/enclone/src/graph_filter.rs -M lib/rust/enclone/src/group.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/load_gex.rs -M lib/rust/enclone/src/loupe.rs -M lib/rust/enclone/src/print_utils1.rs -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/src/print_utils3.rs -M lib/rust/enclone/src/print_utils4.rs -M lib/rust/enclone/src/proc_args2.rs -M lib/rust/enclone/src/proc_args3.rs -M lib/rust/enclone/src/read_json.rs -M lib/rust/enclone/src/subset_json.rs - -commit 5a0c50412ea9e905582c2992e9b5dcc5f9eabb3f -Author: David Jaffe -AuthorDate: Mon Dec 23 06:28:59 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 06:28:59 2019 -0800 - - delete more unneeded extern crate lines - -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/filter.rs -M lib/rust/enclone/src/graph_filter.rs -M lib/rust/enclone/src/group.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/join2.rs -M lib/rust/enclone/src/load_gex.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils1.rs -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/src/print_utils3.rs -M lib/rust/enclone/src/print_utils4.rs -M lib/rust/enclone/src/proc_args2.rs -M lib/rust/enclone/src/read_json.rs - -commit 4bc0d4bc8622827a964820854545c145589c3e31 -Author: David Jaffe -AuthorDate: Mon Dec 23 06:20:12 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 06:20:12 2019 -0800 - - remove some unneeded extern crate lines - -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/graph_filter.rs -M lib/rust/enclone/src/group.rs -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/join2.rs -M lib/rust/enclone/src/join_core.rs -M lib/rust/enclone/src/load_gex.rs -M lib/rust/enclone/src/loupe.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/src/proc_args2.rs -M lib/rust/enclone/src/proc_args3.rs -M lib/rust/enclone/src/read_json.rs -M lib/rust/enclone/src/subset_json.rs - -commit dfb7af5458e0e908c4c067e7bc03e543a3c5ad9e -Author: David Jaffe -AuthorDate: Mon Dec 23 06:12:49 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 06:12:49 2019 -0800 - - delete some unneeded extern crate lines - -M lib/rust/enclone/src/explore.rs -M lib/rust/enclone/src/filter.rs -M lib/rust/enclone/src/graph_filter.rs -M lib/rust/enclone/src/group.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/join2.rs -M lib/rust/enclone/src/print_utils1.rs -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/src/print_utils3.rs -M lib/rust/enclone/src/print_utils4.rs -M lib/rust/enclone/src/proc_args2.rs -M lib/rust/enclone/src/proc_args3.rs - -commit de35d52f8b75af927fd1eb27548e451980408a57 -Author: David Jaffe -AuthorDate: Mon Dec 23 06:05:08 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 06:05:08 2019 -0800 - - kill certain onesie mergers - -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/join2.rs - -commit 46a21c0d92a41545a6202024ef5aa6cfa95b50df -Author: David Jaffe -AuthorDate: Mon Dec 23 04:53:41 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 04:53:41 2019 -0800 - - add disclaimer - -M lib/rust/enclone/src/README - -commit 0e0d61640ded37562a66504cded93e76b395f383 -Author: David Jaffe -AuthorDate: Mon Dec 23 04:47:31 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 04:47:31 2019 -0800 - - add to faq - -M lib/rust/enclone/src/help4.rs - -commit a33dcdc8e509c9533b17be9afba2070f407e137e -Author: David Jaffe -AuthorDate: Mon Dec 23 04:22:23 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 04:22:23 2019 -0800 - - forgot to add file - -A lib/rust/enclone/test/inputs/enclone_test18_output - -commit e477aab00bf0dc7c9d0b1245d7ef8801723d639f -Author: David Jaffe -AuthorDate: Mon Dec 23 04:20:59 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 04:20:59 2019 -0800 - - fix bug: cross filtering was applied to two different samples from same donor - -M lib/rust/enclone/src/misc1.rs -M lib/rust/enclone/tests/enclone_test.rs - -commit 57183b99601eab3f06ba4997cafb16374e7e110b -Author: David Jaffe -AuthorDate: Mon Dec 23 04:10:50 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 23 04:10:50 2019 -0800 - - add n() and comments to SampleInfo - -M lib/rust/enclone/src/defs.rs - -commit 7b26f2f51619b76ee5ae0992334abb807bab9b2f -Author: David Jaffe -AuthorDate: Sun Dec 22 10:55:36 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 22 10:55:36 2019 -0800 - - comments and logging - -M lib/rust/enclone/src/bin/split_by_tags.rs - -commit c0546c6ce62c5a4bd5d6801a12b44e88145afb19 -Author: David Jaffe -AuthorDate: Sun Dec 22 09:42:47 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 22 09:42:47 2019 -0800 - - split a vdj annotations file using tags - -A lib/rust/enclone/src/bin/split_by_tags.rs - -commit bead8ef33df6386cc85c54b5c4eb1fe669017a01 -Author: David Jaffe -AuthorDate: Sun Dec 22 08:04:34 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 22 08:04:34 2019 -0800 - - add fn to extract part of all_contigs_annotations.json - -M lib/rust/enclone/src/lib.rs -A lib/rust/enclone/src/subset_json.rs - -commit 5e64987b7639b8b9bcc57119be4593945a856f2a -Author: David Jaffe -AuthorDate: Sat Dec 21 08:42:51 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 21 08:42:51 2019 -0800 - - add filter for certain onesie clonotypes - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils4.rs - -commit 13b164a98285cb98bba57d17af0b80e7da163297 -Author: David Jaffe -AuthorDate: Sat Dec 21 07:54:29 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 21 07:54:29 2019 -0800 - - normalize antibody counts; update test results accordingly - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/load_gex.rs -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/test/inputs/enclone_test17_output -M lib/rust/enclone/test/inputs/enclone_test4_output - -commit 31263401f4deecbc1051d9f4044760e332c9b25b -Author: David Jaffe -AuthorDate: Sat Dec 21 07:52:00 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 21 07:52:00 2019 -0800 - - insert missing newline - -M lib/rust/enclone/tests/enclone_test.rs - -commit 30de73ed094629c171325df58edadc216ced7ba3 -Author: David Jaffe -AuthorDate: Sat Dec 21 07:48:52 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 21 07:48:52 2019 -0800 - - if a subtest fails, fail the test - -M lib/rust/enclone/tests/enclone_test.rs - -commit af4853cf5128a5b27d9eefca7b283abcc69471fb -Author: David Jaffe -AuthorDate: Sat Dec 21 04:53:27 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 21 04:53:27 2019 -0800 - - fix error message - -M lib/rust/enclone/src/proc_args3.rs - -commit 17dbcf432060bb87684880e41916f3967c6d5509 -Author: David Jaffe -AuthorDate: Sat Dec 21 03:57:06 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 21 03:57:06 2019 -0800 - - update - -M lib/rust/enclone/src/enclone.out - -commit 5243d09d6c5421f2869bb554273b55d0b6b4730e -Author: David Jaffe -AuthorDate: Sat Dec 21 03:54:26 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 21 03:54:26 2019 -0800 - - lower MAX_PARTNERS from 100 to 50 - -M lib/rust/enclone/src/graph_filter.rs - -commit cb27854c466930864ab7c1da83a5bad6c4319b43 -Author: David Jaffe -AuthorDate: Fri Dec 20 11:14:42 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 20 11:14:42 2019 -0800 - - faq beautification - -M lib/rust/enclone/src/help4.rs - -commit df504a20eaa2bcc8cdf59800c72eba5fe63dce8f -Author: David Jaffe -AuthorDate: Fri Dec 20 07:36:06 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 20 07:36:06 2019 -0800 - - include build.rs - -M lib/rust/enclone/BUILD.bazel - -commit 3a94d46d1dfdab63603fc75a84973e9ddb2300c4 -Author: David Jaffe -AuthorDate: Fri Dec 20 07:26:44 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 20 07:26:44 2019 -0800 - - add tests for cvars stuff and nicify test output - -M lib/rust/enclone/test/inputs/enclone_test6_output -M lib/rust/enclone/test/inputs/enclone_test7_output -M lib/rust/enclone/test/inputs/enclone_test8_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 7abe81e916d2e23c4e0704c3c61ba11af447c42c -Author: David Jaffe -AuthorDate: Fri Dec 20 06:48:52 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 20 06:48:52 2019 -0800 - - add to faq - -M lib/rust/enclone/src/help4.rs - -commit 0af6f4e6fc9c7f8e32407adf438de43f01008590 -Author: David Jaffe -AuthorDate: Fri Dec 20 06:39:36 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 20 06:39:36 2019 -0800 - - OCD changes to remove a couple of spurious newlines - -M lib/rust/enclone/tests/enclone_test.rs - -commit 7af7f9b9d544c5c817adefc192498b2180f82935 -Author: David Jaffe -AuthorDate: Fri Dec 20 06:38:04 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 20 06:38:04 2019 -0800 - - now everything in "enclone help lvars" is tested - -M lib/rust/enclone/test/inputs/enclone_test10_output -M lib/rust/enclone/test/inputs/enclone_test17_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 2aaa0675fc18b390ed2ae7423e1203268e8e47a0 -Author: David Jaffe -AuthorDate: Fri Dec 20 06:25:29 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 20 06:25:29 2019 -0800 - - clean up handling of LVARS for PER_BC, fix bug, add test for it - -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/src/print_utils3.rs -A lib/rust/enclone/test/inputs/enclone_test17_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 0611a35f3f8d68a24fac0339605ce4a00d395672 -Author: David Jaffe -AuthorDate: Fri Dec 20 05:38:25 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 20 05:38:25 2019 -0800 - - add test - -A lib/rust/enclone/test/inputs/enclone_test16_output -M lib/rust/enclone/tests/enclone_test.rs - -commit dd5caed6058e64e2634f09e8cd01ed0dc9b0724e -Author: David Jaffe -AuthorDate: Fri Dec 20 05:30:58 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 20 05:30:58 2019 -0800 - - add ability to display cell counts for subsets of datasets - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/load_gex.rs -M lib/rust/enclone/src/main_enclone.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/src/proc_args2.rs - -commit 11cad7e7c67638e2eced7b172987f91be5e97401 -Author: David Jaffe -AuthorDate: Thu Dec 19 08:31:12 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 19 08:31:12 2019 -0800 - - allow specification of arbitrary reference sequence file - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help4.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/main_enclone.rs -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/src/proc_args2.rs -M lib/rust/enclone/src/read_json.rs - -commit 84eaa14b6519244b2315306f9c4e0faf26b28a83 -Author: David Jaffe -AuthorDate: Thu Dec 19 07:34:50 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 19 07:34:50 2019 -0800 - - improve error message - -M lib/rust/enclone/src/read_json.rs - -commit 5cd8a538cbe664d7735d48b6d927507490e052b2 -Author: David Jaffe -AuthorDate: Thu Dec 19 07:25:37 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 19 07:25:37 2019 -0800 - - add mechanism to declare mousiness of data - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main_enclone.rs -M lib/rust/enclone/src/proc_args.rs - -commit 082554f48c9eb2e0bd42617734864d96c5fa1fcf -Author: David Jaffe -AuthorDate: Thu Dec 19 07:03:53 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 19 07:03:53 2019 -0800 - - add comment - -M lib/rust/enclone/tests/enclone_test.rs - -commit afb8cf066ac390b8f4b99f17f2aa1e3348da7630 -Author: David Jaffe -AuthorDate: Thu Dec 19 04:04:16 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 19 04:04:16 2019 -0800 - - print nice exit message if there's a stack trace - -M lib/rust/enclone/Cargo.toml -M lib/rust/enclone/src/proc_args2.rs - -commit 41988d4be57bdd4c6a5c65fd0604594f7ae45b54 -Author: David Jaffe -AuthorDate: Thu Dec 19 02:45:14 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 19 02:45:14 2019 -0800 - - make sure that json file read will not fail - -M lib/rust/enclone/src/read_json.rs - -commit b0128e5924ef751b76214613fa32fa7b84ed6bd9 -Author: David Jaffe -AuthorDate: Thu Dec 19 02:24:43 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 19 02:24:43 2019 -0800 - - fail nicely if read for META won't work - -M lib/rust/enclone/src/proc_args2.rs - -commit 7caa01833527b579b21da880f028ac795c3eae83 -Author: David Jaffe -AuthorDate: Thu Dec 19 02:17:25 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 19 02:17:25 2019 -0800 - - delete some redundant documentation - -M lib/rust/enclone/src/README - -commit b2cb48c9fdc68aa7c7aa5b3ccdb9a955fe57614e -Author: David Jaffe -AuthorDate: Thu Dec 19 02:15:06 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 19 02:15:06 2019 -0800 - - add test to make sure file is writeable - -M lib/rust/enclone/src/main_enclone.rs - -commit 2f54ae9763319b8d12c371abc6e8621a09ce5afe -Author: David Jaffe -AuthorDate: Wed Dec 18 13:22:55 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 18 13:22:55 2019 -0800 - - add test for insertions - -A lib/rust/enclone/test/inputs/86233/outs/all_contig_annotations.json.lz4 -A lib/rust/enclone/test/inputs/enclone_test15_output -M lib/rust/enclone/tests/enclone_test.rs - -commit ea0d83149c2b1fae6aba166f15cca934702dcf2d -Author: David Jaffe -AuthorDate: Wed Dec 18 11:29:48 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 18 11:29:48 2019 -0800 - - add joke - -M lib/rust/enclone/src/help1.rs - -commit 008bd0fe227891595e19ab70eedaed38d81deebe -Author: David Jaffe -AuthorDate: Wed Dec 18 10:19:53 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 18 10:19:53 2019 -0800 - - now insertions are handled - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/info.rs - -commit e4ce106a6c659d6518ccf03b4418532068560845 -Author: David Jaffe -AuthorDate: Wed Dec 18 07:19:50 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 18 07:19:50 2019 -0800 - - delete some dead code - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/misc2.rs -M lib/rust/enclone/src/print_utils3.rs - -commit c583a636b17f92e6f8bf196c84a4f94ba7d61220 -Author: David Jaffe -AuthorDate: Wed Dec 18 06:36:43 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 18 06:36:43 2019 -0800 - - delete some dead code - -M lib/rust/enclone/src/info.rs - -commit 203123b81da83c8d8611d3cee8f68153147a8e77 -Author: David Jaffe -AuthorDate: Wed Dec 18 06:35:11 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 18 06:35:11 2019 -0800 - - insertions are now shown in a "notes" column - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/misc2.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils1.rs -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/src/print_utils3.rs -M lib/rust/enclone/src/proc_args.rs - -commit a792ee422c4e9db3bc05842142b05892396bd202 -Author: David Jaffe -AuthorDate: Wed Dec 18 04:21:05 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 18 04:21:05 2019 -0800 - - various changes involving external clonotyping (non public option) - -M lib/rust/enclone/src/bin/assess_clonotyping.rs -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main_enclone.rs -M lib/rust/enclone/src/print_utils2.rs - -commit ddb372237a3a3297d49da9104df5ca05caeffad5 -Author: David Jaffe -AuthorDate: Tue Dec 17 18:20:50 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 17 18:20:50 2019 -0800 - - grammatical tweak - -M lib/rust/enclone/src/help1.rs - -commit 1d45ebf506db907c6e48c65270ef0188339ec4f1 -Author: David Jaffe -AuthorDate: Tue Dec 17 17:12:54 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 17 17:12:54 2019 -0800 - - clarify definition of MIN_DATASETS - -M lib/rust/enclone/src/help3.rs - -commit f331cfe56a7d2d29d8a1f11ebab5e57c6e487687 -Author: David Jaffe -AuthorDate: Tue Dec 17 15:39:03 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 17 15:39:03 2019 -0800 - - grammatical clarification - -M lib/rust/enclone/src/help4.rs - -commit 97188eed4751b9b0c5779930782dc4d124e73172 -Author: David Jaffe -AuthorDate: Tue Dec 17 15:37:38 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 17 15:37:38 2019 -0800 - - add option to write donor reference sequence - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help4.rs -M lib/rust/enclone/src/main_enclone.rs -M lib/rust/enclone/src/proc_args.rs - -commit 8b0423bf5a3fe515cb28a119d345fa7cf4dfa85c -Author: David Jaffe -AuthorDate: Tue Dec 17 15:09:20 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 17 15:09:20 2019 -0800 - - remove spurious semicolon - -M lib/rust/enclone/src/print_utils3.rs - -commit 6e42d48a7c1dd330ce430a0dd242cd0d2ba8069b -Author: David Jaffe -AuthorDate: Tue Dec 17 14:38:41 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 17 14:38:41 2019 -0800 - - clarify support statement - -M lib/rust/enclone/src/help1.rs - -commit 721ca20998350394edf8fe77f11ad6186728e31e -Author: David Jaffe -AuthorDate: Tue Dec 17 14:24:53 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 17 14:24:53 2019 -0800 - - strengthen tests - -M lib/rust/enclone/test/inputs/enclone_test10_output -M lib/rust/enclone/test/inputs/enclone_test2_output -M lib/rust/enclone/tests/enclone_test.rs - -commit be6e1be4e3a3604d8d5bdae794c4ce13e0ed2782 -Author: David Jaffe -AuthorDate: Tue Dec 17 14:02:47 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 17 14:02:47 2019 -0800 - - add AMINO option donorn - -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils4.rs -M lib/rust/enclone/src/proc_args.rs - -commit 42b8c64c5e87cad6a5dc4368d44d8bbbc2b9487c -Author: David Jaffe -AuthorDate: Tue Dec 17 10:31:41 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 17 10:31:41 2019 -0800 - - describe clonotype grouping options - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/help3.rs - -commit 639adaebd24acdc7f7ba7bf6a0b98e0e28e0ea91 -Author: David Jaffe -AuthorDate: Tue Dec 17 07:36:32 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 17 07:36:32 2019 -0800 - - clarify insertion logging - -M lib/rust/enclone/src/info.rs - -commit 18a3f8059c5f313c127dfc869fceee2b3d5adca0 -Author: David Jaffe -AuthorDate: Tue Dec 17 05:06:48 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 17 05:06:48 2019 -0800 - - allow value of CVARS to be null - -M lib/rust/enclone/src/proc_args.rs - -commit 9060285585533f760a5f60b99b8cf3186a82d048 -Author: David Jaffe -AuthorDate: Tue Dec 17 03:22:01 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 17 03:22:01 2019 -0800 - - tighten joins for TCR - -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/join_core.rs -M lib/rust/enclone/src/main_enclone.rs - -commit f185a7a87354b9248da7ef5b7519448d95687397 -Author: David Jaffe -AuthorDate: Tue Dec 17 03:00:25 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 17 03:00:25 2019 -0800 - - enclone help support: now shows full version information - -M lib/rust/enclone/Cargo.toml -A lib/rust/enclone/build.rs -M lib/rust/enclone/src/help1.rs - -commit d4fb3216b7ae06c01f6d0ae28e004f6c23c0dc80 -Author: David Jaffe -AuthorDate: Mon Dec 16 17:13:14 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 16 17:13:14 2019 -0800 - - clarify that amino acid 0 is the start codon - -M lib/rust/enclone/src/help1.rs - -commit a21b4a96271f6dc52efd21d4966af038af154c77 -Author: David Jaffe -AuthorDate: Mon Dec 16 01:48:09 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 16 01:48:09 2019 -0800 - - add to faq - -M lib/rust/enclone/src/help4.rs - -commit a89c06b22850f19bba789acf7fe008e89c0f18b7 -Author: David Jaffe -AuthorDate: Mon Dec 16 01:40:15 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 16 01:40:15 2019 -0800 - - allow weak chain filter to be turned off - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/print_utils4.rs -M lib/rust/enclone/src/proc_args.rs - -commit 4382c9c3eb9a869b8c17b70f379ffeb7c169597c -Author: David Jaffe -AuthorDate: Mon Dec 16 01:32:12 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 16 01:32:12 2019 -0800 - - tidy - -M lib/rust/enclone/src/proc_args.rs - -commit cebb8b1ffe81f7b99d727e7df48153737ff75e90 -Author: David Jaffe -AuthorDate: Sun Dec 15 10:06:03 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 15 10:06:03 2019 -0800 - - add to a test, but also fix several issues with the testing machine - -M lib/rust/enclone/test/inputs/enclone_test11_output -M lib/rust/enclone/test/inputs/enclone_test12_output -M lib/rust/enclone/test/inputs/enclone_test13_output -M lib/rust/enclone/test/inputs/enclone_test14_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 39d7a97ba9e424e039838386b3d7bbda9f479f22 -Author: David Jaffe -AuthorDate: Sun Dec 15 09:14:02 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 15 09:14:02 2019 -0800 - - various doc tweaks - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help4.rs -M lib/rust/enclone/src/help_utils.rs - -commit 8879a7ec002540d9d40e0f7819b589c20b6b280a -Author: David Jaffe -AuthorDate: Sun Dec 15 08:30:36 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 15 08:30:36 2019 -0800 - - fix major bug in definition of bads and add test for it - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils4.rs -A lib/rust/enclone/test/inputs/enclone_test14_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 1645950e6aa0c2a7bdaa700600f453c3044a4c28 -Author: David Jaffe -AuthorDate: Sun Dec 15 07:41:28 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 15 07:41:28 2019 -0800 - - simplify a bit - -M lib/rust/enclone/src/print_utils4.rs - -commit c0ffd60a82e7c21a2434878565fe1faf575d4869 -Author: David Jaffe -AuthorDate: Sun Dec 15 07:38:26 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 15 07:38:26 2019 -0800 - - only execute delete_weaks on pass 1 - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 11f64efbf074208008d415522b94ab272e1e70a6 -Author: David Jaffe -AuthorDate: Sun Dec 15 06:33:25 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 15 06:33:25 2019 -0800 - - explain that CR ≥ 3.1 is required - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help2.rs - -commit a9184a2b3d002937e37e41f88f4ebe32c35a8fa4 -Author: David Jaffe -AuthorDate: Sat Dec 14 16:51:31 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 14 16:51:31 2019 -0800 - - only set PRE for internal runs - -M lib/rust/enclone/src/proc_args.rs - -commit ee8a003a86d43071be0aa44027066ecc975cdacf -Author: David Jaffe -AuthorDate: Sat Dec 14 04:09:31 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 14 04:09:31 2019 -0800 - - raise MAX_KILL_CELLS to 2 and add a test that benefits from that - -M lib/rust/enclone/src/graph_filter.rs -A lib/rust/enclone/test/inputs/enclone_test13_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 2a50c502a6d2561915fdaa013e0cb86e9514ee45 -Author: David Jaffe -AuthorDate: Sat Dec 14 03:56:12 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 14 03:56:12 2019 -0800 - - tidy code - -M lib/rust/enclone/src/graph_filter.rs - -commit 4d8c3e84812d9f7a63901033e4196a5684034994 -Author: David Jaffe -AuthorDate: Fri Dec 13 15:38:58 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 15:38:58 2019 -0800 - - parallelize tests - -M lib/rust/enclone/tests/enclone_test.rs - -commit fe3e50848039ac7f8bbf9095e5eed856b4515286 -Author: David Jaffe -AuthorDate: Fri Dec 13 14:39:00 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 14:39:00 2019 -0800 - - forgot to add this in last commit - -A lib/rust/enclone/test/inputs/enclone_test12_output - -commit c15fc3c5a03017c0000fa55f8ca5994824ad8bf7 -Author: David Jaffe -AuthorDate: Fri Dec 13 14:37:29 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 14:37:29 2019 -0800 - - Fix a noise filtering problem and add a dataset/test that is made better by the change. - -M lib/rust/enclone/src/graph_filter.rs -A lib/rust/enclone/test/inputs/163914/outs/all_contig_annotations.json.lz4 -M lib/rust/enclone/tests/enclone_test.rs - -commit 5297234e9151b8dd7bed5ac4a1663c9a793fa934 -Author: David Jaffe -AuthorDate: Fri Dec 13 10:40:09 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 10:40:09 2019 -0800 - - clarify - -M lib/rust/enclone/README.md - -commit 60b0df0cad7e54cd108c9a4f984fb9ece20e3ffc -Author: David Jaffe -AuthorDate: Fri Dec 13 10:36:41 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 10:36:41 2019 -0800 - - don't require _invocation - -M lib/rust/enclone/src/proc_args2.rs -D lib/rust/enclone/test/inputs/101287/_invocation -D lib/rust/enclone/test/inputs/123085/_invocation -D lib/rust/enclone/test/inputs/123089/_invocation -D lib/rust/enclone/test/inputs/163911/_invocation -D lib/rust/enclone/test/inputs/85333/_invocation -D lib/rust/enclone/test/inputs/86237/_invocation - -commit b161460770c985e0833d31a15c96cac594af06aa -Author: David Jaffe -AuthorDate: Fri Dec 13 10:28:03 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 10:28:03 2019 -0800 - - now there is "enclone help example2" - -A lib/rust/enclone/src/example2 -M lib/rust/enclone/src/help1.rs -A lib/rust/enclone/test/inputs/126106/outs/metrics_summary_json.json -A lib/rust/enclone/test/inputs/126106/outs/raw_feature_bc_matrix.h5 -A lib/rust/enclone/test/inputs/126106/outs/raw_feature_bc_matrix/barcodes.tsv.gz -A lib/rust/enclone/test/inputs/126106/outs/raw_feature_bc_matrix/features.tsv.gz - -commit c7e04544e863c7900803d8b4cf85c2104915b80b -Author: David Jaffe -AuthorDate: Fri Dec 13 07:29:23 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 07:29:23 2019 -0800 - - save a little space - -M lib/rust/enclone/src/help1.rs - -commit 9e41d43095036591e25fb3ca636e2bcf9aa5d584 -Author: David Jaffe -AuthorDate: Fri Dec 13 07:24:54 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 07:24:54 2019 -0800 - - put test instructions in README.md - -D lib/rust/enclone/README -M lib/rust/enclone/README.md - -commit 4772f2504385953be59988a19ca293f1ec91b1fc -Author: David Jaffe -AuthorDate: Fri Dec 13 07:20:19 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 07:20:19 2019 -0800 - - fix a test - -M lib/rust/enclone/tests/enclone_test.rs - -commit 1e60341e33f4ed98b3919b0fd2c547390584357d -Author: David Jaffe -AuthorDate: Fri Dec 13 07:17:43 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 07:17:43 2019 -0800 - - add enclone help display - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/proc_args.rs - -commit 31e54b5d312ec545fd5f49e72e0903a88fa80875 -Author: David Jaffe -AuthorDate: Fri Dec 13 07:01:01 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 07:01:01 2019 -0800 - - add explanatory text - -M lib/rust/enclone/README.md - -commit a933f58a6c6ce911507b9eaca7fb81c5a1875272 -Author: David Jaffe -AuthorDate: Fri Dec 13 06:28:11 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 06:28:11 2019 -0800 - - add to enclone help support - -M lib/rust/enclone/src/help1.rs - -commit 3e6b94b1ce04800a62ccd963e8f98ddbacac52e5 -Author: David Jaffe -AuthorDate: Fri Dec 13 06:11:23 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 06:11:23 2019 -0800 - - add "enclone help support" - -M lib/rust/enclone/src/help1.rs - -commit 2b48c741bca04888b072486df470e812119ad1e9 -Author: David Jaffe -AuthorDate: Fri Dec 13 05:29:37 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 05:29:37 2019 -0800 - - add to faq - -M lib/rust/enclone/src/help4.rs - -commit 818a18c53250b48399633bd650af154d2ca3e9ee -Author: David Jaffe -AuthorDate: Fri Dec 13 05:15:06 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 05:15:06 2019 -0800 - - moderate comments about speed - -M lib/rust/enclone/src/help4.rs - -commit dbf69532636c4ac01de11a86e210f65922e0e0b3 -Author: David Jaffe -AuthorDate: Fri Dec 13 04:38:51 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 04:38:51 2019 -0800 - - add to EXT option - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main_enclone.rs -M lib/rust/enclone/src/print_utils2.rs - -commit fe02f975649e65f8eb4958596a8a05c8d9f8dcae -Author: David Jaffe -AuthorDate: Fri Dec 13 04:10:41 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 13 04:10:41 2019 -0800 - - add ability to compare to external clonotypes - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main_enclone.rs -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/src/print_utils3.rs -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/src/proc_args2.rs - -commit 3d09dabdadc3ad64e46c0f93e26d8cbe99455a9f -Author: David Jaffe -AuthorDate: Thu Dec 12 20:10:16 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 20:10:16 2019 -0800 - - add comment - -M lib/rust/enclone/src/bin/assess_clonotyping.rs - -commit bf5bcb36c15c558b0b586896bd9619dc90737b39 -Author: David Jaffe -AuthorDate: Thu Dec 12 20:05:37 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 20:05:37 2019 -0800 - - tool to assess clonotyping - -A lib/rust/enclone/src/bin/assess_clonotyping.rs - -commit dec2a7cf741d540a8d5db90503643fde47519f75 -Author: David Jaffe -AuthorDate: Thu Dec 12 13:53:25 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 13:53:25 2019 -0800 - - update - -M lib/rust/enclone/README.md - -commit 190d1617fdc2ed2db38062044664424b19d5e7e3 -Author: David Jaffe -AuthorDate: Thu Dec 12 13:50:31 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 13:50:31 2019 -0800 - - update README.md - -M lib/rust/enclone/README.md - -commit e9086978ecfe675792498cfb940982d886a924d8 -Author: David Jaffe -AuthorDate: Thu Dec 12 13:48:16 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 13:48:16 2019 -0800 - - update README.md - -M lib/rust/enclone/README.md - -commit 118673f97dc3eb1394c33733bb38fb46155d5353 -Author: David Jaffe -AuthorDate: Thu Dec 12 13:45:59 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 13:45:59 2019 -0800 - - update README.md - -M lib/rust/enclone/README.md - -commit 918ac08f34a3ff9f54a1b7968b6e8b926a4042bc -Author: David Jaffe -AuthorDate: Thu Dec 12 13:42:44 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 13:42:44 2019 -0800 - - update README.md - -M lib/rust/enclone/README.md - -commit 824c60f828d8bfa1efe6b5cb613c4a12ee43e5f5 -Author: David Jaffe -AuthorDate: Thu Dec 12 13:40:01 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 13:40:01 2019 -0800 - - update README.md - -M lib/rust/enclone/README.md - -commit 067e295c9d1eb6ea2749fbfcd82487bfca7fffb3 -Author: David Jaffe -AuthorDate: Thu Dec 12 13:35:32 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 13:35:32 2019 -0800 - - start of README.md - -A lib/rust/enclone/README.md - -commit 9a4638eaefe7c70b7c76f1c4b49a75334ed0ad91 -Author: David Jaffe -AuthorDate: Thu Dec 12 13:16:17 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 13:16:17 2019 -0800 - - reduce file size - -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/proc_args.rs -A lib/rust/enclone/src/proc_args3.rs - -commit 232dc8e8b72655d4d8f13d8114879c5075bcf772 -Author: David Jaffe -AuthorDate: Thu Dec 12 13:01:54 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 13:01:54 2019 -0800 - - reduce file size - -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/src/proc_args2.rs - -commit 8f052db3557888e0a409bf76d57ad0313438f7c1 -Author: David Jaffe -AuthorDate: Thu Dec 12 11:35:51 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 11:35:51 2019 -0800 - - don't assert if command line is garbage - -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/src/proc_args2.rs - -commit 3a91fceb8e571076e2caaaae3cfede52a258c98a -Author: David Jaffe -AuthorDate: Thu Dec 12 11:27:18 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 11:27:18 2019 -0800 - - forgot to add this - -A lib/rust/enclone/test/inputs/meta_test11 - -commit 4e15a815ad1faff08bf9a5993f7bdef5ab6703ac -Author: David Jaffe -AuthorDate: Thu Dec 12 11:24:17 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 11:24:17 2019 -0800 - - add a test and fix a small problem with another one - -M lib/rust/enclone/test/inputs/enclone_test10_output -A lib/rust/enclone/test/inputs/enclone_test11_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 0d452fbd655555e0d6b45800554c2a32881910b1 -Author: David Jaffe -AuthorDate: Thu Dec 12 08:29:04 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 08:29:04 2019 -0800 - - remove experimental link - -D lib/rust/enclone/alink - -commit 165d9d89b369c5ab50fc9066ffc5ce7c31a0827c -Author: David Jaffe -AuthorDate: Thu Dec 12 08:26:10 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 08:26:10 2019 -0800 - - testing link - -A lib/rust/enclone/alink - -commit 4010c56f66c2fcabd7c50638bcb67fb858230bf2 -Author: David Jaffe -AuthorDate: Thu Dec 12 07:39:49 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 07:39:49 2019 -0800 - - fix bug in printing of donors - -M lib/rust/enclone/src/print_utils2.rs - -commit 527c1029078208dce0a9c54a3a9dc03be826e099 -Author: David Jaffe -AuthorDate: Thu Dec 12 07:34:14 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 07:34:14 2019 -0800 - - fix another META bug - -M lib/rust/enclone/src/proc_args.rs - -commit 4402d77dbfc09851b43e3dc62e0e6417b430d9e8 -Author: David Jaffe -AuthorDate: Thu Dec 12 07:28:08 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 07:28:08 2019 -0800 - - fix bug - -M lib/rust/enclone/src/proc_args.rs - -commit 34a95b11bf618e1a089d0e54b2e30647f61e8bbc -Author: David Jaffe -AuthorDate: Thu Dec 12 07:22:45 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 07:22:45 2019 -0800 - - nicifications for META - -M lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/proc_args.rs - -commit c3d66d5cfc82ad88c26a816dde2d9b5d5a575fa5 -Author: David Jaffe -AuthorDate: Thu Dec 12 07:15:14 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 07:15:14 2019 -0800 - - fix bugs in META - -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/src/proc_args2.rs - -commit eb847a923e009895a09d92f430d4a5958d6b331c -Author: David Jaffe -AuthorDate: Thu Dec 12 06:53:09 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 06:53:09 2019 -0800 - - clarify error message - -M lib/rust/enclone/src/proc_args2.rs - -commit a28e7693f002c3a2adb1265197b6b28683bd1ec1 -Author: David Jaffe -AuthorDate: Thu Dec 12 06:48:08 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 06:48:08 2019 -0800 - - fix bug in reading input - -M lib/rust/enclone/src/proc_args.rs - -commit 844f719201de49580f2c73eaa767363dbf628e4c -Author: David Jaffe -AuthorDate: Thu Dec 12 06:42:00 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 06:42:00 2019 -0800 - - cosmetic change to error messages - -M lib/rust/enclone/src/proc_args.rs - -commit b87e55905940a8e8d052405b1ccd77ac01db590c -Author: David Jaffe -AuthorDate: Thu Dec 12 05:56:48 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 05:56:48 2019 -0800 - - bunch of fixes related to gex - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/load_gex.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/src/print_utils3.rs -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/src/proc_args2.rs - -commit 0804eea9cb292bd12b1506c36fd6c7348653aff2 -Author: David Jaffe -AuthorDate: Thu Dec 12 04:46:41 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 04:46:41 2019 -0800 - - reduce file size - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/src/proc_args2.rs - -commit 40080c854b75a5bcd06502ae74a4ce894f432f98 -Author: David Jaffe -AuthorDate: Thu Dec 12 04:33:50 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 04:33:50 2019 -0800 - - reduce dependencies - -M lib/rust/enclone/src/proc_args.rs - -commit 20ca20c4546372e4c339e8c01ae2e02736ee5183 -Author: David Jaffe -AuthorDate: Thu Dec 12 04:29:39 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 04:29:39 2019 -0800 - - reduce dependencies - -M lib/rust/enclone/src/proc_args.rs - -commit 8f5bb43493a92888cc05ab766403de07e6e5feca -Author: David Jaffe -AuthorDate: Thu Dec 12 04:24:45 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 04:24:45 2019 -0800 - - git rid of some junk - -M lib/rust/enclone/src/proc_args.rs - -commit 3a100c6e1ee5ac1a187125836378e2dd51a25112 -Author: David Jaffe -AuthorDate: Thu Dec 12 04:11:12 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 04:11:12 2019 -0800 - - delete unused variable - -M lib/rust/enclone/src/proc_args.rs - -commit e0302e9adfb9b4927dab8f9baa4fe8511bea4bbb -Author: David Jaffe -AuthorDate: Thu Dec 12 04:04:48 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 04:04:48 2019 -0800 - - test for internal run - -M lib/rust/enclone/src/proc_args.rs - -commit ffb5d9dd1cbba1db3448245a0bfab68a852362c8 -Author: David Jaffe -AuthorDate: Thu Dec 12 03:53:44 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 12 03:53:44 2019 -0800 - - fix bug in input parsing - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/proc_args.rs - -commit 61f70acc443680328f0a9f76b5baa3961cb4e6a0 -Author: David Jaffe -AuthorDate: Wed Dec 11 20:19:37 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 11 20:19:37 2019 -0800 - - tweaks to "enclone help input" - -M lib/rust/enclone/src/help2.rs - -commit 4295a6e9e1761fb8382609af2a1edce6dc173385 -Author: David Jaffe -AuthorDate: Wed Dec 11 20:08:40 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 11 20:08:40 2019 -0800 - - fix bug - -M lib/rust/enclone/src/proc_args.rs - -commit 00d815737ecc4628a9ad4fa944329bc5c69a2c3b -Author: David Jaffe -AuthorDate: Wed Dec 11 19:27:34 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 11 19:27:34 2019 -0800 - - improvements for enclone help example1 - -M lib/rust/enclone/src/help1.rs - -commit 8b24b5780ea2803416a10ee72faebf76b9955d8a -Author: David Jaffe -AuthorDate: Wed Dec 11 19:08:10 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 11 19:08:10 2019 -0800 - - display version number - -M lib/rust/enclone/src/help1.rs - -commit 89636b45eaea1306a1fba84221064771df7625be -Author: David Jaffe -AuthorDate: Wed Dec 11 17:54:10 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 11 17:54:10 2019 -0800 - - add emphasis - -M lib/rust/enclone/src/help1.rs - -commit 71529084a78d0d73aa388ec196f511f4645a8f78 -Author: David Jaffe -AuthorDate: Wed Dec 11 15:53:52 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 11 15:53:52 2019 -0800 - - parse GEX in parallel to TCR or BCR - -M lib/rust/enclone/src/proc_args.rs - -commit 827dff7fe8712a7f274177dd672b8c32508df073 -Author: David Jaffe -AuthorDate: Wed Dec 11 15:12:41 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 11 15:12:41 2019 -0800 - - add an inconsistency check - -M lib/rust/enclone/src/main_build_immcantation_inputs.rs - -commit 588780d26de1688819c87b3e489c9707f97cba0e -Author: David Jaffe -AuthorDate: Wed Dec 11 14:04:28 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 11 14:04:28 2019 -0800 - - META: track gene expression path - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/proc_args.rs - -commit 2fce10074294d1d70bc9ea673dc0ad71c45595e1 -Author: David Jaffe -AuthorDate: Wed Dec 11 13:43:13 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 11 13:43:13 2019 -0800 - - add some argument sanity checks - -M lib/rust/enclone/src/proc_args.rs - -commit a50c328c31415d634a82840702bbd5423dba5804 -Author: David Jaffe -AuthorDate: Wed Dec 11 13:30:33 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 11 13:30:33 2019 -0800 - - delete some dead code - -M lib/rust/enclone/src/proc_args.rs - -commit ea3d7a51226d34a88533581383986a154342edeb -Author: David Jaffe -AuthorDate: Wed Dec 11 13:13:31 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 11 13:13:31 2019 -0800 - - mention connection to CellRanger and Loupe - -M lib/rust/enclone/src/help1.rs - -commit 9d7d79927d427dc3f0bd066a7f07cc87886bc1e6 -Author: David Jaffe -AuthorDate: Wed Dec 11 07:25:54 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 11 07:25:54 2019 -0800 - - set defaults for sample and donor under META - -M lib/rust/enclone/src/proc_args.rs - -commit 2bed2c1a07af92d1435c8741a6f25881bf86fb8d -Author: David Jaffe -AuthorDate: Wed Dec 11 07:17:12 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 11 07:17:12 2019 -0800 - - extensive changes for META under "enclone help input" - -M lib/rust/enclone/src/help2.rs - -commit ff0c723c890bdcda1f983ffbb50f8cafb7237f43 -Author: David Jaffe -AuthorDate: Tue Dec 10 19:18:19 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 10 19:18:19 2019 -0800 - - more help text clarifications - -M lib/rust/enclone/src/help1.rs - -commit fe525a0b8fb629aa0ebe1d56c8c6cd2998075585 -Author: David Jaffe -AuthorDate: Tue Dec 10 19:15:42 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 10 19:15:42 2019 -0800 - - clarify one thing - -M lib/rust/enclone/src/help1.rs - -commit 2a317dfd2e260958bc9f454304594d2fe8185b80 -Author: David Jaffe -AuthorDate: Tue Dec 10 19:08:17 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 10 19:08:17 2019 -0800 - - add more context for example1 - -M lib/rust/enclone/src/help1.rs - -commit 85d556f32e3d8dd2cb1973007dcd98518134430e -Author: David Jaffe -AuthorDate: Tue Dec 10 16:34:22 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 10 16:34:22 2019 -0800 - - partial implementation of META - -M lib/rust/enclone/src/proc_args.rs - -commit aa095143d25052897efa4bd996b6a7f50d9d665c -Author: David Jaffe -AuthorDate: Tue Dec 10 10:29:23 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 10 10:29:23 2019 -0800 - - implementation of META in progress, com'ed out - -M lib/rust/enclone/src/proc_args.rs - -commit 1e3fac08b95077a0bb539686d4f712961efb0cf4 -Author: David Jaffe -AuthorDate: Tue Dec 10 07:24:33 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 10 07:24:33 2019 -0800 - - add clarification to "enclone help input" - -M lib/rust/enclone/src/help2.rs - -commit 20a03682c22db4ca983e61acd51c962abae82e13 -Author: David Jaffe -AuthorDate: Tue Dec 10 07:22:41 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 10 07:22:41 2019 -0800 - - detect unrecognized arguments - -M lib/rust/enclone/src/proc_args.rs - -commit bf1329750580bdc3218171bd99b05edd98e9c541 -Author: David Jaffe -AuthorDate: Tue Dec 10 07:13:48 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 10 07:13:48 2019 -0800 - - clarification for "enclone help input" - -M lib/rust/enclone/src/help2.rs - -commit 48076e5f8dd835c19219b50d1bcb7850afe4f71b -Author: David Jaffe -AuthorDate: Tue Dec 10 07:12:19 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 10 07:12:19 2019 -0800 - - simplify "enclone help input" - -M lib/rust/enclone/src/help2.rs - -commit 7b7eb101d8ca1f60f6a74583589cf655ce011209 -Author: David Jaffe -AuthorDate: Mon Dec 9 14:55:56 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 9 14:55:56 2019 -0800 - - correct test result - -M lib/rust/enclone/test/inputs/enclone_test2_output - -commit 46550d698e8f11a3cfb5be46044316bc19b153c7 -Author: David Jaffe -AuthorDate: Mon Dec 9 14:15:28 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 9 14:15:28 2019 -0800 - - add comment to error message - -M lib/rust/enclone/tests/enclone_test.rs - -commit 70158c5af62f0eb4dc9328470b80c61e9083bcc8 -Author: David Jaffe -AuthorDate: Mon Dec 9 14:12:49 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 9 14:12:49 2019 -0800 - - fix a test - -M lib/rust/enclone/tests/enclone_test.rs - -commit 6f597998d4a2f8ff3d57541f3c526e2670a87304 -Author: David Jaffe -AuthorDate: Mon Dec 9 12:49:15 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 9 12:49:15 2019 -0800 - - logging correction - -M lib/rust/enclone/src/proc_args.rs - -commit 4b87a84ba40c613acce8a5d0bc2e558bf9168059 -Author: David Jaffe -AuthorDate: Mon Dec 9 12:46:44 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 9 12:46:44 2019 -0800 - - comments - -M lib/rust/enclone/src/print_utils2.rs - -commit fbb514bfcf3207498e51828842def25b0bf172af -Author: David Jaffe -AuthorDate: Mon Dec 9 10:39:24 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 9 10:39:24 2019 -0800 - - improve fail messaging - -M lib/rust/enclone/tests/enclone_test.rs - -commit d80f2758be2c83fbefaec018b708510df59b1322 -Author: David Jaffe -AuthorDate: Mon Dec 9 10:18:27 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 9 10:18:27 2019 -0800 - - correction to testing code - -M lib/rust/enclone/tests/enclone_test.rs - -commit da73107e4c3473a3d7256656b376a8701c4494d2 -Author: David Jaffe -AuthorDate: Mon Dec 9 07:51:04 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 9 07:51:04 2019 -0800 - - fix invocation of example1 in help - -M lib/rust/enclone/src/help1.rs - -commit 08a702c7acd991627e3470ac169d2f136f121c94 -Author: David Jaffe -AuthorDate: Mon Dec 9 07:11:03 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 9 07:11:03 2019 -0800 - - fix bug - -M lib/rust/enclone/src/proc_args.rs - -commit cc7136839bce64d2b36378e736fc93157391cb8c -Author: David Jaffe -AuthorDate: Mon Dec 9 07:02:05 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 9 07:02:05 2019 -0800 - - delete dead code - -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/proc_args.rs - -commit bc75b176704f8258beb7c6bd2de7649da01783a9 -Author: David Jaffe -AuthorDate: Mon Dec 9 06:58:41 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 9 06:58:41 2019 -0800 - - implement first input scheme - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/enclone.test -M lib/rust/enclone/src/enclone.test2 -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/tests/enclone_test.rs - -commit e4a6c29f91ef2217d98a5c34845763d7bd215cd6 -Author: David Jaffe -AuthorDate: Mon Dec 9 05:46:39 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 9 05:46:39 2019 -0800 - - more changes to new input scheme, com'ed out - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/proc_args.rs - -commit 5c7f93374d00ba28d8c1ef3f1baf5a4a324ba606 -Author: David Jaffe -AuthorDate: Mon Dec 9 05:29:50 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 9 05:29:50 2019 -0800 - - further work on new input scheme, still com'ed out - -M lib/rust/enclone/src/proc_args.rs - -commit 75e75b2531b4088e8b0150857c1edc87a0b90fb8 -Author: David Jaffe -AuthorDate: Sun Dec 8 08:44:53 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 8 08:44:53 2019 -0800 - - reduce dependencies - -M lib/rust/enclone/src/proc_args.rs - -commit 2fdc10e5ae11df52df2b712dc701f868f38ed4d8 -Author: David Jaffe -AuthorDate: Sun Dec 8 07:20:03 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 8 07:20:03 2019 -0800 - - describe dependencies - -M lib/rust/enclone/src/proc_args.rs - -commit d8371d363bf2e21d9eb8e6b5b6d306c151dc4702 -Author: David Jaffe -AuthorDate: Sun Dec 8 07:08:30 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 8 07:08:30 2019 -0800 - - characterize dependencies - -M lib/rust/enclone/src/proc_args.rs - -commit 194142aa54c226beb6e283322ede4cb9c74fec1b -Author: David Jaffe -AuthorDate: Sun Dec 8 07:03:17 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 8 07:03:17 2019 -0800 - - add to enclone help example1 - -M lib/rust/enclone/src/help1.rs - -commit a6539c80e65b2c64ca0ada809e451ad03e8a197a -Author: David Jaffe -AuthorDate: Sun Dec 8 06:46:53 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 8 06:46:53 2019 -0800 - - update test results - -M lib/rust/enclone/test/inputs/enclone_test10_output -M lib/rust/enclone/test/inputs/enclone_test2_output -M lib/rust/enclone/test/inputs/enclone_test3_output -M lib/rust/enclone/test/inputs/enclone_test7_output -M lib/rust/enclone/test/inputs/enclone_test9_output - -commit 1ea8b76d25bba48565a4b172d971fde5ba2dcf17 -Author: David Jaffe -AuthorDate: Sun Dec 8 06:42:00 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 8 06:42:00 2019 -0800 - - update example1 - -M lib/rust/enclone/src/example1 -M lib/rust/enclone/src/help1.rs - -commit 8277d5156ea65e4814c7940d336bc703fab0ebaf -Author: David Jaffe -AuthorDate: Sun Dec 8 06:30:37 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 8 06:30:37 2019 -0800 - - fix to logging in CON option - -M lib/rust/enclone/src/allele.rs - -commit 11ce61fc9bcca3e1c6264748c95227ce3675006b -Author: David Jaffe -AuthorDate: Sun Dec 8 06:24:48 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 8 06:24:48 2019 -0800 - - display alt allele - -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/misc2.rs -M lib/rust/enclone/src/print_utils3.rs - -commit 32e951ff3d22699c14055feaf531ada8250185eb -Author: David Jaffe -AuthorDate: Sun Dec 8 06:00:46 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 8 06:00:46 2019 -0800 - - sort alt_refs - -M lib/rust/enclone/src/allele.rs - -commit d4f8dec50a688703006d118c7b6107003acbca0f -Author: David Jaffe -AuthorDate: Sun Dec 8 05:25:51 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 8 05:25:51 2019 -0800 - - more on new input approach but still com'ed out - -M lib/rust/enclone/src/proc_args.rs - -commit ac2f8bb9fd47f988bf9dd34e22a08fa0fd0ab1b6 -Author: David Jaffe -AuthorDate: Sun Dec 8 05:22:18 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 8 05:22:18 2019 -0800 - - add to faq - -M lib/rust/enclone/src/help4.rs - -commit 5466784da748fd16a85926c540c43e1eabf9bb29 -Author: David Jaffe -AuthorDate: Sun Dec 8 04:48:19 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 8 04:48:19 2019 -0800 - - more on new input syntax but still com'ed out - -M lib/rust/enclone/src/proc_args.rs - -commit 84e992cc8dd3833a43f89acdaccbff080d2517b3 -Author: David Jaffe -AuthorDate: Sat Dec 7 08:53:56 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 7 08:53:56 2019 -0800 - - add and use boxing capability - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help_utils.rs - -commit c3ce04893d3bdecc76fe3caaf7c1ddc25608f1ed -Author: David Jaffe -AuthorDate: Sat Dec 7 08:27:10 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 7 08:27:10 2019 -0800 - - add to enclone help how - -M lib/rust/enclone/src/help1.rs - -commit f43fc431a1cfeb7bb01069d4dc98c5859e00a948 -Author: David Jaffe -AuthorDate: Sat Dec 7 07:13:44 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 7 07:13:44 2019 -0800 - - now there is a "how" help page - -M lib/rust/enclone/src/help1.rs - -commit f3e80a7740445f25326a3863826a22e3b7885523 -Author: David Jaffe -AuthorDate: Sat Dec 7 06:09:39 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 7 06:09:39 2019 -0800 - - improve enclone help command - -M lib/rust/enclone/src/help1.rs - -commit a78bfb0c9aa5c1c46c8ae1f557f776b3e90983fb -Author: David Jaffe -AuthorDate: Sat Dec 7 06:03:38 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 7 06:03:38 2019 -0800 - - clarify enclone help command - -M lib/rust/enclone/src/help1.rs - -commit 1a6a8d81ba7b849d45393c32b7d95d8a8dcc990e -Author: David Jaffe -AuthorDate: Sat Dec 7 05:50:44 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Dec 7 05:50:44 2019 -0800 - - new input scheme in progress, com'ed out - -M lib/rust/enclone/src/proc_args.rs - -commit e85f446fa449ab4ed051c6818a2da3255fd262da -Author: David Jaffe -AuthorDate: Fri Dec 6 17:06:34 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 17:06:34 2019 -0800 - - fix some help titles - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/help4.rs - -commit c6ce3264f457a6a8d3b4a23e31f0eede63643bcc -Author: David Jaffe -AuthorDate: Fri Dec 6 17:01:11 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 17:01:11 2019 -0800 - - a little refactoring - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/help_utils.rs - -commit 8a2584f4d8ffa6764d3c8e1c97148b1fb1573e82 -Author: David Jaffe -AuthorDate: Fri Dec 6 16:49:39 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 16:49:39 2019 -0800 - - split file - -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main_enclone.rs -M lib/rust/enclone/src/misc1.rs -M lib/rust/enclone/src/misc2.rs -A lib/rust/enclone/src/misc3.rs - -commit ad264f390a6999c47ea4fd2a6c4479f06e516a8f -Author: David Jaffe -AuthorDate: Fri Dec 6 16:36:20 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 16:36:20 2019 -0800 - - nomenclature fix for CSV, column ==> field - -M lib/rust/enclone/src/help2.rs - -commit 2a9dd19cdb4eee90d47151a8a7246195cbbcf5c0 -Author: David Jaffe -AuthorDate: Fri Dec 6 16:26:44 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 16:26:44 2019 -0800 - - nicify enclone help input_tech - -M lib/rust/enclone/src/help2.rs - -commit fba1d14d72eb5848677ef2d24a9b6c97803182ec -Author: David Jaffe -AuthorDate: Fri Dec 6 14:08:15 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 14:08:15 2019 -0800 - - small fix - -M lib/rust/enclone/src/help1.rs - -commit ba41a4388b8deb0e9c48f6ef3ac359fdcee6bf75 -Author: David Jaffe -AuthorDate: Fri Dec 6 13:12:16 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 13:12:16 2019 -0800 - - a bunch of help tweaks - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/help3.rs - -commit d7d3f5908fe0dc8e69b6e17aa0c1f62599450427 -Author: David Jaffe -AuthorDate: Fri Dec 6 11:11:14 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 11:11:14 2019 -0800 - - make enclone help example1 work with PLAIN - -M lib/rust/enclone/src/help1.rs - -commit 683ae00a486e05a705aec272cead8f5c6453df42 -Author: David Jaffe -AuthorDate: Fri Dec 6 11:01:07 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 11:01:07 2019 -0800 - - make "enclone" the same as "enclone help main" - -M lib/rust/enclone/src/help1.rs - -commit 7e25904c33e03298f2c8567e3aa26118542968eb -Author: David Jaffe -AuthorDate: Fri Dec 6 10:31:53 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 10:31:53 2019 -0800 - - fix a problem with respecting PLAIN - -M lib/rust/enclone/src/help4.rs - -commit ae5b08cece73872407988bfeaf09c0c96bf3ef3f -Author: David Jaffe -AuthorDate: Fri Dec 6 10:10:49 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 10:10:49 2019 -0800 - - fix some cases where we ignored PLAIN - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/help4.rs -M lib/rust/enclone/src/help_utils.rs - -commit d67ac75ebba2980d479c1aa45e91fbc9db0941dd -Author: David Jaffe -AuthorDate: Fri Dec 6 08:29:03 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 08:29:03 2019 -0800 - - fix to last commit - -M lib/rust/enclone/src/help4.rs - -commit 304989118e5e46059ded2d88673d0fd58e52d125 -Author: David Jaffe -AuthorDate: Fri Dec 6 08:22:35 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 08:22:35 2019 -0800 - - bold a header - -M lib/rust/enclone/src/help4.rs - -commit 3adca488fe380f89d55532b303737a919621571a -Author: David Jaffe -AuthorDate: Fri Dec 6 08:16:13 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 08:16:13 2019 -0800 - - turn on pretty tracing for help - -M lib/rust/enclone/src/help1.rs - -commit 5bcaf198f92b9bf5ab9092365153ff18d0e993b6 -Author: David Jaffe -AuthorDate: Fri Dec 6 07:34:05 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 07:34:05 2019 -0800 - - tweak setup page - -M lib/rust/enclone/src/help1.rs - -commit ee3b75926830a995d18c57a8f2b6997ebacb88e6 -Author: David Jaffe -AuthorDate: Fri Dec 6 07:31:36 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 07:31:36 2019 -0800 - - add to setup page - -M lib/rust/enclone/src/help1.rs - -commit e6ab3e122ce0e7a41c61e88ae9f1ca71401fad54 -Author: David Jaffe -AuthorDate: Fri Dec 6 07:13:50 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 07:13:50 2019 -0800 - - cosmetic - -M lib/rust/enclone/src/help1.rs - -commit e69af606fe1fe42bf33d68910ff2d4a1b728af8e -Author: David Jaffe -AuthorDate: Fri Dec 6 07:11:16 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 07:11:16 2019 -0800 - - add setup help - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help_utils.rs - -commit 5fb4bee88801ff2b262866ea93cafdc91de3cbb2 -Author: David Jaffe -AuthorDate: Fri Dec 6 05:43:22 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 05:43:22 2019 -0800 - - fix a couple doc typos - -M lib/rust/enclone/src/help1.rs - -commit 9b089b4a98592be4ce86354839b0866d12048871 -Author: David Jaffe -AuthorDate: Fri Dec 6 04:58:43 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Dec 6 04:58:43 2019 -0800 - - add to faq - -M lib/rust/enclone/src/help4.rs - -commit 6fce2e7878c33c6a34186c25b3f82404d002885a -Author: David Jaffe -AuthorDate: Thu Dec 5 15:15:16 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 15:15:16 2019 -0800 - - put enclone helps in correct order - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/help4.rs - -commit 25a2f04501074294e9fe91d01a3a9de4a4435f56 -Author: David Jaffe -AuthorDate: Thu Dec 5 14:06:00 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 14:06:00 2019 -0800 - - tweaks to input help - -M lib/rust/enclone/src/help1.rs - -commit ddcdc3f13aeee7324fb1eba7b5b888811aa67262 -Author: David Jaffe -AuthorDate: Thu Dec 5 13:25:34 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 13:25:34 2019 -0800 - - metainfo ==> metadata - -M lib/rust/enclone/src/help1.rs - -commit bade6a4e65b909609a00152070de7c9ed5a1bbef -Author: David Jaffe -AuthorDate: Thu Dec 5 13:24:28 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 13:24:28 2019 -0800 - - reorder some help - -M lib/rust/enclone/src/help1.rs - -commit 49dfdab42100ab26906ecdf1e09987d0c4bc1b99 -Author: David Jaffe -AuthorDate: Thu Dec 5 11:55:36 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 11:55:36 2019 -0800 - - explain how to search - -M lib/rust/enclone/src/help1.rs - -commit caf55f72c13365d8622c81a2ff3b1acacb190917 -Author: David Jaffe -AuthorDate: Thu Dec 5 11:53:15 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 11:53:15 2019 -0800 - - improve enclone help all - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/help4.rs -M lib/rust/enclone/src/help_utils.rs - -commit 84e03bdcd45eed9cdb6cdd50cdb9469162b5f0f9 -Author: David Jaffe -AuthorDate: Thu Dec 5 11:13:47 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 11:13:47 2019 -0800 - - first stab at enclone help all - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/help4.rs -M lib/rust/enclone/src/help_utils.rs - -commit 068484b6f02efd7c0e267e8d2bfd8796b36d3efd -Author: David Jaffe -AuthorDate: Thu Dec 5 10:45:46 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 10:45:46 2019 -0800 - - rows in help is now Vec> and not of &str - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/help4.rs - -commit 3077fcbeb294a716845236ef52e972400afb570f -Author: David Jaffe -AuthorDate: Thu Dec 5 06:26:02 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 06:26:02 2019 -0800 - - split file - -M lib/rust/enclone/src/help3.rs -A lib/rust/enclone/src/help4.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/misc1.rs -M lib/rust/enclone/src/misc2.rs -M lib/rust/enclone/src/proc_args2.rs - -commit 3481943f71bd4d8bcb9588828e97f33a130f0585 -Author: David Jaffe -AuthorDate: Thu Dec 5 06:14:36 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 06:14:36 2019 -0800 - - factor out join_core - -M lib/rust/enclone/src/join.rs -A lib/rust/enclone/src/join_core.rs -A lib/rust/enclone/src/join_utils.rs -M lib/rust/enclone/src/lib.rs - -commit 0235c900e00c363f2fe25d5dec877117769b8039 -Author: David Jaffe -AuthorDate: Thu Dec 5 05:59:59 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 05:59:59 2019 -0800 - - enumerate dependencies - -M lib/rust/enclone/src/join.rs - -commit 98344b726f7c7383776ba05cad0e508ef46046b1 -Author: David Jaffe -AuthorDate: Thu Dec 5 05:49:06 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 05:49:06 2019 -0800 - - make qual filtering optional and document - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils4.rs -M lib/rust/enclone/src/proc_args.rs - -commit d918bd43136c337dfeb20da515fab9af3f3aaf1d -Author: David Jaffe -AuthorDate: Thu Dec 5 05:30:58 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 05:30:58 2019 -0800 - - factor some code out of print_clonotypes.rs - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils4.rs - -commit 9e4366ec46592db31c1807b5a23069762ad60038 -Author: David Jaffe -AuthorDate: Thu Dec 5 05:15:57 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 05:15:57 2019 -0800 - - delete an unneeded cautionary test - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 77a0a073a2a908530311490b3d9a54e54e9cf5c5 -Author: David Jaffe -AuthorDate: Thu Dec 5 05:10:31 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 05:10:31 2019 -0800 - - delete dead code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit cbeab476cab57f61eb64f486a44b509b1238b957 -Author: David Jaffe -AuthorDate: Thu Dec 5 05:08:36 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 05:08:36 2019 -0800 - - fix the buggy low quality deletion code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 26b976ca9c1ec19137820a7d8da1b629031d979d -Author: David Jaffe -AuthorDate: Thu Dec 5 04:22:42 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Dec 5 04:22:42 2019 -0800 - - characterize dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 7b6182cafccd47ffe13a2d393cb084b8085b10ed -Author: David Jaffe -AuthorDate: Wed Dec 4 16:28:04 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 16:28:04 2019 -0800 - - fix bug, although I'm not sure it changes anything - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 339326b895149ba7b2266976ed8e907ec9953a2d -Author: David Jaffe -AuthorDate: Wed Dec 4 16:18:23 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 16:18:23 2019 -0800 - - move some code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit b7eb87755df45f608cd5bee0ee5739df26fe8895 -Author: David Jaffe -AuthorDate: Wed Dec 4 16:14:06 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 16:14:06 2019 -0800 - - move some code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit a7755be1d7521649d63c082e5534bbb5982b429e -Author: David Jaffe -AuthorDate: Wed Dec 4 16:09:57 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 16:09:57 2019 -0800 - - move code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit bd5d01e1caa4bc592bdea1997fb42ac07fb66b2d -Author: David Jaffe -AuthorDate: Wed Dec 4 16:08:07 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 16:08:07 2019 -0800 - - a little refactoring to simplify logic - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils2.rs - -commit e0ab69db0f6a9b6678cc18679ea96703af54ebd7 -Author: David Jaffe -AuthorDate: Wed Dec 4 15:59:06 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 15:59:06 2019 -0800 - - move chunk of code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit e923a0b4b9c711da5007841b6eee428476329dd4 -Author: David Jaffe -AuthorDate: Wed Dec 4 15:43:20 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 15:43:20 2019 -0800 - - split file - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help2.rs -A lib/rust/enclone/src/help3.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/proc_args2.rs - -commit fccdd343b8a45585a003a7ba9e5420329384c4ce -Author: David Jaffe -AuthorDate: Wed Dec 4 15:35:54 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 15:35:54 2019 -0800 - - factor code chunk out of print_clonotypes.rs - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils4.rs - -commit 8f99134c69f9b07d14db9dd7635ad11c81d7edcc -Author: David Jaffe -AuthorDate: Wed Dec 4 15:07:21 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 15:07:21 2019 -0800 - - lower top file sizes - -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils1.rs -M lib/rust/enclone/src/print_utils3.rs -A lib/rust/enclone/src/print_utils4.rs - -commit f03f2d77bcf0d5091070a83079a0bec77d3d0c3b -Author: David Jaffe -AuthorDate: Wed Dec 4 14:47:19 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 14:47:19 2019 -0800 - - split large file - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/join.rs -A lib/rust/enclone/src/join2.rs -M lib/rust/enclone/src/lib.rs - -commit 1cb35d0c90fd9accbcfe7b8fcff49502f00e5647 -Author: David Jaffe -AuthorDate: Wed Dec 4 13:15:30 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 13:15:30 2019 -0800 - - add sample_id field to SampleInfo - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/proc_args.rs - -commit 84772be91a2d9cf554906f4818dc898a0e5779e0 -Author: David Jaffe -AuthorDate: Wed Dec 4 11:45:52 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 11:45:52 2019 -0800 - - SampleInfo: add donor_id and rev doc - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/proc_args.rs - -commit 38bd66a515b3e09397517a2d38b2b8e076d802aa -Author: David Jaffe -AuthorDate: Wed Dec 4 11:18:59 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 11:18:59 2019 -0800 - - mark bug - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8e2db636461833e6491ae22f57d67a7996663746 -Author: David Jaffe -AuthorDate: Wed Dec 4 11:12:59 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 11:12:59 2019 -0800 - - split large file - -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/load_gex.rs -M lib/rust/enclone/src/main_enclone.rs -M lib/rust/enclone/src/proc_args.rs -A lib/rust/enclone/src/proc_args2.rs - -commit 1b2e1265061436eeba81643f9075a2a4cb7b1c71 -Author: David Jaffe -AuthorDate: Wed Dec 4 11:05:08 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 11:05:08 2019 -0800 - - nomenclature improvement - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/load_gex.rs -M lib/rust/enclone/src/misc1.rs -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/src/read_json.rs - -commit b46e27417fb120952a889a319954eff958554022 -Author: David Jaffe -AuthorDate: Wed Dec 4 11:00:44 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 11:00:44 2019 -0800 - - nomenclature correction - -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/filter.rs -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/misc1.rs -M lib/rust/enclone/src/print_utils1.rs -M lib/rust/enclone/src/print_utils2.rs -M lib/rust/enclone/src/proc_args.rs - -commit 40fadc43d020b074f1aadef85cfcd2f547cd39e3 -Author: David Jaffe -AuthorDate: Wed Dec 4 10:56:02 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 10:56:02 2019 -0800 - - correction to doc - -M lib/rust/enclone/src/defs.rs - -commit 221edf1376930e03599c6d2014d7ae2317a4eb3d -Author: David Jaffe -AuthorDate: Wed Dec 4 10:53:35 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 10:53:35 2019 -0800 - - upgrade documentation of SampleInfo - -M lib/rust/enclone/src/defs.rs - -commit f9564f95405c7619f2a699680862fc170e6ada19 -Author: David Jaffe -AuthorDate: Wed Dec 4 09:12:05 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 09:12:05 2019 -0800 - - move chunk of code out of print_clonotypes.rs - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils3.rs - -commit 0f61631695d5378b66b53f73c1d19d745423b76a -Author: David Jaffe -AuthorDate: Wed Dec 4 08:50:37 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 08:50:37 2019 -0800 - - remove unneeded variables - -M lib/rust/enclone/src/loupe.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils2.rs - -commit c015c72378292292cd41ddd6bee4114f75bc2c9b -Author: David Jaffe -AuthorDate: Wed Dec 4 08:46:38 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 08:46:38 2019 -0800 - - reduce dependencies - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils3.rs - -commit fb42da2c7cc9b50872fb02370b00d2dbe88517a6 -Author: David Jaffe -AuthorDate: Wed Dec 4 08:38:44 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 08:38:44 2019 -0800 - - reduce dependencies - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils1.rs -M lib/rust/enclone/src/print_utils3.rs - -commit 86be3c46cf64b648bab08f9dfd809874545648a5 -Author: David Jaffe -AuthorDate: Wed Dec 4 08:29:52 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 08:29:52 2019 -0800 - - a little reorg - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 13e77923b724be40ef085d4898d15616fde11a19 -Author: David Jaffe -AuthorDate: Wed Dec 4 08:11:57 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 08:11:57 2019 -0800 - - kill unneeded variable - -M lib/rust/enclone/src/print_clonotypes.rs - -commit f23f91dbc3c359d3de9ff7d9bfc3dfd344c06eb7 -Author: David Jaffe -AuthorDate: Wed Dec 4 08:09:02 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 08:09:02 2019 -0800 - - kill unneeded variable - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils2.rs - -commit 25f47a5d144d29f061fc0e690d773a62ce76002f -Author: David Jaffe -AuthorDate: Wed Dec 4 08:06:40 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 08:06:40 2019 -0800 - - move some code out of print_clonotypes.rs - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils3.rs - -commit 4e6933c111de1738fccc82ebc25cbaaf358cdbb8 -Author: David Jaffe -AuthorDate: Wed Dec 4 08:00:40 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 08:00:40 2019 -0800 - - move some code, preparatory to another move - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8d41f6222e4c8f6ed4f50cb92a53733e0a8540d0 -Author: David Jaffe -AuthorDate: Wed Dec 4 07:53:31 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 07:53:31 2019 -0800 - - remove some unused variables - -M lib/rust/enclone/src/print_clonotypes.rs - -commit d91bd2b01edc90305d5b8785d3ec9ce3d0be8cd1 -Author: David Jaffe -AuthorDate: Wed Dec 4 05:32:18 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 05:32:18 2019 -0800 - - tweak command line help - -M lib/rust/enclone/src/help1.rs - -commit 3122104a6e1610daee0381f2793bbcb6b793baa9 -Author: David Jaffe -AuthorDate: Wed Dec 4 05:25:07 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 05:25:07 2019 -0800 - - more help tweaks - -M lib/rust/enclone/src/help1.rs - -commit e49607b123ec0f00b26d0921c72faf87c5f78b29 -Author: David Jaffe -AuthorDate: Wed Dec 4 05:08:06 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 05:08:06 2019 -0800 - - tweak main help page - -M lib/rust/enclone/src/help1.rs - -commit b44f3a6211e3006d52dc810709fe7829bc2f8338 -Author: David Jaffe -AuthorDate: Wed Dec 4 05:05:11 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 05:05:11 2019 -0800 - - tweak main help page - -M lib/rust/enclone/src/help1.rs - -commit 129bb49671377615f9d9f7d12a8d8b41e7d27caa -Author: David Jaffe -AuthorDate: Wed Dec 4 05:02:41 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 05:02:41 2019 -0800 - - tweak mission statement - -M lib/rust/enclone/src/help1.rs - -commit ca621fd17eebf308640e8d49e0562f41f07d2d3a -Author: David Jaffe -AuthorDate: Wed Dec 4 04:52:34 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 04:52:34 2019 -0800 - - tweak main help page - -M lib/rust/enclone/src/help1.rs - -commit 84461ddf00c1fc564cea46f97b9aa76da49e3320 -Author: David Jaffe -AuthorDate: Wed Dec 4 04:41:58 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 04:41:58 2019 -0800 - - tweak main help page - -M lib/rust/enclone/src/help1.rs - -commit fcf4554efb926924a6e97b24e990f7ac4b17b6f8 -Author: David Jaffe -AuthorDate: Wed Dec 4 04:38:28 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 04:38:28 2019 -0800 - - tweak main help page - -M lib/rust/enclone/src/help1.rs - -commit a0b68690b4c4d74996baa45e1d277e43d8faa9ed -Author: David Jaffe -AuthorDate: Wed Dec 4 04:36:26 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 04:36:26 2019 -0800 - - tweak main help page - -M lib/rust/enclone/src/help1.rs - -commit 9066263378b206abec372503ef038253360d6d60 -Author: David Jaffe -AuthorDate: Wed Dec 4 04:28:23 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 04:28:23 2019 -0800 - - another tweak to main page - -M lib/rust/enclone/src/help1.rs - -commit 87b163508de32d616599240cd3a7cc248b1061c5 -Author: David Jaffe -AuthorDate: Wed Dec 4 04:24:31 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 04:24:31 2019 -0800 - - tweak main help page - -M lib/rust/enclone/src/help1.rs - -commit e8623cb685e7e93f10c702a9f86e9344b7c51e2d -Author: David Jaffe -AuthorDate: Wed Dec 4 03:35:43 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 03:35:43 2019 -0800 - - revamp input help, plus changes to glossary - -M lib/rust/enclone/src/help1.rs - -commit 75d1849946359f7884130620191086b7330e9313 -Author: David Jaffe -AuthorDate: Wed Dec 4 02:42:34 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Dec 4 02:42:34 2019 -0800 - - add to glossary - -M lib/rust/enclone/src/help1.rs - -commit 6a935d7795c858b7f3d76512b4b611ce552b02bd -Author: David Jaffe -AuthorDate: Tue Dec 3 15:41:28 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 15:41:28 2019 -0800 - - reduce dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 5704ba033ac1263214c7dab7b8beaaf4152701ea -Author: David Jaffe -AuthorDate: Tue Dec 3 15:39:14 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 15:39:14 2019 -0800 - - describe dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 53daadf4f4afc1be0234f1bee86ffaeff2d0951f -Author: David Jaffe -AuthorDate: Tue Dec 3 15:26:14 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 15:26:14 2019 -0800 - - purge some unneeded variables - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils2.rs - -commit cb84493805b567c9128ed1c4c8615ab2f6f4a044 -Author: David Jaffe -AuthorDate: Tue Dec 3 15:14:17 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 15:14:17 2019 -0800 - - add help for some previously undocumented options - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/proc_args.rs - -commit 60dbcb9401ddc0c7272959312a50529ec4300f51 -Author: David Jaffe -AuthorDate: Tue Dec 3 14:08:23 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 14:08:23 2019 -0800 - - formatting fix - -M lib/rust/enclone/src/help1.rs - -commit 2884bb723ba7b4ee45ac307610726dc36a03b98a -Author: David Jaffe -AuthorDate: Tue Dec 3 13:58:03 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 13:58:03 2019 -0800 - - split files to reduce sizes - -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils1.rs -M lib/rust/enclone/src/print_utils2.rs -A lib/rust/enclone/src/print_utils3.rs - -commit c0f46c2dde4e9e540e7b6c85a2d24f74e118c7e7 -Author: David Jaffe -AuthorDate: Tue Dec 3 13:44:48 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 13:44:48 2019 -0800 - - factor more code out of print_clonotypes.rs - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils1.rs - -commit 47effa2f2fded91ffc299cd136d858d1f0e53385 -Author: David Jaffe -AuthorDate: Tue Dec 3 10:14:38 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 10:14:38 2019 -0800 - - shrink the other main to one line - -M lib/rust/enclone/src/bin/build_immcantation_inputs.rs -M lib/rust/enclone/src/lib.rs -A lib/rust/enclone/src/main_build_immcantation_inputs.rs - -commit 3008dcc14cf12b62d78f054889a8cb8a9e7586ef -Author: David Jaffe -AuthorDate: Tue Dec 3 10:06:26 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 10:06:26 2019 -0800 - - move contents of main into a function - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/lib.rs -A lib/rust/enclone/src/main_enclone.rs - -commit ea240a0e3cbbef09eb1ce9b25a6db05006911feb -Author: David Jaffe -AuthorDate: Tue Dec 3 07:12:29 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 07:12:29 2019 -0800 - - describe dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 2ce995802d0bbc63e56ff366e20d8855e27ec11c -Author: David Jaffe -AuthorDate: Tue Dec 3 07:07:31 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 07:07:31 2019 -0800 - - make print(...) respect PLAIN - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help_utils.rs - -commit 0386e621a0b9fb3c088f8cd72bf7c74e4533a4e3 -Author: David Jaffe -AuthorDate: Tue Dec 3 05:29:16 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 05:29:16 2019 -0800 - - tweak main help menu - -M lib/rust/enclone/src/help1.rs - -commit 50975503d8d6fe82c1a6810f330d81f4cdc40818 -Author: David Jaffe -AuthorDate: Tue Dec 3 05:27:24 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 05:27:24 2019 -0800 - - add note to main help page - -M lib/rust/enclone/src/help1.rs - -commit 665143c06b68798a1d11405663d406990da62c4b -Author: David Jaffe -AuthorDate: Tue Dec 3 05:24:12 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 05:24:12 2019 -0800 - - add/use \boldred{...} for print(...) - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help_utils.rs - -commit 39ea5f71fc95141142807a0a8a5435c60e35f7e2 -Author: David Jaffe -AuthorDate: Tue Dec 3 05:12:13 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 05:12:13 2019 -0800 - - move more code out of print_clonotypes.rs - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils1.rs - -commit bf734773209ac3bde78022d0909870472e8c5e6b -Author: David Jaffe -AuthorDate: Tue Dec 3 05:01:25 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 05:01:25 2019 -0800 - - reduce dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 461b9fb391b03b46cba1ae7013d1c6efce083ac8 -Author: David Jaffe -AuthorDate: Tue Dec 3 04:58:37 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 04:58:37 2019 -0800 - - factor more code out of print_clonotypes.rs - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils2.rs - -commit aa7f6330e358eeb39e59d9db7c8a75946dc46cac -Author: David Jaffe -AuthorDate: Tue Dec 3 04:50:40 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 04:50:40 2019 -0800 - - update dependency lists - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 03114cc09bd098f306ac1f66dc7faea81456dc80 -Author: David Jaffe -AuthorDate: Tue Dec 3 04:49:32 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 04:49:32 2019 -0800 - - reorg to move more info into ColInfo data structure - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils1.rs - -commit 838e9601032311d86309bcd4f1fa6b5c601f40b8 -Author: David Jaffe -AuthorDate: Tue Dec 3 04:39:00 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 04:39:00 2019 -0800 - - move block of code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit fe4a519555b471cf029a6452a0cb6e9c4183c984 -Author: David Jaffe -AuthorDate: Tue Dec 3 04:33:35 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 04:33:35 2019 -0800 - - rename a data structure - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/filter.rs -M lib/rust/enclone/src/loupe.rs -M lib/rust/enclone/src/print_utils1.rs -M lib/rust/enclone/src/print_utils2.rs - -commit 198d102b8118948da10a07f52b35a35752c42a38 -Author: David Jaffe -AuthorDate: Tue Dec 3 04:30:43 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 04:30:43 2019 -0800 - - update dependency list - -M lib/rust/enclone/src/print_clonotypes.rs - -commit b6ecc5a0c4bf2f7a4b9080f37ae011d8cde14514 -Author: David Jaffe -AuthorDate: Tue Dec 3 04:28:37 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 04:28:37 2019 -0800 - - simplify reference row printing - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 91cf7762d5f005baeea90a6ebf88d42fc805d75a -Author: David Jaffe -AuthorDate: Tue Dec 3 03:54:39 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 03:54:39 2019 -0800 - - slightly reduce dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 618620ec6a8f5282fabd0748107c2f8bb8265f6e -Author: David Jaffe -AuthorDate: Tue Dec 3 03:51:55 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 03:51:55 2019 -0800 - - describe dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 298a22a9c51920e1493df37c6f8c60d294104725 -Author: David Jaffe -AuthorDate: Tue Dec 3 03:44:55 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 03:44:55 2019 -0800 - - more on enclone help input - -M lib/rust/enclone/src/help1.rs - -commit 6a9e6651b97adb76709cbc43e4292fbd49fd9f33 -Author: David Jaffe -AuthorDate: Tue Dec 3 03:24:25 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 03:24:25 2019 -0800 - - fix bug in print(...) - -M lib/rust/enclone/src/help_utils.rs - -commit a1e4bb54ba97b29857c447158e20e5f121b0b1f0 -Author: David Jaffe -AuthorDate: Tue Dec 3 03:15:14 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 03:15:14 2019 -0800 - - separate out technical notes on input - -M lib/rust/enclone/src/help1.rs - -commit 02147326d2076bead788681756dc2087e7ab82d4 -Author: David Jaffe -AuthorDate: Tue Dec 3 03:09:59 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 03:09:59 2019 -0800 - - more on enclone help input - -M lib/rust/enclone/src/help1.rs - -commit 0329cf22c9a2b3629eb7474bb58ad9eadda1261d -Author: David Jaffe -AuthorDate: Tue Dec 3 03:09:30 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 03:09:30 2019 -0800 - - fix bug in print(...) - -M lib/rust/enclone/src/help_utils.rs - -commit 5ee7f4250d6bc327698c96d071683fb57dc148b6 -Author: David Jaffe -AuthorDate: Tue Dec 3 02:51:47 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Dec 3 02:51:47 2019 -0800 - - more on dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit d706767bbd2817f8a04ddd8b4c3293751a530f50 -Author: David Jaffe -AuthorDate: Mon Dec 2 18:30:14 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 18:30:14 2019 -0800 - - describe dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 1d0b5ca6e9d1247cc2b9d1ac64dd141db21c4d23 -Author: David Jaffe -AuthorDate: Mon Dec 2 18:21:12 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 18:21:12 2019 -0800 - - split a large-ish file - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/lib.rs -R061 lib/rust/enclone/src/misc.rs lib/rust/enclone/src/misc1.rs -A lib/rust/enclone/src/misc2.rs - -commit 9a7fbd42a1250ffc353aa51c2889b7deb68372cb -Author: David Jaffe -AuthorDate: Mon Dec 2 15:42:11 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 15:42:11 2019 -0800 - - now print(...) folds long lines - -M lib/rust/enclone/src/help1.rs -M lib/rust/enclone/src/help_utils.rs - -commit b402f4711981f11d0a359ce444effd417a42546d -Author: David Jaffe -AuthorDate: Mon Dec 2 14:17:33 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 14:17:33 2019 -0800 - - put help in two smaller files - -R054 lib/rust/enclone/src/help.rs lib/rust/enclone/src/help1.rs -A lib/rust/enclone/src/help2.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/proc_args.rs - -commit 0dc331b5f34a5a05e427b1b7e3d068751390d7b5 -Author: David Jaffe -AuthorDate: Mon Dec 2 14:12:42 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 14:12:42 2019 -0800 - - split some stuff out of help.rs - -M lib/rust/enclone/src/help.rs -A lib/rust/enclone/src/help_utils.rs -M lib/rust/enclone/src/lib.rs - -commit e6f200eb1a60c0857dc6055be423127a2bb44ebb -Author: David Jaffe -AuthorDate: Mon Dec 2 13:17:18 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 13:17:18 2019 -0800 - - add input doc - -M lib/rust/enclone/src/help.rs - -commit e342c19bb35c0502c0715d7f1024dd35e5fc5f49 -Author: David Jaffe -AuthorDate: Mon Dec 2 12:50:03 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 12:50:03 2019 -0800 - - tweak help - -M lib/rust/enclone/src/help.rs - -commit 1882510218173b5339100364e8f8dd3e1429235c -Author: David Jaffe -AuthorDate: Mon Dec 2 12:47:32 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 12:47:32 2019 -0800 - - tweak help - -M lib/rust/enclone/src/help.rs - -commit 99d67d481ba6ee3064279b02f706f91a27827a5c -Author: David Jaffe -AuthorDate: Mon Dec 2 12:38:39 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 12:38:39 2019 -0800 - - factor chunk of code out of print_clonotypes.rs - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils2.rs - -commit 0cf00d45b90119f9a8467387c08f44fa1e1009b1 -Author: David Jaffe -AuthorDate: Mon Dec 2 12:01:25 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 12:01:25 2019 -0800 - - another little bit of simplification - -M lib/rust/enclone/src/print_clonotypes.rs - -commit d5b50b9dc421bdc019c953cea0f7b84554d64ad9 -Author: David Jaffe -AuthorDate: Mon Dec 2 11:33:34 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 11:33:34 2019 -0800 - - remove some unused variables - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 5b62d5cfed45013b0b24b2d08123e7b41bd2b362 -Author: David Jaffe -AuthorDate: Mon Dec 2 11:30:38 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 11:30:38 2019 -0800 - - reorg to reduce dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8e66e564a98bb684f503e5cf3e4e01fcb5e4918f -Author: David Jaffe -AuthorDate: Mon Dec 2 11:16:48 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 11:16:48 2019 -0800 - - simplify a bit - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 47477a31c9c04f8d63325ec8a0a8402ad536699d -Author: David Jaffe -AuthorDate: Mon Dec 2 11:13:58 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 11:13:58 2019 -0800 - - reorg to reduce dependency - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 80d37252859c3a4b26a203bb4b80a737d7ad5690 -Author: David Jaffe -AuthorDate: Mon Dec 2 10:57:13 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 10:57:13 2019 -0800 - - enumerate files that are used - -M lib/rust/enclone/src/help.rs - -commit f6d47161352f026fc4549f2610cfa70380417227 -Author: David Jaffe -AuthorDate: Mon Dec 2 10:45:00 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 10:45:00 2019 -0800 - - more bolding stuff - -M lib/rust/enclone/src/help.rs - -commit 73fd11c412d8b708103dfcfa450eb9cccc85c41c -Author: David Jaffe -AuthorDate: Mon Dec 2 10:37:33 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 10:37:33 2019 -0800 - - tweak help - -M lib/rust/enclone/src/help.rs - -commit ac8fc107db127e0a08b11ebbc6c024b3854dd584 -Author: David Jaffe -AuthorDate: Mon Dec 2 10:34:41 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 10:34:41 2019 -0800 - - add print function - -M lib/rust/enclone/src/help.rs - -commit b43e7607a8c76913acd9e3b927ba7e1f86561dd8 -Author: David Jaffe -AuthorDate: Mon Dec 2 10:14:24 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 10:14:24 2019 -0800 - - factor more code out of main - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/load_gex.rs - -commit a33c59614773bbcaa215a0b7b5b92543f5bcf3ad -Author: David Jaffe -AuthorDate: Mon Dec 2 07:17:51 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 07:17:51 2019 -0800 - - describe dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 18ea54a22d4a0cd7483ced93183a742ff8b10695 -Author: David Jaffe -AuthorDate: Mon Dec 2 06:42:42 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 06:42:42 2019 -0800 - - cosmetic - -M lib/rust/enclone/src/help.rs - -commit ca072c7ea1f27500f113539100429ad1120053af -Author: David Jaffe -AuthorDate: Mon Dec 2 06:18:30 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 06:18:30 2019 -0800 - - expand enclone help input - -M lib/rust/enclone/src/help.rs - -commit 57b7beb38dfe57dc4e5e21d415022d238db1a031 -Author: David Jaffe -AuthorDate: Mon Dec 2 05:48:40 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 05:48:40 2019 -0800 - - factor more code out of print_clonotypes.rs - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils1.rs - -commit cd4a1b85bec9d5ab50fd6dd1d1b490d7ac221cef -Author: David Jaffe -AuthorDate: Mon Dec 2 05:27:20 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 05:27:20 2019 -0800 - - reduce dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 886511c6750dfcded22b284a5be880b2ab631ad7 -Author: David Jaffe -AuthorDate: Mon Dec 2 05:22:49 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 05:22:49 2019 -0800 - - reduce dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit a4d2e24e5d89dfd65dbbfa53dedaca23ae48e5af -Author: David Jaffe -AuthorDate: Mon Dec 2 05:17:02 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 05:17:02 2019 -0800 - - reduce dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 43555b2d0fd2e7a4b9d0dca5c394f068a8850734 -Author: David Jaffe -AuthorDate: Mon Dec 2 05:13:23 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 05:13:23 2019 -0800 - - various tweaks to help - -M lib/rust/enclone/src/help.rs - -commit 4b2b8017ec841896698d9496ee28e377ce6868e1 -Author: David Jaffe -AuthorDate: Mon Dec 2 04:57:15 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 04:57:15 2019 -0800 - - start of enclone help input - -M lib/rust/enclone/src/help.rs - -commit 0759f7c04e958f4db521dc2f49190470e1324b1d -Author: David Jaffe -AuthorDate: Mon Dec 2 04:38:14 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 04:38:14 2019 -0800 - - add test - -A lib/rust/enclone/test/inputs/enclone_test10_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 4d9e05bd3c5b1433f69600d4902622a7cf3c96d0 -Author: David Jaffe -AuthorDate: Mon Dec 2 04:35:14 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 04:35:14 2019 -0800 - - default is now not to show failed joins - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/enclone.test -M lib/rust/enclone/src/enclone.test2 -M lib/rust/enclone/src/proc_args.rs - -commit 6aee3ea11c3fc56d44a527bd1491e383aed5143d -Author: David Jaffe -AuthorDate: Mon Dec 2 04:21:37 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 04:21:37 2019 -0800 - - enumerate dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit b4cc56cd57a219d7faea90201377144b9685c263 -Author: David Jaffe -AuthorDate: Mon Dec 2 03:59:55 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Dec 2 03:59:55 2019 -0800 - - rename NCELLS to MIN_NCELLS_EXACT - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/proc_args.rs - -commit da57325180490b7f41282979b7c9fc6847c15a33 -Author: David Jaffe -AuthorDate: Sun Dec 1 14:04:04 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 1 14:04:04 2019 -0800 - - start to enumerate dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit c0b064362971864c03899bd3fc29eba820f44cf8 -Author: David Jaffe -AuthorDate: Sun Dec 1 14:00:36 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 1 14:00:36 2019 -0800 - - update - -M lib/rust/enclone/src/README - -commit ed3d7c3c7c05d6eb544d158360d5e57e673450d7 -Author: David Jaffe -AuthorDate: Sun Dec 1 13:58:27 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 1 13:58:27 2019 -0800 - - simplify a bit - -M lib/rust/enclone/src/print_clonotypes.rs - -commit f041eb3136714b0378b9e608069bf8367e0db8f3 -Author: David Jaffe -AuthorDate: Sun Dec 1 13:55:31 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 1 13:55:31 2019 -0800 - - remove NCHAIN_SPLIT option - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils1.rs -M lib/rust/enclone/src/proc_args.rs - -commit f8b96279bcbce50943d0ae64e32647638a9f99e8 -Author: David Jaffe -AuthorDate: Sun Dec 1 12:39:13 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 1 12:39:13 2019 -0800 - - simplify code of last commit - -M lib/rust/enclone/src/print_clonotypes.rs - -commit ccaea3bbc11e66373eacac0f15be2b1cde4692f6 -Author: David Jaffe -AuthorDate: Sun Dec 1 12:34:35 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 1 12:34:35 2019 -0800 - - make show_aa calc for donor ref diffs nicer - -M lib/rust/enclone/src/print_clonotypes.rs - -commit cd4c58b2c65f103dc809245985f79db9e5efa354 -Author: David Jaffe -AuthorDate: Sun Dec 1 07:16:09 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 1 07:16:09 2019 -0800 - - move PALETTE into enclone help color - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/proc_args.rs - -commit 7032cc8b4fc3095b245062b285957802755402f6 -Author: David Jaffe -AuthorDate: Sun Dec 1 07:09:45 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 1 07:09:45 2019 -0800 - - improve PALETTE option - -M lib/rust/enclone/src/proc_args.rs - -commit c9a77c67f31b31c4269a69d6d6cac0ab32e084f0 -Author: David Jaffe -AuthorDate: Sun Dec 1 06:46:49 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Dec 1 06:46:49 2019 -0800 - - factor some code out of print_clonotypes.rs - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils1.rs - -commit 21a1ca63a1b0c919b73bcb3e0eeeba026b1d2f4e -Author: David Jaffe -AuthorDate: Sat Nov 30 11:05:08 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 11:05:08 2019 -0800 - - add to enclone help color - -M lib/rust/enclone/src/help.rs - -commit 07aae1c8b82d5ec3d9f33baa071d73523b44cd38 -Author: David Jaffe -AuthorDate: Sat Nov 30 10:48:58 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 10:48:58 2019 -0800 - - more on enclone help color - -M lib/rust/enclone/src/help.rs - -commit 5418e9b522a9a974a94f2adbb08c169c2a7cf6be -Author: David Jaffe -AuthorDate: Sat Nov 30 10:36:19 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 10:36:19 2019 -0800 - - oops serine codon AGT was colored incorrectly - -M lib/rust/enclone/src/print_utils1.rs -M lib/rust/enclone/test/inputs/enclone_test1_output -M lib/rust/enclone/test/inputs/enclone_test2_output -M lib/rust/enclone/test/inputs/enclone_test3_output -M lib/rust/enclone/test/inputs/enclone_test7_output -M lib/rust/enclone/test/inputs/enclone_test9_output - -commit 43d93e2c759c1369afe22456c36e4f3ef2cefbdf -Author: David Jaffe -AuthorDate: Sat Nov 30 10:24:59 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 10:24:59 2019 -0800 - - complete enclone help color - -M lib/rust/enclone/src/help.rs - -commit 1b70eb83786915c5e1be340414378c06a904125d -Author: David Jaffe -AuthorDate: Sat Nov 30 08:16:24 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 08:16:24 2019 -0800 - - start of enclone help color - -M lib/rust/enclone/src/help.rs - -commit cf4269943014df9889754222df4aecb83c2e47e6 -Author: David Jaffe -AuthorDate: Sat Nov 30 08:07:53 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 08:07:53 2019 -0800 - - improve error message - -M lib/rust/enclone/src/print_utils1.rs - -commit d459115db1f14fc869544fd8c6b1932844f5d4d4 -Author: David Jaffe -AuthorDate: Sat Nov 30 07:03:21 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 07:03:21 2019 -0800 - - add fn to generate colored codon table - -M lib/rust/enclone/src/help.rs - -commit f4bec2c331f7555079789dca30b0886413d7f128 -Author: David Jaffe -AuthorDate: Sat Nov 30 06:29:28 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 06:29:28 2019 -0800 - - help tweaks - -M lib/rust/enclone/src/help.rs - -commit de795c962e5a43c6693b2d8fe862d62924c49cad -Author: David Jaffe -AuthorDate: Sat Nov 30 05:53:56 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 05:53:56 2019 -0800 - - start of example1 help - -A lib/rust/enclone/src/example1 -M lib/rust/enclone/src/help.rs - -commit 8933741e40dfaf23b88615fbca04f00f4908de46 -Author: David Jaffe -AuthorDate: Sat Nov 30 04:50:01 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 04:50:01 2019 -0800 - - tweak main doc page - -M lib/rust/enclone/src/help.rs - -commit 0750ee57f73a99c14d956c309a972ea4a5757357 -Author: David Jaffe -AuthorDate: Sat Nov 30 04:24:51 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 04:24:51 2019 -0800 - - factor fn through RefSeqIds - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils2.rs - -commit b741822a41f03981a1dbf6ffe137822a92d21867 -Author: David Jaffe -AuthorDate: Sat Nov 30 04:22:28 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 04:22:28 2019 -0800 - - factor fn through RefSeqIds - -M lib/rust/enclone/src/filter.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit cbbc63b47f5ac90e15f121f103b88d435128b4d9 -Author: David Jaffe -AuthorDate: Sat Nov 30 04:19:35 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 04:19:35 2019 -0800 - - factor fn through RefSeqIds - -M lib/rust/enclone/src/loupe.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 27bdd8b8c6ff885811b4ce6a83e0e3167ff32aae -Author: David Jaffe -AuthorDate: Sat Nov 30 04:16:12 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 30 04:16:12 2019 -0800 - - refactor to use new struct RefSeqIds - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils1.rs - -commit 839aa1c26879637eb59fa80dce81c4d2ad34c6df -Author: David Jaffe -AuthorDate: Fri Nov 29 14:28:07 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 14:28:07 2019 -0800 - - split large file - -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/print_clonotypes.rs -A lib/rust/enclone/src/print_utils1.rs -R060 lib/rust/enclone/src/print_utils.rs lib/rust/enclone/src/print_utils2.rs - -commit ee82daea50b716e7228371a220c5b56ff5d61b58 -Author: David Jaffe -AuthorDate: Fri Nov 29 14:04:54 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 14:04:54 2019 -0800 - - factor big chunk of code out of print_clonotypes.rs - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils.rs - -commit a13c58b60cc2256f8f685777bcdb46ec8da71d7f -Author: David Jaffe -AuthorDate: Fri Nov 29 13:47:17 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 13:47:17 2019 -0800 - - hide some stuff, preparatory to refactoring - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 3ea4fca72c265dae08b8c00175f133146bfa27d5 -Author: David Jaffe -AuthorDate: Fri Nov 29 13:41:40 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 13:41:40 2019 -0800 - - another little bit of simplification - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 2343ae67334d6fa5d94cb2cbb7352af17efd8705 -Author: David Jaffe -AuthorDate: Fri Nov 29 13:33:19 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 13:33:19 2019 -0800 - - simplify calculation of varmat - -M lib/rust/enclone/src/print_clonotypes.rs - -commit facaf76f2a8b4fde386a1c912ddcf3f6b4591db7 -Author: David Jaffe -AuthorDate: Fri Nov 29 13:17:47 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 13:17:47 2019 -0800 - - kill lowercase stuff - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit 74b43f6e7922c8f2332d4d6c6566ae19db0b4b57 -Author: David Jaffe -AuthorDate: Fri Nov 29 09:22:44 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 09:22:44 2019 -0800 - - document dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 2188098de9f2cff7e73b2893af76d9e7e6b9d3ba -Author: David Jaffe -AuthorDate: Fri Nov 29 07:56:28 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 07:56:28 2019 -0800 - - factor chunk of code out of print_clonotypes.rs - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils.rs - -commit c70fa3dcf23a290b918c1c2982843bc4a43e8f54 -Author: David Jaffe -AuthorDate: Fri Nov 29 07:50:30 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 07:50:30 2019 -0800 - - describe dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit a7e77e058812eb4b847b13c8417722be2d886b61 -Author: David Jaffe -AuthorDate: Fri Nov 29 07:03:58 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 07:03:58 2019 -0800 - - simplify to remove slobber - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 67258f7af75dd9476671d8a4df010434827b43e1 -Author: David Jaffe -AuthorDate: Fri Nov 29 06:46:22 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 06:46:22 2019 -0800 - - further simplify position row insertion code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit cf5eb6c2948cba76118e740741ef9dee7fa84d17 -Author: David Jaffe -AuthorDate: Fri Nov 29 06:40:46 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 06:40:46 2019 -0800 - - cleanup of last commit - -M lib/rust/enclone/src/print_clonotypes.rs - -commit fe09db61611895e61ab3b2f6940ff5b651646cc1 -Author: David Jaffe -AuthorDate: Fri Nov 29 06:38:24 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 06:38:24 2019 -0800 - - substantially simplify - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 4e7b6eafa8af95568531ea5ff427d2c61bbc0f1a -Author: David Jaffe -AuthorDate: Fri Nov 29 06:24:59 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 06:24:59 2019 -0800 - - slightly reduce dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit ea39ddac0d556a29eaac86e80ef11cc232f26d36 -Author: David Jaffe -AuthorDate: Fri Nov 29 06:20:24 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 06:20:24 2019 -0800 - - slightly reduce dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 4f3fc38356a87a8a85c6e446b147045cebe2ebc6 -Author: David Jaffe -AuthorDate: Fri Nov 29 06:15:30 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 06:15:30 2019 -0800 - - characterize dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit f5d440caeeb1cdd5458ef3c80872400600f0c720 -Author: David Jaffe -AuthorDate: Fri Nov 29 06:02:21 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 29 06:02:21 2019 -0800 - - add a few comments - -M lib/rust/enclone/src/print_utils.rs - -commit 3e16ac8ca7ea5aef5e1a2117124678cc0fa54dc3 -Author: David Jaffe -AuthorDate: Thu Nov 28 12:10:41 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 12:10:41 2019 -0800 - - move a bit more out of print_clonotypes.rs - -M lib/rust/enclone/src/loupe.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit f686e165dc79bad5fc12b6c37c039dbcbd7a2e2e -Author: David Jaffe -AuthorDate: Thu Nov 28 11:58:50 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 11:58:50 2019 -0800 - - simplify a bit - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 3907bd851087181f89e6e0c88c06296e79e4f484 -Author: David Jaffe -AuthorDate: Thu Nov 28 11:55:26 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 11:55:26 2019 -0800 - - improve error handling - -M lib/rust/enclone/tests/enclone_test.rs - -commit 926877d7bcee4ea9ae8d8c7eafcf4ea110f00609 -Author: David Jaffe -AuthorDate: Thu Nov 28 11:38:09 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 11:38:09 2019 -0800 - - simplify a bit - -M lib/rust/enclone/src/print_clonotypes.rs - -commit fc7f627384a30249ff47195e97ba4da14aaa8ff6 -Author: David Jaffe -AuthorDate: Thu Nov 28 11:30:05 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 11:30:05 2019 -0800 - - simplify a bit - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 181585ed37f3c4e8a74461ba6443ca9286c309ef -Author: David Jaffe -AuthorDate: Thu Nov 28 10:53:53 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 10:53:53 2019 -0800 - - simplify a bit - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 1d3ef6eb0ef8e31bf101c8b9b5e5e508f85afdd3 -Author: David Jaffe -AuthorDate: Thu Nov 28 10:48:26 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 10:48:26 2019 -0800 - - simplify a bit - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 2759c47214f2dcb5762ff432b79c45d9355fa12f -Author: David Jaffe -AuthorDate: Thu Nov 28 09:55:23 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 09:55:23 2019 -0800 - - cosmetic - -M lib/rust/enclone/src/print_utils.rs - -commit dc65f3c1f285708b6ae89fdd47e788625ec25183 -Author: David Jaffe -AuthorDate: Thu Nov 28 09:52:56 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 09:52:56 2019 -0800 - - fix typo in help message - -M lib/rust/enclone/src/help.rs - -commit 0563139f7d9d4d42529c3241f9e0f80d7d01986b -Author: David Jaffe -AuthorDate: Thu Nov 28 09:22:22 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 09:22:22 2019 -0800 - - fix bugs in parseable output - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 53c384cf06c2b77b4c08375cde758749fe8c8119 -Author: David Jaffe -AuthorDate: Thu Nov 28 09:05:43 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 09:05:43 2019 -0800 - - fix bug in parseable output - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils.rs -M lib/rust/enclone/src/proc_args.rs - -commit fed2131ef9667066c8b05f1d2690facfd3a67313 -Author: David Jaffe -AuthorDate: Thu Nov 28 06:36:49 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 06:36:49 2019 -0800 - - add to test - -M lib/rust/enclone/test/inputs/enclone_test1_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 2c824ddc78f1b6134909013fbb1aed5d6d0ba4d6 -Author: David Jaffe -AuthorDate: Thu Nov 28 06:30:43 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 06:30:43 2019 -0800 - - fix bugs in parseable output - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit eb1dd1d2f4987a111103baa02f5345fa1b091f8b -Author: David Jaffe -AuthorDate: Thu Nov 28 05:42:29 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 05:42:29 2019 -0800 - - env vars are treated as *first* args - -M lib/rust/enclone/src/proc_args.rs - -commit 15ecf4de58276de109375ab2882e7fbfea165dbe -Author: David Jaffe -AuthorDate: Thu Nov 28 05:42:12 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 05:42:12 2019 -0800 - - fixbug in parseable output - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 1f869c6789ccf2601455d235c337c2af108fff95 -Author: David Jaffe -AuthorDate: Thu Nov 28 04:01:54 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 04:01:54 2019 -0800 - - fix bugs in parseable output - -M lib/rust/enclone/src/group.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit e92e66021b0940ed50a5fccb7268da7dfb36e4fa -Author: David Jaffe -AuthorDate: Thu Nov 28 03:29:17 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 03:29:17 2019 -0800 - - respect order in PCOLS - -M lib/rust/enclone/src/group.rs - -commit ca2929f30d17d0eddbeeee1c2039c6dc540f2ebf -Author: David Jaffe -AuthorDate: Thu Nov 28 03:21:06 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 03:21:06 2019 -0800 - - integrate parseable output with grouping - -M lib/rust/enclone/src/group.rs -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit b7240e9ebbbaf4b68d9d4fad6268c515a9f8fcc2 -Author: David Jaffe -AuthorDate: Thu Nov 28 02:49:29 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 28 02:49:29 2019 -0800 - - separate definition of writer from writing - -M lib/rust/enclone/src/group.rs - -commit 97a1caff64e9a862e30993113e9655ed99615e32 -Author: David Jaffe -AuthorDate: Wed Nov 27 15:52:33 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 15:52:33 2019 -0800 - - factor major chunk from print_clonotypes.rs, but ugly - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils.rs - -commit 9ccb4ebb4c75c14ef4f2b2c7fe17a89cbb21513d -Author: David Jaffe -AuthorDate: Wed Nov 27 15:10:35 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 15:10:35 2019 -0800 - - fill in putative fn signature, com'ed out - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 3ca65f5fc4aa10399c8a3853fb1035cf1fd857ac -Author: David Jaffe -AuthorDate: Wed Nov 27 14:55:04 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 14:55:04 2019 -0800 - - dependency reduction - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8ace854384151686fc9b9e88d350b972ea9f5f25 -Author: David Jaffe -AuthorDate: Wed Nov 27 14:49:25 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 14:49:25 2019 -0800 - - dependency reduction - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 5665b8e43592fc2958bcb4960d2e2c58665e01fd -Author: David Jaffe -AuthorDate: Wed Nov 27 14:45:16 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 14:45:16 2019 -0800 - - dependency reduction - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 21971bf8a511677b0623cd61a19aa2202fab3f32 -Author: David Jaffe -AuthorDate: Wed Nov 27 14:42:50 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 14:42:50 2019 -0800 - - dependency reduction - -M lib/rust/enclone/src/print_clonotypes.rs - -commit bea24c6ccc10f921a743fa95ceb4cf955b157ebc -Author: David Jaffe -AuthorDate: Wed Nov 27 14:37:57 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 14:37:57 2019 -0800 - - reduce dependency - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 04fc315a94b47dea4ab1fd8c70fe8d2d5701aa56 -Author: David Jaffe -AuthorDate: Wed Nov 27 14:32:22 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 14:32:22 2019 -0800 - - dependency reduction - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 6f3d1e119416b1e502060957eeef49f33fdf24ef -Author: David Jaffe -AuthorDate: Wed Nov 27 13:13:08 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 13:13:08 2019 -0800 - - dependency reduction - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 2d2b78df14df1e52f6af77db85fb708493ae4b9f -Author: David Jaffe -AuthorDate: Wed Nov 27 13:04:08 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 13:04:08 2019 -0800 - - dependency reduction - -M lib/rust/enclone/src/print_clonotypes.rs - -commit ec64da4c6b51378ce677827e91bc56c4b841aaae -Author: David Jaffe -AuthorDate: Wed Nov 27 12:59:50 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 12:59:50 2019 -0800 - - dependency reduction - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 9683f37d0dbc34884f37cbbc16259f40aebdcaf3 -Author: David Jaffe -AuthorDate: Wed Nov 27 10:22:04 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 10:22:04 2019 -0800 - - kill a dependency - -M lib/rust/enclone/src/print_clonotypes.rs - -commit a8a512817094ad27a84dfa603e8e83e011eb1f47 -Author: David Jaffe -AuthorDate: Wed Nov 27 09:26:45 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 09:26:45 2019 -0800 - - reduce dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit e44c29d99718d422df954e5e7616bce8adb40411 -Author: David Jaffe -AuthorDate: Wed Nov 27 09:16:26 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 09:16:26 2019 -0800 - - print_clonotypes is now passed a GexInfo - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 594b367d5c77aa7c38349ce1d14b4b25460661f3 -Author: David Jaffe -AuthorDate: Wed Nov 27 09:06:02 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 09:06:02 2019 -0800 - - factor through a GexInfo object - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/defs.rs - -commit 5b2b95d8cc195f827906fe9610200d08e46661e2 -Author: David Jaffe -AuthorDate: Wed Nov 27 08:38:22 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 08:38:22 2019 -0800 - - enumerate dependencies - -M lib/rust/enclone/src/print_clonotypes.rs - -commit cf2a4af680d0dbe1639b0f9ee44a385e215d6aa3 -Author: David Jaffe -AuthorDate: Wed Nov 27 08:12:41 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 08:12:41 2019 -0800 - - parseable output closer to working - -M lib/rust/enclone/src/group.rs -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8d684c5377321a3febd4ad72c4ab22a6d2037dd4 -Author: David Jaffe -AuthorDate: Wed Nov 27 07:31:18 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 07:31:18 2019 -0800 - - parseable output now closer to reality - -M lib/rust/enclone/src/group.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit a59b384d46a8f5ffff3cb8776b92e482935d4e19 -Author: David Jaffe -AuthorDate: Wed Nov 27 07:03:28 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 07:03:28 2019 -0800 - - gather out_datas - -M lib/rust/enclone/src/print_clonotypes.rs - -commit d1aaf401810cd172ab4ac6cecc66007c0b88a81d -Author: David Jaffe -AuthorDate: Wed Nov 27 06:48:26 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 06:48:26 2019 -0800 - - respect PCOLS - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit 358dfb74b0fc2837cfbd8300bf8ea9d4069f8b2e -Author: David Jaffe -AuthorDate: Wed Nov 27 06:38:45 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 06:38:45 2019 -0800 - - speak var_aa - -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 5df376fbea98b247200b9a70bb171778981d0a6a -Author: David Jaffe -AuthorDate: Wed Nov 27 06:20:30 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 06:20:30 2019 -0800 - - simplification at speakc! - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8e6ca2f73b19587cf997eff665bc1334e01ed374 -Author: David Jaffe -AuthorDate: Wed Nov 27 06:16:02 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 06:16:02 2019 -0800 - - speak more - -M lib/rust/enclone/src/print_clonotypes.rs - -commit fc4e7cd1089dc0801a4a81d6874c1ae702db3229 -Author: David Jaffe -AuthorDate: Wed Nov 27 05:55:41 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 05:55:41 2019 -0800 - - speak barcodes - -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 33c39a8df9a8b3e106736a47b5d732e97321fd77 -Author: David Jaffe -AuthorDate: Wed Nov 27 05:48:09 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 05:48:09 2019 -0800 - - fill in exact_subclonotype_id - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8469bcee1a1d4175e05c9e22b40af84a64a274c8 -Author: David Jaffe -AuthorDate: Wed Nov 27 05:34:28 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 05:34:28 2019 -0800 - - move some stuff out of print_clonotypes.rs - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/print_utils.rs - -commit 34e4da025341ccf0dc3057662683823481fc9a6f -Author: David Jaffe -AuthorDate: Wed Nov 27 05:23:38 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 05:23:38 2019 -0800 - - add test for BU - -A lib/rust/enclone/test/inputs/enclone_test9_output -M lib/rust/enclone/tests/enclone_test.rs - -commit dd62f110e08be79aed6b91023d9d4583cc90ad2d -Author: David Jaffe -AuthorDate: Wed Nov 27 05:00:29 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 05:00:29 2019 -0800 - - reorg to compute near and far in place - -M lib/rust/enclone/src/print_clonotypes.rs - -commit db13abf172a88095da3bd1c795b6418d87e644ea -Author: David Jaffe -AuthorDate: Wed Nov 27 04:46:33 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 27 04:46:33 2019 -0800 - - delete some accidentally duplicated code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit f1cfdc7d9fe8c92cb2bf9a94ce187b707637ddbd -Author: David Jaffe -AuthorDate: Tue Nov 26 20:58:08 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 20:58:08 2019 -0800 - - factor a bit more code out of main program - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/misc.rs - -commit 5a57f82d688c84b1cbc3428764bfc2e07055c790 -Author: David Jaffe -AuthorDate: Tue Nov 26 20:52:06 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 20:52:06 2019 -0800 - - factor big function make_loupe_clonotype out of print_clonotypes.rs - -M lib/rust/enclone/src/loupe.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 3974720856634e18a9baf8f99fa1aabace9d4999 -Author: David Jaffe -AuthorDate: Tue Nov 26 20:43:35 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 20:43:35 2019 -0800 - - define tentative call signature (comments) - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 0704728e950f735aec4160da5e4964d8a60e26ea -Author: David Jaffe -AuthorDate: Tue Nov 26 20:35:47 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 20:35:47 2019 -0800 - - simplify some stuff - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 9b964bdfc7021a517911990e6c3bb0b4227d34a7 -Author: David Jaffe -AuthorDate: Tue Nov 26 20:21:20 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 20:21:20 2019 -0800 - - more colocation of loupe code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 28223fb0b0a20292bff6ffaf235bda61858fa8ca -Author: David Jaffe -AuthorDate: Tue Nov 26 20:14:38 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 20:14:38 2019 -0800 - - colocate loupe code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit b9a6ba315fc8b5e626b7eb93baf05ca2e58b7332 -Author: David Jaffe -AuthorDate: Tue Nov 26 20:04:20 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 20:04:20 2019 -0800 - - comments - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 9720650cd03f3b26168a03a070e224c2c4ce729d -Author: David Jaffe -AuthorDate: Tue Nov 26 19:59:00 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 19:59:00 2019 -0800 - - move big block of code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit aa71442e26ced350b365144d95976aabd1708872 -Author: David Jaffe -AuthorDate: Tue Nov 26 19:54:19 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 19:54:19 2019 -0800 - - move filtering - -M lib/rust/enclone/src/print_clonotypes.rs - -commit c860de9c781d4649fb0b5f57b0c663eeeb66c8d9 -Author: David Jaffe -AuthorDate: Tue Nov 26 19:48:48 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 19:48:48 2019 -0800 - - move block of code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 6d3be65b09c3c248249af5b9dfe93a649a7679af -Author: David Jaffe -AuthorDate: Tue Nov 26 19:44:53 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 19:44:53 2019 -0800 - - comments - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 59b1d6c17e76e831efbaddfbf1f56fcb200ba4f4 -Author: David Jaffe -AuthorDate: Tue Nov 26 19:29:13 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 19:29:13 2019 -0800 - - delete some duplicated code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 76af1b04065fe7dd5493284111118ec92ec4fe1c -Author: David Jaffe -AuthorDate: Tue Nov 26 19:24:01 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 19:24:01 2019 -0800 - - separate out varmat calculation - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 9f85c265383ec20d42cd7e213e21fee55c33b895 -Author: David Jaffe -AuthorDate: Tue Nov 26 19:11:09 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 19:11:09 2019 -0800 - - delete some unused stuff - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 87c718b43ce4cca249fa407b4f6fd795e7dd6650 -Author: David Jaffe -AuthorDate: Tue Nov 26 19:05:44 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 19:05:44 2019 -0800 - - suck more code out of main - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/misc.rs - -commit 8bd6a2b393a00cea35857ba5b1762bca14ad0c23 -Author: David Jaffe -AuthorDate: Tue Nov 26 19:01:13 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 19:01:13 2019 -0800 - - suck more code out of main - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/misc.rs - -commit 8c62f4dcfc459164e00f86ee223a09f79e30d2b4 -Author: David Jaffe -AuthorDate: Tue Nov 26 18:36:50 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 18:36:50 2019 -0800 - - add to parseable output - -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8ddd21f7abd5d5dded871fbdd991160f1f4acf6e -Author: David Jaffe -AuthorDate: Tue Nov 26 11:04:35 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 11:04:35 2019 -0800 - - move more out of main - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/proc_args.rs - -commit c4d012be0a043e20c725b32f778f763378851f3c -Author: David Jaffe -AuthorDate: Tue Nov 26 10:35:37 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 10:35:37 2019 -0800 - - move more code out of main - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/proc_args.rs - -commit 3a7614326f623df41a293d840ae3c7f8b4a68a51 -Author: David Jaffe -AuthorDate: Tue Nov 26 10:30:04 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 10:30:04 2019 -0800 - - move some code out of main - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/proc_args.rs - -commit 647818a4d3b08f0720ca81eb28592dc8dc7ae0f7 -Author: David Jaffe -AuthorDate: Tue Nov 26 10:22:40 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 10:22:40 2019 -0800 - - factor more code out of main - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/explore.rs - -commit a3038fc26141cde9db136ae18e4defe43fa11a4e -Author: David Jaffe -AuthorDate: Tue Nov 26 10:15:33 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 10:15:33 2019 -0800 - - clarify definitions of near and far - -M lib/rust/enclone/src/help.rs - -commit 56000516354862ad224170ab4eba43c4be337cae -Author: David Jaffe -AuthorDate: Tue Nov 26 10:14:03 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 10:14:03 2019 -0800 - - add to doc of near and far - -M lib/rust/enclone/src/help.rs - -commit 6c2f9c9ed414b9ff4c15cb32f839b9b8909414ee -Author: David Jaffe -AuthorDate: Tue Nov 26 10:09:48 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 10:09:48 2019 -0800 - - add near and far to a test - -M lib/rust/enclone/test/inputs/enclone_test7_output -M lib/rust/enclone/tests/enclone_test.rs - -commit dec7f8e77ab8f70b8d673c59f16f41d4f003bbe8 -Author: David Jaffe -AuthorDate: Tue Nov 26 08:24:52 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 08:24:52 2019 -0800 - - fix bugs in near and far - -M lib/rust/enclone/src/print_clonotypes.rs - -commit bf1a6f87836fe6d7bdd7421644e2ff0de4169364 -Author: David Jaffe -AuthorDate: Tue Nov 26 07:17:40 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 07:17:40 2019 -0800 - - add comment - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8ac5b322c0754b7e7cb384a3573e67f094eb6d0d -Author: David Jaffe -AuthorDate: Tue Nov 26 07:14:45 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 07:14:45 2019 -0800 - - add/use macros speaker! and speakerc! - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8071aa7e1dcb77afaf94fec4a4f4df2b2a8e8092 -Author: David Jaffe -AuthorDate: Tue Nov 26 07:05:33 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 07:05:33 2019 -0800 - - simplify speak! and speakc! invocation - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 1b81453370a0cca74ef85c94e86038ff66fd8c4f -Author: David Jaffe -AuthorDate: Tue Nov 26 07:00:25 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 07:00:25 2019 -0800 - - factor through new macro speakc! - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 6e3178dea890fdf9b4e617a5626835806f1753b1 -Author: David Jaffe -AuthorDate: Tue Nov 26 06:52:24 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 06:52:24 2019 -0800 - - factor through macro speak! - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 3d364f7c359b20511dbbb0fcf63637efacc9e535 -Author: David Jaffe -AuthorDate: Tue Nov 26 04:44:17 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 04:44:17 2019 -0800 - - move more code out of main program - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/misc.rs - -commit 0c8f0eef556d50c5ebbaa34f43754245bb2be0ae -Author: David Jaffe -AuthorDate: Tue Nov 26 04:29:42 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 04:29:42 2019 -0800 - - yet more in direction of parseable output - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 29fcc66000e91d426c966b99938f0d19ddba9b1e -Author: David Jaffe -AuthorDate: Tue Nov 26 04:10:27 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 04:10:27 2019 -0800 - - inch towards parseable output - -M lib/rust/enclone/src/print_clonotypes.rs - -commit a059422836406450364fee8f45724874bcb08cd0 -Author: David Jaffe -AuthorDate: Tue Nov 26 03:49:50 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 03:49:50 2019 -0800 - - moving in direction of parseable output - -M lib/rust/enclone/src/print_clonotypes.rs - -commit b6a03e07eb6a63e9d7e6b9e6c6fc0a4d8ecc789f -Author: David Jaffe -AuthorDate: Tue Nov 26 03:10:45 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 03:10:45 2019 -0800 - - moving towards parseable output, long way to go - -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit c5f6abdcd919df065704d1dff738b373adc2655f -Author: David Jaffe -AuthorDate: Tue Nov 26 02:55:45 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 02:55:45 2019 -0800 - - fix typo - -M lib/rust/enclone/src/help.rs - -commit 1e1d334b7f990a80c2ae2ead1ab020cf78ad3edd -Author: David Jaffe -AuthorDate: Tue Nov 26 02:44:47 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 02:44:47 2019 -0800 - - fix bug - -M lib/rust/enclone/src/filter.rs - -commit 768a8bb5daea8a335b73979972352b046c5ad33f -Author: David Jaffe -AuthorDate: Tue Nov 26 02:41:56 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 02:41:56 2019 -0800 - - delete some unneeded stuff - -M lib/rust/enclone/src/proc_args.rs - -commit 59d8eaa0b35d61a5e39f3fd4ac479335ddc458f3 -Author: David Jaffe -AuthorDate: Tue Nov 26 02:30:56 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 02:30:56 2019 -0800 - - add and use is_usize_arg - -M lib/rust/enclone/src/proc_args.rs - -commit 405a7216e4d97f6c29e56819d024737c634397b8 -Author: David Jaffe -AuthorDate: Tue Nov 26 01:58:49 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 26 01:58:49 2019 -0800 - - add parseable output command line args (but doing nothing with them) - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/proc_args.rs - -commit bf6165b4c187a741c2075c4205f83649d061042f -Author: David Jaffe -AuthorDate: Mon Nov 25 15:14:39 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 15:14:39 2019 -0800 - - move more code out of main - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/misc.rs - -commit 8615a4c0c97e06e4c87c33743f3b935d09582140 -Author: David Jaffe -AuthorDate: Mon Nov 25 14:46:36 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 14:46:36 2019 -0800 - - add test - -A lib/rust/enclone/test/inputs/enclone_test8_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 2f34a2ab0d65b58513d24210abc38efa6795442f -Author: David Jaffe -AuthorDate: Mon Nov 25 14:43:54 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 14:43:54 2019 -0800 - - add dataset - -A lib/rust/enclone/test/inputs/163911/_invocation -A lib/rust/enclone/test/inputs/163911/outs/all_contig_annotations.json.lz4 - -commit d71dde3713d7cf505532e3a60997a32300382707 -Author: David Jaffe -AuthorDate: Mon Nov 25 14:41:36 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 14:41:36 2019 -0800 - - complex of changes that improve specificity - -M lib/rust/enclone/src/graph_filter.rs - -commit 41e0de15c813ceabdb8bffbd4cc08230c96af8c9 -Author: David Jaffe -AuthorDate: Mon Nov 25 07:42:01 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 07:42:01 2019 -0800 - - add new filter, but off for now - -M lib/rust/enclone/src/graph_filter.rs - -commit 393a874a154c88e647dea974a70adef2b557373b -Author: David Jaffe -AuthorDate: Mon Nov 25 07:29:00 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 07:29:00 2019 -0800 - - logging and comments - -M lib/rust/enclone/src/graph_filter.rs - -commit d19b589ec1551ee5286571dbb6855c2372efef6a -Author: David Jaffe -AuthorDate: Mon Nov 25 07:16:15 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 07:16:15 2019 -0800 - - add samples - -M lib/rust/enclone/src/enclone.testdata.tcr -M lib/rust/enclone/src/enclone.testlist.all - -commit 520923a283cf0d308c7fb97740733b5d6866c6f5 -Author: David Jaffe -AuthorDate: Mon Nov 25 05:39:08 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 05:39:08 2019 -0800 - - yet more sentence clarification - -M lib/rust/enclone/src/help.rs - -commit 617f53d92f5235efdd6e5ebf6737a38a80576c65 -Author: David Jaffe -AuthorDate: Mon Nov 25 05:35:38 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 05:35:38 2019 -0800 - - more sentence clarification - -M lib/rust/enclone/src/help.rs - -commit c7cb343247c8d0efd42eddadb19ac6ddb632818e -Author: David Jaffe -AuthorDate: Mon Nov 25 05:32:54 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 05:32:54 2019 -0800 - - clarify sentence - -M lib/rust/enclone/src/help.rs - -commit 23cf317ab5b7f4e7361de83a51b292ca3416208b -Author: David Jaffe -AuthorDate: Mon Nov 25 05:32:10 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 05:32:10 2019 -0800 - - add command-line argument help - -M lib/rust/enclone/src/help.rs - -commit a81876e6568656ae37f3ce84c45d2983c207ed17 -Author: David Jaffe -AuthorDate: Mon Nov 25 04:55:11 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 04:55:11 2019 -0800 - - fix some long lines in help - -M lib/rust/enclone/src/help.rs - -commit 93c332e4df315713861bbb74fb5886660c56286d -Author: David Jaffe -AuthorDate: Mon Nov 25 04:02:03 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 04:02:03 2019 -0800 - - factor through a new macro cvar! - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 051461903c858b389be80456d444e9f64151742e -Author: David Jaffe -AuthorDate: Mon Nov 25 03:43:59 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 03:43:59 2019 -0800 - - factor some code out of print_clonotypes.rs - -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/print_clonotypes.rs -A lib/rust/enclone/src/print_utils.rs - -commit 139f8b9ac7401ee24fa60914d2a7dfa736b83472 -Author: David Jaffe -AuthorDate: Mon Nov 25 03:24:32 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 03:24:32 2019 -0800 - - factor through a macro lvar! - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 258b8b5896f3f8e12ab0909b0852c1da5a55391b -Author: David Jaffe -AuthorDate: Mon Nov 25 02:53:13 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 25 02:53:13 2019 -0800 - - move some code out of main - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/misc.rs - -commit 1143bfc1189fb24c66eb3def8a07717d779300ae -Author: David Jaffe -AuthorDate: Sun Nov 24 08:54:14 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 24 08:54:14 2019 -0800 - - fix bug - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/test/inputs/enclone_test7_output - -commit 0192802527b41a39d462d2904dc02576be0a148e -Author: David Jaffe -AuthorDate: Sun Nov 24 08:26:18 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 24 08:26:18 2019 -0800 - - oops, don't need this - -D lib/rust/enclone/test/inputs/47680/_invocation -D lib/rust/enclone/test/inputs/47680/outs/all_contig_annotations.json.lz4 - -commit c24c31218530fd59999215d7581f80257ce99dd7 -Author: David Jaffe -AuthorDate: Sun Nov 24 08:24:54 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 24 08:24:54 2019 -0800 - - add dataset - -A lib/rust/enclone/test/inputs/47680/_invocation -A lib/rust/enclone/test/inputs/47680/outs/all_contig_annotations.json.lz4 - -commit 0c24711116e57450c3e1cbb48563cf4fbc4065cb -Author: David Jaffe -AuthorDate: Sun Nov 24 08:16:48 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 24 08:16:48 2019 -0800 - - forgot to add this - -A lib/rust/enclone/test/inputs/enclone_test7_output - -commit da7aadb9e4ea9450cf59a26af07744991fe2a87a -Author: David Jaffe -AuthorDate: Sun Nov 24 07:45:10 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 24 07:45:10 2019 -0800 - - now actually add test - -A lib/rust/enclone/test/inputs/123085/_invocation -A lib/rust/enclone/test/inputs/123085/outs/all_contig_annotations.json.lz4 -M lib/rust/enclone/tests/enclone_test.rs - -commit 14b0c730227295b8ee6914f914dda8cb9e2ee505 -Author: David Jaffe -AuthorDate: Sun Nov 24 07:36:06 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 24 07:36:06 2019 -0800 - - add filter DEL - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/filter.rs -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/proc_args.rs - -commit fd27f9229260ab7c8388a3f990d0c6a7a1d7c463 -Author: David Jaffe -AuthorDate: Sun Nov 24 07:27:48 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 24 07:27:48 2019 -0800 - - add test, but com out for the moment - -M lib/rust/enclone/tests/enclone_test.rs - -commit d215b0db0d6cd37af91af2e90d5b880d2580310c -Author: David Jaffe -AuthorDate: Sun Nov 24 07:22:08 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 24 07:22:08 2019 -0800 - - partial solution to the indel problem - -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit fc53a9e58a5bde84cfcaad2fb889e7fc8b36966f -Author: David Jaffe -AuthorDate: Sun Nov 24 05:23:02 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 24 05:23:02 2019 -0800 - - test var field - -M lib/rust/enclone/test/inputs/enclone_test2_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 855ce750f0af874425ac210b600df9c6a24efd9a -Author: David Jaffe -AuthorDate: Sun Nov 24 04:44:20 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 24 04:44:20 2019 -0800 - - midway towards handling amino acid indels - -M lib/rust/enclone/src/print_clonotypes.rs - -commit cd52bb052d004a3433d8f29cd47d13ddb96667e5 -Author: David Jaffe -AuthorDate: Sun Nov 24 04:27:27 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 24 04:27:27 2019 -0800 - - complex of changes to simplify finding of var pos's in print_clonotypes.rs - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 435ae0ac723bf19c663ac0e46c91cd27cc2f5a61 -Author: David Jaffe -AuthorDate: Sun Nov 24 03:59:47 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 24 03:59:47 2019 -0800 - - complex of changes to allow simplification of some code in print_clonotypes.rs - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit db268519bfcafe6a5debe9699e92ef50930cfa2e -Author: David Jaffe -AuthorDate: Sat Nov 23 09:59:04 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 23 09:59:04 2019 -0800 - - initial completion of "enclone help parseable" - -M lib/rust/enclone/src/help.rs - -commit 28af4c69fb1f63fa2e491195aa29222f5dba901c -Author: David Jaffe -AuthorDate: Sat Nov 23 09:23:38 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 23 09:23:38 2019 -0800 - - move more code out of main - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/misc.rs - -commit 28af0949ed2d9c48fa7d12d37d1c9c6dbcb9e64b -Author: David Jaffe -AuthorDate: Sat Nov 23 07:34:33 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 23 07:34:33 2019 -0800 - - provide more info if test fails - -M lib/rust/enclone/tests/enclone_test.rs - -commit de95878a838283ddfff7c06b6723a5b84572ef8c -Author: David Jaffe -AuthorDate: Sat Nov 23 07:27:39 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 23 07:27:39 2019 -0800 - - provide more info on test failure - -M lib/rust/enclone/tests/enclone_test.rs - -commit 0137071f006f634e91dac244f6ea143d7add0d75 -Author: David Jaffe -AuthorDate: Sat Nov 23 06:41:17 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 23 06:41:17 2019 -0800 - - move fn package_characters_with_escapes - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 0f66b50439314fb0142e95516b9af5d259ce871a -Author: David Jaffe -AuthorDate: Sat Nov 23 06:36:39 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 23 06:36:39 2019 -0800 - - use package_characters_with_escapes - -M lib/rust/enclone/src/print_clonotypes.rs - -commit d740d04555836edff801c0a9136b54a35453cf6e -Author: David Jaffe -AuthorDate: Sat Nov 23 06:33:06 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 23 06:33:06 2019 -0800 - - add fn package_characters_with_escapes - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 184e91ed1746ef0210bdf0f6cedd00878f2b6b1f -Author: David Jaffe -AuthorDate: Sat Nov 23 05:43:49 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 23 05:43:49 2019 -0800 - - separate out table functions in tenkit2/src/tables.rs - -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit ddf54f8451b0ca5b6928616243c5d5b46504919d -Author: David Jaffe -AuthorDate: Sat Nov 23 05:28:22 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 23 05:28:22 2019 -0800 - - WIP on "enclone help parseable" - -M lib/rust/enclone/src/help.rs - -commit 2774308a6884a76dac0d7c78c8a4543afb46d858 -Author: David Jaffe -AuthorDate: Fri Nov 22 15:35:04 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 22 15:35:04 2019 -0800 - - beautifications for enclone help parseable - -M lib/rust/enclone/src/help.rs - -commit 069af436e7cf46bdad32e9be16b89e1e7335fcd1 -Author: David Jaffe -AuthorDate: Fri Nov 22 15:12:18 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 22 15:12:18 2019 -0800 - - add to enclone help parseable - -M lib/rust/enclone/src/help.rs - -commit f6353c997b82af189a8f6d933c566258bc47901f -Author: David Jaffe -AuthorDate: Fri Nov 22 14:58:39 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 22 14:58:39 2019 -0800 - - factor more code out of main - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/read_json.rs - -commit fc5585c7294eddd5c817eb93d6237eabe0b8cc89 -Author: David Jaffe -AuthorDate: Fri Nov 22 14:41:46 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 22 14:41:46 2019 -0800 - - factor some code out of main - -M lib/rust/enclone/src/bin/enclone.rs -M lib/rust/enclone/src/misc.rs - -commit 715eb59ac361cf31ff05630657b8f09443f294a8 -Author: David Jaffe -AuthorDate: Fri Nov 22 14:25:17 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 22 14:25:17 2019 -0800 - - add to "enclone help parseable" - -M lib/rust/enclone/src/help.rs - -commit e1a40dfc7c5ddf4394d2bf4a333d0da20be53424 -Author: David Jaffe -AuthorDate: Fri Nov 22 12:48:52 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 22 12:48:52 2019 -0800 - - tweak consensus to mitigate junk tails - -M lib/rust/enclone/src/bin/enclone.rs - -commit 7fc016793611207f1cd71467f9d8dac67df3cdc3 -Author: David Jaffe -AuthorDate: Fri Nov 22 11:24:06 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 22 11:24:06 2019 -0800 - - fix overflow error in consensus calculation - -M lib/rust/enclone/src/bin/enclone.rs - -commit c212cbfd3fab4fee359fdde9b235d1423a586979 -Author: David Jaffe -AuthorDate: Fri Nov 22 11:19:32 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 22 11:19:32 2019 -0800 - - fix at con_con - -M lib/rust/enclone/src/bin/enclone.rs - -commit 546c285be333f31230d24438ee5275118fa09c16 -Author: David Jaffe -AuthorDate: Fri Nov 22 07:01:59 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 22 07:01:59 2019 -0800 - - allow filtering for constant regions diffs - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/filter.rs -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/proc_args.rs - -commit 23c1f6a1532b9e8acb7cc32c527548f51438706d -Author: David Jaffe -AuthorDate: Thu Nov 21 16:42:15 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 16:42:15 2019 -0800 - - speed up by moving main to bin - -R099 lib/rust/enclone/src/main.rs lib/rust/enclone/src/bin/enclone.rs - -commit b1b28c2434299933449e9a7da2a83617024ad389 -Author: David Jaffe -AuthorDate: Thu Nov 21 16:28:53 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 16:28:53 2019 -0800 - - more parseable output doc and some abbreviation throughout - -M lib/rust/enclone/src/help.rs - -commit 6b8762a0afe9a613d40cb77b746f67a3666d8320 -Author: David Jaffe -AuthorDate: Thu Nov 21 15:56:38 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 15:56:38 2019 -0800 - - start of parseable output documentation - -M lib/rust/enclone/src/help.rs - -commit 8b3e299973c580c0d03e4188af064c3e1bf313b3 -Author: David Jaffe -AuthorDate: Thu Nov 21 13:29:53 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 13:29:53 2019 -0800 - - add to test - -M lib/rust/enclone/test/inputs/enclone_test4_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 41dd89209a90f4f2c5fe99859e3734a37f5effbe -Author: David Jaffe -AuthorDate: Thu Nov 21 13:16:49 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 13:16:49 2019 -0800 - - finally, fix nest of bugs around SEQC etc. - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 71715ee9f7c18ad0868f7ef88ac31842173acbc4 -Author: David Jaffe -AuthorDate: Thu Nov 21 13:01:39 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 13:01:39 2019 -0800 - - still isolating the bane of all creation - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 3a68aa12b38244b83c57eab6fac2a25f9aa2a145 -Author: David Jaffe -AuthorDate: Thu Nov 21 12:52:19 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 12:52:19 2019 -0800 - - move very annoying chunk of code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 1c19ad1be4930b85ac23250fafd382dff39c9ec1 -Author: David Jaffe -AuthorDate: Thu Nov 21 12:47:45 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 12:47:45 2019 -0800 - - isolate very annoying chunk of code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 036ae0cf1056a8ff076b451aa253a312f5d6d319 -Author: David Jaffe -AuthorDate: Thu Nov 21 12:40:42 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 12:40:42 2019 -0800 - - oye, more setting up - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 92f31af5ca137f73b76f8dad7ee2553947a4aabe -Author: David Jaffe -AuthorDate: Thu Nov 21 12:35:13 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 12:35:13 2019 -0800 - - setting up for some refactorization - -M lib/rust/enclone/src/print_clonotypes.rs - -commit e7bffc74646e24de03609e632ba5cffa866c4322 -Author: David Jaffe -AuthorDate: Thu Nov 21 12:00:29 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 12:00:29 2019 -0800 - - another code move - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 119041145f5e9094c3e8d4aa91174686029c46c2 -Author: David Jaffe -AuthorDate: Thu Nov 21 11:55:51 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 11:55:51 2019 -0800 - - move another block of code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 0293dfe242669ae711eebc5302d5b66e73a7da40 -Author: David Jaffe -AuthorDate: Thu Nov 21 11:45:28 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 11:45:28 2019 -0800 - - move ginormous block of code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 9b458b105eae50f5a17b177372e7b3a2a2f6cd07 -Author: David Jaffe -AuthorDate: Thu Nov 21 11:16:32 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 11:16:32 2019 -0800 - - start of FAQ - -M lib/rust/enclone/src/help.rs - -commit 5279feeecb2ae9e4b66a6e1e1315c16d2b36fb10 -Author: David Jaffe -AuthorDate: Thu Nov 21 10:34:56 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 10:34:56 2019 -0800 - - now can write rust file for Loupe - -M lib/rust/enclone/Cargo.toml -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/loupe.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/src/types.rs - -commit ebc45f25a35d9de673d4c3a936661925b0425b7d -Author: David Jaffe -AuthorDate: Thu Nov 21 09:34:34 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 09:34:34 2019 -0800 - - now loupe output uses bona fide exact subclonotypes consensuses - -M lib/rust/enclone/src/print_clonotypes.rs - -commit b7d60a5e0bd2708d9b0f6b3c3675b9d3527e7734 -Author: David Jaffe -AuthorDate: Thu Nov 21 09:24:19 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 09:24:19 2019 -0800 - - move some code out of main.rs - -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/misc.rs - -commit 3587f945f477a6decaf5e9bd7923a09f343ba5e2 -Author: David Jaffe -AuthorDate: Thu Nov 21 09:11:39 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 09:11:39 2019 -0800 - - tweak help menu - -M lib/rust/enclone/src/help.rs - -commit df2a4f8156c893b9583101059758a3938d2fa16f -Author: David Jaffe -AuthorDate: Thu Nov 21 09:08:16 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 09:08:16 2019 -0800 - - implement udiff - -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit 14a110c5285b461c35917fba604d81caa6bdf697 -Author: David Jaffe -AuthorDate: Thu Nov 21 08:37:32 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 08:37:32 2019 -0800 - - fix bug in computation of j_stop - -M lib/rust/enclone/src/main.rs - -commit 47e82475e706712bd1600b4acb77307b847dd72b -Author: David Jaffe -AuthorDate: Thu Nov 21 08:20:31 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 08:20:31 2019 -0800 - - add options SEQCS and FULL_SEQCS - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit a7775b20af41824a11a0ed02dfa93964bdefe22b -Author: David Jaffe -AuthorDate: Thu Nov 21 08:11:30 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 08:11:30 2019 -0800 - - track reordering of rows - -M lib/rust/enclone/src/print_clonotypes.rs - -commit e880506a5eeed416c5d772795b781b27d93c2dbe -Author: David Jaffe -AuthorDate: Thu Nov 21 07:21:38 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 07:21:38 2019 -0800 - - add cdiff and FULL_SEQC - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit 467b1e77d43f926139867d10c597a20938d9e9ac -Author: David Jaffe -AuthorDate: Thu Nov 21 04:20:54 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 04:20:54 2019 -0800 - - first stab at consensus - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/src/read_json.rs - -commit 2e86e467cf7302d1308f4c6681eed053c022767d -Author: David Jaffe -AuthorDate: Thu Nov 21 03:00:48 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 03:00:48 2019 -0800 - - add comments - -M lib/rust/enclone/src/defs.rs - -commit 7a09eccba3531055e0cf0cca300b35a2a8b27218 -Author: David Jaffe -AuthorDate: Thu Nov 21 02:57:08 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 02:57:08 2019 -0800 - - tidy - -M lib/rust/enclone/src/main.rs - -commit 2149b32e3a42cda0cf3436339583fb885f6df07f -Author: David Jaffe -AuthorDate: Thu Nov 21 02:45:45 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 02:45:45 2019 -0800 - - fix bug in UTR_CON - -M lib/rust/enclone/src/main.rs - -commit f17885ed2acb66a2315bf47eb9e01f13040da20c -Author: David Jaffe -AuthorDate: Thu Nov 21 02:45:04 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 21 02:45:04 2019 -0800 - - clarify comments - -M lib/rust/enclone/src/defs.rs - -commit c6a03ba93ff704f07f6d3a2b2b863567af70dd6d -Author: David Jaffe -AuthorDate: Wed Nov 20 15:46:01 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 20 15:46:01 2019 -0800 - - now actually make an EncloneOutput - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/types.rs - -commit b6533631777e23f6dad9a6869a3ee20d13aff610 -Author: David Jaffe -AuthorDate: Wed Nov 20 14:54:01 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 20 14:54:01 2019 -0800 - - now create all_loupe_clonotypes - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 38bd791f1429f3c626a80596898fee2e5080f778 -Author: David Jaffe -AuthorDate: Wed Nov 20 14:37:48 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 20 14:37:48 2019 -0800 - - now make an actual "Loupe" Clonotype - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/types.rs - -commit dd003452e48234461ed68c91182a38f2c2a17e62 -Author: David Jaffe -AuthorDate: Wed Nov 20 14:17:23 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 20 14:17:23 2019 -0800 - - now create ClonotypeChains - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 05cbfa59e55cdb0d83aa182ef5b62c3a51c66e16 -Author: David Jaffe -AuthorDate: Wed Nov 20 13:26:13 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 20 13:26:13 2019 -0800 - - now beta comes before alpha, rather than the other way - -M lib/rust/enclone/src/types.rs - -commit 89c6ce3bdc17941ee233f3272531729f96de0ec8 -Author: David Jaffe -AuthorDate: Wed Nov 20 11:56:03 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 20 11:56:03 2019 -0800 - - expand test - -M lib/rust/enclone/test/inputs/enclone_test2_output -M lib/rust/enclone/tests/enclone_test.rs - -commit c5c3706040da376bff0a98666902320702409eb3 -Author: David Jaffe -AuthorDate: Wed Nov 20 11:03:45 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 20 11:03:45 2019 -0800 - - add cdr3_dna as CVARS field - -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit 33a0d37fffe12a722a9251b5359f6499185b0fe8 -Author: David Jaffe -AuthorDate: Wed Nov 20 10:51:24 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 20 10:51:24 2019 -0800 - - add measure of CDR3 complexity - -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/loupe.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit 88081c5b4db70490d29e8927ff80e311b13a0bae -Author: David Jaffe -AuthorDate: Wed Nov 20 05:10:42 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 20 05:10:42 2019 -0800 - - build Immcantation inputs - -A lib/rust/enclone/src/bin/build_immcantation_inputs.rs - -commit 20e13f60abe41bc8d1acb7f7c7add48f09641909 -Author: David Jaffe -AuthorDate: Wed Nov 20 04:08:42 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 20 04:08:42 2019 -0800 - - fix bug and add test for it - -M lib/rust/enclone/src/print_clonotypes.rs -A lib/rust/enclone/test/inputs/enclone_test6_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 5f4732a241ed7509dbc76ef14172ba4747a611f5 -Author: David Jaffe -AuthorDate: Wed Nov 20 03:57:01 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 20 03:57:01 2019 -0800 - - various changes to comments - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 6ffb2199bcc704c8d707a1ee4eac38f5acd0947f -Author: David Jaffe -AuthorDate: Wed Nov 20 03:43:21 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 20 03:43:21 2019 -0800 - - factor some code out of main.rs - -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/misc.rs - -commit f64cd3bfd84ef05ca70df98092ec8205a6e92235 -Author: David Jaffe -AuthorDate: Wed Nov 20 03:30:57 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 20 03:30:57 2019 -0800 - - make an EClonoType - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/types.rs - -commit 7658d22c2e1fcff458bec74b3d5e6317e8b6b994 -Author: David Jaffe -AuthorDate: Tue Nov 19 15:58:37 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 19 15:58:37 2019 -0800 - - factor out test for barcode reuse - -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs -A lib/rust/enclone/src/misc.rs - -commit a3000e72ebf66e08bfa0d4c0aae18a855011e876 -Author: David Jaffe -AuthorDate: Tue Nov 19 15:47:28 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 19 15:47:28 2019 -0800 - - tighten arg checking - -M lib/rust/enclone/src/proc_args.rs - -commit b61b3d0e4b3d325a63968533e95faed16d853fd1 -Author: David Jaffe -AuthorDate: Tue Nov 19 15:33:36 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 19 15:33:36 2019 -0800 - - factor out code to substitute in alleles - -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/main.rs - -commit 2268413018ccd968877ca1d2595dfc6977b6cf95 -Author: David Jaffe -AuthorDate: Tue Nov 19 15:16:41 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 19 15:16:41 2019 -0800 - - add special option to dump lena ids - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/proc_args.rs - -commit 5630f11d747f897f63ce1ed8f7297347d9b5f235 -Author: David Jaffe -AuthorDate: Tue Nov 19 14:42:27 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 19 14:42:27 2019 -0800 - - factor out some loupe stuff - -M lib/rust/enclone/src/lib.rs -A lib/rust/enclone/src/loupe.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 245872fe6653c11b71c6288ef41a7e9ca7054296 -Author: David Jaffe -AuthorDate: Tue Nov 19 14:12:52 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 19 14:12:52 2019 -0800 - - remove some dead code allowances - -M lib/rust/enclone/src/types.rs - -commit eab60de1c99aab9d6dc88047bb1770283708d768 -Author: David Jaffe -AuthorDate: Tue Nov 19 13:58:44 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 19 13:58:44 2019 -0800 - - finally, define ExactClonotypeChain - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 54e0788b8665e58172cabceb7d05064c4e541a44 -Author: David Jaffe -AuthorDate: Tue Nov 19 13:44:14 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 19 13:44:14 2019 -0800 - - define temp consensus alignment - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 4bd8fd749778bfdda4e626211baf377833448ebd -Author: David Jaffe -AuthorDate: Tue Nov 19 13:33:11 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 19 13:33:11 2019 -0800 - - factor out some duplicated code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit cd9261eb1e22270423b40b27121edd0ea004a497 -Author: David Jaffe -AuthorDate: Tue Nov 19 11:05:43 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 19 11:05:43 2019 -0800 - - allow X= as arg, as alternative to X - -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/proc_args.rs - -commit eb2d1f96ddc46cb400f46b887e2d5f1018fede69 -Author: David Jaffe -AuthorDate: Mon Nov 18 14:36:20 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 18 14:36:20 2019 -0800 - - mine environment variables - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/proc_args.rs - -commit 1b402ab0608e4a78e045b1e49c26d2ad580ae403 -Author: David Jaffe -AuthorDate: Mon Nov 18 14:13:07 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 18 14:13:07 2019 -0800 - - update - -M lib/rust/enclone/src/tour - -commit 6b3ae75fce1f1bfd6d57e154c776cab81ae930a2 -Author: David Jaffe -AuthorDate: Mon Nov 18 12:03:03 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 18 12:03:03 2019 -0800 - - add to the tour - -M lib/rust/enclone/src/tour - -commit b436592bdd4d7a1842e7c7a581fd4f6ac2f16dad -Author: David Jaffe -AuthorDate: Mon Nov 18 08:27:30 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 18 08:27:30 2019 -0800 - - reorg - -M lib/rust/enclone/src/tour - -commit ce5aa11641bbc230753532e960f80915a36762ed -Author: David Jaffe -AuthorDate: Sun Nov 17 09:59:36 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 17 09:59:36 2019 -0800 - - fix bug in last commit - -M lib/rust/enclone/src/explore.rs - -commit f9b9335394b8cd9f218a5d85093741b87c901fa5 -Author: David Jaffe -AuthorDate: Sun Nov 17 09:55:15 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 17 09:55:15 2019 -0800 - - factor exploratory code out of main - -A lib/rust/enclone/src/explore.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs - -commit c1d0b35e2e0ab02aa6a108ae1e0ab3b054b49a11 -Author: David Jaffe -AuthorDate: Sun Nov 17 09:10:51 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 17 09:10:51 2019 -0800 - - expand doc for foursies etc. - -M lib/rust/enclone/src/help.rs - -commit 684d1a7199e38d9af9a01c756015bb1151406f5d -Author: David Jaffe -AuthorDate: Sun Nov 17 09:00:40 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 17 09:00:40 2019 -0800 - - catch unknown help requests - -M lib/rust/enclone/src/help.rs - -commit 5076375d4f55fad82ed3c684eceef88b44a8acad -Author: David Jaffe -AuthorDate: Sun Nov 17 07:31:11 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 17 07:31:11 2019 -0800 - - update - -M lib/rust/enclone/src/tour - -commit ce01bd27072808bb60c2acd562e05e2b7bfa3107 -Author: David Jaffe -AuthorDate: Sun Nov 17 07:25:34 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 17 07:25:34 2019 -0800 - - another garden - -A lib/rust/enclone/src/tour - -commit 2847fe306853a33b0dd24a14d00ca29e37cae418 -Author: David Jaffe -AuthorDate: Sun Nov 17 06:25:30 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 17 06:25:30 2019 -0800 - - define donor reference alignment - -M lib/rust/enclone/src/print_clonotypes.rs - -commit b34573aa54e0ed42a2a17f646cf05122be7b3076 -Author: David Jaffe -AuthorDate: Sun Nov 17 05:47:13 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 17 05:47:13 2019 -0800 - - update test output - -M lib/rust/enclone/src/enclone.out - -commit e32448feb6a1bf9230066e3a831dc45715f2c032 -Author: David Jaffe -AuthorDate: Sun Nov 17 05:38:51 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 17 05:38:51 2019 -0800 - - clarify doc - -M lib/rust/enclone/src/README - -commit a766c645c1a7f288f43639eb5ea5db5681f16a90 -Author: David Jaffe -AuthorDate: Sun Nov 17 05:24:53 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 17 05:24:53 2019 -0800 - - tidy - -M lib/rust/enclone/src/print_clonotypes.rs - -commit bdd904dd903f182079f54713d07388c256a1fe18 -Author: David Jaffe -AuthorDate: Sun Nov 17 05:19:56 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 17 05:19:56 2019 -0800 - - make concatenated donor reference sequences - -M lib/rust/enclone/src/print_clonotypes.rs - -commit f9e941329a86172ba366814af74322baaaadc459 -Author: David Jaffe -AuthorDate: Sun Nov 17 05:09:49 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 17 05:09:49 2019 -0800 - - factor out group_and_print_clonotypes - -A lib/rust/enclone/src/group.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit fa818b817fb40c50eb049b3f759b31dc3cc417da -Author: David Jaffe -AuthorDate: Sun Nov 17 04:48:01 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 17 04:48:01 2019 -0800 - - define vpids - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 9da79255fc02b9cda51d44b3ec50d4042050ff02 -Author: David Jaffe -AuthorDate: Sat Nov 16 16:41:17 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 16 16:41:17 2019 -0800 - - oops move these from bcr list to tcr list - -M lib/rust/enclone/src/enclone.testdata -M lib/rust/enclone/src/enclone.testdata.tcr - -commit d82058c7dc64c9ee1ed0c433a7fa731710899cf8 -Author: David Jaffe -AuthorDate: Sat Nov 16 14:35:07 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 16 14:35:07 2019 -0800 - - add to help ideas - -M lib/rust/enclone/src/help.rs - -commit a762d41f0f8a9ec121555d2dafb7c25cf94e073b -Author: David Jaffe -AuthorDate: Sat Nov 16 07:39:06 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 16 07:39:06 2019 -0800 - - tweak color scheme - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/test/inputs/enclone_test3_output - -commit 572c7f9c8de10408e12b4c0624b9885aa9b43f44 -Author: David Jaffe -AuthorDate: Sat Nov 16 07:32:01 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 16 07:32:01 2019 -0800 - - beautify and unify group printing - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/test/inputs/enclone_test3_output - -commit ecd420b26bc995814d1373811bf838e999acbf13 -Author: David Jaffe -AuthorDate: Sat Nov 16 06:51:40 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 16 06:51:40 2019 -0800 - - solve the onesie FP problem - -M lib/rust/enclone/src/enclone.out -M lib/rust/enclone/src/graph_filter.rs - -commit 1c4e03c7e57a446476939d96dc8da4e8a4fd31c8 -Author: David Jaffe -AuthorDate: Sat Nov 16 05:47:25 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 16 05:47:25 2019 -0800 - - fix bug in HAVE_ONESIE - -M lib/rust/enclone/src/filter.rs - -commit 76444ab88a6b6a33d0854141dcfd9d5532ada6e1 -Author: David Jaffe -AuthorDate: Sat Nov 16 05:34:05 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 16 05:34:05 2019 -0800 - - doc onesies etc. in glossary - -M lib/rust/enclone/src/help.rs - -commit 8abda1e6e35410c3a8533b5870b370ea005b7de3 -Author: David Jaffe -AuthorDate: Sat Nov 16 05:29:48 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 16 05:29:48 2019 -0800 - - add option HAVE_ONESIE - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/filter.rs -M lib/rust/enclone/src/proc_args.rs - -commit fb0ab2e9f2ba6ad9deca7cd89333d7b19fd61ffc -Author: David Jaffe -AuthorDate: Sat Nov 16 04:52:38 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 16 04:52:38 2019 -0800 - - correct nomenclature - -M lib/rust/enclone/src/README - -commit 64e88fea560a9ee4c8e6214d1829714b423407ea -Author: David Jaffe -AuthorDate: Sat Nov 16 04:49:30 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 16 04:49:30 2019 -0800 - - update - -M lib/rust/enclone/src/enclone.out - -commit a3e42678d9339f472660898bff19fa420318a242 -Author: David Jaffe -AuthorDate: Sat Nov 16 04:46:03 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 16 04:46:03 2019 -0800 - - 35884 is TCR - -M lib/rust/enclone/src/enclone.out -M lib/rust/enclone/src/enclone.testdata -M lib/rust/enclone/src/enclone.testdata.tcr - -commit fa807d0e8d444d7ad107e4a6ef6a927db0b0e634 -Author: David Jaffe -AuthorDate: Sat Nov 16 04:44:28 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 16 04:44:28 2019 -0800 - - remove instances of barcode reuse - -M lib/rust/enclone/src/enclone.testdata -M lib/rust/enclone/src/enclone.testlist.all - -commit 1695322cec577fbe65c6ee31ed71dd64e198c632 -Author: David Jaffe -AuthorDate: Sat Nov 16 04:34:51 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 16 04:34:51 2019 -0800 - - new option NO_REUSE - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/enclone.test -M lib/rust/enclone/src/enclone.test2 -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/proc_args.rs - -commit b53bf9de52a66e2b85f229251098998b5a398a31 -Author: David Jaffe -AuthorDate: Fri Nov 15 15:52:22 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 15:52:22 2019 -0800 - - populate vp - -M lib/rust/enclone/src/print_clonotypes.rs - -commit d5546b2b6edf8b6bb25758cd954805417eb38fbb -Author: David Jaffe -AuthorDate: Fri Nov 15 15:40:32 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 15:40:32 2019 -0800 - - add v_ref_id_donor in ExactClonotype and populate - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit ab8a9b194f1bcfe0ad2381d443b6a9ea57540841 -Author: David Jaffe -AuthorDate: Fri Nov 15 15:19:37 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 15:19:37 2019 -0800 - - update test results - -M lib/rust/enclone/test/inputs/enclone_test1_output -M lib/rust/enclone/test/inputs/enclone_test2_output -M lib/rust/enclone/test/inputs/enclone_test3_output -M lib/rust/enclone/test/inputs/enclone_test4_output -M lib/rust/enclone/test/inputs/enclone_test5_output - -commit a5427b9630d77f8cdf30f0fb67b60e87720e5ea9 -Author: David Jaffe -AuthorDate: Fri Nov 15 15:18:34 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 15:18:34 2019 -0800 - - change labeling - -M lib/rust/enclone/src/print_clonotypes.rs - -commit b303f53811c2050bec685fddfffe7bd4181a5b50 -Author: David Jaffe -AuthorDate: Fri Nov 15 14:29:51 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 14:29:51 2019 -0800 - - setting up for donor reference changes - -M lib/rust/enclone/src/print_clonotypes.rs - -commit f26e900ebdbcf6318a2f5cc13f298e6356892e1c -Author: David Jaffe -AuthorDate: Fri Nov 15 13:32:48 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 13:32:48 2019 -0800 - - add arg NPLAIN - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/proc_args.rs - -commit 976748b5a6b9727c595b8a678aea0b7ff570aaf1 -Author: David Jaffe -AuthorDate: Fri Nov 15 12:59:22 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 12:59:22 2019 -0800 - - define universal reference alignment - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 49a12c9cd031787a1c15673d9f9624e5284689b6 -Author: David Jaffe -AuthorDate: Fri Nov 15 11:36:32 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 11:36:32 2019 -0800 - - update - -M lib/rust/enclone/src/enclone.out - -commit dfc7ec05c36f3c60cedd6347fa6e557f86cb77c1 -Author: David Jaffe -AuthorDate: Fri Nov 15 11:32:58 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 11:32:58 2019 -0800 - - update test outputs - -M lib/rust/enclone/test/inputs/enclone_test1_output -M lib/rust/enclone/test/inputs/enclone_test2_output -M lib/rust/enclone/test/inputs/enclone_test3_output -M lib/rust/enclone/test/inputs/enclone_test4_output -M lib/rust/enclone/test/inputs/enclone_test5_output - -commit d253ed25aa722b5259bebf913d387cfc3273f8f9 -Author: David Jaffe -AuthorDate: Fri Nov 15 11:27:31 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 11:27:31 2019 -0800 - - add a little color - -M lib/rust/enclone/src/print_clonotypes.rs - -commit fe25059101cb9b55f2c5938ce893be53abdd84ac -Author: David Jaffe -AuthorDate: Fri Nov 15 10:47:06 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 10:47:06 2019 -0800 - - some donor reference plumbing - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit af1de1133c766b84a5547c8cf84a5fc783911bbb -Author: David Jaffe -AuthorDate: Fri Nov 15 10:28:11 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 10:28:11 2019 -0800 - - temporarily turn of a bunch of warnings in types.rs - -M lib/rust/enclone/src/types.rs - -commit 31760126df2cb447dc9e3a8e42ae1d036c53a173 -Author: David Jaffe -AuthorDate: Fri Nov 15 10:17:27 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 10:17:27 2019 -0800 - - construct donor reference for Loupe - -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/types.rs - -commit df3620953265b45fa78ef957b2436f0eb14d2ddc -Author: David Jaffe -AuthorDate: Fri Nov 15 09:38:06 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 09:38:06 2019 -0800 - - exact clonotype ==> exact subclonotype - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/types.rs - -commit fbc7949f8b82a2963555a53d432d3ff7c8faacab -Author: David Jaffe -AuthorDate: Fri Nov 15 08:04:56 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 08:04:56 2019 -0800 - - add arg MIN_GROUP - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit 218be90343553defa1c58b9f5d99b84da87d9ca8 -Author: David Jaffe -AuthorDate: Fri Nov 15 07:52:56 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 07:52:56 2019 -0800 - - add first grouping mechanism - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit 3ec3cea391afbb54a1a9ec8d296c6044a29159b4 -Author: David Jaffe -AuthorDate: Fri Nov 15 07:10:07 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 07:10:07 2019 -0800 - - update test results - -M lib/rust/enclone/test/inputs/enclone_test1_output -M lib/rust/enclone/test/inputs/enclone_test2_output -M lib/rust/enclone/test/inputs/enclone_test3_output -M lib/rust/enclone/test/inputs/enclone_test4_output -M lib/rust/enclone/test/inputs/enclone_test5_output - -commit 688482fc43ef4dbc5b74dea052b44d9b15cba04d -Author: David Jaffe -AuthorDate: Fri Nov 15 07:04:35 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 07:04:35 2019 -0800 - - emit clonotypes groups (trivial for now) - -M lib/rust/enclone/src/print_clonotypes.rs - -commit abc081b60e2cd004a5e79b74f3b6a8ff1ec14db2 -Author: David Jaffe -AuthorDate: Fri Nov 15 05:43:18 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 05:43:18 2019 -0800 - - factor filter.rs from clonotypes.rs - -A lib/rust/enclone/src/filter.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 6221bf64410e0aa8db5659dcb273042bf092cfd5 -Author: David Jaffe -AuthorDate: Fri Nov 15 05:17:46 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 05:17:46 2019 -0800 - - a little prep for refactoring - -M lib/rust/enclone/src/print_clonotypes.rs - -commit f558cf9446ecd7b491911240805a7b5a764766fc -Author: David Jaffe -AuthorDate: Fri Nov 15 04:47:34 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 04:47:34 2019 -0800 - - work in progress to instantiate ExactClonotypeChain - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 1c2557415a9161ff39d6b0663b20a137fc93e808 -Author: David Jaffe -AuthorDate: Fri Nov 15 04:38:28 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 04:38:28 2019 -0800 - - work in progress to instantiate ExactClonotypeChain - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 7b6c39a02eba18699d78f0d0ec05d83291d93139 -Author: David Jaffe -AuthorDate: Fri Nov 15 04:24:44 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 04:24:44 2019 -0800 - - work in progress to instantiate ExactClonotypeChain - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/types.rs - -commit ca91ac4cbb228bdefaff7b6d965136ef2dc92354 -Author: David Jaffe -AuthorDate: Fri Nov 15 03:55:29 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 15 03:55:29 2019 -0800 - - add alignment to concat univ ref - -M lib/rust/enclone/Cargo.toml -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit c99b59609ef4a70a042b56cd27ff4fc1a0befaed -Author: David Jaffe -AuthorDate: Thu Nov 14 16:19:05 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 16:19:05 2019 -0800 - - make use of h5 file the default - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/load_gex.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/test/inputs/.gitattributes -D lib/rust/enclone/test/inputs/85679/outs/raw_feature_bc_matrix/matrix.bin -A lib/rust/enclone/test/inputs/85679/outs/raw_gene_bc_matrices_h5.h5 - -commit 6957a5f84534a20b9d9047db13c6c4be333f7afe -Author: David Jaffe -AuthorDate: Thu Nov 14 15:53:26 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 15:53:26 2019 -0800 - - slight correction - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 6c7820e5cdb4bf814dff55ce4cd5b364dbe0d1d2 -Author: David Jaffe -AuthorDate: Thu Nov 14 15:50:27 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 15:50:27 2019 -0800 - - make a bit more efficient - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 20792fb30789c22021feb4434dfd0bc6db1a3de5 -Author: David Jaffe -AuthorDate: Thu Nov 14 15:36:10 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 15:36:10 2019 -0800 - - reduce redundant computation - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 79cd8a6284b8f4309f625bbe65c87c2a26a526d9 -Author: David Jaffe -AuthorDate: Thu Nov 14 14:52:23 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 14:52:23 2019 -0800 - - omg still trying to fix bug - -M lib/rust/enclone/src/proc_args.rs - -commit 9b7e398eb55cc9d0efbedbc144cc5f85451f777f -Author: David Jaffe -AuthorDate: Thu Nov 14 14:43:11 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 14:43:11 2019 -0800 - - again maybe fix a bug - -M lib/rust/enclone/src/proc_args.rs - -commit 927d84063135acaae9c2db1364351d4921a668d3 -Author: David Jaffe -AuthorDate: Thu Nov 14 14:20:05 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 14:20:05 2019 -0800 - - fix bug, maybe - -M lib/rust/enclone/src/proc_args.rs - -commit 69caef5bddd4439d553ff4b74d329134f5d122b8 -Author: David Jaffe -AuthorDate: Thu Nov 14 13:52:50 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 13:52:50 2019 -0800 - - option to use h5 file - -M lib/rust/enclone/Cargo.toml -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit 27b05748c9604002bb3672aeaf65a705c60b6686 -Author: David Jaffe -AuthorDate: Thu Nov 14 06:59:17 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 06:59:17 2019 -0800 - - update - -M lib/rust/enclone/src/enclone.testlist.all - -commit d5bfb8afaf58679bfabacc361125a78dfc494671 -Author: David Jaffe -AuthorDate: Thu Nov 14 06:42:22 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 06:42:22 2019 -0800 - - update - -M lib/rust/enclone/src/enclone.testdata2 - -commit c56fd917f4dbfc70c736125dd0713c67dff5d1c4 -Author: David Jaffe -AuthorDate: Thu Nov 14 06:22:57 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 06:22:57 2019 -0800 - - add lenas - -M lib/rust/enclone/src/enclone.testdata2 - -commit beae7d9edcfe8c78f6f29b5906f1e13c62b47780 -Author: David Jaffe -AuthorDate: Thu Nov 14 06:08:11 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 06:08:11 2019 -0800 - - fix bug in concat ref code - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 602d4fc0f47429a27a04a2ad0640a9c00e51f636 -Author: David Jaffe -AuthorDate: Thu Nov 14 05:26:20 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 05:26:20 2019 -0800 - - define concatenated reference sequences - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 64846dcd0e44267ac19ed54c51b4cadc901596c4 -Author: David Jaffe -AuthorDate: Thu Nov 14 05:05:20 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 05:05:20 2019 -0800 - - delete old code to compute ref sequence identifiers - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 3fa535a58d1aeadb5e357c2b35be2be7d9891e51 -Author: David Jaffe -AuthorDate: Thu Nov 14 04:51:54 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 14 04:51:54 2019 -0800 - - new computation of reference sequence identifiers - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 5ef519def412f16fba72c2585223db9c3c17e574 -Author: David Jaffe -AuthorDate: Wed Nov 13 14:37:22 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 13 14:37:22 2019 -0800 - - lock versions on rust-toolbox crates - -M lib/rust/enclone/Cargo.toml - -commit 91f2019fbd36b7d83b40379c7d8e536da59c5557 -Author: David Jaffe -AuthorDate: Wed Nov 13 12:47:19 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 13 12:47:19 2019 -0800 - - oops forgot json - -M lib/rust/enclone/test/inputs/.gitattributes - -commit 5414778d105bdcc3ad28a0244de9137400085de9 -Author: David Jaffe -AuthorDate: Wed Nov 13 10:26:38 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 13 10:26:38 2019 -0800 - - fix bug - -M lib/rust/enclone/src/proc_args.rs - -commit 94b7dd5fe05441bfa9dd36a789c332564c56fe77 -Author: David Jaffe -AuthorDate: Wed Nov 13 09:36:41 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 13 09:36:41 2019 -0800 - - add help on feature ideas - -M lib/rust/enclone/src/help.rs - -commit 912c37db57373a534c2b4b73181803396153ba52 -Author: David Jaffe -AuthorDate: Wed Nov 13 07:29:32 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 13 07:29:32 2019 -0800 - - fix bug in setting of pre - -M lib/rust/enclone/src/proc_args.rs - -commit 3a0d7444ebc96c23576273777f2aa46fe7c77db6 -Author: David Jaffe -AuthorDate: Wed Nov 13 05:22:54 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 13 05:22:54 2019 -0800 - - move u_ref_id from TigData0 to TigData1 - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/read_json.rs - -commit 203dc6965dcd05dc372051a80bd02affa9520a80 -Author: David Jaffe -AuthorDate: Wed Nov 13 05:11:57 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 13 05:11:57 2019 -0800 - - move c_ref_id from TigData0 to TigData1 - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 1e24d4d357a136bdcd7d5c4707de382548f47a6c -Author: David Jaffe -AuthorDate: Wed Nov 13 05:01:41 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 13 05:01:41 2019 -0800 - - c_ref_is is now Option, not isize - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/read_json.rs - -commit 010e14bcc0831e9dccab454fa56da6c5abfd9a53 -Author: David Jaffe -AuthorDate: Wed Nov 13 04:44:47 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 13 04:44:47 2019 -0800 - - utr_id ==> u_ref_id to normalize notation - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/read_json.rs - -commit 274e86159dd7cfcf75277bc39225086be0ff5628 -Author: David Jaffe -AuthorDate: Wed Nov 13 04:39:40 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 13 04:39:40 2019 -0800 - - define ref identifiers, com'ed out for now - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 28decfba47f6ed84c5ab7cf01fdfd8eb7939a0de -Author: David Jaffe -AuthorDate: Wed Nov 13 04:22:56 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 13 04:22:56 2019 -0800 - - define mat - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 13803cc3f854b5a1f8067295b08aa900d525e672 -Author: David Jaffe -AuthorDate: Wed Nov 13 04:05:16 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 13 04:05:16 2019 -0800 - - set up for building better ref seq identifers - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/proc_args.rs - -commit 57de9051c6c0b1863ed769f6c4d3597cf360b146 -Author: David Jaffe -AuthorDate: Tue Nov 12 14:53:56 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 14:53:56 2019 -0800 - - fix doc typo - -M lib/rust/enclone/src/README - -commit 157e015f383f99ae0c43c36234759d4362d15028 -Author: David Jaffe -AuthorDate: Tue Nov 12 14:43:39 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 14:43:39 2019 -0800 - - add "enclone help cvars" - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/help.rs - -commit a64e79a106eb3e4209ec42cd284c69c7dd192bca -Author: David Jaffe -AuthorDate: Tue Nov 12 14:01:57 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 14:01:57 2019 -0800 - - fix CDR3 option - -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/proc_args.rs - -commit 5a825a13020101bda3894a38fd606672b12d46ef -Author: David Jaffe -AuthorDate: Tue Nov 12 13:48:27 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 13:48:27 2019 -0800 - - miceo baby steps towards loupe output - -M lib/rust/enclone/Cargo.toml -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/proc_args.rs -M lib/rust/enclone/src/types.rs - -commit 59e36f44ef36c786a25a908a30a11e02231735de -Author: David Jaffe -AuthorDate: Tue Nov 12 13:20:21 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 13:20:21 2019 -0800 - - improve proc_args interface - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/proc_args.rs - -commit 32244ed520e80117df0885dd74475adda1765582 -Author: David Jaffe -AuthorDate: Tue Nov 12 13:15:40 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 13:15:40 2019 -0800 - - factor out argument processing - -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs -A lib/rust/enclone/src/proc_args.rs - -commit 30190844a43fcf7b97c00c2cc2b73c1d1656038c -Author: David Jaffe -AuthorDate: Tue Nov 12 11:25:27 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 11:25:27 2019 -0800 - - tidying - -M lib/rust/enclone/src/main.rs - -commit e0dbec97a57acb01d71e85efdc8e8d7612a6ec09 -Author: David Jaffe -AuthorDate: Tue Nov 12 11:20:43 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 11:20:43 2019 -0800 - - refactor exact - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit 39201a97c76965cf3b2373b4d08c9cbb78cb905b -Author: David Jaffe -AuthorDate: Tue Nov 12 11:09:30 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 11:10:53 2019 -0800 - - refactor ncells_min - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit f6b71bd204a2a75681c5e8e59a626f912cd7ccff -Author: David Jaffe -AuthorDate: Tue Nov 12 10:59:59 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 11:10:53 2019 -0800 - - more arg processing refactoring - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit 79d7975979730b8349606da729f8e6bbdcf56379 -Author: David Jaffe -AuthorDate: Tue Nov 12 10:42:15 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 11:10:53 2019 -0800 - - some arg processing refactoring - -M lib/rust/enclone/src/main.rs - -commit 1560e282f1cdfb45445eca685d9021784b20b811 -Author: sreenathkrishnan -AuthorDate: Tue Nov 12 10:14:41 2019 -0800 -Commit: GitHub -CommitDate: Tue Nov 12 10:14:41 2019 -0800 - - First draft of enclone output structure (#2235) - - * First draft of enclone output structure - - * Fix mistakes in comments - - * Update types.rs - -A lib/rust/enclone/src/types.rs - -commit 9bad04416f2be363e2cde9b796746cbc8fc7dd44 -Author: David Jaffe -AuthorDate: Tue Nov 12 05:16:57 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 05:16:57 2019 -0800 - - more argument tidying - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit 1432c6ed42784015d5657ab3a85bcc756c4362a4 -Author: David Jaffe -AuthorDate: Tue Nov 12 05:05:48 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 05:05:48 2019 -0800 - - some argument tidying - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit d9a801ef1cc0bc43fbdab68d25330ef33aa229dc -Author: David Jaffe -AuthorDate: Tue Nov 12 04:51:53 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 04:51:53 2019 -0800 - - update notes - -M lib/rust/enclone/src/enclone.testdata2 - -commit 8a66164fa327337547c1b2dcf388661099d23776 -Author: David Jaffe -AuthorDate: Tue Nov 12 04:32:51 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 04:32:51 2019 -0800 - - left justify const and fix some issues with the tests - -M lib/rust/enclone/src/print_clonotypes.rs -D lib/rust/enclone/test/inputs/86237/outs/all_contig_annotations.json -A lib/rust/enclone/test/inputs/86237/outs/all_contig_annotations.json.lz4 -M lib/rust/enclone/test/inputs/enclone_test1_output -M lib/rust/enclone/test/inputs/enclone_test2_output -M lib/rust/enclone/test/inputs/enclone_test3_output -M lib/rust/enclone/test/inputs/enclone_test4_output -M lib/rust/enclone/test/inputs/enclone_test5_output -M lib/rust/enclone/tests/enclone_test.rs - -commit ec86aafa6a5b6fad674726187eda8cfe53804900 -Author: David Jaffe -AuthorDate: Tue Nov 12 03:22:14 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 12 03:22:14 2019 -0800 - - full list of test lenas - -A lib/rust/enclone/src/enclone.testlist.all - -commit 14b691553a8d54d974bac86d4fb22924550a1362 -Author: David Jaffe -AuthorDate: Mon Nov 11 13:59:06 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 11 13:59:06 2019 -0800 - - add option VDUP - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 10c08bc7da7f43181d38e09cfc918ea5b58715ca -Author: David Jaffe -AuthorDate: Mon Nov 11 11:00:07 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 11 11:00:07 2019 -0800 - - replace TWO_LENAS by MIN_DATASETS - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 6aa5870bf5f724fc866235e256429f72c351314a -Author: David Jaffe -AuthorDate: Sun Nov 10 07:58:10 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 10 07:58:10 2019 -0800 - - a bunch of tidying - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit dddacd450b4f5066901fc2c5d180645b239032b5 -Author: David Jaffe -AuthorDate: Sun Nov 10 07:42:54 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 10 07:42:54 2019 -0800 - - fix error check - -M lib/rust/enclone/src/main.rs - -commit 0fe299de1b7a31151c2fb9cc9dd7027cb7c94023 -Author: David Jaffe -AuthorDate: Sun Nov 10 07:38:09 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 10 07:38:09 2019 -0800 - - encapsulate argument processing - -M lib/rust/enclone/src/main.rs - -commit 1e965e7167eca65989859fd13a2e30d8a9853017 -Author: David Jaffe -AuthorDate: Sun Nov 10 06:37:19 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 10 06:37:19 2019 -0800 - - add option BARCODES - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit dff8e1a0a090cac8e3dd50250c32d4153807cf13 -Author: David Jaffe -AuthorDate: Sun Nov 10 06:09:20 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 10 06:09:20 2019 -0800 - - track read counts - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/read_json.rs - -commit e5f2bf4f43bfb5c271729b366b081767f9fdb25d -Author: David Jaffe -AuthorDate: Sun Nov 10 05:46:50 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 10 05:46:50 2019 -0800 - - fix bug in BCJOIN - -M lib/rust/enclone/src/join.rs - -commit ec51de280f85caaf945cbf3804587819313985a5 -Author: David Jaffe -AuthorDate: Sun Nov 10 05:41:51 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 10 05:41:51 2019 -0800 - - add option BCJOIN - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/main.rs - -commit 45b94cea5a8e5e0d602a53bf32390d49230220e2 -Author: David Jaffe -AuthorDate: Sun Nov 10 04:49:42 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 10 04:49:42 2019 -0800 - - improve error behavior - -M lib/rust/enclone/src/main.rs - -commit e18466b22ba57d3c59f6bc724f0c79b58f490841 -Author: David Jaffe -AuthorDate: Sat Nov 9 17:45:34 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 17:45:34 2019 -0800 - - simplify table justification - -M lib/rust/enclone/src/help.rs - -commit 07876fe599fac10129a132d37c284ada56b5e2c6 -Author: David Jaffe -AuthorDate: Sat Nov 9 17:41:15 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 17:41:15 2019 -0800 - - add doc for lead variables - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/help.rs - -commit e08442967802e0b38d8151f0d23a23a4a6a47b28 -Author: David Jaffe -AuthorDate: Sat Nov 9 16:38:06 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 16:38:06 2019 -0800 - - remove some redundant doc - -M lib/rust/enclone/src/README - -commit e7f1d4f70e43406a55cdd5e6f1a17b60f381fbc9 -Author: David Jaffe -AuthorDate: Sat Nov 9 16:19:44 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 16:19:44 2019 -0800 - - add argument SEGN - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 45c574762dc177fe8c13eafda4c333f5ba9e3b43 -Author: David Jaffe -AuthorDate: Sat Nov 9 16:05:12 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 16:05:12 2019 -0800 - - implement PLAIN for help - -M lib/rust/enclone/src/help.rs - -commit fdbca5b0307c6f976011c4897090b78e39b93849 -Author: David Jaffe -AuthorDate: Sat Nov 9 15:54:13 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 15:54:13 2019 -0800 - - change color - -M lib/rust/enclone/src/help.rs - -commit 85ca04a7cf7314e112e4b189af82c0a9a7561a95 -Author: David Jaffe -AuthorDate: Sat Nov 9 15:17:18 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 15:17:18 2019 -0800 - - add "Hate the colors" message - -M lib/rust/enclone/src/help.rs - -commit f94aeb6682fe5bfae0959b98e2f24cf6d91c1854 -Author: David Jaffe -AuthorDate: Sat Nov 9 12:01:24 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 12:01:24 2019 -0800 - - beautify - -M lib/rust/enclone/src/help.rs - -commit 42737bb10d32609a60a4619a1808ef7c2fdfbc7b -Author: David Jaffe -AuthorDate: Sat Nov 9 11:37:06 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 11:37:06 2019 -0800 - - add mission statement - -M lib/rust/enclone/src/help.rs - -commit 6af363a822ef220401d661bb4cd1580b790697f9 -Author: David Jaffe -AuthorDate: Sat Nov 9 10:55:27 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 10:55:27 2019 -0800 - - start of top level doc - -M lib/rust/enclone/src/help.rs - -commit f864ae34da340726e78084a4cc970f173d4a555a -Author: David Jaffe -AuthorDate: Sat Nov 9 10:48:31 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 10:48:31 2019 -0800 - - expand glossary - -M lib/rust/enclone/src/help.rs - -commit 7201dc1993ba58a4824acacbecceade312518ec0 -Author: David Jaffe -AuthorDate: Sat Nov 9 10:41:31 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 10:41:31 2019 -0800 - - nicify using macros - -M lib/rust/enclone/src/help.rs - -commit 0720ef3734761e874ff2c5ef780a23c9801429a4 -Author: David Jaffe -AuthorDate: Sat Nov 9 10:15:59 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 10:15:59 2019 -0800 - - start of glossary - -M lib/rust/enclone/src/help.rs - -commit 3a22875c268feb2070865360d9baad2ba313738e -Author: David Jaffe -AuthorDate: Sat Nov 9 07:47:58 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 07:47:58 2019 -0800 - - update - -M lib/rust/enclone/src/enclone.out - -commit d44129bc3b27bfc870911170a514816bebe0fb47 -Author: David Jaffe -AuthorDate: Sat Nov 9 07:36:27 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 07:36:27 2019 -0800 - - comment out putative cell lines - -M lib/rust/enclone/src/enclone.testdata2 - -commit a8ce6e3e3b4bbe0d1cf0008c22c0c437b2571246 -Author: David Jaffe -AuthorDate: Sat Nov 9 07:31:31 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 07:31:31 2019 -0800 - - set NSILENT - -M lib/rust/enclone/src/enclone.test -M lib/rust/enclone/src/enclone.test2 - -commit fddaedd285d9955ba6a9f5fe647f7ce2be633543 -Author: David Jaffe -AuthorDate: Sat Nov 9 07:23:15 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 07:23:15 2019 -0800 - - add notes - -M lib/rust/enclone/src/enclone.testdata2 - -commit 5ad7a72970cc4ec429005e56bbe94322cf2672ea -Author: David Jaffe -AuthorDate: Sat Nov 9 06:24:50 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 06:24:50 2019 -0800 - - tweak enclone help filter - -M lib/rust/enclone/src/help.rs - -commit 5d9c6ea8cef334563fd47e907884f667ba54885a -Author: David Jaffe -AuthorDate: Sat Nov 9 06:13:33 2019 -0800 -Commit: David Jaffe -CommitDate: Sat Nov 9 06:13:33 2019 -0800 - - add option V..J - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8544ec2b5062e4975855f2f74df535f56425cefe -Author: David Jaffe -AuthorDate: Fri Nov 8 16:38:26 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 16:38:26 2019 -0800 - - improve error handling - -M lib/rust/enclone/src/main.rs - -commit 9ba9989fc443e470cd26457355d4a96106896657 -Author: David Jaffe -AuthorDate: Fri Nov 8 15:27:56 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 15:27:56 2019 -0800 - - fix chain bolding for TCR - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/test/inputs/enclone_test5_output - -commit e6655f765de69c02d005a83a16c226ef8f9f91a7 -Author: David Jaffe -AuthorDate: Fri Nov 8 15:22:22 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 15:22:22 2019 -0800 - - add test and clean up .gitattributes files - -M lib/rust/enclone/test/inputs/.gitattributes -A lib/rust/enclone/test/inputs/101287/_invocation -A lib/rust/enclone/test/inputs/101287/outs/all_contig_annotations.json.lz4 -D lib/rust/enclone/test/inputs/123089/outs/.gitattributes -D lib/rust/enclone/test/inputs/85333/.gitattributes -A lib/rust/enclone/test/inputs/README -A lib/rust/enclone/test/inputs/enclone_test5_output -M lib/rust/enclone/tests/enclone_test.rs - -commit c2ce63df1f9060d64572418de4b444596bf56775 -Author: David Jaffe -AuthorDate: Fri Nov 8 14:31:04 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 14:31:04 2019 -0800 - - slight beautification - -M lib/rust/enclone/src/help.rs - -commit c61b4e2b04a2c5692cfac3d43bcec98b0f277e86 -Author: David Jaffe -AuthorDate: Fri Nov 8 14:17:57 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 14:17:57 2019 -0800 - - add arg MIN_EXACTS - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 7099e11273cae2d5f2da50853abb68fd46ec07e6 -Author: David Jaffe -AuthorDate: Fri Nov 8 12:02:06 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 12:02:06 2019 -0800 - - fix bug in last commit - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 1848567a3702d9896a3f6e68b50b65dacba76b4f -Author: David Jaffe -AuthorDate: Fri Nov 8 11:58:06 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 11:58:06 2019 -0800 - - add option NOTE_SIMPLE - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 96ee6e3813ef8c004d13eb70b898daa89ba42251 -Author: David Jaffe -AuthorDate: Fri Nov 8 10:56:12 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 10:56:12 2019 -0800 - - update - -M lib/rust/enclone/src/enclone.testdata2 - -commit 40af334be9dc59313cffc92af815eba7b300301e -Author: David Jaffe -AuthorDate: Fri Nov 8 10:47:48 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 10:47:48 2019 -0800 - - switch to PLAIN - -M lib/rust/enclone/src/enclone.test -M lib/rust/enclone/src/enclone.test2 - -commit e493415253300628d19078c43bf2d245e72b79cb -Author: David Jaffe -AuthorDate: Fri Nov 8 06:10:23 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 06:10:23 2019 -0800 - - add test - -A lib/rust/enclone/test/inputs/enclone_test4_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 524e813d7a99bf4b59b9b19b92eaf7fc250a817c -Author: David Jaffe -AuthorDate: Fri Nov 8 05:56:26 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 05:56:26 2019 -0800 - - add test datasets - -A lib/rust/enclone/test/inputs/.gitattributes -A lib/rust/enclone/test/inputs/85679/outs/metrics_summary_json.json -A lib/rust/enclone/test/inputs/85679/outs/raw_feature_bc_matrix/barcodes.tsv.gz -A lib/rust/enclone/test/inputs/85679/outs/raw_feature_bc_matrix/features.tsv.gz -A lib/rust/enclone/test/inputs/85679/outs/raw_feature_bc_matrix/matrix.bin -A lib/rust/enclone/test/inputs/86237/_invocation -A lib/rust/enclone/test/inputs/86237/outs/all_contig_annotations.json - -commit 47d48f7d19fe04346bca510e1395e0353c3ceb46 -Author: David Jaffe -AuthorDate: Fri Nov 8 05:41:47 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 05:41:47 2019 -0800 - - forgot to copy files - -M lib/rust/enclone/src/load_gex.rs - -commit f3ffde94638487643911f5f91be02f3ac9c8f510 -Author: David Jaffe -AuthorDate: Fri Nov 8 05:27:52 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 05:27:52 2019 -0800 - - fix buggy writing of matrix.bin - -M lib/rust/enclone/src/load_gex.rs - -commit 6acca5890255bbb8aeb14f2a81d090ace51abeda -Author: David Jaffe -AuthorDate: Fri Nov 8 05:05:16 2019 -0800 -Commit: David Jaffe -CommitDate: Fri Nov 8 05:05:16 2019 -0800 - - improve failure mode - -M lib/rust/enclone/src/load_gex.rs - -commit eaa2f37cae64a3643bb09db41928255ed69c42b2 -Author: David Jaffe -AuthorDate: Thu Nov 7 15:36:41 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 7 15:36:41 2019 -0800 - - add test - -M lib/rust/enclone/test/inputs/85333/.gitattributes -D lib/rust/enclone/test/inputs/85333/outs/all_contig_annotations.json -A lib/rust/enclone/test/inputs/85333/outs/all_contig_annotations.json.lz4 -A lib/rust/enclone/test/inputs/enclone_test3_output -M lib/rust/enclone/tests/enclone_test.rs - -commit f68959909ba17d1e1b2c8358548db8b169774a24 -Author: David Jaffe -AuthorDate: Thu Nov 7 13:57:18 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 7 13:57:18 2019 -0800 - - add a smallish dataset - -A lib/rust/enclone/test/inputs/85333/.gitattributes -A lib/rust/enclone/test/inputs/85333/_invocation -A lib/rust/enclone/test/inputs/85333/outs/all_contig_annotations.json - -commit 086e61c066ed24574732fd5642cd20f1ec86a5c3 -Author: David Jaffe -AuthorDate: Thu Nov 7 13:08:27 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 7 13:08:27 2019 -0800 - - update - -M lib/rust/enclone/src/enclone.testdata2 - -commit 42a8147d085563d30fa6205c94b118e1973a45b0 -Author: David Jaffe -AuthorDate: Thu Nov 7 10:51:20 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 7 10:51:20 2019 -0800 - - create tigs_amino - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/info.rs - -commit d3cda48602afc0b7e42eaa70302a264b1b83ada4 -Author: David Jaffe -AuthorDate: Thu Nov 7 05:38:57 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 7 05:38:57 2019 -0800 - - support CDR3=pattern - -M lib/rust/enclone/Cargo.toml -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 00bb44bce54456784d677ddb27b94c5905f34fcb -Author: David Jaffe -AuthorDate: Thu Nov 7 04:43:57 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 7 04:43:57 2019 -0800 - - add command-line arg CHAINS - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/main.rs - -commit 0a94377fcd90333d2f37b699054d9fb30ffd556e -Author: David Jaffe -AuthorDate: Thu Nov 7 04:36:24 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 7 04:36:24 2019 -0800 - - factor out loading of gene expression data - -M lib/rust/enclone/src/lib.rs -A lib/rust/enclone/src/load_gex.rs -M lib/rust/enclone/src/main.rs - -commit cfd027c881c1afa237504316345b09b4c4a8b5bb -Author: David Jaffe -AuthorDate: Thu Nov 7 04:13:35 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 7 04:13:35 2019 -0800 - - tidy - -M lib/rust/enclone/src/help.rs - -commit c860c2d3931eba7f97e6f4dd244701314c249d24 -Author: David Jaffe -AuthorDate: Thu Nov 7 04:11:45 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 7 04:11:45 2019 -0800 - - simplify - -M lib/rust/enclone/src/help.rs - -commit e18eb706b3c0d8c50f7ab291682e9faf4d42c2c3 -Author: David Jaffe -AuthorDate: Thu Nov 7 04:05:38 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 7 04:05:38 2019 -0800 - - move help to separate file - -A lib/rust/enclone/src/help.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs - -commit da010b7054b6c58e00209799467d9a002cb3fcce -Author: David Jaffe -AuthorDate: Thu Nov 7 03:55:48 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 7 03:55:48 2019 -0800 - - put hlines in help page - -M lib/rust/enclone/src/main.rs - -commit b681f46908170d40a3510273c1b3d49ca245e7df -Author: David Jaffe -AuthorDate: Thu Nov 7 03:47:28 2019 -0800 -Commit: David Jaffe -CommitDate: Thu Nov 7 03:47:28 2019 -0800 - - use \hline in print_tabular_vbox - -M lib/rust/enclone/src/print_clonotypes.rs - -commit d8e594f90c70156327ff1b01d18d7a1be6ea4766 -Author: David Jaffe -AuthorDate: Wed Nov 6 14:19:23 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 6 14:19:23 2019 -0800 - - update - -M lib/rust/enclone/src/enclone.testdata2 - -commit c598a89507188594e61480538c47d4d7cc05c7da -Author: David Jaffe -AuthorDate: Wed Nov 6 13:35:29 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 6 13:35:29 2019 -0800 - - lenas from jo - -A lib/rust/enclone/src/enclone.test2 -A lib/rust/enclone/src/enclone.testdata2 - -commit d66a5d1bd27b4c386b80bf8b57929365cdafb441 -Author: David Jaffe -AuthorDate: Wed Nov 6 05:43:46 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 6 05:43:46 2019 -0800 - - bugfix to last commit - -M lib/rust/enclone/src/print_clonotypes.rs - -commit e3e28ea26581004c10387413d87cdc1fc1b0db69 -Author: David Jaffe -AuthorDate: Wed Nov 6 05:33:41 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 6 05:33:41 2019 -0800 - - major change: now can show gene counts - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 1827f6df8d5cf7b8190f120bffd9dab409152be5 -Author: David Jaffe -AuthorDate: Wed Nov 6 03:33:15 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 6 03:33:15 2019 -0800 - - update test outputs - -M lib/rust/enclone/test/inputs/enclone_test1_output -M lib/rust/enclone/test/inputs/enclone_test2_output - -commit 98e494494415291ddf3ff40c912184469555f17b -Author: David Jaffe -AuthorDate: Wed Nov 6 03:28:36 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 6 03:28:36 2019 -0800 - - make chain splitting the default - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs - -commit 3ed253a259170d5fec1e16ab8fc1d8ae2a2a1399 -Author: David Jaffe -AuthorDate: Wed Nov 6 03:04:26 2019 -0800 -Commit: David Jaffe -CommitDate: Wed Nov 6 03:04:26 2019 -0800 - - add notes - -M lib/rust/enclone/src/main.rs - -commit 9b396f3940cbf2e8cf8fbe3e992e99e95b52395b -Author: David Jaffe -AuthorDate: Tue Nov 5 18:53:16 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 5 18:53:16 2019 -0800 - - add comments - -M lib/rust/enclone/tests/enclone_test.rs - -commit 5dbc9b5348feb34c5d0c54684dde507358300efc -Author: David Jaffe -AuthorDate: Tue Nov 5 15:42:38 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 5 15:42:38 2019 -0800 - - add option to compute heavy chain reuse - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs - -commit 8f6502b5204c5058600f3694af99f9bbc0e80d97 -Author: David Jaffe -AuthorDate: Tue Nov 5 11:28:15 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 5 11:28:15 2019 -0800 - - start of online help menus - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs - -commit ba278f0dfc6fde0a200ceefa35d25df8c9e0f337 -Author: David Jaffe -AuthorDate: Tue Nov 5 10:48:52 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 5 10:48:52 2019 -0800 - - allow multiple segments for SEG - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 78e60234a892ebe8df117abbd90ab7cb5617b270 -Author: David Jaffe -AuthorDate: Tue Nov 5 10:29:04 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 5 10:29:04 2019 -0800 - - update test outputs - -M lib/rust/enclone/test/inputs/enclone_test1_output -M lib/rust/enclone/test/inputs/enclone_test2_output - -commit 3c44a87a197b3231f4395b29ce52c2d6bf492e61 -Author: David Jaffe -AuthorDate: Tue Nov 5 10:22:31 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 5 10:22:31 2019 -0800 - - suppress datasets column if only one dataset - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs - -commit 45fddb0db56b9d8fb1684cc87f4a75444e4582a4 -Author: David Jaffe -AuthorDate: Tue Nov 5 10:11:40 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 5 10:11:40 2019 -0800 - - write "ref" instead of "reference" - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 7b57c26a82c3846a459b825835f223d51f199399 -Author: David Jaffe -AuthorDate: Tue Nov 5 07:20:57 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 5 07:20:57 2019 -0800 - - add filtering by segment names - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 28a49d0feeec74b9f9441d4e94dd2b9d183d304d -Author: David Jaffe -AuthorDate: Tue Nov 5 06:19:27 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 5 06:19:27 2019 -0800 - - update test outputs - -M lib/rust/enclone/test/inputs/enclone_test1_output -M lib/rust/enclone/test/inputs/enclone_test2_output - -commit 66b6f2c21036374260ff184da7fb8b4565902f4e -Author: David Jaffe -AuthorDate: Tue Nov 5 06:16:21 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 5 06:16:21 2019 -0800 - - explain how to test - -A lib/rust/enclone/README - -commit 78c5ed8acae4e6d8c5294a2797e6d11ffc00c563 -Author: David Jaffe -AuthorDate: Tue Nov 5 06:13:45 2019 -0800 -Commit: David Jaffe -CommitDate: Tue Nov 5 06:13:45 2019 -0800 - - use empty circle symbol for reference holes - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 692d383a03dd1a9b482a57e18aa44d1a414ddb3c -Author: David Jaffe -AuthorDate: Mon Nov 4 17:32:16 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 4 17:32:16 2019 -0800 - - fix bug - -M lib/rust/enclone/src/print_clonotypes.rs - -commit d71ed2c52f618b38025ff0873be5fb4f50d2797c -Author: David Jaffe -AuthorDate: Mon Nov 4 15:13:37 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 4 15:13:37 2019 -0800 - - delete debugging lines - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 13e0248318018d96407050ff51ad8f076b25acc7 -Author: David Jaffe -AuthorDate: Mon Nov 4 13:51:52 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 4 13:51:52 2019 -0800 - - explain the exclusion of the bridging codon - -M lib/rust/enclone/src/README - -commit 1ebcec09702772b354f1c824e6698fb4d5f3850d -Author: David Jaffe -AuthorDate: Mon Nov 4 13:22:30 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 4 13:22:30 2019 -0800 - - more dealing with the last codon problem - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 0ea434e85b7117292665093352f6afc0a6e41305 -Author: David Jaffe -AuthorDate: Mon Nov 4 13:02:34 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 4 13:02:34 2019 -0800 - - first step in dealing with the last codon problem - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 97a4654ba12a24530fd1d6860f22ed5657bf6c7b -Author: David Jaffe -AuthorDate: Mon Nov 4 04:56:07 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 4 04:56:07 2019 -0800 - - simmplify - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 46c7c9325bce87a7394e0621253eafbeba3dab05 -Author: David Jaffe -AuthorDate: Mon Nov 4 04:53:26 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 4 04:53:26 2019 -0800 - - fix diff line in plain mode - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 36b6952d748bc5fa2a8abf43040e6179b5504c28 -Author: David Jaffe -AuthorDate: Mon Nov 4 01:55:51 2019 -0800 -Commit: David Jaffe -CommitDate: Mon Nov 4 01:55:51 2019 -0800 - - prospective documentation change - -M lib/rust/enclone/src/README - -commit 2d9d6594023fbd75319a3819c9f380afda830207 -Author: David Jaffe -AuthorDate: Sun Nov 3 12:19:58 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 3 12:19:58 2019 -0800 - - add comments and logging - -M lib/rust/enclone/tests/enclone_test.rs - -commit 8dc4c7fc4d2bd2cb782a8f39a84bb5372a00a40b -Author: David Jaffe -AuthorDate: Sun Nov 3 07:46:37 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 3 07:46:37 2019 -0800 - - nicify error messages - -M lib/rust/enclone/src/main.rs - -commit c4aec30f6a2543f98ac9060abdc1107bc4dd8b30 -Author: David Jaffe -AuthorDate: Sun Nov 3 05:47:13 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 3 05:47:13 2019 -0800 - - update test outputs - -M lib/rust/enclone/test/inputs/enclone_test1_output -M lib/rust/enclone/test/inputs/enclone_test2_output - -commit 68f52e13c782c5bd59136f53f3b1b06352d8e448 -Author: David Jaffe -AuthorDate: Sun Nov 3 05:44:44 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 3 05:44:44 2019 -0800 - - revamp the enclone tests - -M lib/rust/enclone/tests/enclone_test.rs - -commit 8372c5b602c77ae1ed53d519bb900d73ceb20338 -Author: David Jaffe -AuthorDate: Sun Nov 3 05:27:06 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 3 05:27:06 2019 -0800 - - add instructions for test failure - -M lib/rust/enclone/tests/enclone_test.rs - -commit 6642719d31832fa0843d2a441195b1b93a375ca0 -Author: David Jaffe -AuthorDate: Sun Nov 3 05:16:12 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 3 05:16:12 2019 -0800 - - fix bug - -M lib/rust/enclone/src/info.rs - -commit 5a6f880be83bf071035cd0a82ad0a90218e95cb0 -Author: David Jaffe -AuthorDate: Sun Nov 3 05:05:13 2019 -0800 -Commit: David Jaffe -CommitDate: Sun Nov 3 05:05:13 2019 -0800 - - add NOPRETTY option - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs - -commit f78a88ab69b76a533af640a6c9b9114928a6ddaa -Author: David Jaffe -AuthorDate: Sat Nov 2 06:20:07 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Nov 2 06:20:07 2019 -0700 - - move color escape sequences to tenkit2 - -D lib/rust/enclone/src/color.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8b2c29963799abbef1bf5d3c121188127d6b5a9d -Author: David Jaffe -AuthorDate: Sat Nov 2 06:11:19 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Nov 2 06:11:19 2019 -0700 - - tweak palette by bolding one color - -M lib/rust/enclone/src/color.rs - -commit f54d226f45eb52163eaafcd989a4f95f3ac02669 -Author: David Jaffe -AuthorDate: Sat Nov 2 06:07:27 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Nov 2 06:07:27 2019 -0700 - - refactor a bit and add PALETTE mode - -M lib/rust/enclone/src/README -A lib/rust/enclone/src/color.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 6c5870998fbafa9a79992e510b751079b77fc060 -Author: David Jaffe -AuthorDate: Sat Nov 2 05:14:58 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Nov 2 05:14:58 2019 -0700 - - tweak handling of pre gex file locs - -M lib/rust/enclone/src/main.rs - -commit df09f35d59586dfb207b6b3c05238a36523e3510 -Author: David Jaffe -AuthorDate: Sat Nov 2 03:59:36 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Nov 2 03:59:36 2019 -0700 - - avoid string copies - -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 98298dfd62e26c2106ca73e4cda609ddc8901851 -Author: David Jaffe -AuthorDate: Sat Nov 2 03:47:54 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Nov 2 03:47:54 2019 -0700 - - midway towards avoiding string copies - -M lib/rust/enclone/src/main.rs - -commit 7548f8057ccdb02beb948ffa6a9a66f038a7ec21 -Author: David Jaffe -AuthorDate: Sat Nov 2 03:40:49 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Nov 2 03:40:49 2019 -0700 - - add clock - -M lib/rust/enclone/src/main.rs - -commit 11aee2e44574ee0778105e5ee5c401499aced3c4 -Author: David Jaffe -AuthorDate: Fri Nov 1 07:47:43 2019 -0700 -Commit: David Jaffe -CommitDate: Fri Nov 1 07:47:43 2019 -0700 - - move stuff from crate enclone to crate tenkit2 - -D lib/rust/enclone/src/binary_vec_io.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs -D lib/rust/enclone/src/mirror_sparse_matrix.rs - -commit 1ca6db1ce80efa5695800f313a7f188060126a68 -Author: David Jaffe -AuthorDate: Fri Nov 1 07:36:02 2019 -0700 -Commit: David Jaffe -CommitDate: Fri Nov 1 07:36:02 2019 -0700 - - major rev to allow larger column numbers - -M lib/rust/enclone/src/mirror_sparse_matrix.rs - -commit c543cc8cf967d014557cf3829014a2c9deb33f33 -Author: David Jaffe -AuthorDate: Thu Oct 31 06:57:19 2019 -0700 -Commit: David Jaffe -CommitDate: Thu Oct 31 06:57:19 2019 -0700 - - getting ready to add second storage version - -M lib/rust/enclone/src/mirror_sparse_matrix.rs - -commit da0701520f467f8b44505d6d47bb5f5d3c6a8bf5 -Author: David Jaffe -AuthorDate: Thu Oct 31 05:52:03 2019 -0700 -Commit: David Jaffe -CommitDate: Thu Oct 31 05:52:03 2019 -0700 - - add code and storage version functions - -M lib/rust/enclone/src/mirror_sparse_matrix.rs - -commit 6057f66754418dd6ec8103cedb3432c740075e0f -Author: David Jaffe -AuthorDate: Thu Oct 31 05:38:52 2019 -0700 -Commit: David Jaffe -CommitDate: Thu Oct 31 05:38:52 2019 -0700 - - redesign file header - -M lib/rust/enclone/src/mirror_sparse_matrix.rs - -commit 57799f3d6e6b64704c1abff4264a91f004a9a1cf -Author: David Jaffe -AuthorDate: Thu Oct 31 05:13:25 2019 -0700 -Commit: David Jaffe -CommitDate: Thu Oct 31 05:13:25 2019 -0700 - - SpecialSparseMatrix ==> MirrorSparseMatrix - -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs -R095 lib/rust/enclone/src/special_sparse_matrix.rs lib/rust/enclone/src/mirror_sparse_matrix.rs - -commit e3207523e89c413384a19c0f7db1dc83fbf958bc -Author: David Jaffe -AuthorDate: Wed Oct 30 15:41:08 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 30 15:41:08 2019 -0700 - - separate feature extraction - -M lib/rust/enclone/src/main.rs - -commit f5459c54df968d2b50425cde7ef1b1d683dbc28c -Author: David Jaffe -AuthorDate: Wed Oct 30 15:15:56 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 30 15:15:56 2019 -0700 - - contract clocks - -M lib/rust/enclone/src/main.rs - -commit 696f18cbb10bd777247dfe5ba3a207a3a1dd1174 -Author: David Jaffe -AuthorDate: Wed Oct 30 15:09:38 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 30 15:09:38 2019 -0700 - - speed up a stupid slow spot - -M lib/rust/enclone/src/main.rs - -commit ad31e442fa009f1fd7f66c79d1bcde5ca43ec164 -Author: David Jaffe -AuthorDate: Wed Oct 30 14:52:27 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 30 14:52:27 2019 -0700 - - add many timers - -M lib/rust/enclone/src/main.rs - -commit 3d25d7bc7499527e08a710df0bdafc692a8ffd3f -Author: David Jaffe -AuthorDate: Wed Oct 30 14:15:02 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 30 14:15:02 2019 -0700 - - use new sparse matrix representation - -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/special_sparse_matrix.rs - -commit fbefef642c06978d0a15ca56333271064f74ae57 -Author: David Jaffe -AuthorDate: Wed Oct 30 11:30:03 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 30 11:30:03 2019 -0700 - - fix warnings - -M lib/rust/enclone/src/binary_vec_io.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/special_sparse_matrix.rs - -commit ceab32ac7d2ad6bb3115398ca075790496182872 -Author: David Jaffe -AuthorDate: Wed Oct 30 10:49:22 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 30 10:49:22 2019 -0700 - - create special sparse matrix - -M lib/rust/enclone/src/main.rs - -commit 4c50991d8d89103ba77d13efbcb77a030e31cd3a -Author: David Jaffe -AuthorDate: Wed Oct 30 10:23:13 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 30 10:23:13 2019 -0700 - - remove debugging - -M lib/rust/enclone/src/special_sparse_matrix.rs - -commit aa02f3c1efcfba3106719e7485dd57d2b9f5ecc9 -Author: David Jaffe -AuthorDate: Wed Oct 30 10:20:31 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 30 10:20:31 2019 -0700 - - fix bugs - -M lib/rust/enclone/src/special_sparse_matrix.rs - -commit a328a768b2d93edaf948898fc910847a6245eb4d -Author: David Jaffe -AuthorDate: Wed Oct 30 09:46:25 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 30 09:46:25 2019 -0700 - - add test, add doc, tweak representation - -M lib/rust/enclone/src/special_sparse_matrix.rs - -commit 7e6575d6fecdb7e4b1a885a0418580f639764716 -Author: David Jaffe -AuthorDate: Wed Oct 30 07:05:02 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 30 07:05:02 2019 -0700 - - add notes - -M lib/rust/enclone/src/special_sparse_matrix.rs - -commit 04fab99ce5b26d6db4ddb9c3161755ef56025b5e -Author: David Jaffe -AuthorDate: Wed Oct 30 06:50:20 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 30 06:50:20 2019 -0700 - - rustfmt - -M lib/rust/enclone/src/binary_vec_io.rs -M lib/rust/enclone/src/special_sparse_matrix.rs - -commit eae2d0119f0991340d167e00ede2293002b65a89 -Author: David Jaffe -AuthorDate: Wed Oct 30 06:49:38 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 30 06:49:38 2019 -0700 - - sparse matrix stuff, pass 0 - -M lib/rust/enclone/Cargo.toml -A lib/rust/enclone/src/binary_vec_io.rs -M lib/rust/enclone/src/lib.rs -A lib/rust/enclone/src/special_sparse_matrix.rs - -commit 6a320e7d30ee347b461cfc426161a12ac5ba5d08 -Author: David Jaffe -AuthorDate: Tue Oct 29 14:22:38 2019 -0700 -Commit: David Jaffe -CommitDate: Tue Oct 29 14:22:38 2019 -0700 - - normalize antibody counts - -M lib/rust/enclone/src/main.rs - -commit 249153a5876252446f73504c282a23747185fb5c -Author: David Jaffe -AuthorDate: Tue Oct 29 14:20:03 2019 -0700 -Commit: David Jaffe -CommitDate: Tue Oct 29 14:20:03 2019 -0700 - - cloak runtime message - -M lib/rust/enclone/src/main.rs - -commit df20d6291a4286a7209999303286c41f65b5f48d -Author: David Jaffe -AuthorDate: Tue Oct 29 14:19:22 2019 -0700 -Commit: David Jaffe -CommitDate: Tue Oct 29 14:19:22 2019 -0700 - - normalize gex values - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs - -commit e71a1c7ebef413db0b24ea39cf150d02c4c34b6a -Author: David Jaffe -AuthorDate: Tue Oct 29 13:42:04 2019 -0700 -Commit: David Jaffe -CommitDate: Tue Oct 29 13:42:04 2019 -0700 - - add missing timers - -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/main.rs - -commit 97fe15754873189987bac19aa9f6e9d5d0762f0b -Author: David Jaffe -AuthorDate: Tue Oct 29 07:52:29 2019 -0700 -Commit: David Jaffe -CommitDate: Tue Oct 29 07:52:29 2019 -0700 - - update - -M lib/rust/enclone/src/enclone.out - -commit 82f34a1b8bb93d5d87ff1baa40b256eacceeac6d -Author: David Jaffe -AuthorDate: Tue Oct 29 07:50:40 2019 -0700 -Commit: David Jaffe -CommitDate: Tue Oct 29 07:50:40 2019 -0700 - - add TCR and BCR options - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/enclone.test -M lib/rust/enclone/src/main.rs - -commit 27794a94ade772d2458279d1875004758cd1761d -Author: David Jaffe -AuthorDate: Tue Oct 29 06:51:15 2019 -0700 -Commit: David Jaffe -CommitDate: Tue Oct 29 06:51:15 2019 -0700 - - add second test of enclone - -A lib/rust/enclone/test/inputs/enclone_test2_output -M lib/rust/enclone/tests/enclone_test.rs - -commit 1eabbe91a2e537a5c8f20c3a2c83f703cb0caee4 -Author: David Jaffe -AuthorDate: Tue Oct 29 06:47:25 2019 -0700 -Commit: David Jaffe -CommitDate: Tue Oct 29 06:47:25 2019 -0700 - - update test results - -M lib/rust/enclone/test/inputs/enclone_test1_output - -commit 0f8687b64912c7bb1560b04a47b247c66e90c5d4 -Author: David Jaffe -AuthorDate: Tue Oct 29 06:26:33 2019 -0700 -Commit: David Jaffe -CommitDate: Tue Oct 29 06:26:33 2019 -0700 - - work around some indel issues - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 627241723c386d9696eceb17f49ff98b68ff9db6 -Author: David Jaffe -AuthorDate: Tue Oct 29 06:01:31 2019 -0700 -Commit: David Jaffe -CommitDate: Tue Oct 29 06:01:31 2019 -0700 - - show allelic difference columns - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit b8d7c32404fe2becb1854e461b6a4c39f5a3b133 -Author: David Jaffe -AuthorDate: Tue Oct 29 05:28:13 2019 -0700 -Commit: David Jaffe -CommitDate: Tue Oct 29 05:28:13 2019 -0700 - - show universal reference - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 1b3f50d6724f202a965ba56aa7b89081a47a2375 -Author: David Jaffe -AuthorDate: Tue Oct 29 05:15:18 2019 -0700 -Commit: David Jaffe -CommitDate: Tue Oct 29 05:15:18 2019 -0700 - - add donor reference line - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 5393241828354dfd490258ad37a290a0d461a230 -Author: David Jaffe -AuthorDate: Mon Oct 28 16:47:05 2019 -0700 -Commit: David Jaffe -CommitDate: Mon Oct 28 16:47:05 2019 -0700 - - put spacers around the CDR3 - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/test/inputs/enclone_test1_output - -commit 5e801faf3849248ef78b1a48fc7d1056993b7667 -Author: David Jaffe -AuthorDate: Mon Oct 28 15:47:27 2019 -0700 -Commit: David Jaffe -CommitDate: Mon Oct 28 15:47:27 2019 -0700 - - integrate display of shared positions - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 871f240e34653f80eccce49829104c4d6fa6f1e2 -Author: David Jaffe -AuthorDate: Mon Oct 28 15:12:01 2019 -0700 -Commit: David Jaffe -CommitDate: Mon Oct 28 15:12:01 2019 -0700 - - add gitattributes - -A lib/rust/enclone/test/inputs/123089/outs/.gitattributes - -commit 41c13854ef30fd853629f8eda5f23b7802750948 -Author: David Jaffe -AuthorDate: Mon Oct 28 14:55:40 2019 -0700 -Commit: David Jaffe -CommitDate: Mon Oct 28 14:55:40 2019 -0700 - - add a test for enclone - -A lib/rust/enclone/test/inputs/enclone_test1_output -A lib/rust/enclone/tests/enclone_test.rs - -commit 7cf9c9e3a3d430f706c3acac78d36087255decfe -Author: David Jaffe -AuthorDate: Mon Oct 28 14:30:50 2019 -0700 -Commit: David Jaffe -CommitDate: Mon Oct 28 14:30:50 2019 -0700 - - add some enclone test data - -A lib/rust/enclone/test/inputs/123089/_invocation -A lib/rust/enclone/test/inputs/123089/outs/all_contig_annotations.json.lz4 - -commit 34cca55fad111a56497113f38c41af5ca2843cfd -Author: David Jaffe -AuthorDate: Mon Oct 28 13:01:33 2019 -0700 -Commit: David Jaffe -CommitDate: Mon Oct 28 13:01:33 2019 -0700 - - fix bug - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 1ea0ee80aa59b97288b7d8d61c602aeb2efe5910 -Author: David Jaffe -AuthorDate: Mon Oct 28 10:43:50 2019 -0700 -Commit: David Jaffe -CommitDate: Mon Oct 28 10:43:50 2019 -0700 - - allow compressed json file as input - -M lib/rust/enclone/src/read_json.rs - -commit 9c3188305886583066f5f06adbdf15dfb154a5e6 -Author: David Jaffe -AuthorDate: Mon Oct 28 05:25:02 2019 -0700 -Commit: David Jaffe -CommitDate: Mon Oct 28 05:25:02 2019 -0700 - - switch from vec_utils to vector_utils - -M lib/rust/enclone/Cargo.toml -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/graph_filter.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 64c6b1e49cfa39d56f108425381fc37c5dcf9215 -Author: David Jaffe -AuthorDate: Sat Oct 26 06:36:15 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Oct 26 06:36:15 2019 -0700 - - fix bug in bolding - -M lib/rust/enclone/src/print_clonotypes.rs - -commit f55871cb97778a6ca97382d46bb2eceb9c54e779 -Author: DJ -AuthorDate: Fri Oct 25 14:51:21 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 25 14:51:21 2019 -0700 - - report antibody counts - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 18793a08eb43277d1ad59591c55f0579eb26e1b3 -Author: DJ -AuthorDate: Fri Oct 25 05:02:17 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 25 05:02:17 2019 -0700 - - fix bug - -M lib/rust/enclone/src/print_clonotypes.rs - -commit e918010d747ed88ed4055b622e06226a2916c703 -Author: DJ -AuthorDate: Fri Oct 25 04:14:23 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 25 04:14:23 2019 -0700 - - document argument - -M lib/rust/enclone/src/README - -commit 880302cf75de28861e86b8bf870c9214172c9d74 -Author: DJ -AuthorDate: Thu Oct 24 13:01:25 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 24 13:01:25 2019 -0700 - - simplify - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 50c725150d0a9dcd947ded46753ecf5cce24848e -Author: DJ -AuthorDate: Thu Oct 24 08:52:03 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 24 08:52:03 2019 -0700 - - rustfmt + tidy - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 19dde48ba293f4745d37cff706a653c8c4f011fc -Author: DJ -AuthorDate: Thu Oct 24 08:44:43 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 24 08:44:43 2019 -0700 - - fix buggy handling of case where there is no dots row - -M lib/rust/enclone/src/print_clonotypes.rs - -commit bae392299846ac1d14cb12463ac6bd316051c05d -Author: DJ -AuthorDate: Thu Oct 24 08:23:21 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 24 08:23:21 2019 -0700 - - bug fixes and optional debugging - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 596ce93db5fbebb113b7698393ebc971c3999107 -Author: DJ -AuthorDate: Thu Oct 24 05:16:22 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 24 05:16:22 2019 -0700 - - revamp handling of CDR3 and variable positions - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs -A lib/rust/enclone/src/things_to_test - -commit e982f802030636f7c16f984e8644c01c37fbf683 -Author: DJ -AuthorDate: Thu Oct 24 04:17:42 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 24 04:17:42 2019 -0700 - - control some logging - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 3eeddb44f511d3d776dd190e4490d9b129d87fa4 -Author: DJ -AuthorDate: Wed Oct 23 16:17:54 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 16:17:54 2019 -0700 - - add to do item - -M lib/rust/enclone/src/print_clonotypes.rs - -commit e8f466d61b86d3b1e4c70382ac8060d78f91090e -Author: DJ -AuthorDate: Wed Oct 23 16:08:02 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 16:08:02 2019 -0700 - - fix bug - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/print_clonotypes.rs - -commit 1e14b9f38c295d4b513d9c56e721fba18abdf539 -Author: DJ -AuthorDate: Wed Oct 23 15:19:18 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 15:19:18 2019 -0700 - - tweak sort order - -M lib/rust/enclone/src/print_clonotypes.rs - -commit ef339ca998bf9f683ca5616303131adef50903f7 -Author: DJ -AuthorDate: Wed Oct 23 14:56:12 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 14:56:12 2019 -0700 - - improve doc - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 3530b16043db277f38a5f5be08c4a97628ea0961 -Author: DJ -AuthorDate: Wed Oct 23 14:52:06 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 14:52:06 2019 -0700 - - tweak color scheme - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8613eef4bcc3b4ddcb835e86b86587f8aff05a06 -Author: DJ -AuthorDate: Wed Oct 23 14:31:08 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 14:31:08 2019 -0700 - - fix bug - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 6d9a0924d5763d12439349f0fd260ea5f546938d -Author: DJ -AuthorDate: Wed Oct 23 14:06:13 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 14:06:13 2019 -0700 - - improve color assignment - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 5e1d0a9e07b739db52d3f16246318f18659c1fe6 -Author: DJ -AuthorDate: Wed Oct 23 13:11:04 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 13:11:04 2019 -0700 - - fix dots line - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 581180dcc08a5ad5ace11370e682b60b36cff8aa -Author: DJ -AuthorDate: Wed Oct 23 12:47:06 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 12:47:06 2019 -0700 - - improve ordering of exact clonotype rows - -M lib/rust/enclone/src/print_clonotypes.rs - -commit f3d58daba813d00434b8e49f18e6b6a3703264bc -Author: DJ -AuthorDate: Wed Oct 23 08:12:46 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 08:12:46 2019 -0700 - - the new view, WITH SEVERAL BUGS - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 3a15c8c064503be5eaa2c9352de8c8ac78f14da5 -Author: DJ -AuthorDate: Wed Oct 23 05:58:42 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 05:58:42 2019 -0700 - - define amino acid positions to show - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 4550211544c0e2187f3a72dc2df9b25019c0afd3 -Author: DJ -AuthorDate: Wed Oct 23 05:46:25 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 05:46:25 2019 -0700 - - show vert positions for cdr3_aa - -M lib/rust/enclone/src/print_clonotypes.rs -M lib/rust/enclone/src/read_json.rs - -commit ea2396a4965c876b08cce09c2107a3d75f0ea8ce -Author: DJ -AuthorDate: Wed Oct 23 04:56:05 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 04:56:05 2019 -0700 - - factor out ndigits - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 93db36af968a29ec3d64fee4f3d9bfe27e77f498 -Author: DJ -AuthorDate: Wed Oct 23 04:50:14 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 04:50:14 2019 -0700 - - slight refactoring - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 98257a6f797492a676893d11b87e282e1ac317ce -Author: DJ -AuthorDate: Wed Oct 23 04:47:35 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 04:47:35 2019 -0700 - - factor out print_digit - -M lib/rust/enclone/src/print_clonotypes.rs - -commit a3674f125bca5cadb8bf05930ff5d3786fb31d3f -Author: DJ -AuthorDate: Wed Oct 23 04:27:50 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 04:27:50 2019 -0700 - - don't show variant positions twice - -M lib/rust/enclone/src/print_clonotypes.rs - -commit dec3dddc1708ad204e697a1c1fa8a1dddc420447 -Author: DJ -AuthorDate: Wed Oct 23 04:25:31 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 04:25:31 2019 -0700 - - indentation fix - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 30a1c50f10eecf88cfc2f67c2c0f69a9f05c72f8 -Author: DJ -AuthorDate: Wed Oct 23 04:23:32 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 04:23:32 2019 -0700 - - show var positions - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 2724b6b5332888eb6d7adc81ddec0ab17297d935 -Author: DJ -AuthorDate: Wed Oct 23 03:38:33 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 23 03:38:33 2019 -0700 - - add comments - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 5bdb4e05faa5b7afd4d17cb5bee0c8b917f3a09a -Author: DJ -AuthorDate: Tue Oct 22 19:18:22 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 22 19:18:22 2019 -0700 - - improve CDR3 column header - -M lib/rust/enclone/src/print_clonotypes.rs - -commit d61e6d9cf170ec6512a2b046438d1cf10dd83f70 -Author: DJ -AuthorDate: Tue Oct 22 16:10:25 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 22 16:10:25 2019 -0700 - - fix codon coloring - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 9fc951d27a72662e805b93c5894f9f947784a219 -Author: DJ -AuthorDate: Tue Oct 22 15:29:37 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 22 15:29:37 2019 -0700 - - track CDR3 start position - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/read_json.rs - -commit 862d259741a2f064c2181d0e00de997183c48ad8 -Author: DJ -AuthorDate: Tue Oct 22 14:58:52 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 22 14:58:52 2019 -0700 - - improve arg syntax for number of cells - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs - -commit 5ffd5423b9c9a20700e53bebeef1dd4a8c7529e6 -Author: DJ -AuthorDate: Tue Oct 22 13:42:19 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 22 13:42:19 2019 -0700 - - replace "none" by "?" for constant regions - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 62ea6f5a75003e69499f1285cacee353bb3f44df -Author: DJ -AuthorDate: Tue Oct 22 13:38:44 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 22 13:38:44 2019 -0700 - - pretty printing is now the default - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs - -commit 3977ced32cd8f2474c35a2659845b3c7195ad43a -Author: DJ -AuthorDate: Tue Oct 22 10:46:57 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 22 10:46:57 2019 -0700 - - make ugly orange the least frequent color - -M lib/rust/enclone/src/print_clonotypes.rs - -commit ed2de0dd5b946b1b5e18205daefe57ac341b0d1e -Author: DJ -AuthorDate: Tue Oct 22 07:26:29 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 22 07:26:29 2019 -0700 - - color code amino acids - -M lib/rust/enclone/src/print_clonotypes.rs - -commit ffa412f050034501f8e7cfb112d0a27bd9435329 -Author: DJ -AuthorDate: Mon Oct 21 15:24:57 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 21 15:24:57 2019 -0700 - - SILENT is now the default - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs - -commit faff013cbad2333896b3fa07fbb7c148e4bfabc3 -Author: DJ -AuthorDate: Mon Oct 21 12:29:11 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 21 12:29:11 2019 -0700 - - add option CHAIN_SPLIT - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit e13fb79c6f41dfad3362f45eb06a12ced8937d61 -Author: DJ -AuthorDate: Mon Oct 21 11:23:09 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 21 11:23:09 2019 -0700 - - major change to rationalize handling of initial columns - -M lib/rust/enclone/src/README -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 55b196bf078417d469cd86ffaf747b0c062c1ea7 -Author: DJ -AuthorDate: Mon Oct 21 05:48:23 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 21 05:48:23 2019 -0700 - - move doc into README - -A lib/rust/enclone/src/README -M lib/rust/enclone/src/main.rs - -commit 48f0e58058e9953c8c788709c5f39f5e3a2fc736 -Author: DJ -AuthorDate: Mon Oct 21 05:35:06 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 21 05:35:06 2019 -0700 - - tidy a bit - -M lib/rust/enclone/src/print_clonotypes.rs - -commit fac6f504c93743f8bf24f82b19aebaebabab6793 -Author: DJ -AuthorDate: Mon Oct 21 05:29:54 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 21 05:29:54 2019 -0700 - - a little refactoring - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8f153ffe33eadc5534506221b31a77b193d52348 -Author: DJ -AuthorDate: Mon Oct 21 05:19:08 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 21 05:19:08 2019 -0700 - - a little tidying - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 5ec83c8d12945ce9ef45e2a3e27f417f79a55d61 -Author: DJ -AuthorDate: Mon Oct 21 05:16:44 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 21 05:16:44 2019 -0700 - - a little refactoring - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 422f688e0752360e3b163b585adcac95d2798dbd -Author: DJ -AuthorDate: Mon Oct 21 04:57:45 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 21 04:57:45 2019 -0700 - - correct handling of HIGH - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 47df0c00e1031cfe4f589e59b5cecd49441bd90c -Author: DJ -AuthorDate: Mon Oct 21 04:13:34 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 21 04:13:34 2019 -0700 - - make FAIL_ONLY reversable - -M lib/rust/enclone/src/enclone.test -M lib/rust/enclone/src/main.rs - -commit ca56a82741ca04edcbf6862dcd6e52af82ebd8ab -Author: DJ -AuthorDate: Sun Oct 20 09:14:46 2019 -0700 -Commit: DJ -CommitDate: Sun Oct 20 09:14:46 2019 -0700 - - handle SHM deletions - -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 49865a57ddb7deca58a3df9b98dfb3acfd0d116e -Author: DJ -AuthorDate: Sun Oct 20 07:37:49 2019 -0700 -Commit: DJ -CommitDate: Sun Oct 20 07:37:49 2019 -0700 - - convert CloneInfo tigs to Vec - -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 0d1c669c51cf935f714ee6c83a6365428af64696 -Author: DJ -AuthorDate: Sat Oct 19 08:11:42 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 19 08:11:42 2019 -0700 - - start to handle deletions better - -M lib/rust/enclone/src/info.rs - -commit 3f77a6232730aa8c8c25de75f72017672ce43135 -Author: DJ -AuthorDate: Sat Oct 19 07:41:24 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 19 07:41:24 2019 -0700 - - update to current12 - -M lib/rust/enclone/src/enclone.out -M lib/rust/enclone/src/enclone.test -M lib/rust/enclone/src/main.rs - -commit fc27fcd7f5390f84b7ec81710661c0001bef526f -Author: DJ -AuthorDate: Sat Oct 19 07:31:57 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 19 07:31:57 2019 -0700 - - update - -M lib/rust/enclone/src/enclone.testdata.tcr - -commit beb6cf2a2011b40edabfc55fc7b2e1bd577cdf3f -Author: DJ -AuthorDate: Sat Oct 19 06:30:12 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 19 06:30:12 2019 -0700 - - correct "has insertion" notes - -M lib/rust/enclone/src/info.rs - -commit cda48722c70c40913e35ebd4039ab28195968f52 -Author: DJ -AuthorDate: Sat Oct 19 06:27:49 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 19 06:27:49 2019 -0700 - - add code to find SHM insertions - -M lib/rust/enclone/src/main.rs - -commit 1083e9aecc131857238c065dd5f8a59bf03a70ee -Author: DJ -AuthorDate: Sat Oct 19 05:28:20 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 19 05:28:20 2019 -0700 - - temporary tracking of SHM indels via notes - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 2acdd17388acc80fae98a08273225be9f14fe9ef -Author: DJ -AuthorDate: Sat Oct 19 04:50:22 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 19 04:50:22 2019 -0700 - - rustfmt + comments - -M lib/rust/enclone/src/defs.rs - -commit 4869774bc0a688f75c5670055b417b4ae18c8b4b -Author: DJ -AuthorDate: Sat Oct 19 04:47:17 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 19 04:47:17 2019 -0700 - - comments - -M lib/rust/enclone/src/main.rs - -commit e553789e3713a3a11947bb1b2f6377cc9eb06664 -Author: DJ -AuthorDate: Sat Oct 19 04:30:03 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 19 04:30:03 2019 -0700 - - fix problem with MERGE_ALL_IMPROPERS - -M lib/rust/enclone/src/join.rs - -commit feeb66d527cb02a02b01ec8e8649ad6800fea785 -Author: DJ -AuthorDate: Sat Oct 19 04:20:51 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 19 04:20:51 2019 -0700 - - use ◼ instead of + in clonotype table - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 0d31b890cca3d34fafc2f56bf3ef072e09561494 -Author: DJ -AuthorDate: Fri Oct 18 16:44:55 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 16:44:55 2019 -0700 - - add sample - -M lib/rust/enclone/src/enclone.testdata.tcr - -commit b9a1eab31300c113ca64cbffce03c98c544bb2ce -Author: DJ -AuthorDate: Fri Oct 18 16:41:41 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 16:41:41 2019 -0700 - - fix bug in reannotate mode - -M lib/rust/enclone/src/read_json.rs - -commit 6127db9c1a487ec08003e404499f1e0f98e3f5d5 -Author: DJ -AuthorDate: Fri Oct 18 15:09:03 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 15:09:03 2019 -0700 - - update test and results - -M lib/rust/enclone/src/enclone.out -M lib/rust/enclone/src/enclone.testdata - -commit b02f946b11741c1ea9cd2a9091a9431b46932ae0 -Author: DJ -AuthorDate: Fri Oct 18 15:02:56 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 15:02:56 2019 -0700 - - add some data - -M lib/rust/enclone/src/enclone.testdata.tcr - -commit 3143aa659d8d9e4dc43457770560f8c66af02142 -Author: DJ -AuthorDate: Fri Oct 18 14:57:47 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 14:57:47 2019 -0700 - - improve failure message - -M lib/rust/enclone/src/info.rs - -commit c7a9b9df89e85a0bb3b774bda1f6f4097e82b1f1 -Author: DJ -AuthorDate: Fri Oct 18 14:07:13 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 14:07:13 2019 -0700 - - fix bug - -M lib/rust/enclone/src/join.rs - -commit 9f21304841d44beaed829294d455ccd13023eb37 -Author: DJ -AuthorDate: Fri Oct 18 12:36:29 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 12:36:29 2019 -0700 - - factor out json reading - -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs -A lib/rust/enclone/src/read_json.rs - -commit 88c821d1a62bce9f475b77ec38456d424aeaa0dc -Author: DJ -AuthorDate: Fri Oct 18 11:35:23 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 11:35:23 2019 -0700 - - revise SEQC - -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 784fe0d25c6e2a10a4d3e4a20e0ff004e28c47bb -Author: DJ -AuthorDate: Fri Oct 18 11:29:16 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 11:29:16 2019 -0700 - - tighten test for SHM indels - -M lib/rust/enclone/src/main.rs - -commit 588963d7c7b91519dc78c3ef32e840be216e4a55 -Author: DJ -AuthorDate: Fri Oct 18 10:55:38 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 10:55:38 2019 -0700 - - add comment - -M lib/rust/enclone/src/main.rs - -commit 9acfec08d2d09dbab999d09cc34aac36853d42be -Author: DJ -AuthorDate: Fri Oct 18 10:50:27 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 10:50:27 2019 -0700 - - cosmetic - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 5b39ebce1534c278fda8047b0f30b89626bfd02d -Author: DJ -AuthorDate: Fri Oct 18 10:46:28 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 10:46:28 2019 -0700 - - add option MERGE_ALL_IMPROPERS - -M lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/main.rs - -commit 1b73d1acee90e660d1bc49305a420c5abe690e4f -Author: DJ -AuthorDate: Fri Oct 18 09:38:49 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 09:38:49 2019 -0700 - - add option to search for SHM indels - -M lib/rust/enclone/src/main.rs - -commit 9b26599274203956d4ba81097eb587db8308d744 -Author: DJ -AuthorDate: Fri Oct 18 08:57:13 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 08:57:13 2019 -0700 - - tweak filter - -M lib/rust/enclone/src/print_clonotypes.rs - -commit f3448f2aabbbf29e45ca583a2cda4784402418a3 -Author: DJ -AuthorDate: Fri Oct 18 08:42:51 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 08:42:51 2019 -0700 - - add option to turn off graph filtering - -M lib/rust/enclone/src/main.rs - -commit 2c131d9e4b74e5d06ef9606cb710757ee1275bea -Author: DJ -AuthorDate: Fri Oct 18 08:09:38 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 08:09:38 2019 -0700 - - tweak heavy chain filter - -M lib/rust/enclone/src/graph_filter.rs - -commit a2f22ae564257c02b8fa442c0d89b763bd9d3db9 -Author: DJ -AuthorDate: Fri Oct 18 07:55:19 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 07:55:19 2019 -0700 - - tweak threshold - -M lib/rust/enclone/src/graph_filter.rs - -commit dadd409a5648f34ee37599bad9b9060c16cad47d -Author: DJ -AuthorDate: Fri Oct 18 07:50:52 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 07:50:52 2019 -0700 - - fix bug in CDR3 option - -M lib/rust/enclone/src/main.rs - -commit bbfb7ab67157282b7615ac3262c8a08bd7d199aa -Author: DJ -AuthorDate: Fri Oct 18 07:47:46 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 07:47:46 2019 -0700 - - tweak NICE - -M lib/rust/enclone/src/main.rs - -commit 9a26b5e2ec351fd861141412b475e2cb8b0d36ca -Author: DJ -AuthorDate: Fri Oct 18 07:37:51 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 07:37:51 2019 -0700 - - fix bug in bold escape chains - -M lib/rust/enclone/src/print_clonotypes.rs - -commit e5c8c800fcc350faf3b783274ee34ed6a7c7d40f -Author: DJ -AuthorDate: Fri Oct 18 07:35:42 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 07:35:42 2019 -0700 - - add arg MAX_CHAINS - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 13d67d483c6fa6d0a1e8fb1efda0278098c270c2 -Author: DJ -AuthorDate: Fri Oct 18 07:26:14 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 07:26:14 2019 -0700 - - tweak threshold for heavy->light filtering - -M lib/rust/enclone/src/graph_filter.rs - -commit 4f4f633f052e1508862e73dbf52c91748911d108 -Author: DJ -AuthorDate: Fri Oct 18 07:20:19 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 07:20:19 2019 -0700 - - add option NICE - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 81b04f20027cf18c70d10d58d27064f232ee18fd -Author: DJ -AuthorDate: Fri Oct 18 06:22:40 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 06:22:40 2019 -0700 - - tweak heavy->light filtering heuristics - -M lib/rust/enclone/src/graph_filter.rs - -commit 7c434e6d6d6f88da5852ef5f655133de4d16be45 -Author: DJ -AuthorDate: Fri Oct 18 06:01:24 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 06:01:24 2019 -0700 - - track number of cells as part of weight - -M lib/rust/enclone/src/graph_filter.rs - -commit 9f8afbc37088959067985afe7dc57f16572cc488 -Author: DJ -AuthorDate: Fri Oct 18 05:54:55 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 05:54:55 2019 -0700 - - generalize CDR3 argument syntax - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 08f1c37ccc1dc88e6d8430824bdb1b08eac4c85c -Author: DJ -AuthorDate: Fri Oct 18 05:19:43 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 05:19:43 2019 -0700 - - don't print orbits - -M lib/rust/enclone/src/main.rs - -commit 6dee7b6515fde7230a1b3ed837dbdb636e1da568 -Author: DJ -AuthorDate: Fri Oct 18 05:13:16 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 05:13:16 2019 -0700 - - factor out build_info - -A lib/rust/enclone/src/info.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs - -commit ba8288cf14990fe5cdde5a0a9e8152ac2e451c92 -Author: DJ -AuthorDate: Fri Oct 18 05:03:32 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 05:03:32 2019 -0700 - - comments and tidying - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit 8c417d2d668fae4ad607fd5ab3c9daa123cf3384 -Author: DJ -AuthorDate: Fri Oct 18 04:51:19 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 04:51:19 2019 -0700 - - delete some unneeded stuff - -M lib/rust/enclone/src/graph_filter.rs -M lib/rust/enclone/src/main.rs - -commit b3d86e94259b94d0fd3a40e227d36a80bc5d50bf -Author: DJ -AuthorDate: Fri Oct 18 04:45:44 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 04:45:44 2019 -0700 - - factor out graph filtering - -A lib/rust/enclone/src/graph_filter.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs - -commit a0faa2755eed43e31d50508f854c7338b509c81d -Author: DJ -AuthorDate: Fri Oct 18 04:16:41 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 04:16:41 2019 -0700 - - add a little heavy->light filtering - -M lib/rust/enclone/src/main.rs - -commit 941ef9432ac483b38475813f4d07653b869a97fa -Author: DJ -AuthorDate: Fri Oct 18 04:07:12 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 04:07:12 2019 -0700 - - factor out allele finding - -A lib/rust/enclone/src/allele.rs -M lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 796bfc3a6e06ff0cd6ce371513836f11de8bac28 -Author: DJ -AuthorDate: Fri Oct 18 03:49:01 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 03:49:01 2019 -0700 - - capture dataset_list in ctl - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit d7d8fe0055c500f8502d2b6cdf3d6e45675e9c34 -Author: DJ -AuthorDate: Fri Oct 18 03:43:22 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 03:43:22 2019 -0700 - - preparation for some refactoring - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit 6117c545cecef95fb56875e2297d4bcf04fdbfe7 -Author: DJ -AuthorDate: Fri Oct 18 03:23:43 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 03:23:43 2019 -0700 - - update test and results - -M lib/rust/enclone/src/enclone.out -M lib/rust/enclone/src/enclone.test - -commit 629883093eb245d04e5f6f2c227e1e81b0f67e03 -Author: DJ -AuthorDate: Fri Oct 18 03:15:06 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 18 03:15:06 2019 -0700 - - doc and tidying - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit 60ab24e78c0a037f16687176d3c6b34f5c3ecd29 -Author: DJ -AuthorDate: Thu Oct 17 16:40:05 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 16:40:05 2019 -0700 - - add arg ONESIE_MULT - -M lib/rust/enclone/src/main.rs - -commit 06898b2b20a64ab1d13db7fff6f2c0787344721c -Author: DJ -AuthorDate: Thu Oct 17 16:09:01 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 16:09:01 2019 -0700 - - tweak filtering - -M lib/rust/enclone/src/main.rs - -commit 79ef574f262d1db506a44a1ec4f3d4fb97bd9231 -Author: DJ -AuthorDate: Thu Oct 17 16:07:12 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 16:07:12 2019 -0700 - - always build graph and filter; GRAPH just gives logging - -M lib/rust/enclone/src/main.rs - -commit 08b48c03757c8672917cdf9a2ea7182d38f2d154 -Author: DJ -AuthorDate: Thu Oct 17 16:04:47 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 16:04:47 2019 -0700 - - now GRAPH causes filtering - -M lib/rust/enclone/src/main.rs - -commit 0e50eb366073dadaa223d1e81ee5bbd6d4fffbcf -Author: DJ -AuthorDate: Thu Oct 17 15:43:47 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 15:43:47 2019 -0700 - - move graph code to better place - -M lib/rust/enclone/src/main.rs - -commit b3599e7f3dd039f7f984d4be7088c31f3b7a83bd -Author: DJ -AuthorDate: Thu Oct 17 15:23:59 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 15:23:59 2019 -0700 - - ugly thing to force TRB columns first in clonotype table - -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit f5c0387f27e69f79abde94aa6fb60789d34bd7d6 -Author: DJ -AuthorDate: Thu Oct 17 14:32:44 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 14:32:44 2019 -0700 - - fix to documentation - -M lib/rust/enclone/src/main.rs - -commit de3c94eb7bd44e7d527d7f75372e903a9d55682f -Author: DJ -AuthorDate: Thu Oct 17 14:18:35 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 14:18:35 2019 -0700 - - replace CLONES by LOW and HIGH - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 9c73131c84d3be365821b52eb622edd980e7f8ce -Author: DJ -AuthorDate: Thu Oct 17 13:38:36 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 13:38:36 2019 -0700 - - improve def of unambiguous when merging onesies - -M lib/rust/enclone/src/join.rs - -commit 8cb4967909cbe16e0d57d71bc87832d10ad56325 -Author: DJ -AuthorDate: Thu Oct 17 13:23:00 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 13:23:00 2019 -0700 - - start of TCR dataset list - -A lib/rust/enclone/src/enclone.testdata.tcr - -commit fa7884392676f2e5a176d032a3fab81e9988765d -Author: DJ -AuthorDate: Thu Oct 17 13:00:54 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 13:00:54 2019 -0700 - - fix bug in filtering - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 5290ab6fd4525d8f05b54e38b1ab48e419c95786 -Author: DJ -AuthorDate: Thu Oct 17 10:58:22 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 10:58:22 2019 -0700 - - now always create and merge onesies - -M lib/rust/enclone/src/main.rs - -commit d17c0f1e3ca53c3006fa5023344a9540d4e6ab4d -Author: DJ -AuthorDate: Thu Oct 17 10:50:03 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 10:50:03 2019 -0700 - - workaround for TRBC duplicate records - -M lib/rust/enclone/src/main.rs - -commit b6bc038c6dc5c7e9e40fcd86ada9e0a46e449cfc -Author: DJ -AuthorDate: Thu Oct 17 09:53:32 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 09:53:32 2019 -0700 - - support TCR - -M lib/rust/enclone/src/main.rs - -commit d8640096129f10bda17c836c7e4075addb7736ae -Author: DJ -AuthorDate: Thu Oct 17 08:25:30 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 08:25:30 2019 -0700 - - onesie exact clonotypes only used if at least 1/10,000 - -M lib/rust/enclone/src/main.rs - -commit c6f7282f5a185a73db99ccaaba1ed42f63ab58d8 -Author: DJ -AuthorDate: Thu Oct 17 07:20:42 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 07:20:42 2019 -0700 - - switch to current11 - -M lib/rust/enclone/src/enclone.test -M lib/rust/enclone/src/main.rs - -commit 6a4577e30b48c0b8d5b46dea899cef0dcac78141 -Author: DJ -AuthorDate: Thu Oct 17 07:18:41 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 07:18:41 2019 -0700 - - lower MAX_DEGRADATION from 5 to 3 - -M lib/rust/enclone/src/join.rs - -commit f3b2f3c5c9c11bb5fad5abca1265d0ae08ef12cd -Author: DJ -AuthorDate: Thu Oct 17 07:06:56 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 07:06:56 2019 -0700 - - fix bug - -M lib/rust/enclone/src/main.rs - -commit b662af105959e96fb980429861c415ad4becd1c5 -Author: DJ -AuthorDate: Thu Oct 17 06:32:45 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 06:32:45 2019 -0700 - - major refactor, separate join_exact, BROKEN - -M lib/rust/enclone/src/defs.rs -A lib/rust/enclone/src/join.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs - -commit ff21f5d2164272fefaf9f1081920239aefe2f7df -Author: DJ -AuthorDate: Thu Oct 17 05:48:53 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 05:48:53 2019 -0700 - - move some code to clarify dependencies - -M lib/rust/enclone/src/main.rs - -commit a92d8bfbf493d19aa99bf4370a5d2c3c72155e4f -Author: DJ -AuthorDate: Thu Oct 17 05:45:29 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 05:45:29 2019 -0700 - - kill all_cl1 - -M lib/rust/enclone/src/main.rs - -commit 42969e1c6559b7be0e0cffe4e382d7bfb3507fb4 -Author: DJ -AuthorDate: Thu Oct 17 05:36:38 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 05:36:38 2019 -0700 - - kill a variable - -M lib/rust/enclone/src/main.rs - -commit 6cd20b82f6f87adda77b094e98f2c755dee9b3d9 -Author: DJ -AuthorDate: Thu Oct 17 05:25:57 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 05:25:57 2019 -0700 - - kill a variable - -M lib/rust/enclone/src/main.rs - -commit 422cdb14cc6e9562416ec1fa96e4a7dd02f4bf62 -Author: DJ -AuthorDate: Thu Oct 17 05:24:35 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 17 05:24:35 2019 -0700 - - kill a variable - -M lib/rust/enclone/src/main.rs - -commit cd2c993c8f814ea71d052acf609a64dcb1b069c7 -Author: DJ -AuthorDate: Wed Oct 16 17:06:43 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 17:06:43 2019 -0700 - - comments - -M lib/rust/enclone/src/print_clonotypes.rs - -commit a214421154ed32e7f250faabde9f5ce01f627fcd -Author: DJ -AuthorDate: Wed Oct 16 16:58:38 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 16:58:38 2019 -0700 - - kill another variable - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit b8be64f65d4e57255bbcd1aade33a5ded846fdaf -Author: DJ -AuthorDate: Wed Oct 16 16:32:16 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 16:32:16 2019 -0700 - - default is now to show constant region - -M lib/rust/enclone/src/main.rs - -commit 4bb455c8e7c789b30a55499dc13d9927e6ac7046 -Author: DJ -AuthorDate: Wed Oct 16 16:30:24 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 16:30:24 2019 -0700 - - SPLIT_CONST now on by default - -M lib/rust/enclone/src/main.rs - -commit db320b05b60375515acb93227f22e62e8c3ab5ac -Author: DJ -AuthorDate: Wed Oct 16 16:27:03 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 16:27:03 2019 -0700 - - kill a variable - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit b5be673a4a2096fe26743cf34b1033153df617c1 -Author: DJ -AuthorDate: Wed Oct 16 16:22:58 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 16:22:58 2019 -0700 - - kill some variables - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit 0a3edc79ff0b4418a7ed7d5e60004c44b7fd35da -Author: DJ -AuthorDate: Wed Oct 16 16:01:55 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 16:01:55 2019 -0700 - - half kill a variable - -M lib/rust/enclone/src/main.rs - -commit 27cb3b407bc15a8dbd748b576472d6e9ea459791 -Author: DJ -AuthorDate: Wed Oct 16 15:56:44 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 15:56:44 2019 -0700 - - add ExactClonotype::ncells() - -M lib/rust/enclone/src/defs.rs - -commit 6ba03eb243b1c81029ebb17e10515355853b95af -Author: DJ -AuthorDate: Wed Oct 16 15:44:26 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 15:44:26 2019 -0700 - - kill a variable - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs - -commit 1ba3cd78c88a44e6150b8f6c5d1d028a39b24bfd -Author: DJ -AuthorDate: Wed Oct 16 15:40:26 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 15:40:26 2019 -0700 - - move code to sort of kill a variable - -M lib/rust/enclone/src/main.rs - -commit fee62dd7898b22c535ea69e433ad3f783300b952 -Author: DJ -AuthorDate: Wed Oct 16 14:28:51 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 14:28:51 2019 -0700 - - kill unneeded variable - -M lib/rust/enclone/src/main.rs - -commit 63e425466004912ed546d02f8775e43ca6c0ec91 -Author: DJ -AuthorDate: Wed Oct 16 10:55:08 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 10:55:08 2019 -0700 - - add option CHAIN_BRIEF - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 1f770a80c38faeb3dce7097c81b3147fc5f17fc9 -Author: DJ -AuthorDate: Wed Oct 16 04:52:08 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 04:52:08 2019 -0700 - - add option FAIL_ONLY - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 8413a265f0f3c5248b4d048329fbac83e5cd9b27 -Author: DJ -AuthorDate: Wed Oct 16 04:41:46 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 04:41:46 2019 -0700 - - simplify first column in clonotype table - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 6436b1d6f0f5ac05faca0556efd624dbfe205fec -Author: DJ -AuthorDate: Wed Oct 16 04:30:06 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 16 04:30:06 2019 -0700 - - fix bug in annv in RE mode - -M lib/rust/enclone/src/main.rs - -commit 2801fd9266aadde82fb65e0f516007b9e9c28c64 -Author: DJ -AuthorDate: Tue Oct 15 14:29:02 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 15 14:29:02 2019 -0700 - - need this - -A lib/rust/enclone/BUILD.bazel - -commit 06a300646b3c911cc92ec1f869ddd119e6e1d7fb -Author: DJ -AuthorDate: Tue Oct 15 11:41:53 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 15 11:41:53 2019 -0700 - - a little beautification - -M lib/rust/enclone/src/print_clonotypes.rs - -commit fc6492d47ceeb159bd4e176f2658e49c97230b88 -Author: DJ -AuthorDate: Tue Oct 15 06:24:02 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 15 06:24:02 2019 -0700 - - filter putative gel bead contamination - -M lib/rust/enclone/src/main.rs - -commit 63230da98314ed0d8c14e77738877b07f9039c09 -Author: DJ -AuthorDate: Tue Oct 15 04:18:37 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 15 04:18:37 2019 -0700 - - improve BU mode - -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 257bd9c89a234beabf2c813508b1f692e0471627 -Author: DJ -AuthorDate: Tue Oct 15 03:54:41 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 15 03:54:41 2019 -0700 - - for BU, show lena index for each barcode line - -M lib/rust/enclone/src/print_clonotypes.rs - -commit b23d813be143f25e4557380d25abafb7a5ae0748 -Author: DJ -AuthorDate: Mon Oct 14 16:54:06 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 16:54:06 2019 -0700 - - now use arg ONESIES to create and merge onesies - -M lib/rust/enclone/src/main.rs - -commit 198ae30b2f6ff8023a1b5b830eb73dba27106583 -Author: DJ -AuthorDate: Mon Oct 14 15:01:42 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 15:01:42 2019 -0700 - - add option PROTECT_BADS - -M lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/main.rs -M lib/rust/enclone/src/print_clonotypes.rs - -commit 5727010f4563101a9b42556d626b03cf5b9994fd -Author: DJ -AuthorDate: Mon Oct 14 14:24:48 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 14:24:48 2019 -0700 - - comments - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 312820d7a66bf0e9cccadd4a649549c9c72571c3 -Author: DJ -AuthorDate: Mon Oct 14 13:05:39 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 13:05:39 2019 -0700 - - add back the onesies, but don't merge them - -M lib/rust/enclone/src/main.rs - -commit 33166ce5c3f832e852aaef8b3b7054e88f2f063f -Author: DJ -AuthorDate: Mon Oct 14 11:48:04 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 11:48:04 2019 -0700 - - more doc - -M lib/rust/enclone/src/main.rs - -commit 7a362c3fd6dda8f0ba907f15b961847a2b482398 -Author: DJ -AuthorDate: Mon Oct 14 11:44:31 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 11:44:31 2019 -0700 - - add some doc - -M lib/rust/enclone/src/main.rs - -commit c04fa20280462b447c774888d8aa5da77b380b15 -Author: DJ -AuthorDate: Mon Oct 14 11:34:24 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 11:34:24 2019 -0700 - - bugfix connected to onesies - -M lib/rust/enclone/src/main.rs - -commit 0cba4e7ded9e5ae07b50b627e5a351307bb2c43c -Author: DJ -AuthorDate: Mon Oct 14 11:04:32 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 11:04:32 2019 -0700 - - fix printing error - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 3d439f67f78f5c2b5470f890eaf81977300ba93c -Author: DJ -AuthorDate: Mon Oct 14 06:39:24 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 06:39:24 2019 -0700 - - tidy - -M lib/rust/enclone/src/print_clonotypes.rs - -commit c5e6df0640de0f7967257bc6ff1ffd5567566724 -Author: DJ -AuthorDate: Mon Oct 14 06:36:35 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 06:36:35 2019 -0700 - - rustfmt - -M lib/rust/enclone/src/print_clonotypes.rs - -commit 422c48c2cace2f218edd022ec9e84f55369e4a3e -Author: DJ -AuthorDate: Mon Oct 14 06:33:59 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 06:33:59 2019 -0700 - - split out print_clonotypes into separate file - -A lib/rust/enclone/src/defs.rs -M lib/rust/enclone/src/lib.rs -M lib/rust/enclone/src/main.rs -A lib/rust/enclone/src/print_clonotypes.rs - -commit 98d78086b4b6b31946188b790399d39b41e46f73 -Author: DJ -AuthorDate: Mon Oct 14 06:07:29 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 06:07:29 2019 -0700 - - package up function print_clonotypes - -M lib/rust/enclone/src/main.rs - -commit 427c63b728565c2bcd5762f85dd185e59c6d21e0 -Author: DJ -AuthorDate: Mon Oct 14 05:52:30 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 05:52:30 2019 -0700 - - doc - -M lib/rust/enclone/src/main.rs - -commit 6e037271c157980b65fafa8976a3606e9efe195a -Author: DJ -AuthorDate: Mon Oct 14 05:19:51 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 05:19:51 2019 -0700 - - package SampleInfo into EncloneControl - -M lib/rust/enclone/src/main.rs - -commit 8634d0d6f2dd56ed30ffc9fad5c9fbb3c844450b -Author: DJ -AuthorDate: Mon Oct 14 05:13:06 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 05:13:06 2019 -0700 - - package constants in one place - -M lib/rust/enclone/src/main.rs - -commit 92a278c3ae4060b07aca3d70945739dc39500534 -Author: DJ -AuthorDate: Mon Oct 14 04:54:36 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 14 04:54:36 2019 -0700 - - turn off onesie inclusion for now - -M lib/rust/enclone/src/main.rs - -commit 0c6648b2ecc48d8c51c5306d241f776688195fbd -Author: DJ -AuthorDate: Sun Oct 13 08:17:14 2019 -0700 -Commit: DJ -CommitDate: Sun Oct 13 08:17:14 2019 -0700 - - always print shared positions - -M lib/rust/enclone/src/main.rs - -commit 5fe2e53bc50b146ae1049f12e619185a8dc6bd77 -Author: DJ -AuthorDate: Sun Oct 13 07:50:28 2019 -0700 -Commit: DJ -CommitDate: Sun Oct 13 07:50:28 2019 -0700 - - broken code to merge onesies, off - -M lib/rust/enclone/src/main.rs - -commit 33bd8c2285617ef525310f9261a6faad9c4784f7 -Author: DJ -AuthorDate: Sun Oct 13 07:18:24 2019 -0700 -Commit: DJ -CommitDate: Sun Oct 13 07:18:24 2019 -0700 - - capture onesies in clonotypes but don't yet merge - -M lib/rust/enclone/src/main.rs - -commit ddbbebdf8ec4a26fb01c6bdfc86f9967623d6423 -Author: DJ -AuthorDate: Sat Oct 12 08:49:07 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 12 08:49:07 2019 -0700 - - move enclone to its own crate - -A lib/rust/enclone/Cargo.toml -A lib/rust/enclone/src/enclone.out -A lib/rust/enclone/src/enclone.test -A lib/rust/enclone/src/enclone.testdata -A lib/rust/enclone/src/lib.rs -A lib/rust/enclone/src/main.rs - -commit 977b9e0a960411293c9774d206076271d4378779 -Author: DJ -AuthorDate: Sat Oct 12 08:14:15 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 12 08:14:15 2019 -0700 - - kill use of gex in clonotype printing - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 45921918cf2b373d11337d41ad941814dbd14c26 -Author: DJ -AuthorDate: Sat Oct 12 08:09:28 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 12 08:09:28 2019 -0700 - - kill use of "graph" in clonotype printing - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit e60c2f9c6759e7692d6675ca834566e3eae1c156 -Author: DJ -AuthorDate: Sat Oct 12 08:07:33 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 12 08:07:33 2019 -0700 - - kill use of to_bc in clonotype printing - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit f1f3f8ca971091ac791ca118232f93d91932008d -Author: DJ -AuthorDate: Sat Oct 12 07:07:17 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 12 07:07:17 2019 -0700 - - capture show_donor in clono_print_opt - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 47e947d78b68426b89b468cb7e98154ea560de18 -Author: DJ -AuthorDate: Sat Oct 12 07:02:42 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 12 07:02:42 2019 -0700 - - capture heuristics in a data structure - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 2a76a3823cb202d60be795afe2467fd630200fab -Author: DJ -AuthorDate: Sat Oct 12 06:56:23 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 12 06:56:23 2019 -0700 - - add some doc - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit b6eaa8a9e78b4fdaec2cd24bc3a74189dadddbd9 -Author: DJ -AuthorDate: Sat Oct 12 06:48:44 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 12 06:48:44 2019 -0700 - - add data structure SampleInfo - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 6b897f391c329f79a4cae5d7a5793a8935b92a5b -Author: DJ -AuthorDate: Fri Oct 11 05:24:35 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 11 05:24:35 2019 -0700 - - add some doc - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 253d244a390fa99e70f6b645cf697753952e5142 -Author: DJ -AuthorDate: Thu Oct 10 16:56:48 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 10 16:56:48 2019 -0700 - - count gex bads - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 131d2afca95a36d4685288a0c5197c790f4b32eb -Author: DJ -AuthorDate: Thu Oct 10 16:20:04 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 10 16:20:04 2019 -0700 - - logging and comments - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 082490bed22d97a0e564d1edd32477696fb0dca9 -Author: DJ -AuthorDate: Thu Oct 10 16:14:51 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 10 16:14:51 2019 -0700 - - exclude large clones from whitelist contamination stats - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit c980331231004098f1cdb76f7a6f92b7f364d163 -Author: DJ -AuthorDate: Thu Oct 10 15:48:53 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 10 15:48:53 2019 -0700 - - corrections to whitelist contam tallying - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit e303858f3e5d2a6c16861088509ec88a30718dea -Author: DJ -AuthorDate: Thu Oct 10 14:48:12 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 10 14:48:12 2019 -0700 - - compute whitelist contamination rate - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit be925471fd9f5451a62b2f3e1ccd13409e22905b -Author: DJ -AuthorDate: Thu Oct 10 14:11:52 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 10 14:11:52 2019 -0700 - - lowercasing of bases is now optional - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 8161d05ba07ae4dece9a25cc2d6665021d8a62e8 -Author: DJ -AuthorDate: Thu Oct 10 13:42:16 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 10 13:42:16 2019 -0700 - - + option to show only clonotypes exhibiting whitelist contam - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 83947ff70685b6845cccb0295f5b60df28904891 -Author: DJ -AuthorDate: Thu Oct 10 13:31:43 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 10 13:31:43 2019 -0700 - - refine whitelist contamination filtering - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 8d3a3fb367153a1e03430fa895f3aa3b953aeed1 -Author: DJ -AuthorDate: Thu Oct 10 11:59:32 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 10 11:59:32 2019 -0700 - - show gex for GEX + BC - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 213ff4e05151a7af4a9d18d7e36901d7958a0232 -Author: DJ -AuthorDate: Thu Oct 10 11:36:32 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 10 11:36:32 2019 -0700 - - make work with old CellRanger GEX data - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 6659e20f83684841b0f132fce01a571c61675957 -Author: DJ -AuthorDate: Thu Oct 10 11:00:15 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 10 11:00:15 2019 -0700 - - add option to regenerate annotations - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 4d66df483978b4eeea572c76f0f316d4520b08f6 -Author: DJ -AuthorDate: Thu Oct 10 05:34:46 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 10 05:34:46 2019 -0700 - - print number of datasets and donors - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 85b8579b424d55645f1985698862a4925d218f3d -Author: DJ -AuthorDate: Wed Oct 9 15:06:16 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 9 15:06:16 2019 -0700 - - add nearest neighbor - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit bce58699126103b4ab9d7100622571306168903c -Author: DJ -AuthorDate: Wed Oct 9 11:05:20 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 9 11:05:20 2019 -0700 - - show shared positions - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 3260ed9e9793f5b179fc633a1140ef8d2aaece08 -Author: DJ -AuthorDate: Wed Oct 9 06:49:55 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 9 06:49:55 2019 -0700 - - delete commented out code - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit da31c0cf9f466b1b714cc4a4bffcdea6bdf785c6 -Author: DJ -AuthorDate: Wed Oct 9 06:47:57 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 9 06:47:57 2019 -0700 - - use multicolumn feature of print_tabular_vbox - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 6494796407b7ab99fd7c7bae746ff860160a95d3 -Author: DJ -AuthorDate: Tue Oct 8 19:33:25 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 19:33:25 2019 -0700 - - split exact clonotypes by constant region if requested - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 9ffbd796dedb39ac2c2ddb6863d8f79df5e1faf2 -Author: DJ -AuthorDate: Tue Oct 8 15:09:56 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 15:09:56 2019 -0700 - - add option NCELLS - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 8bb2b5d808383dd060e2e5ee05671a134921b99d -Author: DJ -AuthorDate: Tue Oct 8 14:41:43 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 14:41:43 2019 -0700 - - fix printing problem - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 1e367444e92fb84301cfecdf5ee54a1d737549e0 -Author: DJ -AuthorDate: Tue Oct 8 14:09:39 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 14:09:39 2019 -0700 - - fix BU - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 1409bf33bc0af8c8a3b190949a34faa1ac9c95b1 -Author: DJ -AuthorDate: Tue Oct 8 13:03:50 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 13:03:50 2019 -0700 - - improve DONOR option - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit bfe67f9dee13f761bad8c05ebfb2dbd2f705d730 -Author: DJ -AuthorDate: Tue Oct 8 11:39:33 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 11:39:33 2019 -0700 - - add option DONOR - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit c422a955eec97125db09eccad7b26b002fdd6382 -Author: DJ -AuthorDate: Tue Oct 8 11:12:25 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 11:12:25 2019 -0700 - - add whitelist contamination metric - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 547fad560f94b74ccc6cc231a8a35ff1edbf5688 -Author: DJ -AuthorDate: Tue Oct 8 08:11:43 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 08:11:43 2019 -0700 - - remove dependency - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 48b2247b1779f01c387a7d3c6d15310bcf696b1e -Author: DJ -AuthorDate: Tue Oct 8 06:13:20 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 06:13:20 2019 -0700 - - package clonotype print options - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 2c5299e99fc3f96893beb84c434cc39d022c3ecb -Author: DJ -AuthorDate: Tue Oct 8 05:53:40 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 05:53:40 2019 -0700 - - package clonotype filtering options - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 74759c1864c9d450011382c12904c2c2b2b3c4d0 -Author: DJ -AuthorDate: Tue Oct 8 05:33:44 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 05:33:44 2019 -0700 - - remove a dependency - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit ae02a1e427f5744bb91a5fd970a9dba2bb352c14 -Author: DJ -AuthorDate: Tue Oct 8 05:29:57 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 05:29:57 2019 -0700 - - remove a dependency - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 69198a56db9ff823b033e1746d722377833e804e -Author: DJ -AuthorDate: Tue Oct 8 05:26:14 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 05:26:14 2019 -0700 - - kill BL option - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit dce5f9594d9d6b013f2f5c99cf8c3e7a98cb2e89 -Author: DJ -AuthorDate: Tue Oct 8 05:01:43 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 05:01:43 2019 -0700 - - switch to current10 - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs -M lib/rust/vdj_asm_tools/src/bin/enclone.test - -commit b5a244cb51d8d54d7819e631c817224e75f9e661 -Author: DJ -AuthorDate: Tue Oct 8 04:52:25 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 04:52:25 2019 -0700 - - some reorg - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit a5a0c2a83da41e7c7dd8e783606e5d4cad6637d4 -Author: DJ -AuthorDate: Tue Oct 8 04:44:44 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 04:44:44 2019 -0700 - - add option SILENT - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 3d9c2a4945a5d1da2c0dc65ea3992607606fa31b -Author: DJ -AuthorDate: Tue Oct 8 04:36:03 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 04:36:03 2019 -0700 - - add option CDR3 - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 4629f6c8f7d17f82b87c717cf458feb320af8287 -Author: DJ -AuthorDate: Tue Oct 8 04:26:55 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 04:26:55 2019 -0700 - - doc - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 8db96844d9794419ab71b8a26540aae05c70c876 -Author: DJ -AuthorDate: Tue Oct 8 04:02:55 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 04:02:55 2019 -0700 - - add missing newline - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 2ae07a6dd8d47f67d230d1b42345132a34cc1bf7 -Author: DJ -AuthorDate: Tue Oct 8 03:59:49 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 03:59:49 2019 -0700 - - delete duplicate sort - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit ed5e5dade0a586e8992a18ebe4da2a2d28d82ebb -Author: DJ -AuthorDate: Tue Oct 8 03:56:12 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 8 03:56:12 2019 -0700 - - use sort_by for tig_bc - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit b3ffae22560e090e6164034d87b4fbe7f449f5d4 -Author: DJ -AuthorDate: Mon Oct 7 07:41:32 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 7 07:41:32 2019 -0700 - - add comments - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit bdc4e4be56a0a4d32285f0ed053e48c8d387ccca -Author: DJ -AuthorDate: Mon Oct 7 06:46:50 2019 -0700 -Commit: DJ -CommitDate: Mon Oct 7 06:46:50 2019 -0700 - - for CON_CON, show barcodes - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 28d92b6398eef6e109c731dcf7a89ba25eaaa891 -Author: DJ -AuthorDate: Sun Oct 6 07:07:46 2019 -0700 -Commit: DJ -CommitDate: Sun Oct 6 07:07:46 2019 -0700 - - truncate first base from reference IGHG4 - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 96f8f76e158b55a1c50d85d9c624dc17a2a53601 -Author: DJ -AuthorDate: Sun Oct 6 06:33:13 2019 -0700 -Commit: DJ -CommitDate: Sun Oct 6 06:33:13 2019 -0700 - - tweaks to CON_CON - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 2e9147b649bd1443073a7a8a30a49d398dd7392a -Author: DJ -AuthorDate: Sun Oct 6 06:08:05 2019 -0700 -Commit: DJ -CommitDate: Sun Oct 6 06:08:05 2019 -0700 - - remove first base of reference C segment in certain cases - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit b1cf693d35c371a52a9d84830d9a53bd3099f485 -Author: DJ -AuthorDate: Sun Oct 6 05:50:51 2019 -0700 -Commit: DJ -CommitDate: Sun Oct 6 05:50:51 2019 -0700 - - add option CON_CON - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit ef2d0e8a9168cdf9cd7aebaa29718eed3c236ee4 -Author: DJ -AuthorDate: Sun Oct 6 05:11:50 2019 -0700 -Commit: DJ -CommitDate: Sun Oct 6 05:11:50 2019 -0700 - - fix UTR_CON - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit c4c107ee3c6cee76afd74404cef81154c8f2dc8e -Author: DJ -AuthorDate: Sun Oct 6 05:00:08 2019 -0700 -Commit: DJ -CommitDate: Sun Oct 6 05:00:08 2019 -0700 - - hardcode current PRE value - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 6b8d12add0a287dc5898e4e6b8ec7be954bd1047 -Author: DJ -AuthorDate: Sun Oct 6 04:51:45 2019 -0700 -Commit: DJ -CommitDate: Sun Oct 6 04:51:45 2019 -0700 - - add option UTR_CON - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 919df4a7a2cf97559a5f9bc6db28e01ce12e6ec7 -Author: DJ -AuthorDate: Sat Oct 5 05:37:02 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 5 05:37:02 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 7c0e2605bd12961c3b91ac688b498cc9fe41ab5a -Author: DJ -AuthorDate: Sat Oct 5 05:21:21 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 5 05:21:21 2019 -0700 - - add to EXACT - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit f489d4a9723b40dcea61a3ffca0c1d72ee16b0f4 -Author: DJ -AuthorDate: Sat Oct 5 05:15:26 2019 -0700 -Commit: DJ -CommitDate: Sat Oct 5 05:15:26 2019 -0700 - - add option EXACT - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 147323a72331905904b590174a889c43317d2085 -Author: DJ -AuthorDate: Fri Oct 4 15:46:08 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 15:46:08 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit f02bbbf7cc4fa86e7c2987385b08d26aef3ccbb6 -Author: DJ -AuthorDate: Fri Oct 4 15:22:11 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 15:22:11 2019 -0700 - - insert divider row - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit c4d6d0a2d7c78d03328f0bab21bebde2d9b432a1 -Author: DJ -AuthorDate: Fri Oct 4 14:59:34 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 14:59:34 2019 -0700 - - add arg CVARSP - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit ab27a5d807b1b5ff5848f10006a9f968578699ff -Author: DJ -AuthorDate: Fri Oct 4 14:48:31 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 14:48:31 2019 -0700 - - tidy output - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 689ac8b2807f0ae5c93f97f712833269f901c7ab -Author: DJ -AuthorDate: Fri Oct 4 14:44:31 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 14:44:31 2019 -0700 - - put chain descriptions in table - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 35cd65410c9b31eb4f013c30e620dc5e9db2aa2c -Author: DJ -AuthorDate: Fri Oct 4 13:21:08 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 13:21:08 2019 -0700 - - add option CVARS for const - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit f6fb39cb482e59a2628379b3f6d971f5b3a4382f -Author: DJ -AuthorDate: Fri Oct 4 12:51:22 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 12:51:22 2019 -0700 - - fix bug in dots row - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 17fadbf93a171e835c52dc4511aebb51043ab507 -Author: DJ -AuthorDate: Fri Oct 4 11:53:05 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 11:53:05 2019 -0700 - - start to implement near and far - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit ae2b2ca4f0c7cc3eec216a78c2746e63b2ec3e0f -Author: DJ -AuthorDate: Fri Oct 4 11:41:22 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 11:41:22 2019 -0700 - - restore parallelization of loop - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit a1bbc5d14d17e62f42690862159a0ba8dc76ed4d -Author: DJ -AuthorDate: Fri Oct 4 11:39:26 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 11:39:26 2019 -0700 - - allow specification of arbitrary columns for each chain - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit e135654d7c28dc4eb32f2da28bb4185177457438 -Author: DJ -AuthorDate: Fri Oct 4 10:41:02 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 10:41:02 2019 -0700 - - change lenas to datasets - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit c0369e5d279db8f132406ee2efe0112425004cea -Author: DJ -AuthorDate: Fri Oct 4 10:30:53 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 10:30:53 2019 -0700 - - tidy - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 9e5da2423d2e7c87806858bb21745fe21847014b -Author: DJ -AuthorDate: Fri Oct 4 10:28:54 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 10:28:54 2019 -0700 - - tidy - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit e20873aca940ab9734de754a316cf664b69a872f -Author: DJ -AuthorDate: Fri Oct 4 10:26:00 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 10:26:00 2019 -0700 - - tidy - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 4f97b0f45ac5e55d9c4815bfe69095650038d78b -Author: DJ -AuthorDate: Fri Oct 4 10:22:33 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 10:22:33 2019 -0700 - - add field headings - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 467c920da28a2a209bfa6cf88f1a6c78b60c9606 -Author: DJ -AuthorDate: Fri Oct 4 04:18:02 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 04:18:02 2019 -0700 - - slight amelioration to cross filter - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit d4d0ecc8c2ec0216e7a6512c4541ecc5f33ba0a5 -Author: DJ -AuthorDate: Fri Oct 4 04:12:32 2019 -0700 -Commit: DJ -CommitDate: Fri Oct 4 04:12:32 2019 -0700 - - add cross filter - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit ef02144c465c33e119a734aec21231a2de5ed3b6 -Author: DJ -AuthorDate: Thu Oct 3 06:20:39 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 3 06:20:39 2019 -0700 - - make GRAPH logging sort of work for multiple lenas - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit ac002ad86689d1b13028d60d7046e3917eadaf09 -Author: DJ -AuthorDate: Thu Oct 3 06:04:48 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 3 06:04:48 2019 -0700 - - if GRAPH, print branching - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 4bf0f19aeac0301aa405fe919dadd5be67354ff1 -Author: DJ -AuthorDate: Thu Oct 3 05:27:57 2019 -0700 -Commit: DJ -CommitDate: Thu Oct 3 05:27:57 2019 -0700 - - add some doc - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 52ad52e84e7c96d418019583d2d3972d7348f5b9 -Author: DJ -AuthorDate: Wed Oct 2 19:35:30 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 2 19:35:30 2019 -0700 - - use make_ascii_lowercase - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit c1380e25fb5ed5db311d97c81e8da463515ffb5e -Author: DJ -AuthorDate: Wed Oct 2 18:54:15 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 2 18:54:15 2019 -0700 - - sub in more uses of rpos - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit c73e9f31e806862f0001ab7558f472e103f4ecbd -Author: DJ -AuthorDate: Wed Oct 2 18:50:14 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 2 18:50:14 2019 -0700 - - sub in more uses of rpos - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 4bd68cc00c107c059a13b44ca2ebbf733273b749 -Author: DJ -AuthorDate: Wed Oct 2 18:45:48 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 2 18:45:48 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit c53ccf5c125dee9233a08a82daf10c13164d4fd7 -Author: DJ -AuthorDate: Wed Oct 2 16:28:38 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 2 16:28:38 2019 -0700 - - sub in a use of rpos - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 0a19916cac98ff05131e8a1c88477927bc63e314 -Author: DJ -AuthorDate: Wed Oct 2 16:19:35 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 2 16:19:35 2019 -0700 - - factor out map rpos - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit dfed09d0478b09078d00dfa0ef5268dd53243266 -Author: DJ -AuthorDate: Wed Oct 2 15:42:28 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 2 15:42:28 2019 -0700 - - more tidying - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit bdfad8cc96df9ade0d95d489ccbad772397b2b24 -Author: DJ -AuthorDate: Wed Oct 2 15:24:44 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 2 15:24:44 2019 -0700 - - clean up some junk - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit ee3b220b30dd8104e873424cbddad16a075926ac -Author: DJ -AuthorDate: Wed Oct 2 15:20:25 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 2 15:20:25 2019 -0700 - - now use print_tabular_vbox - -M lib/rust/tenkit2/src/io.rs -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 6c9124e7f9419e0135bc7bedf3c80decb9a6e310 -Author: DJ -AuthorDate: Wed Oct 2 13:51:31 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 2 13:51:31 2019 -0700 - - placeholder for graph stuff - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit a82880df4ce8af7602ace356a1d9f5683672c644 -Author: DJ -AuthorDate: Wed Oct 2 13:37:27 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 2 13:37:27 2019 -0700 - - GRAPH now forms global object - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 4bc37364dde9c240352c0a1779c0153a78be5e6f -Author: DJ -AuthorDate: Wed Oct 2 13:21:08 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 2 13:21:08 2019 -0700 - - add option GRAPH - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 379623143da22a56dbad14f1070bdcc7f7417034 -Author: DJ -AuthorDate: Wed Oct 2 10:59:27 2019 -0700 -Commit: DJ -CommitDate: Wed Oct 2 10:59:27 2019 -0700 - - add option SEQC - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit e2898697223c64ded7949bdeb4fe6b0b68f57104 -Author: David Jaffe -AuthorDate: Wed Oct 2 07:14:55 2019 -0700 -Commit: David Jaffe -CommitDate: Wed Oct 2 07:14:55 2019 -0700 - - filter out exact clonotypes having low q support at var positions - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit ff20715403bb8068d9c65fe4b279251706eee7f1 -Author: DJ -AuthorDate: Tue Oct 1 16:48:55 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 1 16:48:55 2019 -0700 - - add arg MIN_CHAINS - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit a344b1368ed1027bcb79d6775c868ba2201be288 -Author: DJ -AuthorDate: Tue Oct 1 13:47:56 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 1 13:47:56 2019 -0700 - - parallelize loading of gex data - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 105e5c866283cb7f077bc3c9bf3302ad5dbe5f88 -Author: DJ -AuthorDate: Tue Oct 1 13:30:46 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 1 13:30:46 2019 -0700 - - add gex_max - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit b98038ef72e8206203390e597acc20542931037e -Author: DJ -AuthorDate: Tue Oct 1 13:14:44 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 1 13:14:44 2019 -0700 - - fix bug in handling of gex data - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit bf7ec9475b495bcee9d95eb2ad4211dfd19f2417 -Author: DJ -AuthorDate: Tue Oct 1 11:39:15 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 1 11:39:15 2019 -0700 - - add option TWO_LENAS - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 7a058836998642571ca91df208028243adfce823 -Author: DJ -AuthorDate: Tue Oct 1 11:23:44 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 1 11:23:44 2019 -0700 - - update doc - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit a8f728c5d1e9010807948cfe7620a9d430ae9053 -Author: DJ -AuthorDate: Tue Oct 1 11:21:00 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 1 11:21:00 2019 -0700 - - put gex counts in better place - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 99464a1f7d12a5d2e05c0fe7337db665e1a3c8ee -Author: DJ -AuthorDate: Tue Oct 1 10:58:01 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 1 10:58:01 2019 -0700 - - add GEX option - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit f931080302eb043962fe3147342c8dd65ebcb406 -Author: DJ -AuthorDate: Tue Oct 1 06:27:21 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 1 06:27:21 2019 -0700 - - sub in hyphenated list - -M lib/rust/vdj_asm_tools/src/bin/enclone.testdata - -commit 4aba5470e4cf8ad8d979003b0b9c4c00d4e9af9b -Author: DJ -AuthorDate: Tue Oct 1 06:26:35 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 1 06:26:35 2019 -0700 - - allow hyphenated lists of lena ids - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 67cb2dbea836b76aab54840781fb2cf70c628444 -Author: DJ -AuthorDate: Tue Oct 1 04:12:32 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 1 04:12:32 2019 -0700 - - add option MIN_UMI - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 5108f928c691b2403baecd8451ee7a855c835cf2 -Author: DJ -AuthorDate: Tue Oct 1 03:45:36 2019 -0700 -Commit: DJ -CommitDate: Tue Oct 1 03:45:36 2019 -0700 - - fix bug that caused some joins to be missed - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 68def556f4c549a1f72ae601255d91a5b2c1df5a -Author: DJ -AuthorDate: Mon Sep 30 16:18:26 2019 -0700 -Commit: DJ -CommitDate: Mon Sep 30 16:18:26 2019 -0700 - - add some doc to code - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 6ce6d91369960f98fa2690e500f7c60ef3a63d85 -Author: DJ -AuthorDate: Mon Sep 30 16:04:30 2019 -0700 -Commit: DJ -CommitDate: Mon Sep 30 16:04:30 2019 -0700 - - more doc for optional args - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 63ac89157a7a271551fca0c29ca5384598bdc827 -Author: DJ -AuthorDate: Mon Sep 30 15:21:30 2019 -0700 -Commit: DJ -CommitDate: Mon Sep 30 15:21:30 2019 -0700 - - parallelize clonotype printing - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 04589c252fa55b2babb038d1c02fad0b8e7a899b -Author: DJ -AuthorDate: Mon Sep 30 08:56:19 2019 -0700 -Commit: DJ -CommitDate: Mon Sep 30 08:56:19 2019 -0700 - - add option FASTA - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 3cc4fab17f1d4d7851b3c26b84e65e80cea38371 -Author: DJ -AuthorDate: Mon Sep 30 05:53:18 2019 -0700 -Commit: DJ -CommitDate: Mon Sep 30 05:53:18 2019 -0700 - - add question - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 84aecd3a787b23fee3f30ce056939ddbf49fa565 -Author: DJ -AuthorDate: Sun Sep 29 17:02:07 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 17:02:07 2019 -0700 - - upgrade clonotype headers - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 62d9b11dce4abb2d0b3f1518a573056cb09104d2 -Author: DJ -AuthorDate: Sun Sep 29 11:44:04 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 11:44:04 2019 -0700 - - print the segments in chains - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit fb387c9619090311046a1d6842a4b3f6cd8bb234 -Author: DJ -AuthorDate: Sun Sep 29 11:00:48 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 11:00:48 2019 -0700 - - track D segments in TigData - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 8712cf0b799c6c880c2742db3f384bde1a455533 -Author: DJ -AuthorDate: Sun Sep 29 10:25:26 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 10:25:26 2019 -0700 - - add comments - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 54bd1419b972a1d713f3d0b553838a0c79196cba -Author: DJ -AuthorDate: Sun Sep 29 10:15:59 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 10:15:59 2019 -0700 - - add option TMAX - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 758f5013fffaae261f52e0c923db8c810e744a0d -Author: DJ -AuthorDate: Sun Sep 29 09:47:58 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 09:47:58 2019 -0700 - - add option TUMI - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 6857b4bda56380bb6075655e107cee6cb8835a9d -Author: DJ -AuthorDate: Sun Sep 29 09:22:24 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 09:22:24 2019 -0700 - - set PRE for test - -M lib/rust/vdj_asm_tools/src/bin/enclone.test - -commit abd82cabd27d72e552ef83b02c3e18c262439eaf -Author: DJ -AuthorDate: Sun Sep 29 08:56:48 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 08:56:48 2019 -0700 - - fix to clocks - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 0e140390c5c8679767722dd2cf2261c144f8c56f -Author: DJ -AuthorDate: Sun Sep 29 08:40:40 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 08:40:40 2019 -0700 - - print more info for donor mixups - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 4c0838f3ad7cef467d02d8e3950a5ff6d9c8337d -Author: DJ -AuthorDate: Sun Sep 29 07:19:31 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 07:19:31 2019 -0700 - - add option QUIET - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit 303e965bbb2ef38d649ae843cea798c1abbcb191 -Author: DJ -AuthorDate: Sun Sep 29 07:15:05 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 07:15:05 2019 -0700 - - more organization of optional logging - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit f603c61127e376fadd4c66757fb496edbe36dbc1 -Author: DJ -AuthorDate: Sun Sep 29 07:08:52 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 07:08:52 2019 -0700 - - lowercase bases that lack Q60 support in exact clonotype - -M lib/rust/vdj_asm_tools/src/bin/enclone.rs - -commit d5841cc1b90a500b04059f9e81f10064c112e2e0 -Author: DJ -AuthorDate: Sun Sep 29 05:54:46 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 05:54:46 2019 -0700 - - rename simclone to enclone - -R095 lib/rust/vdj_asm_tools/src/bin/simclone.out lib/rust/vdj_asm_tools/src/bin/enclone.out -R099 lib/rust/vdj_asm_tools/src/bin/simclone.rs lib/rust/vdj_asm_tools/src/bin/enclone.rs -A lib/rust/vdj_asm_tools/src/bin/enclone.test -R098 lib/rust/vdj_asm_tools/src/bin/simclone.testdata lib/rust/vdj_asm_tools/src/bin/enclone.testdata -D lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 1164b7c7a2b4dd0bcd733ad3553c7331202db50f -Author: DJ -AuthorDate: Sun Sep 29 05:39:01 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 05:39:01 2019 -0700 - - delete reused barcodes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 230abb49eb11d2e317f5775848ba642efb1d44e6 -Author: DJ -AuthorDate: Sun Sep 29 05:02:56 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 05:02:56 2019 -0700 - - some reord of argument documentation - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 625be93e7f29ef86e415904651cf997bb12dc46e -Author: DJ -AuthorDate: Sun Sep 29 04:58:29 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 04:58:29 2019 -0700 - - add option REUSE - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ec050af3e65b4bcda8435526a5f891d55d9ca26f -Author: DJ -AuthorDate: Sun Sep 29 04:41:47 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 29 04:41:47 2019 -0700 - - add doc - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f591017d56aca5aa3c13beaac0ad19ab13aa666b -Author: DJ -AuthorDate: Sat Sep 28 13:51:06 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 28 13:51:06 2019 -0700 - - avoid unnneeded hardcoded constant - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 6ad843dd4b9085a4aca6fbb5cd1cb45f3292da23 -Author: DJ -AuthorDate: Sat Sep 28 13:46:06 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 28 13:46:06 2019 -0700 - - improve columns in orbits table - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 2e49ed7846ff79179b885769b57bbc04b826678d -Author: DJ -AuthorDate: Sat Sep 28 08:11:45 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 28 08:11:45 2019 -0700 - - fix BU option - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit bb1d0cedb359cc7fd81f5d48bb3b686b031d882a -Author: DJ -AuthorDate: Sat Sep 28 05:19:40 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 28 05:19:40 2019 -0700 - - flag if clonotype crosses donors - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 18990807584d4a28753f1f57615f47c4fc0b371d -Author: DJ -AuthorDate: Sat Sep 28 05:10:24 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 28 05:10:24 2019 -0700 - - add pass 2 orbit size check - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 0449ba65148378c4209deb2ca1755e5936d2c2b2 -Author: DJ -AuthorDate: Sat Sep 28 05:02:13 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 28 05:02:13 2019 -0700 - - corrections to clonotype headers - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ac46ffa39030028cc0b45498052e17d8c91ef45e -Author: DJ -AuthorDate: Sat Sep 28 04:46:10 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 28 04:46:10 2019 -0700 - - reorg some doc - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 1b8f73c4be6848f9e75a87e2e5a85886970c49b6 -Author: DJ -AuthorDate: Sat Sep 28 04:42:46 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 28 04:42:46 2019 -0700 - - add option CTRLC - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 3c642c4f86c174a364606ca27859ac971fe06cec -Author: DJ -AuthorDate: Sat Sep 28 04:35:43 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 28 04:35:43 2019 -0700 - - number clonotypes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5e4d9f23958d773d7c4689f6d4816b0e2d271f74 -Author: DJ -AuthorDate: Sat Sep 28 04:28:13 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 28 04:28:13 2019 -0700 - - remove redundant chain type from orbit table - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a7a054711e3b6ae57d9b72c49ae52a0e92d6a0f1 -Author: DJ -AuthorDate: Sat Sep 28 04:18:03 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 28 04:18:03 2019 -0700 - - add chain headers - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 74f3ae04a1636a4608803f00d75381c4255a8562 -Author: DJ -AuthorDate: Fri Sep 27 17:12:40 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 17:12:40 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 18a69f5670395533e3f7be24a27bcdc1075b8daa -Author: DJ -AuthorDate: Fri Sep 27 17:07:08 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 17:07:08 2019 -0700 - - slight abbreviation - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ffe6e904451e8a0256f0c6a22f3d18c301fa2391 -Author: DJ -AuthorDate: Fri Sep 27 16:52:11 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 16:52:11 2019 -0700 - - drop empty variant lines - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d358de0aed9a6638a85a53a8a320e711f4a44d6d -Author: DJ -AuthorDate: Fri Sep 27 16:23:25 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 16:23:25 2019 -0700 - - tweak threshold - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c6abb854abd7743dd58e7e052168421c27c41a42 -Author: DJ -AuthorDate: Fri Sep 27 16:04:33 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 16:04:33 2019 -0700 - - show umi counts - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ea478aa28e2079bb89293736e6bec1a92fc1b68e -Author: DJ -AuthorDate: Fri Sep 27 14:47:54 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 14:47:54 2019 -0700 - - add column headers for orbit tables - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 780f54776b4fb328d95e46c1f0c7ac5d5e658e55 -Author: DJ -AuthorDate: Fri Sep 27 14:33:02 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 14:33:02 2019 -0700 - - show orbit matrix as pretty box - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 25f305f20c7dabe287267cf25fa0cc0649235577 -Author: DJ -AuthorDate: Fri Sep 27 12:52:16 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 12:52:16 2019 -0700 - - add blank line to nicify output - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e0113bbf07553be84bc6238a704218e8f99fe694 -Author: DJ -AuthorDate: Fri Sep 27 12:50:50 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 12:50:50 2019 -0700 - - delete weak columns in the orbit matrix - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 3287f3e109c03af760ea60f2a10fd2b0a62aff4d -Author: DJ -AuthorDate: Fri Sep 27 11:26:58 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 11:26:58 2019 -0700 - - implement better equiv rel on CDR3_AAs that defines orbit columns - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 2b5efb91e4c7d9081c7074a5573f0c912aae91c9 -Author: DJ -AuthorDate: Fri Sep 27 09:37:05 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 09:37:05 2019 -0700 - - comments - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 778b368ac2e59734836180a31fb987bf8bdae2ca -Author: DJ -AuthorDate: Fri Sep 27 09:25:28 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 09:25:28 2019 -0700 - - delete code about orbits having errors - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 2829e3975b9fb6b3b5f2062b25eafdc2c7bbc077 -Author: DJ -AuthorDate: Fri Sep 27 08:44:37 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 08:44:37 2019 -0700 - - tidy code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e1c94e5375c5000d4f4617297550edd4fdc1f3e4 -Author: DJ -AuthorDate: Fri Sep 27 06:26:11 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 06:26:11 2019 -0700 - - delete some dated doc - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 9b49b5c83b089cd079a208d96b0d14ffdeb1cf4d -Author: DJ -AuthorDate: Fri Sep 27 06:25:19 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 06:25:19 2019 -0700 - - simplify notation - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 0b3f5ddb05e692c95bcc3f153187aefd620454e3 -Author: DJ -AuthorDate: Fri Sep 27 05:50:18 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 05:50:18 2019 -0700 - - add option NVARS - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4a99803912265f635a95a8b3ea95445cf878647f -Author: DJ -AuthorDate: Fri Sep 27 04:55:23 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 04:55:23 2019 -0700 - - simplify variable name - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 37a02475e9f5c0ec927d64515127b0b25d9cf340 -Author: DJ -AuthorDate: Fri Sep 27 04:53:13 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 04:53:13 2019 -0700 - - delete unused stuff - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit fca00984e96e36dfd26f7c11d23e1bd531ad11b4 -Author: DJ -AuthorDate: Fri Sep 27 04:51:09 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 04:51:09 2019 -0700 - - delete unused stuff - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 2d40e457d7ac82d482df0c472b099a6029ca6899 -Author: DJ -AuthorDate: Fri Sep 27 04:46:02 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 04:46:02 2019 -0700 - - delete some unused stuff - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c1ba0daf9f714675a9529cdbe8350475138e7fed -Author: DJ -AuthorDate: Fri Sep 27 04:43:53 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 04:43:53 2019 -0700 - - delete some unused stuff - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 645dd608ea63137dc6362da29e1c4a6d647ed401 -Author: DJ -AuthorDate: Fri Sep 27 04:35:37 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 04:35:37 2019 -0700 - - fix for BU - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit addcd7e3abed7d03859edc9978a3c6cbebee1825 -Author: DJ -AuthorDate: Fri Sep 27 04:24:53 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 27 04:24:53 2019 -0700 - - add vertical separators - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 7b95891d52d72338d210fd59be69f23ecb97c5bd -Author: DJ -AuthorDate: Thu Sep 26 14:37:43 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 14:37:43 2019 -0700 - - add in foursies but don't yet merge with anything - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 39af00b2c646522d76184d5cc0b263117890babc -Author: DJ -AuthorDate: Thu Sep 26 12:18:56 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 12:18:56 2019 -0700 - - don't allow bad ref match if other much better - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit beb97f12e8308d366211852d6343a892273ed56e -Author: DJ -AuthorDate: Thu Sep 26 09:55:05 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 09:55:05 2019 -0700 - - fix bug in ordering of TigData objects - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ca7294ab3c3b83409d80ecf2e635d2ecb4c053d8 -Author: DJ -AuthorDate: Thu Sep 26 09:24:44 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 09:24:44 2019 -0700 - - qualify some logging - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 3a6418c6efc0f5ba658afc9188864172bcebf55e -Author: DJ -AuthorDate: Thu Sep 26 09:20:36 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 09:20:36 2019 -0700 - - dump old version - -D lib/rust/vdj_asm_tools/src/bin/simclone_old.rs - -commit ec3c9f2edbab699b91d2cb06c5417e2d8068383a -Author: DJ -AuthorDate: Thu Sep 26 09:19:56 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 09:19:56 2019 -0700 - - show correct clonotype size - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit cbe6ad548f75296b34046f88eca674685f249e26 -Author: DJ -AuthorDate: Thu Sep 26 07:31:00 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 07:31:00 2019 -0700 - - workaround for different length problem - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 967aa3249338dcef1e789ac81d94856a10a1ebd0 -Author: DJ -AuthorDate: Thu Sep 26 05:37:17 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 05:37:17 2019 -0700 - - comments - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5de8b3c894e4f531ab4b4220f1d074d127e4433f -Author: DJ -AuthorDate: Thu Sep 26 05:24:37 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 05:24:37 2019 -0700 - - make sure that we catch all errors - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f5b42c1bbd17db5ee7f42f6a1b34f916b931f275 -Author: DJ -AuthorDate: Thu Sep 26 05:01:56 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 05:01:56 2019 -0700 - - fix issue in extracting sample descriptions - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a04ff2247e163049e7101c99de3a13df8c1168b2 -Author: DJ -AuthorDate: Thu Sep 26 04:51:20 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 04:51:20 2019 -0700 - - comment out some logging - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 8c35cf693aee17b901adf3962cfb929379346869 -Author: DJ -AuthorDate: Thu Sep 26 04:46:04 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 04:46:04 2019 -0700 - - make OUTS automatic with PRE - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 0c02892454d3399c56dd5a53045f8ff9ebfde999 -Author: DJ -AuthorDate: Thu Sep 26 04:41:29 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 04:41:29 2019 -0700 - - delete unneeded code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 3ea810bdcf1a02d46f802a2795a2b06f819a80c9 -Author: DJ -AuthorDate: Thu Sep 26 04:39:27 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 04:39:27 2019 -0700 - - avoid crash - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 71c9cb95b5c1384673290554ee6bb041212c5aab -Author: DJ -AuthorDate: Thu Sep 26 04:01:39 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 26 04:01:39 2019 -0700 - - use equiv rel to greatly improve comp performance - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f4221213cccd50555931f83849944092cc875272 -Author: DJ -AuthorDate: Wed Sep 25 15:57:31 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 15:57:31 2019 -0700 - - formatting tweak - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 17f1ca574f787dd206a656845f00c88f09feb932 -Author: DJ -AuthorDate: Wed Sep 25 15:29:40 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 15:29:40 2019 -0700 - - actually show the variant positions - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ee9a643a44c49fc7cd13965e04bc2ce1f7eca4ce -Author: DJ -AuthorDate: Wed Sep 25 14:42:28 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 14:42:28 2019 -0700 - - compute variant positions in orbits - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 00ed134860ec65973f9693e338b27c3832a35a52 -Author: DJ -AuthorDate: Wed Sep 25 11:26:20 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 11:26:20 2019 -0700 - - delete unused structure - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a42734f95e55ff942f345f77c595b2b4c7d187a2 -Author: DJ -AuthorDate: Wed Sep 25 10:27:21 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 10:27:21 2019 -0700 - - delete com'ed out line - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b83d2dac68fd89a08eadb51138443501be6d5e8a -Author: DJ -AuthorDate: Wed Sep 25 10:08:45 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 10:08:45 2019 -0700 - - EXP2 gone (always on) - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ee1d908b3b2f824ab2687494f448c16ea68ff44a -Author: DJ -AuthorDate: Wed Sep 25 10:07:17 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 10:07:17 2019 -0700 - - tidying - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 82327f29be8c34338c3f7e93dd9cd95655638425 -Author: DJ -AuthorDate: Wed Sep 25 10:05:23 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 10:05:23 2019 -0700 - - delete old code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 03b137713dd8341ad1387a92ca99665179256916 -Author: DJ -AuthorDate: Wed Sep 25 10:01:29 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 10:01:29 2019 -0700 - - temp copy of old version - -A lib/rust/vdj_asm_tools/src/bin/simclone_old.rs - -commit ea62f5ff38dbd89fcf0a4cd26f12ba2753a9fbc6 -Author: DJ -AuthorDate: Wed Sep 25 07:52:06 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 07:52:06 2019 -0700 - - add option OUTS - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f4408dbded94249ad28bb1d799563f746a9f3a45 -Author: DJ -AuthorDate: Wed Sep 25 07:44:58 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 07:44:58 2019 -0700 - - delete some old debugging - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 31662137dc6beff435bfcf96bfaf2b53b51318af -Author: DJ -AuthorDate: Wed Sep 25 06:34:33 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 06:34:33 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 710f40de8d1ef9442df8f93a636ca519609c80ad -Author: DJ -AuthorDate: Wed Sep 25 06:21:29 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 06:21:29 2019 -0700 - - remove spurious newline - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 055bf459b70c5e1d6e6153e51405d99112185c6d -Author: DJ -AuthorDate: Wed Sep 25 06:07:57 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 06:07:57 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e45ecfc128eb5797bdd15ebd1c76f3b76b4dc1f3 -Author: DJ -AuthorDate: Wed Sep 25 05:23:55 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 05:23:55 2019 -0700 - - temp WWW lines - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a60d88b57eb2517d7aa01226abc68159ebb3ec6c -Author: DJ -AuthorDate: Wed Sep 25 05:05:59 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 05:05:59 2019 -0700 - - correct display of clonotypes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a9a111e3632b11161325a636cacdf50030ab5bad -Author: DJ -AuthorDate: Wed Sep 25 04:50:42 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 04:50:42 2019 -0700 - - add missing newline - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 826948fea01fab018d9383a537b51e95d9c8abdc -Author: DJ -AuthorDate: Wed Sep 25 04:48:02 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 04:48:02 2019 -0700 - - fix bug in splitting of 3-chain clonotypes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit dd5de87db7878a454ac671890aeb87d925a82978 -Author: DJ -AuthorDate: Wed Sep 25 04:19:19 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 04:19:19 2019 -0700 - - turn on sub-clonotype joining code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 19347fcadc76cdae5701b3be2d648213462bb227 -Author: DJ -AuthorDate: Wed Sep 25 03:53:37 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 03:53:37 2019 -0700 - - fix bug in new orbit printing - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ee837124b0e216f8db9df365f297aac0581afc91 -Author: DJ -AuthorDate: Wed Sep 25 03:49:19 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 03:49:19 2019 -0700 - - port remaining tidbit - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d83a9d500b2b033869a8d444fdd46913e2ada47f -Author: DJ -AuthorDate: Wed Sep 25 03:45:38 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 25 03:45:38 2019 -0700 - - get new-style orbit printing to compile - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 87d81a7dd56b0f25f159622d66ad52753055f125 -Author: DJ -AuthorDate: Tue Sep 24 15:48:36 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 24 15:48:36 2019 -0700 - - ready more code for translation - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4c7e5d41de7c90ae9ebff96984ba1243432b1fff -Author: DJ -AuthorDate: Tue Sep 24 15:25:25 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 24 15:25:25 2019 -0700 - - more translation - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 2a2f8273214bed3bec6d43def09c4ea0325d2ea7 -Author: DJ -AuthorDate: Tue Sep 24 15:13:54 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 24 15:13:54 2019 -0700 - - third block now copied/translated - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 918285168158933f71212f85d82da9e9ecc90b8a -Author: DJ -AuthorDate: Tue Sep 24 14:58:09 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 24 14:58:09 2019 -0700 - - midway on translating giant block - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a9755639ebf1d5b7173551b20a62d6a950810967 -Author: DJ -AuthorDate: Tue Sep 24 14:17:13 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 24 14:17:13 2019 -0700 - - translate alt allele substitution - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 61b5cb6f09caee8ba32bdee48d1ed20898f8418a -Author: DJ -AuthorDate: Tue Sep 24 14:03:38 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 24 14:03:38 2019 -0700 - - translate allele finding to new approach - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f6c8fbc8c43ff7830cf736a1db351a19138e0444 -Author: DJ -AuthorDate: Tue Sep 24 13:48:53 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 24 13:48:53 2019 -0700 - - info2 now consists of two chain subclones - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a01f1a920a40aeca65594886de9f0d47ac28ac25 -Author: DJ -AuthorDate: Tue Sep 24 13:04:42 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 24 13:04:42 2019 -0700 - - delete dead code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d77426216d723ee50c41744519d488b50fd0b5b4 -Author: DJ -AuthorDate: Tue Sep 24 12:56:52 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 24 12:56:52 2019 -0700 - - next step in transition to from scratch - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 25091d3cbac091c35647965bfc9c9cca6d9133ac -Author: DJ -AuthorDate: Tue Sep 24 11:29:42 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 24 11:29:42 2019 -0700 - - deep into WIP on transition to clonotypes from scratch - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 0035fcd182be3f8e67e4f00465fa29a7d9852d44 -Author: DJ -AuthorDate: Tue Sep 24 11:28:04 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 24 11:28:04 2019 -0700 - - remove 48624, which is TCR - -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 25c4ba7f2c9662e6260f518fdb96f146569e40d8 -Author: David Jaffe -AuthorDate: Mon Sep 23 04:25:28 2019 -0700 -Commit: David Jaffe -CommitDate: Mon Sep 23 04:25:28 2019 -0700 - - comment on, set aside UTR consensus computation - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4886bf83930d0c211804f910b65da85620d9fbb7 -Author: David Jaffe -AuthorDate: Sun Sep 22 07:17:09 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Sep 22 07:17:09 2019 -0700 - - filter out some unneeded printing - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 2a60205b0f3fdae7de17934fac2c3a9ce549c722 -Author: David Jaffe -AuthorDate: Sat Sep 21 16:35:06 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Sep 21 16:35:06 2019 -0700 - - tidy code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c8a7b7f05c318a617b0bf23b68bef24a87a6b894 -Author: David Jaffe -AuthorDate: Sat Sep 21 16:27:25 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Sep 21 16:27:25 2019 -0700 - - logging fix - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 52bb5f3e4ac38c34b01ca1ae1a60374cc7a12b30 -Author: David Jaffe -AuthorDate: Sat Sep 21 16:24:29 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Sep 21 16:24:29 2019 -0700 - - show vs for consensus - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 27499f129c40fda59f7e56e1b10eca90c02181b1 -Author: David Jaffe -AuthorDate: Sat Sep 21 07:50:14 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Sep 21 07:50:14 2019 -0700 - - add comments - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 8519f9eb18e04a769e44d163c0cc52497261e96d -Author: David Jaffe -AuthorDate: Sat Sep 21 06:45:01 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Sep 21 06:45:01 2019 -0700 - - show even if only one seq - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a1660f1b57e5692ec2b64e95134db63a02383127 -Author: David Jaffe -AuthorDate: Sat Sep 21 06:40:19 2019 -0700 -Commit: David Jaffe -CommitDate: Sat Sep 21 06:40:19 2019 -0700 - - show utr ids - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 8bd656d89a21b0c0598f5f318d057108ec755354 -Author: David Jaffe -AuthorDate: Fri Sep 20 15:09:03 2019 -0700 -Commit: David Jaffe -CommitDate: Fri Sep 20 15:09:03 2019 -0700 - - start of consensus code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit bc002837197ea05fe534b1d7c080586ccb591ff3 -Author: DJ -AuthorDate: Fri Sep 20 06:15:06 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 20 06:15:06 2019 -0700 - - mess, WIP - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 3473f3abc4a5930fd17d25cf101beb4db15dd694 -Author: DJ -AuthorDate: Thu Sep 19 16:02:29 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 19 16:02:29 2019 -0700 - - messy translation in progress - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit bb78d32dc58b62eee99076ceaa56a46f68eecb34 -Author: DJ -AuthorDate: Thu Sep 19 15:10:19 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 19 15:10:19 2019 -0700 - - track chain type in new approach - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 54537430fa4caffdbb752898048f419cbc312e6a -Author: DJ -AuthorDate: Thu Sep 19 10:47:53 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 19 10:47:53 2019 -0700 - - add new computation of slobber - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit fc9873dd0fdb57b420831540668954b53db03a4a -Author: DJ -AuthorDate: Thu Sep 19 10:16:21 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 19 10:16:21 2019 -0700 - - add new computation of to_bc - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ea5351c1a8eb9632b7812770d2faa8cb460acc26 -Author: DJ -AuthorDate: Thu Sep 19 10:02:27 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 19 10:02:27 2019 -0700 - - comments and delete some duplicated code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 321862067337ae737b5de542a663b3cac145b71b -Author: DJ -AuthorDate: Thu Sep 19 06:06:30 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 19 06:06:30 2019 -0700 - - no longer need to add missing sequence - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 0ec9a70ae47d0d56a8621fe930bf1383c25f64ee -Author: DJ -AuthorDate: Thu Sep 19 05:30:11 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 19 05:30:11 2019 -0700 - - clarify notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 15c50bb0025352d778cab29c2fd6e1a218fd78b6 -Author: DJ -AuthorDate: Thu Sep 19 05:27:45 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 19 05:27:45 2019 -0700 - - update results - -M lib/rust/vdj_asm_tools/src/bin/simclone.out - -commit 264105284b562f1f1b41144f369a927d7ab72f92 -Author: DJ -AuthorDate: Wed Sep 18 15:57:36 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 15:57:36 2019 -0700 - - dump missing sequence 4 - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit aadab55d8082b0681c038da32267b1214abafa5b -Author: DJ -AuthorDate: Wed Sep 18 15:34:51 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 15:34:51 2019 -0700 - - make MIN_ALT accessible from command line - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 99af44712cdb810d954c57e027a0d3a4cb25fdf3 -Author: DJ -AuthorDate: Wed Sep 18 15:18:25 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 15:18:25 2019 -0700 - - dump missing sequence #5 - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 33c56217e0cfd886766a33c02cf163c8e3afc803 -Author: DJ -AuthorDate: Wed Sep 18 15:09:20 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 15:09:20 2019 -0700 - - add options to exclude the other missing seqs - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f32af43343f780956bb4bcb7279d1d899180d821 -Author: DJ -AuthorDate: Wed Sep 18 14:55:44 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 14:55:44 2019 -0700 - - dump missing sequence #2 - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a723e408d2c733755649b3eda5761705a41418a7 -Author: DJ -AuthorDate: Wed Sep 18 14:49:25 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 14:49:25 2019 -0700 - - dump missing sequence 1 - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4b5e01d4ee83e08c6d044820f286d27c8c544659 -Author: DJ -AuthorDate: Wed Sep 18 14:47:28 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 14:47:28 2019 -0700 - - dump wherex - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 14bad1d47870bb2ab0eb7050da9ebe98bbe4fe7a -Author: DJ -AuthorDate: Wed Sep 18 14:45:46 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 14:45:46 2019 -0700 - - for allele-finding, consider first most common base if nonref - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f7000ea119d3d7f7c51f10cb4b146be53277efcf -Author: DJ -AuthorDate: Wed Sep 18 14:02:57 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 14:02:57 2019 -0700 - - add option NMISS1 - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit db4759f30a3832587e1ba1c8f7aa66080bd57ddd -Author: DJ -AuthorDate: Wed Sep 18 13:12:22 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 13:12:22 2019 -0700 - - exit upon illegal argument - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 44066c419b210e5f3476f248613a2cc8a5004452 -Author: DJ -AuthorDate: Wed Sep 18 11:33:17 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 11:33:17 2019 -0700 - - print command-line args - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit dafcecb497946175f42bc473fd7e2606aea930af -Author: DJ -AuthorDate: Wed Sep 18 11:30:01 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 11:30:01 2019 -0700 - - make MIN_MULT accessible from command line - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit afeafead21046b210ef913be838749ba222f8e72 -Author: DJ -AuthorDate: Wed Sep 18 10:52:13 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 10:52:13 2019 -0700 - - add some optional logging - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 42ac055c2dfe3202fa7a2d28e40f5f6eb577248f -Author: DJ -AuthorDate: Wed Sep 18 10:37:34 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 10:37:34 2019 -0700 - - add option NMISS - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 3be234429663d7e7b2c4980dbe80bffa94748706 -Author: DJ -AuthorDate: Wed Sep 18 09:44:23 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 09:44:23 2019 -0700 - - doc and com'ed out logging - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b5050d15eea974a2d150102f7aff0d3c7bb1d5f9 -Author: DJ -AuthorDate: Wed Sep 18 09:25:44 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 09:25:44 2019 -0700 - - document some stuf - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5eba322061099f8a480f5010de4e141c7f8aeba2 -Author: DJ -AuthorDate: Wed Sep 18 04:54:40 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 04:54:40 2019 -0700 - - temp logging com'ed out - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4ccb084f1f31c39bfce25cd999fa400923ecb7fe -Author: DJ -AuthorDate: Wed Sep 18 03:41:34 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 03:41:34 2019 -0700 - - add logging - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f077649224fa6a1f297703d1390add37968c94ea -Author: DJ -AuthorDate: Wed Sep 18 03:33:08 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 03:33:08 2019 -0700 - - add logging - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 8a5205ff4c190a42652a1edd1e2cce2e248ce832 -Author: DJ -AuthorDate: Wed Sep 18 02:36:09 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 18 02:36:09 2019 -0700 - - conditionalize some new code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 86a0ba76c22ae2c0827fcc025138132e58f16d08 -Author: DJ -AuthorDate: Tue Sep 17 15:59:47 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 15:59:47 2019 -0700 - - sort pair exact clonotypes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b6bf86bcd8ac008d33a1291e2963a70f6169f784 -Author: DJ -AuthorDate: Tue Sep 17 15:25:21 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 15:25:21 2019 -0700 - - track V and J reference ids - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 7cf1738b9410cf228a3f1a382c7f25c3959ffc28 -Author: DJ -AuthorDate: Tue Sep 17 15:14:00 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 15:14:00 2019 -0700 - - TigData: track full contig sequences - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit bb3ac805de55155f54af6b1a421fdf107f22a2e3 -Author: DJ -AuthorDate: Tue Sep 17 14:53:18 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 14:53:18 2019 -0700 - - find pair exact clonotypes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5008a13c3568acf7be0b11fb6ab68eccbbf3882c -Author: DJ -AuthorDate: Tue Sep 17 14:36:30 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 14:36:30 2019 -0700 - - track umi count in TigData - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 097ae3483d3355183a9c57c6180d0c884d0cb96b -Author: DJ -AuthorDate: Tue Sep 17 12:54:02 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 12:54:02 2019 -0700 - - add ExactClonotype data structure that reduces duplication - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4174711108803b3ea8a46caa876d4c82357fa9db -Author: DJ -AuthorDate: Tue Sep 17 12:13:16 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 12:13:16 2019 -0700 - - print peak mem - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 87139fbf9636a4446f30fbdb6819d4b3fb82c95e -Author: DJ -AuthorDate: Tue Sep 17 05:41:38 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 05:41:38 2019 -0700 - - print max exact clonotype size - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4183964fb9f0ba4ae516812b9bbdc595aa1b1d38 -Author: DJ -AuthorDate: Tue Sep 17 05:27:04 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 05:27:04 2019 -0700 - - add logging - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 00a550141dd7044ddce252a02ff3afdfb10fd14d -Author: DJ -AuthorDate: Tue Sep 17 05:17:03 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 05:17:03 2019 -0700 - - exact clonotype finding now is across lenas - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ed59484e37141dfbbd3358c593e2228e11e10afe -Author: DJ -AuthorDate: Tue Sep 17 05:02:58 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 05:02:58 2019 -0700 - - track lena index in TigData - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 0d651728a0c437740c873d1f7f678ef0e73932a4 -Author: DJ -AuthorDate: Tue Sep 17 04:54:54 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 04:54:54 2019 -0700 - - add some documentation - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 7dc24670f45b4e23f9bb36b80cb6e585047f0ae8 -Author: DJ -AuthorDate: Tue Sep 17 04:43:04 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 04:43:04 2019 -0700 - - add notes on computational performance - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4ac0d44a0bea8e411aa47ff8aec67caf952ad716 -Author: DJ -AuthorDate: Tue Sep 17 04:39:41 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 04:39:41 2019 -0700 - - dump EXP3 - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f638792d07a065868984f325ca48e5cded2227e2 -Author: DJ -AuthorDate: Tue Sep 17 04:03:00 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 04:03:00 2019 -0700 - - add experimental option EXP3 - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 486f21c7f7307ed9353013ee22524492ef9ac324 -Author: DJ -AuthorDate: Tue Sep 17 03:55:59 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 17 03:55:59 2019 -0700 - - add option EXP2 - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 1c104e31c92190bbd6ca26171bd1fb84972aa34b -Author: DJ -AuthorDate: Sun Sep 15 05:22:24 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 15 05:22:24 2019 -0700 - - fix to last commit - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit da2e5bb07ca2886743ebfbd42cb79a9039148b36 -Author: DJ -AuthorDate: Sun Sep 15 05:19:52 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 15 05:19:52 2019 -0700 - - EXP: exit when done - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 76da04d9e99fb155be0824258a30ad67159b90fe -Author: David Jaffe -AuthorDate: Fri Sep 13 15:30:51 2019 -0700 -Commit: David Jaffe -CommitDate: Fri Sep 13 15:30:51 2019 -0700 - - add option WEAK - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c1e5db117fac91511cf277ea19d13b1afbcf65b7 -Author: David Jaffe -AuthorDate: Fri Sep 13 14:43:11 2019 -0700 -Commit: David Jaffe -CommitDate: Fri Sep 13 14:43:11 2019 -0700 - - EXP: tweak printing heuristic - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 40daab8045871ba15f92219a0bc7efd05474ea0d -Author: David Jaffe -AuthorDate: Fri Sep 13 13:33:18 2019 -0700 -Commit: David Jaffe -CommitDate: Fri Sep 13 13:33:18 2019 -0700 - - EXP: revise printing criteria - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 633bf0c24cdefe9fa733e5843d42663021ecff06 -Author: DJ -AuthorDate: Wed Sep 11 03:25:24 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 11 03:25:24 2019 -0700 - - EXP: consecutively number cases - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5a8e3f4188e78f1623f59259b76d8a3aeb0fa4dd -Author: DJ -AuthorDate: Tue Sep 10 15:34:40 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 10 15:34:40 2019 -0700 - - declare more cases clear cut - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 8afc73ffb854451eb535fed68f96468b1fe60a3e -Author: DJ -AuthorDate: Tue Sep 10 15:17:15 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 10 15:17:15 2019 -0700 - - EXP: resolve some cases - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b86830326301093f526c7de128b9b20be328c61e -Author: DJ -AuthorDate: Tue Sep 10 04:49:18 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 10 04:49:18 2019 -0700 - - comments - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 2981f594de2ba60c79a98ee7de0619784140c9d7 -Author: DJ -AuthorDate: Tue Sep 10 04:24:39 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 10 04:24:39 2019 -0700 - - ugly fix to sort order for TigData - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a1901290de2dc1ea1653623250eccc794e94d995 -Author: DJ -AuthorDate: Tue Sep 10 03:38:18 2019 -0700 -Commit: DJ -CommitDate: Tue Sep 10 03:38:18 2019 -0700 - - add a little printing - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 905f120f348bdd79645e6951b156410f7caee973 -Author: DJ -AuthorDate: Sun Sep 8 07:03:04 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 8 07:03:04 2019 -0700 - - EXP: for now switch to more generous printing - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 9bcafc89848149ed445754adcabe261facb99e4b -Author: DJ -AuthorDate: Sun Sep 8 06:25:07 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 8 06:25:07 2019 -0700 - - EXP_BC = true always and gone as option - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b89cd9b4ee1e422192f7fb6ea2844dfb0035e5b8 -Author: DJ -AuthorDate: Sun Sep 8 06:11:06 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 8 06:11:06 2019 -0700 - - EXP: show cdr3s, fix bug in cdr3 usage - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ee9c754bb9051105e37f10c39b5707223444f147 -Author: DJ -AuthorDate: Sun Sep 8 05:51:34 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 8 05:51:34 2019 -0700 - - redo EXP_BC - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e88ec0087dac331799d1681165e089bdacdc87ef -Author: DJ -AuthorDate: Sun Sep 8 05:37:10 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 8 05:37:10 2019 -0700 - - tweak to EXP_BC - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 0f27c60ef253ff3e8560e58744daad7799b86e83 -Author: DJ -AuthorDate: Sun Sep 8 05:11:12 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 8 05:11:12 2019 -0700 - - for EXP, add option to print actual barcode - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d6d2252bf71cc37ffca0f8c03ab57628e441ef00 -Author: DJ -AuthorDate: Sun Sep 8 05:01:10 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 8 05:01:10 2019 -0700 - - add printing - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 408a04b85ffbc6c58f855f9d113d5e1370edd8b8 -Author: DJ -AuthorDate: Sun Sep 8 04:30:26 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 8 04:30:26 2019 -0700 - - EXP: add a first Q60 filter - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f5f6182866a56ff148a41932ad6e219eafdbf988 -Author: DJ -AuthorDate: Sat Sep 7 07:03:03 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 7 07:03:03 2019 -0700 - - number cases - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5d98d1f0a859dad599742380aa43aa108ec6d970 -Author: DJ -AuthorDate: Sat Sep 7 06:27:06 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 7 06:27:06 2019 -0700 - - EXP: only print in imperfect case - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a32b45b63714b2228e16c43b48f00bf00c1b9a74 -Author: DJ -AuthorDate: Sat Sep 7 06:18:55 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 7 06:18:55 2019 -0700 - - EXP: show qual and barcode in singleton case - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4912eb4a44e576c3a55b3dab3ab90b64f6d1e4fb -Author: DJ -AuthorDate: Sat Sep 7 05:51:05 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 7 05:51:05 2019 -0700 - - more on EXP - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5c03fde08780c1e1ebec0cbb6538de0b960cd8fe -Author: DJ -AuthorDate: Sat Sep 7 05:46:02 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 7 05:46:02 2019 -0700 - - EXP: require both chain types - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 7bb47c31d32bbb01628296af824a63bfe188e43f -Author: DJ -AuthorDate: Sat Sep 7 05:30:42 2019 -0700 -Commit: DJ -CommitDate: Sat Sep 7 05:30:42 2019 -0700 - - start exploratory code for exact clonotyping - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 81abc5360cb5e973553f3be868446c991f81da39 -Author: DJ -AuthorDate: Fri Sep 6 16:04:23 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 16:04:23 2019 -0700 - - correct sort order - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c411cff4c0ecd4e52f83344319215eaeb4ad7d26 -Author: DJ -AuthorDate: Fri Sep 6 16:01:29 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 16:01:29 2019 -0700 - - subtract offset from qual scores - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 27b29657e15611902ee761edb2001e9f565b10e7 -Author: DJ -AuthorDate: Fri Sep 6 15:53:57 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 15:53:57 2019 -0700 - - in TigData, seq is now Vec - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 3889a47bed045a192f05dff1b4d66dfc69d6f3ed -Author: DJ -AuthorDate: Fri Sep 6 15:41:46 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 15:41:46 2019 -0700 - - remove some debugging lines - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 9ecc62a4bf66d9b5cccbfa629ab14432c9b67f8a -Author: DJ -AuthorDate: Fri Sep 6 15:39:58 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 15:39:58 2019 -0700 - - starting to build exact clonotypes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 7141c273f38d2169bde2744138e909367b34bb8e -Author: DJ -AuthorDate: Fri Sep 6 06:32:04 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 06:32:04 2019 -0700 - - allow args - -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 48005b39607171b9f03f7a88885b1b0715284040 -Author: DJ -AuthorDate: Fri Sep 6 06:11:12 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 06:11:12 2019 -0700 - - add info on variable length - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 92faab8181c03ed2070d5bacd4b40d6e785ce835 -Author: DJ -AuthorDate: Fri Sep 6 05:56:05 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 05:56:05 2019 -0700 - - check for repeated variable length - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 135ab4bd443f7cb03252073f70e741577d21e010 -Author: DJ -AuthorDate: Fri Sep 6 05:45:47 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 05:45:47 2019 -0700 - - for BL, test for variable length - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d6254ef75de994964ceaa9ee090ffd52d8156b03 -Author: DJ -AuthorDate: Fri Sep 6 05:33:00 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 05:33:00 2019 -0700 - - exclude two lenas, not run through marsoc with 3.1 - -M lib/rust/vdj_asm_tools/src/bin/simclone.out -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 5c36dadf26947e8c943eb601274aff39fa2bc61b -Author: DJ -AuthorDate: Fri Sep 6 05:31:36 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 05:31:36 2019 -0700 - - add comment - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5f261a687e58d027727192a555fada7d1cfa0bc4 -Author: DJ -AuthorDate: Fri Sep 6 05:23:10 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 05:23:10 2019 -0700 - - add comment - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 6870049b893caf7559ad130e7d941a78a7b29597 -Author: DJ -AuthorDate: Fri Sep 6 04:25:16 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 04:25:16 2019 -0700 - - fix typo - -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit d6d8377eb53595e3b9dfa152d28759ed1e30f9bb -Author: DJ -AuthorDate: Fri Sep 6 04:22:48 2019 -0700 -Commit: DJ -CommitDate: Fri Sep 6 04:22:48 2019 -0700 - - add option BL - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d8922d3e1ababad655b44c7c7db894d9267ee776 -Author: DJ -AuthorDate: Thu Sep 5 05:32:16 2019 -0700 -Commit: DJ -CommitDate: Thu Sep 5 05:32:16 2019 -0700 - - add arg PRE; doc tweaks - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit daf3355cc0160313c53047761782b8bbc972438c -Author: DJ -AuthorDate: Wed Sep 4 18:45:02 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 4 18:45:02 2019 -0700 - - comments - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 15f695105cf77b7683e3aafc1436fb517fcfc23a -Author: DJ -AuthorDate: Wed Sep 4 17:00:31 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 4 17:00:31 2019 -0700 - - add option to show barcodes and umi counts for clones - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 1b930dd5d3a6860ff04ab5970d922db59ff8b736 -Author: DJ -AuthorDate: Wed Sep 4 14:07:51 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 4 14:07:51 2019 -0700 - - set up map to track umi counts - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 92342125869cb153e1db3371b6f2f3ecfbee13f5 -Author: DJ -AuthorDate: Wed Sep 4 13:59:18 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 4 13:59:18 2019 -0700 - - add comments - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 90f5fc7d86537e42557d3882143f70810b789cc5 -Author: DJ -AuthorDate: Wed Sep 4 13:40:36 2019 -0700 -Commit: DJ -CommitDate: Wed Sep 4 13:40:36 2019 -0700 - - capture number of umis supporting each contig - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c40325f06ff414bae3e53af4bc497a2322a990fb -Author: David Jaffe -AuthorDate: Mon Sep 2 11:09:44 2019 -0700 -Commit: David Jaffe -CommitDate: Mon Sep 2 11:09:44 2019 -0700 - - fix bug in handling of paths for lenas - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 64c55c14de2e6b348014fd7a3a58128381a80f73 -Author: David Jaffe -AuthorDate: Mon Sep 2 09:43:06 2019 -0700 -Commit: David Jaffe -CommitDate: Mon Sep 2 09:43:06 2019 -0700 - - make it work with paths instead of lenas - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ece2f643b99d271141db1d975b36f55bb59bceea -Author: DJ -AuthorDate: Sun Sep 1 08:30:05 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 1 08:30:05 2019 -0700 - - doc tweak - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 7daf2ff074648c1504638caa71fbeaa8a3909759 -Author: DJ -AuthorDate: Sun Sep 1 07:14:04 2019 -0700 -Commit: DJ -CommitDate: Sun Sep 1 07:14:04 2019 -0700 - - print total cells - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit df3fd77cd6826539d34d8f71b7f818dfae8cbd23 -Author: DJ -AuthorDate: Sat Aug 31 04:19:40 2019 -0700 -Commit: DJ -CommitDate: Sat Aug 31 04:19:40 2019 -0700 - - complex mess to use new debruijn ndiffs function - -M lib/rust/Cargo.lock -M lib/rust/rust-utils-10x -M lib/rust/vdj_asm_tools/src/bin/simclone.out -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b6af1b55ef723f2ab53f24fc04099797a97f06d5 -Author: DJ -AuthorDate: Fri Aug 30 06:11:31 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 30 06:11:31 2019 -0700 - - tidy file - -M lib/rust/vdj_asm_tools/src/bin/simclone.out - -commit ddb473cbb53fa9dc1638e6aae1e79b1cd543fd33 -Author: DJ -AuthorDate: Fri Aug 30 06:06:43 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 30 06:06:43 2019 -0700 - - lower bar for allele detection - -M lib/rust/vdj_asm_tools/src/bin/simclone.out -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b2b9bbdafb84a9995d01443a61aa76ef15953e89 -Author: DJ -AuthorDate: Fri Aug 30 05:35:22 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 30 05:35:22 2019 -0700 - - now there is only one heuristic - -D lib/rust/vdj_asm_tools/src/bin/simclone.fails -R099 lib/rust/vdj_asm_tools/src/bin/simclone.out2 lib/rust/vdj_asm_tools/src/bin/simclone.out -D lib/rust/vdj_asm_tools/src/bin/simclone.out1 -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test -D lib/rust/vdj_asm_tools/src/bin/simclone.test1 -D lib/rust/vdj_asm_tools/src/bin/simclone.test2 - -commit 856a129eb8051be0a5513bc0fe25fb8feba6ac85 -Author: DJ -AuthorDate: Thu Aug 29 16:04:21 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 29 16:04:21 2019 -0700 - - tidy - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 1a80fdcad6bc946704921924179520edff815e8e -Author: DJ -AuthorDate: Thu Aug 29 15:21:10 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 29 15:21:10 2019 -0700 - - add BC to tests - -M lib/rust/vdj_asm_tools/src/bin/simclone.out2 -M lib/rust/vdj_asm_tools/src/bin/simclone.test1 -M lib/rust/vdj_asm_tools/src/bin/simclone.test2 - -commit b97377da22c551ae8b00cadd9f9b2a6a1b84e651 -Author: DJ -AuthorDate: Thu Aug 29 14:53:29 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 29 14:53:29 2019 -0700 - - yet more tidying - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c3d2400947c0c57778fabb4f36d673da4e82ae24 -Author: DJ -AuthorDate: Thu Aug 29 14:25:12 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 29 14:25:12 2019 -0700 - - more tidying - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 701560c02895c42d71e21c2e103e09bdc4db15bf -Author: DJ -AuthorDate: Thu Aug 29 14:23:37 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 29 14:23:37 2019 -0700 - - more tidying - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4192169f517d56d76cfe01e1f83bb7ae1ef212e4 -Author: DJ -AuthorDate: Thu Aug 29 14:14:12 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 29 14:14:12 2019 -0700 - - some tidying - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 52d1a2208e813859efaba13ddb92394c703dd024 -Author: DJ -AuthorDate: Thu Aug 29 13:55:41 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 29 13:55:41 2019 -0700 - - parallelize finding of barcodes - -M lib/rust/vdj_asm_tools/src/bin/simclone.out2 -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b58693f8cb64b3a5986f56abe7b7e2a62136e385 -Author: DJ -AuthorDate: Thu Aug 29 13:30:11 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 29 13:30:11 2019 -0700 - - update results table - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 07447c412c7b1d06c05b76d8ba2ab538b89a492c -Author: DJ -AuthorDate: Thu Aug 29 13:28:57 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 29 13:28:57 2019 -0700 - - favor larger joins - -M lib/rust/vdj_asm_tools/src/bin/simclone.out2 -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 1c3618eee1fab621c8b561025a4ea5ca9f5ce2b7 -Author: DJ -AuthorDate: Thu Aug 29 06:00:14 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 29 06:00:14 2019 -0700 - - slight reorg that should not change results - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 2f177f13c449bbbeb5596073a0a8bc2bfc2783b5 -Author: DJ -AuthorDate: Thu Aug 29 04:58:14 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 29 04:58:14 2019 -0700 - - add arg MAX_SCORE - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit eb41c820f4e41cba011972c94b23c6505a627814 -Author: DJ -AuthorDate: Wed Aug 28 15:51:23 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 28 15:51:23 2019 -0700 - - add note - -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 345da9f37eaa65c8350e63b2960be730e783fdb2 -Author: DJ -AuthorDate: Wed Aug 28 15:43:03 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 28 15:43:03 2019 -0700 - - show share positions - -M lib/rust/vdj_asm_tools/src/bin/simclone.out1 -M lib/rust/vdj_asm_tools/src/bin/simclone.out2 - -commit 4378a9a5869fe0180d02df838c98e69d19f5dd59 -Author: DJ -AuthorDate: Wed Aug 28 15:32:02 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 28 15:32:02 2019 -0700 - - print share pos - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 0128f091589e0cf40527f897732368307ac5addc -Author: DJ -AuthorDate: Wed Aug 28 15:04:22 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 28 15:04:22 2019 -0700 - - for now, turn off matrix printing - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 60d068becc71b0fecd51e1a82949c90407ffc1a0 -Author: DJ -AuthorDate: Wed Aug 28 15:03:18 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 28 15:03:18 2019 -0700 - - add notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 16f002ae46beb2d88582363f848570e40153acd8 -Author: DJ -AuthorDate: Wed Aug 28 14:45:13 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 28 14:45:13 2019 -0700 - - add new heuristic and record results - -A lib/rust/vdj_asm_tools/src/bin/simclone.out1 -A lib/rust/vdj_asm_tools/src/bin/simclone.out2 -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -A lib/rust/vdj_asm_tools/src/bin/simclone.test1 -A lib/rust/vdj_asm_tools/src/bin/simclone.test2 - -commit efc9724a46baaac1df7d34a57c689526c36eae01 -Author: DJ -AuthorDate: Fri Aug 23 15:19:44 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 23 15:19:44 2019 -0700 - - improve arg processing - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4653a173a801b79036ccfc7f15650e62a80d8537 -Author: DJ -AuthorDate: Fri Aug 23 15:15:23 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 23 15:15:23 2019 -0700 - - fix bug in ADD - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f5a8f561eb49045292830146d5077cf9d039784c -Author: DJ -AuthorDate: Fri Aug 23 15:09:40 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 23 15:09:40 2019 -0700 - - new option ADD - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b726e31bf702ea0dedb2fec6e7f1f25cbab9ec55 -Author: DJ -AuthorDate: Fri Aug 23 15:05:06 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 23 15:05:06 2019 -0700 - - add experimental options P1P2 - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit cd3d40d626ef0cb8e002c020527c3eef2940390d -Author: DJ -AuthorDate: Fri Aug 23 06:58:30 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 23 06:58:30 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 8edd13583c53788506b398ff3758895417470281 -Author: DJ -AuthorDate: Fri Aug 23 05:59:04 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 23 05:59:04 2019 -0700 - - add control over sizes of orbits to print - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d5516e5b89a0e3f1c039d809402900231f448a11 -Author: DJ -AuthorDate: Fri Aug 23 05:43:50 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 23 05:43:50 2019 -0700 - - update results - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails - -commit 94ac02cc44b9c6d772e2bb96ab5af6aff6af6f7a -Author: DJ -AuthorDate: Fri Aug 23 05:43:27 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 23 05:43:27 2019 -0700 - - relax join threshold - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 1bfd07fec69d0d93d9bbf70d5545a6dfd90cc988 -Author: DJ -AuthorDate: Fri Aug 23 05:39:46 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 23 05:39:46 2019 -0700 - - impose higher bar on joins involving only two cells - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4a85561ab7df348724cf8679b86f6a7aa2117165 -Author: DJ -AuthorDate: Fri Aug 23 05:01:18 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 23 05:01:18 2019 -0700 - - update results - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails - -commit 678d7d824dfc240cfb4d2080fc9ee76a880167ab -Author: DJ -AuthorDate: Fri Aug 23 05:00:58 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 23 05:00:58 2019 -0700 - - reduce memory usage - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e0a5bd5c97e764c565ef69b87999dbcc3fc18db7 -Author: DJ -AuthorDate: Fri Aug 23 04:51:52 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 23 04:51:52 2019 -0700 - - refactor to first find potential joins - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit bfa80d813aedeeff674df6babb6495f465e86b2d -Author: DJ -AuthorDate: Fri Aug 23 04:13:54 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 23 04:13:54 2019 -0700 - - add option EASY - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 3859d4da2556279ff0dcbdd9a1d80c37e5106cc8 -Author: DJ -AuthorDate: Thu Aug 22 11:09:34 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 22 11:09:34 2019 -0700 - - update results - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails - -commit f1c8a902cff8f1830907c65818a11145e3a81f60 -Author: DJ -AuthorDate: Thu Aug 22 11:08:29 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 22 11:08:29 2019 -0700 - - printing of clones now optional - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f80cdb1c10d99f8ed7b3adad01a00d6138021d92 -Author: DJ -AuthorDate: Thu Aug 22 11:06:25 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 22 11:06:25 2019 -0700 - - relax test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e7e4295b9e9a75e59d1eb7fbfacecd02afaf1700 -Author: DJ -AuthorDate: Thu Aug 22 06:56:53 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 22 06:56:53 2019 -0700 - - add option NOPAR - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 9d25da801db3517a95ae46ee851f12ff5d6e606e -Author: DJ -AuthorDate: Thu Aug 22 06:43:40 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 22 06:43:40 2019 -0700 - - fix superclone size - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e032af5cf602abe4892c450518c36c1b3dcb2a1b -Author: DJ -AuthorDate: Thu Aug 22 06:39:40 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 22 06:39:40 2019 -0700 - - add diff dots line - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 25486de3f2eb481a2e06475c1c24dc238ffabdad -Author: DJ -AuthorDate: Thu Aug 22 05:37:22 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 22 05:37:22 2019 -0700 - - order rows in orbit printing - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ba6afeeb32b0feb886262abb3ec13ef917a0feec -Author: DJ -AuthorDate: Thu Aug 22 05:26:32 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 22 05:26:32 2019 -0700 - - print orbits with columns broken out - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 70fc8093700b00fbafdf93713f12ae87a09b7261 -Author: DJ -AuthorDate: Wed Aug 21 11:34:48 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 21 11:34:48 2019 -0700 - - combine twos - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 373477f641baa0674efb2903309323d8a7f6a29a -Author: DJ -AuthorDate: Wed Aug 21 11:19:07 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 21 11:19:07 2019 -0700 - - join orbits that cross within a clone - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 516b8c28a1105ae11f5c3666cfd699c6a9b82bb7 -Author: DJ -AuthorDate: Wed Aug 21 11:06:27 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 21 11:06:27 2019 -0700 - - bugfix to orbit printing - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ace631896b02916ec6ceebddc264451e70e3b207 -Author: DJ -AuthorDate: Wed Aug 21 10:19:14 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 21 10:19:14 2019 -0700 - - first stab at seeing orbits - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a7bdefde5a6de10d3a429976d9639cbd4214c734 -Author: DJ -AuthorDate: Wed Aug 21 09:47:16 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 21 09:47:16 2019 -0700 - - print orbit sizes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit cbe46cdac274b366d7a817ca0aababf946f3204a -Author: DJ -AuthorDate: Wed Aug 21 06:17:05 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 21 06:17:05 2019 -0700 - - small tweaks - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 84315e531285d44235bfc98a57b679f1bda03b32 -Author: DJ -AuthorDate: Wed Aug 21 06:06:25 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 21 06:06:25 2019 -0700 - - exclude some T cell runs - -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit fa3c69c5522b78ff88ea0e045f6d854203686009 -Author: DJ -AuthorDate: Wed Aug 21 06:03:46 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 21 06:03:46 2019 -0700 - - print descrips - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit bfe416d01e9c702262547485264b5afa2d74d6f1 -Author: DJ -AuthorDate: Wed Aug 21 05:50:09 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 21 05:50:09 2019 -0700 - - exclude 140365 as contaminated - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 16bb486bf25ccd4ddb92650b8b9b276d45698c70 -Author: DJ -AuthorDate: Wed Aug 21 05:40:05 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 21 05:40:05 2019 -0700 - - exclude 118193 as contaminated - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit e35e59655716eed70b5a3832ac6d1d0e85edd98b -Author: DJ -AuthorDate: Wed Aug 21 05:22:33 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 21 05:22:33 2019 -0700 - - remove redundant lena - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 0921b361b726fd1b9146feed935d60c5656db76b -Author: DJ -AuthorDate: Wed Aug 21 05:18:15 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 21 05:18:15 2019 -0700 - - combine test groups - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit af3716da877be15afae65527dcd786de25b2e191 -Author: DJ -AuthorDate: Wed Aug 21 04:56:24 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 21 04:56:24 2019 -0700 - - update - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails - -commit bb56f747f3a02134cdb69f525405ef87d2620ee5 -Author: DJ -AuthorDate: Tue Aug 20 16:49:07 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 20 16:49:07 2019 -0700 - - update - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails - -commit cc2296b9996f08989dd072038dfaa299b24732ff -Author: DJ -AuthorDate: Tue Aug 20 16:44:51 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 20 16:44:51 2019 -0700 - - delete 47186 (cell line) - -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 6312c19b756d7169897c7b29c51da3f47fa9bb05 -Author: DJ -AuthorDate: Tue Aug 20 16:40:01 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 20 16:40:01 2019 -0700 - - reject if barcode overlap - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 3f6f9b96038b0c6a17f4a39e3b66ed80df3eb835 -Author: DJ -AuthorDate: Tue Aug 20 16:25:52 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 20 16:25:52 2019 -0700 - - exclude two lenas as contaminated - -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 8bb5e9dfd89c6d38ca139169af70bce53e0f4010 -Author: DJ -AuthorDate: Tue Aug 20 16:14:13 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 20 16:14:13 2019 -0700 - - don't join along same barcode - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit dae82fb2553246cd9d95bf59a4506de74dc617af -Author: DJ -AuthorDate: Tue Aug 20 16:13:05 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 20 16:13:05 2019 -0700 - - add to list - -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 97ff7071fb8eed52f865c21b9ce02f01244fecb2 -Author: DJ -AuthorDate: Tue Aug 20 15:55:00 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 20 15:55:00 2019 -0700 - - upgrade after switch to 3.1 - -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 1b71e590a508c20de6043d466a56cacd270448f6 -Author: DJ -AuthorDate: Tue Aug 20 04:51:38 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 20 04:51:38 2019 -0700 - - improve behavior on error - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c6a0cf2c8429c1b31e1918e2e42af3375b687ff9 -Author: DJ -AuthorDate: Mon Aug 19 15:07:58 2019 -0700 -Commit: DJ -CommitDate: Mon Aug 19 15:07:58 2019 -0700 - - add bunch of notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 4f56a8b44d9e44060d22c628e90931eceaf1deb3 -Author: DJ -AuthorDate: Mon Aug 19 14:37:40 2019 -0700 -Commit: DJ -CommitDate: Mon Aug 19 14:37:40 2019 -0700 - - split lena group - -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 15b819985ad2ed1025949f63aca2b32295d54760 -Author: DJ -AuthorDate: Mon Aug 19 14:17:42 2019 -0700 -Commit: DJ -CommitDate: Mon Aug 19 14:17:42 2019 -0700 - - exclude two lenas showing evidence of contamination - -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 3f02d75fb72ee7e919bf6765d267670948cc2fe8 -Author: DJ -AuthorDate: Mon Aug 19 14:05:31 2019 -0700 -Commit: DJ -CommitDate: Mon Aug 19 14:05:31 2019 -0700 - - split and document set - -M lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit 803cda34d522c78e8e8c5c1ade0b096b0444cb27 -Author: DJ -AuthorDate: Mon Aug 19 14:02:59 2019 -0700 -Commit: DJ -CommitDate: Mon Aug 19 14:02:59 2019 -0700 - - factor out simclone.testdata - -M lib/rust/vdj_asm_tools/src/bin/simclone.test -A lib/rust/vdj_asm_tools/src/bin/simclone.testdata - -commit e2fe4f7de1285c9f870989747e6553b5f1bf7947 -Author: DJ -AuthorDate: Mon Aug 19 13:03:40 2019 -0700 -Commit: DJ -CommitDate: Mon Aug 19 13:03:40 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit bb5e46fbb46c30d3d8edae55b02669fbbeeea033 -Author: DJ -AuthorDate: Mon Aug 19 10:57:34 2019 -0700 -Commit: DJ -CommitDate: Mon Aug 19 10:57:34 2019 -0700 - - print warning if shared barcodes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f81a523c1081cb0291b0fe6b33db50a6f8711131 -Author: DJ -AuthorDate: Mon Aug 19 10:43:41 2019 -0700 -Commit: DJ -CommitDate: Mon Aug 19 10:43:41 2019 -0700 - - add FORCE option - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 91aeaaf3ef789f8fc6de54f1d066e5d2d43a85d8 -Author: DJ -AuthorDate: Mon Aug 19 06:10:07 2019 -0700 -Commit: DJ -CommitDate: Mon Aug 19 06:10:07 2019 -0700 - - bug fix = sort keep - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 7fb0e80df72eed5aa20ec3b64e6b8db6fdd514fd -Author: DJ -AuthorDate: Mon Aug 19 05:34:23 2019 -0700 -Commit: DJ -CommitDate: Mon Aug 19 05:34:23 2019 -0700 - - fix bug in def of keep0 - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 6eca0db221bd8aa3253b5d8f538e5698b7ef1225 -Author: DJ -AuthorDate: Mon Aug 19 05:21:47 2019 -0700 -Commit: DJ -CommitDate: Mon Aug 19 05:21:47 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 75d50a26f135e59ca593b4131f367e1b5e02d31a -Author: DJ -AuthorDate: Mon Aug 19 04:48:28 2019 -0700 -Commit: DJ -CommitDate: Mon Aug 19 04:48:28 2019 -0700 - - print allele x lena matrix - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 9f6027ee96fb761fc2bc96cd0fc772fbe4febb9c -Author: DJ -AuthorDate: Mon Aug 19 03:57:20 2019 -0700 -Commit: DJ -CommitDate: Mon Aug 19 03:57:20 2019 -0700 - - add doc and clocks - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 9286d66f1b06c6fa231afe8e6c8d633c78f26d52 -Author: DJ -AuthorDate: Sun Aug 18 06:33:16 2019 -0700 -Commit: DJ -CommitDate: Sun Aug 18 06:33:16 2019 -0700 - - update notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit abbfc77586ce614ab4248fc63929f33098f5b146 -Author: DJ -AuthorDate: Sat Aug 17 08:54:58 2019 -0700 -Commit: DJ -CommitDate: Sat Aug 17 08:54:58 2019 -0700 - - remove duplicates from test - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit c414b88b4d01d1ab78313b6d3cc810fb9459d9fb -Author: DJ -AuthorDate: Sat Aug 17 08:24:16 2019 -0700 -Commit: DJ -CommitDate: Sat Aug 17 08:24:16 2019 -0700 - - add note - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c5028eda52d8c7b6e0eadb0db02c7d2211c04c4c -Author: DJ -AuthorDate: Sat Aug 17 05:08:02 2019 -0700 -Commit: DJ -CommitDate: Sat Aug 17 05:08:02 2019 -0700 - - add option to print sample descriptions - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit fc6c8cac04f09e816a547f50565b226aa80d9fcf -Author: DJ -AuthorDate: Fri Aug 16 19:06:19 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 19:06:19 2019 -0700 - - turn off SEQ - -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 5fcd4094c6002bc5802548cd388edb64266abbb2 -Author: DJ -AuthorDate: Fri Aug 16 16:15:13 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 16:15:13 2019 -0700 - - add to test - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 25e3a7fcfa6bd86da66dc942301a3746b3b47cff -Author: DJ -AuthorDate: Fri Aug 16 15:43:35 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 15:43:35 2019 -0700 - - update results - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails - -commit 48acbd74c189ad8d53be54780065a53aec7e6d7c -Author: DJ -AuthorDate: Fri Aug 16 15:41:59 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 15:41:59 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 1f904008b4ad24c6be2c848fc14219a2389675a4 -Author: DJ -AuthorDate: Fri Aug 16 15:31:03 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 15:31:03 2019 -0700 - - always compute alternate alleles - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 1659cf5e41aa71670aaf8173f4dccf18b35865ab -Author: DJ -AuthorDate: Fri Aug 16 15:27:41 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 15:27:41 2019 -0700 - - use alt_refs - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a8ccc54495f22299af65a171289e4d9a6da11f92 -Author: DJ -AuthorDate: Fri Aug 16 15:10:35 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 15:10:35 2019 -0700 - - create alt_refs - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 87517a99f10a9cdc9dc3c0114c2bee29ffe33c2d -Author: DJ -AuthorDate: Fri Aug 16 14:54:22 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 14:54:22 2019 -0700 - - use donor_id in place of n1class - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d79d8418133007a49d84529986f262ec4adb417d -Author: DJ -AuthorDate: Fri Aug 16 14:26:34 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 14:26:34 2019 -0700 - - dequadratify some code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 0ebb56256c11de8ec8d93177393efbeec2d834cd -Author: DJ -AuthorDate: Fri Aug 16 13:59:35 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 13:59:35 2019 -0700 - - fasterify by dedumbing loop structure - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c3262f6d28f9d074c02153b3404f20e945187667 -Author: DJ -AuthorDate: Fri Aug 16 13:44:17 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 13:44:17 2019 -0700 - - add to happening whitelist - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ded31749ea3cfe12dcb316a6832bbb642159bf6e -Author: DJ -AuthorDate: Fri Aug 16 13:22:51 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 13:22:51 2019 -0700 - - track allelotypes donor by donor - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ac7ac81db6db46042a3ddca7328781124ef7fcab -Author: DJ -AuthorDate: Fri Aug 16 13:06:16 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 13:06:16 2019 -0700 - - for CON, track donor in all - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 495e2ea35e752fb9a9c6be00a5961307b26ff08e -Author: DJ -AuthorDate: Fri Aug 16 12:56:54 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 12:56:54 2019 -0700 - - update to do - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d63ae808b5b12cb87d15edfde51ffedfd104eae3 -Author: DJ -AuthorDate: Fri Aug 16 12:56:08 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 12:56:08 2019 -0700 - - for CON, prevent CDR3 dup across lenas from one donor - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b1a2b4dd83bc6ce9979da1b044c9eff368e5e361 -Author: DJ -AuthorDate: Fri Aug 16 11:54:28 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 11:54:28 2019 -0700 - - add to test - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 609a371bc3c3ce7df054c5c7cabba917071375d0 -Author: DJ -AuthorDate: Fri Aug 16 11:18:28 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 11:18:28 2019 -0700 - - del comments - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 7aa4f09f87475021aef98105f273b6d2bc072de6 -Author: DJ -AuthorDate: Fri Aug 16 11:05:46 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 11:05:46 2019 -0700 - - add comment - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5e3361c83d11fe7949669017eadce9c068d290b1 -Author: DJ -AuthorDate: Fri Aug 16 10:49:42 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 10:49:42 2019 -0700 - - add to test set - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit e49e61d6835637d1bd8c6ebe3f87be5aaef3d20f -Author: DJ -AuthorDate: Fri Aug 16 07:22:39 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 07:22:39 2019 -0700 - - fix error in last commit - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 788183eb7ddfae9e5eb0a109dfa2009b2adcd1c5 -Author: DJ -AuthorDate: Fri Aug 16 07:13:05 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 07:13:05 2019 -0700 - - number fails - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 4afe39846da3cd9c85438b93bf1f488ebe11482a -Author: DJ -AuthorDate: Fri Aug 16 06:56:07 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 06:56:07 2019 -0700 - - no significant change, but update anyway - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails - -commit bac3e4b906de61e8c750a282c6785de58f5f8ae5 -Author: DJ -AuthorDate: Fri Aug 16 06:23:25 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 06:23:25 2019 -0700 - - print two versions of id - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 89f24be647b2c9e548dd382656fab64b8546f011 -Author: DJ -AuthorDate: Fri Aug 16 06:13:28 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 06:13:28 2019 -0700 - - update to do list - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 75e280b636e7bf71c7b54107e0eb986a5d03bc2e -Author: DJ -AuthorDate: Fri Aug 16 05:44:16 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 05:44:16 2019 -0700 - - print ps - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit fa7bb131b36e20c9a888088234b64156fa13e0ce -Author: DJ -AuthorDate: Fri Aug 16 05:42:07 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 05:42:07 2019 -0700 - - tidying and comments - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 11fa1095ef81c2121ba9a7912fe8c6f63aea534d -Author: DJ -AuthorDate: Fri Aug 16 05:34:38 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 05:34:38 2019 -0700 - - remove pure reference columms - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 920b8f3cd7c451eb3c447cfe78044484704c3ba9 -Author: DJ -AuthorDate: Fri Aug 16 05:22:47 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 05:22:47 2019 -0700 - - refactor - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 40d97caa262165ef932b82b5bfbd4c96f7ecc1de -Author: DJ -AuthorDate: Fri Aug 16 05:11:45 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 05:11:45 2019 -0700 - - avoid discarding ref - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ba0290cf991b3f9ba6c1b14ce6047c053910e085 -Author: DJ -AuthorDate: Fri Aug 16 05:05:40 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 05:05:40 2019 -0700 - - remove empties - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 8de0af61f85261315c47167b556b846081d82c96 -Author: DJ -AuthorDate: Fri Aug 16 05:00:13 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 05:00:13 2019 -0700 - - simplify - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit fdfafa2699801edc11714a170b9ce63580af6888 -Author: DJ -AuthorDate: Fri Aug 16 04:55:47 2019 -0700 -Commit: DJ -CommitDate: Fri Aug 16 04:55:47 2019 -0700 - - remove unneeded condition - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 11a716047ec3e4bed74d65673acb00f062054a6c -Author: DJ -AuthorDate: Thu Aug 15 16:37:50 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 16:37:50 2019 -0700 - - remove spurious newline - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit edd18648b429582c8f6701d23e835207b58fd416 -Author: DJ -AuthorDate: Thu Aug 15 16:35:08 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 16:35:08 2019 -0700 - - remove bug - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d1f3a7d137f4a3d0373f2964e866503ba323cc11 -Author: DJ -AuthorDate: Thu Aug 15 16:20:50 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 16:20:50 2019 -0700 - - set floor for allelotype frequency - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5cf88bff805cd2ddb19c05b394dbd7606309cc5c -Author: DJ -AuthorDate: Thu Aug 15 16:16:56 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 16:16:56 2019 -0700 - - show allelotypes for all V segments - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 8dbeb00c1800ceccb2917a807eea8643096f2501 -Author: DJ -AuthorDate: Thu Aug 15 16:07:09 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 16:07:09 2019 -0700 - - don't show errors if they're impossible - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e8a609f1e71ebbaedae6bb0a7cee4966aceab1d9 -Author: DJ -AuthorDate: Thu Aug 15 15:56:45 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 15:56:45 2019 -0700 - - don't show ref-only allelotypes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 570f8d3bffe78a94e08c81810aa59aec44db5db0 -Author: DJ -AuthorDate: Thu Aug 15 15:27:36 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 15:27:36 2019 -0700 - - show reference allele - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 1348e687450a073d88a09c97d6d6b6e3ee22bdc1 -Author: DJ -AuthorDate: Thu Aug 15 15:22:03 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 15:22:03 2019 -0700 - - ignore small allelotypes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 533ce1fef7f80cfdd4be0a4a73e9e60dcaa4698d -Author: DJ -AuthorDate: Thu Aug 15 15:07:51 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 15:07:51 2019 -0700 - - don't show ps - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 0819ea37257d93c71d78f6118377ad1522f75058 -Author: DJ -AuthorDate: Thu Aug 15 15:01:05 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 15:01:05 2019 -0700 - - tidy - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit dcc2fdea7ee71e52564e7b144431cb4f4c1042e3 -Author: DJ -AuthorDate: Thu Aug 15 14:56:36 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 14:56:36 2019 -0700 - - simplify and fix bug at vsids - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit cecb54aa2a60eb415b6cf3775ee9514151406231 -Author: DJ -AuthorDate: Thu Aug 15 14:43:53 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 14:43:53 2019 -0700 - - show purity - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit bc9a75dd38d62f91d2c7239bbbde9b4d5ddcede7 -Author: DJ -AuthorDate: Thu Aug 15 14:36:33 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 14:36:33 2019 -0700 - - improve allelotype display - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 91a50533e6b72944ef6cb75a3c790811468bb808 -Author: DJ -AuthorDate: Thu Aug 15 13:52:18 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 13:52:18 2019 -0700 - - abbreviate list - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 183e0d6a531977142e848ef4614414cbe2f58d0a -Author: DJ -AuthorDate: Thu Aug 15 13:50:31 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 13:50:31 2019 -0700 - - dump some logging - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 87f325d84dd410b0cddfc23d6afc673cc49a674f -Author: DJ -AuthorDate: Thu Aug 15 13:49:23 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 13:49:23 2019 -0700 - - show partner list - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e87b16d2211646de78b930dc10831facba902ce7 -Author: DJ -AuthorDate: Thu Aug 15 13:39:29 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 13:39:29 2019 -0700 - - dump the matrix - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 2408e4ccba85480a2fb182de5c9119e6ea34e3a4 -Author: DJ -AuthorDate: Thu Aug 15 13:36:49 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 13:36:49 2019 -0700 - - print allelotypes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 725f6adc7a046eaefc169cb6b615655ff3b837b1 -Author: DJ -AuthorDate: Thu Aug 15 11:50:36 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 11:50:36 2019 -0700 - - show partner ref ids in intersection analysis - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit bcb23d43ae590296e9dea771b99729c263b1c99b -Author: DJ -AuthorDate: Thu Aug 15 11:30:26 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 11:30:26 2019 -0700 - - intersection info in progress - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 1c3ea4286470c4afe2952fafa08daac0823ef06a -Author: DJ -AuthorDate: Thu Aug 15 11:13:22 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 11:13:22 2019 -0700 - - towards intersection info - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4397c0c2535701968ac819a740bd827e0e3fdfca -Author: David Jaffe -AuthorDate: Thu Aug 15 10:28:03 2019 -0700 -Commit: David Jaffe -CommitDate: Thu Aug 15 10:28:03 2019 -0700 - - correct reference stop point for V seg consensus - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4b512fc68dd8f668411e81e266031dd7c33180a4 -Author: David Jaffe -AuthorDate: Thu Aug 15 10:01:04 2019 -0700 -Commit: David Jaffe -CommitDate: Thu Aug 15 10:01:04 2019 -0700 - - use human_ref - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 82c2c66696f7c45373fda6735fae0ad6d1cde72e -Author: David Jaffe -AuthorDate: Thu Aug 15 09:51:31 2019 -0700 -Commit: David Jaffe -CommitDate: Thu Aug 15 09:51:31 2019 -0700 - - add function human_ref() - -M lib/rust/vdj_ann/src/refx.rs -A lib/rust/vdj_ann/vdj_refs/human/regions.fa -A lib/rust/vdj_ann/vdj_refs/human/supp_regions.fa -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 8b7ddc5bdd18c8993e75360f5bf0bea8a95d0064 -Author: DJ -AuthorDate: Thu Aug 15 06:46:48 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 06:46:48 2019 -0700 - - cleaning experimental consensus code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 1726d0d97e3df40545e4f7bb47967753012f5790 -Author: DJ -AuthorDate: Thu Aug 15 06:40:48 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 06:40:48 2019 -0700 - - tinkering with experimental consensus code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 7d06e70bf95c1e6036389c2ab18d24ceddb9df03 -Author: DJ -AuthorDate: Thu Aug 15 06:23:36 2019 -0700 -Commit: DJ -CommitDate: Thu Aug 15 06:23:36 2019 -0700 - - initial experimental consensus code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit aa9c39a4b002e6ec14d5d8654f74da3314a35664 -Author: DJ -AuthorDate: Wed Aug 14 15:17:15 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 15:17:15 2019 -0700 - - another to do - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit fe4a9685908c5fa7f96eecee9d57ca7464b33989 -Author: DJ -AuthorDate: Wed Aug 14 15:15:24 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 15:15:24 2019 -0700 - - update to do list - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f5c0ee615c06c8411dfb2971278fdcd8955fde46 -Author: DJ -AuthorDate: Wed Aug 14 15:13:39 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 15:13:39 2019 -0700 - - update doc - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit cda555337ced4541dd308c3d88e3f2896a5d97d9 -Author: DJ -AuthorDate: Wed Aug 14 15:10:39 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 15:10:39 2019 -0700 - - fix cdr3 handling - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 14ea0eb088c776d725ae229aeaa4ce2d46204f32 -Author: DJ -AuthorDate: Wed Aug 14 14:36:48 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 14:36:48 2019 -0700 - - now actually allow three contigs - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit dd0043bfc7772974fbb4beb3253fcf0f7a7faa5b -Author: DJ -AuthorDate: Wed Aug 14 14:13:10 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 14:13:10 2019 -0700 - - set up machine for splitting clonotypes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit fc49e201ffb2e8ea43ad0bc5b01105ed0aa2a27d -Author: DJ -AuthorDate: Wed Aug 14 13:31:18 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 13:31:18 2019 -0700 - - simplify a bit - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 2698510f9da631ff43b791629ca6e8499ccdd6cd -Author: DJ -AuthorDate: Wed Aug 14 13:25:14 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 13:25:14 2019 -0700 - - tweak to CloneInfo creation strategy - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 6a226a72be731116254ecb20cbbad5fd8978b2e4 -Author: DJ -AuthorDate: Wed Aug 14 13:12:57 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 13:12:57 2019 -0700 - - formatting - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 6174929f525aa7c615fc5522086497b90479e98f -Author: DJ -AuthorDate: Wed Aug 14 13:11:13 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 13:11:13 2019 -0700 - - add some doc - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 66b408e21aa5d1988373c6740130a7c9f761a55b -Author: DJ -AuthorDate: Wed Aug 14 11:48:47 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 11:48:47 2019 -0700 - - remove stupid comment - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ab908fa9673725de6a62273f88daabf57fe8e2bd -Author: DJ -AuthorDate: Wed Aug 14 11:23:01 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 11:23:01 2019 -0700 - - output beautification - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f9859b34cbf6bc321544326558f7a97f28305a84 -Author: DJ -AuthorDate: Wed Aug 14 11:17:00 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 11:17:00 2019 -0700 - - report orbits having errors - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 73a866f351bd6f219b7bc95dc6bb0cb46b972a36 -Author: DJ -AuthorDate: Wed Aug 14 10:55:14 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 10:55:14 2019 -0700 - - don't join if it can have no effect on equiv rel - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b9125b399f53f47de862b92a1bc0ffb15a8c9b74 -Author: DJ -AuthorDate: Wed Aug 14 10:44:18 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 10:44:18 2019 -0700 - - find orbits - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit efeca91a0e4477875ec7ed7b35e9460a10cd96d1 -Author: DJ -AuthorDate: Wed Aug 14 10:09:37 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 10:09:37 2019 -0700 - - add note on computational performance - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 90e55011159618d6b76c41044d81e69cdd61882c -Author: DJ -AuthorDate: Wed Aug 14 10:07:45 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 10:07:45 2019 -0700 - - speed up a bit - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ce5deb3fd8f6f4ef539a4cea34e3df102bda7e1e -Author: DJ -AuthorDate: Wed Aug 14 09:39:00 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 09:39:00 2019 -0700 - - tweak happening targets - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 709c5485d582dca0a87776abfc7eb87a742750b9 -Author: DJ -AuthorDate: Wed Aug 14 09:32:11 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 09:32:11 2019 -0700 - - add happening mode - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit afcede9d92e9d6d37f07823934e845e0e2f73a46 -Author: DJ -AuthorDate: Wed Aug 14 09:03:08 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 09:03:08 2019 -0700 - - add to TO DO - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e06ef005045b624f8980a1f4cbec6020c564dc86 -Author: DJ -AuthorDate: Wed Aug 14 06:38:24 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 06:38:24 2019 -0700 - - restore unbusted lena - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit dc8a06fc0e15e245f617496f4b552af925fde4a1 -Author: DJ -AuthorDate: Wed Aug 14 06:34:54 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 06:34:54 2019 -0700 - - update notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit b305c1082ca0ba4f7ecfcffab5a5011b75cede84 -Author: DJ -AuthorDate: Wed Aug 14 06:22:38 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 06:22:38 2019 -0700 - - resolve a lena - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 3e8079997e6619a8befa285231076f32ffc32b76 -Author: DJ -AuthorDate: Wed Aug 14 06:14:52 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 06:14:52 2019 -0700 - - resolve a couple lenas - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit c2567c41bf49664251e0ed87f12a47f726589271 -Author: DJ -AuthorDate: Wed Aug 14 05:50:05 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 05:50:05 2019 -0700 - - update - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails - -commit 1940e9a7165749a4d064d0709d3dc23b9011864e -Author: DJ -AuthorDate: Wed Aug 14 05:46:43 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 05:46:43 2019 -0700 - - use only one ref if possible - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f4a030ca4350b7d09830740e76a99cded68b4981 -Author: DJ -AuthorDate: Wed Aug 14 05:16:28 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 05:16:28 2019 -0700 - - update notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 29d4cc9d179d68b4d63dca73e4164773f4d78c4e -Author: DJ -AuthorDate: Wed Aug 14 04:52:19 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 04:52:19 2019 -0700 - - output tweak - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 9d31ad986cc385e776e2503b786fd20ce3ad0eab -Author: DJ -AuthorDate: Wed Aug 14 04:48:15 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 04:48:15 2019 -0700 - - add missing sequence - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 1e4a1f8cbe5991deeb59e5619682676fa71487c3 -Author: DJ -AuthorDate: Wed Aug 14 04:37:35 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 04:37:35 2019 -0700 - - update - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails - -commit 5c36fc20055bcc2233664561e74cb218d7b042b6 -Author: DJ -AuthorDate: Wed Aug 14 04:27:13 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 04:27:13 2019 -0700 - - output tidying - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 8d37d3239b1615e954cca83743b5575fa467fdc8 -Author: DJ -AuthorDate: Wed Aug 14 04:25:08 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 04:25:08 2019 -0700 - - show shared mutations in difference patterns - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 21ad5e11c08eb0a85187786701c180b39e75ad9f -Author: DJ -AuthorDate: Wed Aug 14 04:12:20 2019 -0700 -Commit: DJ -CommitDate: Wed Aug 14 04:12:20 2019 -0700 - - add notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 838df3be471f1405c830fd055483ac06c0e874e7 -Author: DJ -AuthorDate: Tue Aug 13 19:21:53 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 19:21:53 2019 -0700 - - default to showing full annotations - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e55062f6213dbfca4b29ecd938560640e489b6ac -Author: DJ -AuthorDate: Tue Aug 13 17:57:20 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 17:57:20 2019 -0700 - - add option PFREQ - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ae920c6ea4307860251706062a9a43f44df8592e -Author: DJ -AuthorDate: Tue Aug 13 17:52:42 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 17:52:42 2019 -0700 - - update - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails - -commit fac28b985039f6ab10fca594fab28690ad9ca424 -Author: DJ -AuthorDate: Tue Aug 13 17:49:28 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 17:49:28 2019 -0700 - - show difference patterns - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 02f830e1ea11b59ad95f4e5996388a1ec6253992 -Author: DJ -AuthorDate: Tue Aug 13 16:10:04 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 16:10:04 2019 -0700 - - merge orbits - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit ad4b367d9e63f28790367ff8712316862b03d9d8 -Author: DJ -AuthorDate: Tue Aug 13 16:03:39 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 16:03:39 2019 -0700 - - output nicification - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d165df725e410114ac660b0a8087616c7e3ae71d -Author: DJ -AuthorDate: Tue Aug 13 15:59:16 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 15:59:16 2019 -0700 - - restore broken file - -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit efe8f3836d479e95654e24c9d302c40ce0b51351 -Author: DJ -AuthorDate: Tue Aug 13 15:53:43 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 15:53:43 2019 -0700 - - raise REF_V_TRIM - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit ec1c072f38e47bbf6dac2d024c130cc7370de530 -Author: DJ -AuthorDate: Tue Aug 13 15:42:20 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 15:42:20 2019 -0700 - - merge orbits - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit d8cfbe2b601aa0c10b996f8735bd9d7fe6909bcd -Author: DJ -AuthorDate: Tue Aug 13 15:21:02 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 15:21:02 2019 -0700 - - add missing sequence - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 9826e6f2f89eab6ebecc354e5f13a218af694fb9 -Author: DJ -AuthorDate: Tue Aug 13 13:29:38 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 13:29:38 2019 -0700 - - a little output streamlining - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 7281534c81f7f7070eae5a0b6fd581ea52b09e3c -Author: DJ -AuthorDate: Tue Aug 13 13:23:43 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 13:23:43 2019 -0700 - - add missing newline - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b21745ec2ba87bc98a49c3d0791118a485a546f8 -Author: DJ -AuthorDate: Tue Aug 13 13:09:09 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 13:09:09 2019 -0700 - - update results - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails - -commit 0fc973dfc76d61c672f2a621d2102bb531f3ec30 -Author: DJ -AuthorDate: Tue Aug 13 12:05:21 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 12:05:21 2019 -0700 - - run with COMP - -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit fabdaa9e6c79fb1c564e3e2dfd93e78088193aa6 -Author: DJ -AuthorDate: Tue Aug 13 12:04:04 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 12:04:04 2019 -0700 - - increase reference trim for J segments - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 9a47da93fe8eea8151d0563434b63150ab646c9a -Author: DJ -AuthorDate: Tue Aug 13 11:59:34 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 11:59:34 2019 -0700 - - add a reference sequence - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 612153c74560e09df98d22d3335ab8ab1ddcfe0f -Author: DJ -AuthorDate: Tue Aug 13 10:12:24 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 10:12:24 2019 -0700 - - fix to shares_details - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 71fda3e00772f04acec9f5250ab8b8d06d81fbe3 -Author: DJ -AuthorDate: Tue Aug 13 09:54:02 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 09:54:02 2019 -0700 - - make computational perf info optional - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 47d624c2737fe7e564eb3cd44d527ab47b96e272 -Author: DJ -AuthorDate: Tue Aug 13 09:45:41 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 09:45:41 2019 -0700 - - break down shares by V and J - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 54e1970ae7575355bc10ad1ef8eb7f2fbfbe6c29 -Author: DJ -AuthorDate: Tue Aug 13 06:19:04 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 06:19:04 2019 -0700 - - update notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit adb5a7d2784a900ff3cacebd11678a7d0fb865d4 -Author: DJ -AuthorDate: Tue Aug 13 06:17:06 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 06:17:06 2019 -0700 - - update notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 6db037907462d6c641341c12f5038fdbeb6fbdc3 -Author: DJ -AuthorDate: Tue Aug 13 06:09:01 2019 -0700 -Commit: DJ -CommitDate: Tue Aug 13 06:09:01 2019 -0700 - - update and workaround for marsoc weirdness - -M lib/rust/vdj_asm_tools/src/bin/simclone.fails -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 9b6012532bc1424be1f49ad4982a6f9950b498a0 -Author: DJ -AuthorDate: Wed Jul 31 19:07:28 2019 -0700 -Commit: DJ -CommitDate: Wed Jul 31 19:07:28 2019 -0700 - - add SEQ ANN - -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 43ab22e3559b0ae5c7b347bd593d062001661dff -Author: DJ -AuthorDate: Wed Jul 31 14:44:19 2019 -0700 -Commit: DJ -CommitDate: Wed Jul 31 14:44:19 2019 -0700 - - add missing sequence - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit aa660f6bd509a27af1a9baed49242792100129d5 -Author: DJ -AuthorDate: Wed Jul 31 10:03:30 2019 -0700 -Commit: DJ -CommitDate: Wed Jul 31 10:03:30 2019 -0700 - - SEQ now shows original contigs - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5fcc1f49d577bbf6000c00bd88d2ae8c6c252a58 -Author: DJ -AuthorDate: Wed Jul 31 05:51:04 2019 -0700 -Commit: DJ -CommitDate: Wed Jul 31 05:51:04 2019 -0700 - - temp list of fails - -A lib/rust/vdj_asm_tools/src/bin/simclone.fails - -commit feb42f61029bd6546929bf34b8bfbab0b3819303 -Author: DJ -AuthorDate: Wed Jul 31 05:46:45 2019 -0700 -Commit: DJ -CommitDate: Wed Jul 31 05:46:45 2019 -0700 - - update test - -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit a760df068def158c72101d886e12b50504a77c6e -Author: DJ -AuthorDate: Wed Jul 31 05:36:16 2019 -0700 -Commit: DJ -CommitDate: Wed Jul 31 05:36:16 2019 -0700 - - add tests - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 77b30d97d34a7bb9989cd3d3594b9ebec2a879df -Author: DJ -AuthorDate: Wed Jul 31 05:06:00 2019 -0700 -Commit: DJ -CommitDate: Wed Jul 31 05:06:00 2019 -0700 - - add to tests - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit cb31f2fb02fff12fa8707932f46fabe1bd89c029 -Author: DJ -AuthorDate: Wed Jul 31 04:55:38 2019 -0700 -Commit: DJ -CommitDate: Wed Jul 31 04:55:38 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 2feba3feb9a29712d39357cbe43a6667879399c3 -Author: DJ -AuthorDate: Wed Jul 31 04:31:38 2019 -0700 -Commit: DJ -CommitDate: Wed Jul 31 04:31:38 2019 -0700 - - add to tests - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 3a84c47e3f75944648b2d4e6e2606cc2ade7aa20 -Author: DJ -AuthorDate: Wed Jul 31 04:22:12 2019 -0700 -Commit: DJ -CommitDate: Wed Jul 31 04:22:12 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 51f6a20c090e4790073ffdec4e9ece9b4a9b727a -Author: DJ -AuthorDate: Wed Jul 31 04:06:22 2019 -0700 -Commit: DJ -CommitDate: Wed Jul 31 04:06:22 2019 -0700 - - add to tests - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 5f8460042189621d8e34ae8825668d22d495f490 -Author: DJ -AuthorDate: Wed Jul 31 03:46:33 2019 -0700 -Commit: DJ -CommitDate: Wed Jul 31 03:46:33 2019 -0700 - - add to tests - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 494417d97ae23db3a8c8c8e47bd4c0608ac835d8 -Author: DJ -AuthorDate: Tue Jul 30 16:39:51 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 16:39:51 2019 -0700 - - add to test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit bf1d80b25b3393ac96660871478b5a4938a18839 -Author: DJ -AuthorDate: Tue Jul 30 16:23:37 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 16:23:37 2019 -0700 - - add to tests - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit e86610c7884cf5a9c86cc02c371e4310ebe217d5 -Author: DJ -AuthorDate: Tue Jul 30 14:59:08 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 14:59:08 2019 -0700 - - remove a couple more dups - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit b5659cdfb7e0287f1044b63ec7a2d52544f092d7 -Author: DJ -AuthorDate: Tue Jul 30 14:50:32 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 14:50:32 2019 -0700 - - tidy dup finding code - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit bf19a60a02120641a822cd4592b9d65187b4a5d2 -Author: DJ -AuthorDate: Tue Jul 30 14:46:53 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 14:46:53 2019 -0700 - - improve dup finding and remove some dups - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 74b82f5f237a9e660fb45fc32856704d81081d29 -Author: DJ -AuthorDate: Tue Jul 30 13:15:10 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 13:15:10 2019 -0700 - - remove some duplicate lena ids from test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit c6b933c1825760c98ea7b30ba3cc91de4041e925 -Author: DJ -AuthorDate: Tue Jul 30 10:41:27 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 10:41:27 2019 -0700 - - remove some dups from test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit bd51b9a9f6a8e205ffe3a9692e0edc685676b0ca -Author: DJ -AuthorDate: Tue Jul 30 10:15:46 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 10:15:46 2019 -0700 - - improve duplicate barcode test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d2b3fb084d3ab54534ffe0b31fa6ed974e0c4372 -Author: DJ -AuthorDate: Tue Jul 30 09:54:43 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 09:54:43 2019 -0700 - - showing of barcodes now off by default - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ce2a50b911e05d1371ab7fcf4be9a7ba681dbd4d -Author: DJ -AuthorDate: Tue Jul 30 09:48:24 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 09:48:24 2019 -0700 - - speed up a lot - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c886a7ceaed0cb0a49d6f3c2a81e580ae3df34ee -Author: DJ -AuthorDate: Tue Jul 30 09:43:00 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 09:43:00 2019 -0700 - - speed up a bit - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e494cbe77133dbe1fd34d479c6611fc5f0ffac2a -Author: DJ -AuthorDate: Tue Jul 30 09:34:21 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 09:34:21 2019 -0700 - - add more tests - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit e7aa64b47fd295af9296fdd030bb1ed5af50c482 -Author: DJ -AuthorDate: Tue Jul 30 08:36:25 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 08:36:25 2019 -0700 - - add to tests - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 3ac894c70fe646f080f9b611df6576ffaa337474 -Author: DJ -AuthorDate: Tue Jul 30 06:58:38 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 06:58:38 2019 -0700 - - add to tests - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit a5b20938abeb144491be5ed3f1837506e21e73b3 -Author: DJ -AuthorDate: Tue Jul 30 04:23:12 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 04:23:12 2019 -0700 - - add tests - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit e44cac4d89abf4f494812dfec3f270607d31e6e0 -Author: DJ -AuthorDate: Tue Jul 30 03:30:58 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 03:30:58 2019 -0700 - - add note - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b8bd48942f9d32be7bd5a4748017072942a66a37 -Author: DJ -AuthorDate: Tue Jul 30 03:27:17 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 03:27:17 2019 -0700 - - add option to show annotations - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 0d5fdf53fb99f55b77cdb0380ddb8cd3e2f35a8d -Author: DJ -AuthorDate: Tue Jul 30 03:11:39 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 03:11:39 2019 -0700 - - add to tests - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit c03083ce93ec24bbb8960ebd1c63d724ee3049d7 -Author: DJ -AuthorDate: Tue Jul 30 02:51:29 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 02:51:29 2019 -0700 - - add tests - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit c1983ca3c7e836ad7a21b502370d742b83a0fafa -Author: DJ -AuthorDate: Tue Jul 30 02:38:26 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 02:38:26 2019 -0700 - - add to test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit ff5368fbfe74bcec12cacc358c139c865618efe4 -Author: DJ -AuthorDate: Tue Jul 30 02:27:08 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 02:27:08 2019 -0700 - - add notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit fa401de24a0849c23ad3a0396cb27cb07e0fccba -Author: DJ -AuthorDate: Tue Jul 30 02:26:20 2019 -0700 -Commit: DJ -CommitDate: Tue Jul 30 02:26:20 2019 -0700 - - add notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 86295d8f8e029601d297fd1ef1f1755476d6b50e -Author: DJ -AuthorDate: Mon Jul 29 15:33:53 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 15:33:53 2019 -0700 - - add to test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 9b6a9a0847d317897bd435cdd74ab8acc5e6a814 -Author: DJ -AuthorDate: Mon Jul 29 07:45:45 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 07:45:45 2019 -0700 - - speed up - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 3ce352565b96623f25b605fdd8cdccdb1072a163 -Author: DJ -AuthorDate: Mon Jul 29 07:32:50 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 07:32:50 2019 -0700 - - tweak test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit cc66ad6c8bbf64d1b2c251e999ce5483e9058383 -Author: DJ -AuthorDate: Mon Jul 29 07:28:00 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 07:28:00 2019 -0700 - - speed up - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 45ca8d9a4f6ee86a995e8df3065228cc49d54b52 -Author: DJ -AuthorDate: Mon Jul 29 07:24:27 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 07:24:27 2019 -0700 - - speed up - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c082029f4d5138941ed88e31950724dca31164dd -Author: DJ -AuthorDate: Mon Jul 29 06:14:52 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 06:14:52 2019 -0700 - - add clocks - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 2ea6261e53c79f5e1a0434dc518b4691220ee4bd -Author: DJ -AuthorDate: Mon Jul 29 06:00:31 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 06:00:31 2019 -0700 - - add notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 7b1a211165f0017c3ac26aa539615fa74ffa7e8c -Author: DJ -AuthorDate: Mon Jul 29 05:58:13 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 05:58:13 2019 -0700 - - more on adding to test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 724d7c1f75f750ead4475aa6df2c176cf61ae472 -Author: DJ -AuthorDate: Mon Jul 29 05:57:54 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 05:57:54 2019 -0700 - - add to test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 6e039574c0438a0d03b208699729f9c8e1d4e423 -Author: DJ -AuthorDate: Mon Jul 29 05:40:06 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 05:40:06 2019 -0700 - - add missing V segment - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 7bcb351ba2239ce2012d100ca72ad99a0e6387d6 -Author: DJ -AuthorDate: Mon Jul 29 04:55:43 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 04:55:43 2019 -0700 - - for now, just show errors - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e16302d9eb978859911bdaf4aa2d3fc7ccbc570c -Author: DJ -AuthorDate: Mon Jul 29 04:54:01 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 04:54:01 2019 -0700 - - add more examples - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit e7e0387153404b99a6090a67e15dc82514c20a9c -Author: DJ -AuthorDate: Mon Jul 29 04:43:44 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 04:43:44 2019 -0700 - - del item from to do list - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a00acabf005d0a3d3bbbe039c68ef43a32c8949d -Author: DJ -AuthorDate: Mon Jul 29 04:39:04 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 04:39:04 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f9ba39ba864d37c2bf15e371c365145d55aad5b7 -Author: DJ -AuthorDate: Mon Jul 29 04:08:08 2019 -0700 -Commit: DJ -CommitDate: Mon Jul 29 04:08:08 2019 -0700 - - set permissions - -M lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit 84b98a8f88ebfd107f11e95a9138df531220ede3 -Author: David Jaffe -AuthorDate: Sun Jul 28 11:53:52 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Jul 28 11:53:52 2019 -0700 - - more notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 2eefa09f83ff0e5e71329ecb7aba6708740375a2 -Author: David Jaffe -AuthorDate: Sun Jul 28 11:22:43 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Jul 28 11:22:43 2019 -0700 - - more notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b8cabae31f6941038e365993d1b94950278ac2d4 -Author: David Jaffe -AuthorDate: Sun Jul 28 11:00:04 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Jul 28 11:00:04 2019 -0700 - - doc changes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5e01d17025787d43f3d12c07fdbf418f69200cd1 -Author: David Jaffe -AuthorDate: Sun Jul 28 10:54:01 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Jul 28 10:54:01 2019 -0700 - - don't skip small clones - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b3cf71dbd37c2cfe597adb6fa4d50c848e7c45fc -Author: David Jaffe -AuthorDate: Sun Jul 28 10:52:31 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Jul 28 10:52:31 2019 -0700 - - add to do list - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit cfc96b20e7a297bd93988b7ebef9ffdfb45b46b3 -Author: David Jaffe -AuthorDate: Sun Jul 28 10:43:18 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Jul 28 10:43:18 2019 -0700 - - tweak docs - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 3a4d76a5555712b62326e34fe7ed23b5d0f1e641 -Author: David Jaffe -AuthorDate: Sun Jul 28 10:39:41 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Jul 28 10:39:41 2019 -0700 - - separate out test script - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs -A lib/rust/vdj_asm_tools/src/bin/simclone.test - -commit fc2cbf3daa33d494dd741954931723d8e98a43b9 -Author: David Jaffe -AuthorDate: Sun Jul 28 10:29:00 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Jul 28 10:29:00 2019 -0700 - - require BCR - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 05f9050c087b70e697a7bca7f11f8102e4b75353 -Author: David Jaffe -AuthorDate: Sun Jul 28 10:17:14 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Jul 28 10:17:14 2019 -0700 - - use continue to avoid indentation - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 8e2a4bfc80ed1bb184c793b6309980e0015ae77f -Author: David Jaffe -AuthorDate: Sun Jul 28 09:59:51 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Jul 28 09:59:51 2019 -0700 - - add more notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ee74c934400f53fed7137bb7b6724facf011c5a1 -Author: David Jaffe -AuthorDate: Sun Jul 28 09:58:04 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Jul 28 09:58:04 2019 -0700 - - add more doc - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 8e6b14d8b7d855a00f96d974e7948ab17fbb353e -Author: David Jaffe -AuthorDate: Sun Jul 28 09:15:32 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Jul 28 09:15:32 2019 -0700 - - add more notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 59149356d7bf2dd97b759d021651bae1ad8a5bc5 -Author: David Jaffe -AuthorDate: Sun Jul 28 09:01:25 2019 -0700 -Commit: David Jaffe -CommitDate: Sun Jul 28 09:01:25 2019 -0700 - - add some doc - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit cd6c982fa3896fd1d61868081d8025261b0175a6 -Author: DJ -AuthorDate: Sun Jul 28 06:07:46 2019 -0700 -Commit: DJ -CommitDate: Sun Jul 28 06:07:46 2019 -0700 - - add examples - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 0dc2fb8cff76aa0dabe8d95a2fc532c7332fa52d -Author: DJ -AuthorDate: Sun Jul 28 05:51:12 2019 -0700 -Commit: DJ -CommitDate: Sun Jul 28 05:51:12 2019 -0700 - - check for duplicate barcodes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5c9450cc64bef7b4fdd64975a6e9d861d4b14c58 -Author: DJ -AuthorDate: Sun Jul 28 05:36:22 2019 -0700 -Commit: DJ -CommitDate: Sun Jul 28 05:36:22 2019 -0700 - - fix bug, add example - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit cfd41c977e2c18a0a8e24a744b0b3cd22e2e033e -Author: DJ -AuthorDate: Sun Jul 28 05:06:24 2019 -0700 -Commit: DJ -CommitDate: Sun Jul 28 05:06:24 2019 -0700 - - note files used - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f59476d41250e10fe20f51b7dc530ff07a948b7b -Author: DJ -AuthorDate: Sun Jul 28 05:04:04 2019 -0700 -Commit: DJ -CommitDate: Sun Jul 28 05:04:04 2019 -0700 - - support lena groups - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e37bf42d0f59fba21a37e86ea43aae8bc530ef2f -Author: DJ -AuthorDate: Sat Jul 27 17:59:26 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 17:59:26 2019 -0700 - - add notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit fd36cf643edc27003d08f0245d9973d9f498f9a2 -Author: DJ -AuthorDate: Sat Jul 27 17:36:50 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 17:36:50 2019 -0700 - - add notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 75acf045e0e37b6eed5f6958cc8ca7f81fe082ea -Author: DJ -AuthorDate: Sat Jul 27 17:35:35 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 17:35:35 2019 -0700 - - fix bug - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ef3b0f0613dfdf2edf03d1249459b4c0b7331dc3 -Author: DJ -AuthorDate: Sat Jul 27 16:36:50 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 16:36:50 2019 -0700 - - add notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 98e095de81a93622f6b77a7754c91053caf54105 -Author: DJ -AuthorDate: Sat Jul 27 16:21:35 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 16:21:35 2019 -0700 - - fix indel handling - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 80e492ddd4834edad1ad5db4bdf9651e430f4630 -Author: DJ -AuthorDate: Sat Jul 27 15:34:01 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 15:34:01 2019 -0700 - - save reference sequences rather than indices to them - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a8d4c91ff663d826fc738cc3ef6bd2c1f40a41ad -Author: DJ -AuthorDate: Sat Jul 27 15:26:26 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 15:26:26 2019 -0700 - - a little doc - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d4140ffdf5dfad949d8de621f797ea4fa46c00ac -Author: DJ -AuthorDate: Sat Jul 27 04:51:16 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 04:51:16 2019 -0700 - - add notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 32a502e8370e072e2bbf021ac5da3f3ec7230bab -Author: DJ -AuthorDate: Sat Jul 27 04:45:25 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 04:45:25 2019 -0700 - - add option SEQ - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 99d18fdc3542ec8e0f969f574f7da841f37c46ec -Author: DJ -AuthorDate: Sat Jul 27 04:08:10 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 04:08:10 2019 -0700 - - redesign parallel loop to be much faster - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 9a08c00824bd66c9f982883baa8107ab0437124e -Author: DJ -AuthorDate: Sat Jul 27 03:55:45 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 03:55:45 2019 -0700 - - define res - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5eb8ce31999bc821043f6fa06be2ab8c953078d7 -Author: DJ -AuthorDate: Sat Jul 27 03:47:40 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 03:47:40 2019 -0700 - - eliminate some continue statements - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 781544dc4d1277f2ff83d933c2050d541b43bcb4 -Author: DJ -AuthorDate: Sat Jul 27 03:36:33 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 03:36:33 2019 -0700 - - add clock - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 9f2a758f1d08d59d12958202dca0a9c9ab1f144b -Author: DJ -AuthorDate: Sat Jul 27 03:28:20 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 03:28:20 2019 -0700 - - documentation and scoping - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit cfd62c5183bd734ea32c6f763f2ef23e69096767 -Author: DJ -AuthorDate: Sat Jul 27 03:14:47 2019 -0700 -Commit: DJ -CommitDate: Sat Jul 27 03:14:47 2019 -0700 - - define example under study - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 16215e68caddc15fd930dbbc8bdb4a56fac3adc8 -Author: DJ -AuthorDate: Fri Jul 26 16:16:45 2019 -0700 -Commit: DJ -CommitDate: Fri Jul 26 16:16:45 2019 -0700 - - show the CDRs - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 97ff422616cdb94dc47253e6660f456171e21d37 -Author: DJ -AuthorDate: Fri Jul 26 15:22:37 2019 -0700 -Commit: DJ -CommitDate: Fri Jul 26 15:22:37 2019 -0700 - - working version of dj/cr-1577, with master merged - -A lib/rust/vdj_asm_tools/src/bin/simclone.rs - -manually pruned history from cellranger branch dj/cr-1577 - -commit aabe2d42e97f9e67a30addd22f3b8ee169c33523 -Author: DJ -AuthorDate: Fri May 24 10:47:24 2019 -0700 -Commit: DJ -CommitDate: Fri May 24 10:47:24 2019 -0700 - - parallelize comparison - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4721f415e14b02e556fc017489c592a219162862 -Author: DJ -AuthorDate: Thu May 23 16:53:33 2019 -0700 -Commit: DJ -CommitDate: Thu May 23 16:53:33 2019 -0700 - - more comments - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit e4497cfa089a8886e4c672656c78b7424e16e6b4 -Author: DJ -AuthorDate: Thu May 23 10:22:48 2019 -0700 -Commit: DJ -CommitDate: Thu May 23 10:22:48 2019 -0700 - - now can show barcodes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 71f11c83a522cd6e8dcc3b4fa8e7dd7a7582b6a9 -Author: DJ -AuthorDate: Thu May 23 06:23:16 2019 -0700 -Commit: DJ -CommitDate: Thu May 23 06:23:16 2019 -0700 - - add notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4b94d5e4cf445329d0a26f409c6376a0a6171d3f -Author: DJ -AuthorDate: Thu May 23 06:13:37 2019 -0700 -Commit: DJ -CommitDate: Thu May 23 06:13:37 2019 -0700 - - tighten second test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 0925aa18cc9031dc88cdda09324531eec333977d -Author: DJ -AuthorDate: Wed May 22 15:34:05 2019 -0700 -Commit: DJ -CommitDate: Wed May 22 15:34:05 2019 -0700 - - add samples - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d7ad3786a282ef9bc77460209bd93184dbc03a1f -Author: DJ -AuthorDate: Wed May 22 06:44:36 2019 -0700 -Commit: DJ -CommitDate: Wed May 22 06:44:36 2019 -0700 - - add notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a13fe54901ff4284397a8aa9eadc4b02aae8da1f -Author: DJ -AuthorDate: Wed May 22 06:36:42 2019 -0700 -Commit: DJ -CommitDate: Wed May 22 06:36:42 2019 -0700 - - make a bit faster - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit da0c305660909e6db0dcfeea2682fc0f19e11f60 -Author: DJ -AuthorDate: Wed May 22 06:21:21 2019 -0700 -Commit: DJ -CommitDate: Wed May 22 06:21:21 2019 -0700 - - add comments - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 6b25c44a8ad1e8e1d14750287331f38407398702 -Author: DJ -AuthorDate: Tue May 21 17:30:03 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 17:30:03 2019 -0700 - - go back to skipping singletons - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 4c500db094b08f3b9fcc2155f6c7c2f0886682c8 -Author: DJ -AuthorDate: Tue May 21 17:17:09 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 17:17:09 2019 -0700 - - parallelize - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 3ab0ec522a30bf034ab02c66e7ec6b38b9e82881 -Author: DJ -AuthorDate: Tue May 21 16:36:56 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 16:36:56 2019 -0700 - - add notes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b40b2ebf8c1cc5d6626baac5a975caf397891bf3 -Author: DJ -AuthorDate: Tue May 21 16:21:10 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 16:21:10 2019 -0700 - - fix insane memory hog bug - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 092ca823e3a3964884e8543d0a4897976a948eab -Author: DJ -AuthorDate: Tue May 21 16:00:57 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 16:00:57 2019 -0700 - - allow singletons - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit f167a2c10ba84927f76f247672c1fe95cd6d907c -Author: DJ -AuthorDate: Tue May 21 15:55:28 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 15:55:28 2019 -0700 - - add samples - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 074347c522f72da5fd75e81a6b6a0c101e69833d -Author: DJ -AuthorDate: Tue May 21 15:49:14 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 15:49:14 2019 -0700 - - tighten second test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 65c71dd4dbfb80632b329a1f3fc657e20a402e1b -Author: DJ -AuthorDate: Tue May 21 15:05:32 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 15:05:32 2019 -0700 - - add samples - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 90c6271c6916ef7fd119cb7845477c24682719d1 -Author: DJ -AuthorDate: Tue May 21 11:38:24 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 11:38:24 2019 -0700 - - strengthen join test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 78cc5109419862b0a420118f3eb22aefd1aedd5e -Author: DJ -AuthorDate: Tue May 21 11:36:34 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 11:36:34 2019 -0700 - - add examples - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 54e0764380e3f482805bf3931c0956482f422636 -Author: DJ -AuthorDate: Tue May 21 11:31:41 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 11:31:41 2019 -0700 - - add second test for acceptable join - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit a12a39eb6f07f4b60ceace064ab67d87913a121a -Author: DJ -AuthorDate: Tue May 21 11:18:51 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 11:18:51 2019 -0700 - - move print test - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 1f5a3fffdf28f6e2eda3762d8d35880eb12405f8 -Author: DJ -AuthorDate: Tue May 21 11:16:59 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 11:16:59 2019 -0700 - - centralize printing - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 77174727602a5a78c2de1d1e24726e82cd5c6592 -Author: DJ -AuthorDate: Tue May 21 11:08:59 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 11:08:59 2019 -0700 - - midway on printing changes - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c25435b7ca4f61559b8ebe3b583ba42b01c2625f -Author: DJ -AuthorDate: Tue May 21 11:03:25 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 11:03:25 2019 -0700 - - compute CDR3 diffs - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 043b9e63092fd2ec510e61df400b2e830487a5c0 -Author: DJ -AuthorDate: Tue May 21 10:42:27 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 10:42:27 2019 -0700 - - fix bug, add dataset - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit c0e2f8750637f9b3379263a0e407acff41388fa5 -Author: DJ -AuthorDate: Tue May 21 10:29:19 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 10:29:19 2019 -0700 - - add knob - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit bc8a8088f41f418158899a9d59db51c6d7df81a0 -Author: DJ -AuthorDate: Tue May 21 10:09:42 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 10:09:42 2019 -0700 - - analyze differences with reference - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 72bf331010843575543afb6679f833b1ebe08bc6 -Author: DJ -AuthorDate: Tue May 21 07:11:19 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 07:11:19 2019 -0700 - - various tidying - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 76be4fa886f5965488c77a3c0bde0db938a6d836 -Author: DJ -AuthorDate: Tue May 21 06:53:00 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 06:53:00 2019 -0700 - - track v segs, j segs, and funnies - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit d8a5369e4047801807aff9f8621fd94e16a51079 -Author: DJ -AuthorDate: Tue May 21 04:49:18 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 04:49:18 2019 -0700 - - report peak mem - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 31f4aaad3fcb56cb84c27efea215dc809d834bb0 -Author: DJ -AuthorDate: Tue May 21 04:43:24 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 04:43:24 2019 -0700 - - remove dead code thing - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 5c00d72f6a59a0b04b5f27219a92d4521510ccff -Author: DJ -AuthorDate: Tue May 21 04:42:00 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 04:42:00 2019 -0700 - - make CloneInfo.origin into sample index - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ed181879983eeed7231d2bcb29f9d1b42d9b713e -Author: DJ -AuthorDate: Tue May 21 04:34:30 2019 -0700 -Commit: DJ -CommitDate: Tue May 21 04:34:30 2019 -0700 - - replace anonymous struct by named struct - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 560668f40ad68e7af9d0599a721072a9f7d11f14 -Author: DJ -AuthorDate: Mon May 20 15:51:22 2019 -0700 -Commit: DJ -CommitDate: Mon May 20 15:51:22 2019 -0700 - - allow multiple datasets - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit ce04f24187ee3034ef88b1dc6ec12c551cbcb4d9 -Author: DJ -AuthorDate: Mon May 20 15:06:11 2019 -0700 -Commit: DJ -CommitDate: Mon May 20 15:06:11 2019 -0700 - - changes in progress - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit b6689644b05ec4af7f8944c8f8121c56e1941e4b -Author: DJ -AuthorDate: Sat May 18 06:20:45 2019 -0700 -Commit: DJ -CommitDate: Sat May 18 06:20:45 2019 -0700 - - log contig sequences - -M lib/rust/vdj_asm_tools/src/bin/simclone.rs - -commit 9345ab526043705df4a2bfa1b73263729e9109d9 -Author: DJ -AuthorDate: Fri May 17 16:22:18 2019 -0700 -Commit: DJ -CommitDate: Fri May 17 16:22:18 2019 -0700 - - experimental code to find similar clonotypes - -A lib/rust/vdj_asm_tools/src/bin/simclone.rs diff --git a/README b/README deleted file mode 100644 index dc33bed0d..000000000 --- a/README +++ /dev/null @@ -1,32 +0,0 @@ -Notes on building and testing: - -1. Run ./build to build. See notes there. - -2. Test speed with ./speed. This uses datasets that are not available externally. The - particular data used have a very large clonotype that causes the code to be slow. - -3. Test other things with - cargo t - assuming that you have installed enclone with the "large" option. - This automatically runs with -- --nocapture. - If you have only "medium" and not "large, you can use instead: - cd enclone; cargo test --features basic -- --nocapture - -4. This does not include the very large test given by enclone/src/enclone.test. - That test can only be run at 10x Genomics because it includes datasets that we don't - distribute (for example because they're not unambiguously consented for public release). - -5. The code for NOPAGER accidentally broke at one point and we - don't want that to recur. Some test cases that could be verified: - enclone BCR=... - enclone BCR=... NOPAGER - enclone help all - enclone help all NOPAGER - enclone help faq - enclone help faq NOPAGER. - This seems to require manual testing. - -6. In OS X Catalina, in full screen mode, at one point enclone appeared to have no output, - because the output was going to the alternate screen. This seems to require manual testing. - -7. We allow paths that start with ~ or ~user_name, but do not have automated testing for this. diff --git a/README.md b/README.md index 801889461..c1e514960 100644 --- a/README.md +++ b/README.md @@ -4,4 +4,4 @@ enclone banner -Please go here bit.ly/enclone for more information, including FAQs and documentation! The latest version of `enclone` is `v0.4.48`. +Please go here bit.ly/enclone for more information, including FAQs and documentation! The latest version of `enclone` is `v0.5.219`. diff --git a/build b/build deleted file mode 100755 index 60eb2256d..000000000 --- a/build +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/csh -e - -# Build script for enclone. The only thing missing from this is to run -# target/debug/merge_html BUILD, -# however you should only need to do that rarely, -# and if you do need to, "cargo t" will catch the problem and should tell you -# what to do. - -# WARNING: note harcoding of version14! - -# import and update enclone-data - -if ( !(-d enclone-data) ) then - git clone --depth=1 https://github.com/10XGenomics/enclone-data.git -endif -git -C enclone-data fetch --depth=1 origin `cat data_version` -git -C enclone-data switch --detach FETCH_HEAD - -# make outputs - -mkdir -p enclone_main/testx/outputs - -# run rustfmt - -cargo fmt --all - -# compile rust code - -cargo b - -# build help pages - -target/debug/enclone HTML STABLE_DOC > pages/auto/help.main.html -target/debug/enclone HTML STABLE_DOC help > pages/auto/help.setup.html -foreach x (quick how command glossary example1 example2 input input_tech parseable filter special lvars cvars amino display indels color faq developer all) - target/debug/enclone help $x HTML STABLE_DOC > pages/auto/help.$x.html -end - -# merge html pages - -target/debug/merge_html - -# update dataset checksums - -git -C enclone-data write-tree --prefix=big_inputs/version14/123085 > datasets_small_checksum -git -C enclone-data write-tree --prefix=big_inputs/version14 > datasets_medium_checksum diff --git a/enclone_help/Cargo.toml b/build_enclone_proto/Cargo.toml similarity index 60% rename from enclone_help/Cargo.toml rename to build_enclone_proto/Cargo.toml index bc8f8ab74..de41c9aeb 100644 --- a/enclone_help/Cargo.toml +++ b/build_enclone_proto/Cargo.toml @@ -1,26 +1,22 @@ [package] -name = "enclone_help" -version = "0.4.49" +name = "build_enclone_proto" +version = "0.5.219" authors = ["""David Jaffe , Keri Dockter , + Lance Hepler , Shaun Jackman , Sreenath Krishnan , Meryl Lewis , + Alvin Liang , Patrick Marks , Wyatt McDonnell """] -edition = "2018" +edition = "2021" +license-file = "LICENSE.txt" publish = false - -# Please do not edit crate versions within this file. Instead edit the file master.toml -# in the root of the enclone repo. +exclude = ["enclone.types.rs"] [dependencies] -ansi_escape = "0.1.0" -enclone_core = { path = "../enclone_core" } -io_utils = "0.2" -string_utils = "0.1.1" -tables = "0.1.2" - - - +prost-build = ">=0.9,<0.12" +[dev-dependencies] +tempfile = "3.4" diff --git a/build_enclone_proto/LICENSE.txt b/build_enclone_proto/LICENSE.txt new file mode 120000 index 000000000..4ab43736a --- /dev/null +++ b/build_enclone_proto/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/build_enclone_proto/enclone.types.rs b/build_enclone_proto/enclone.types.rs new file mode 120000 index 000000000..dc13ed71e --- /dev/null +++ b/build_enclone_proto/enclone.types.rs @@ -0,0 +1 @@ +../enclone_proto/src/enclone.types.rs \ No newline at end of file diff --git a/build_enclone_proto/src/main.rs b/build_enclone_proto/src/main.rs new file mode 100644 index 000000000..ffb838346 --- /dev/null +++ b/build_enclone_proto/src/main.rs @@ -0,0 +1,54 @@ +// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. + +// Rather than put this in a build.rs script in the enclone_proto crate to +// auto generate types.rs from types.proto, this is used to update types.rs +// offline. A unit test ensures that they're in sync. +// This allows dependent crates to avoid having `prost_build` in their +// transitive dependency tree, and also makes fresh builds (e.g. in CI) +// for dependent crates quite a lot faster. + +use prost_build::Config; +use std::path::{Path, PathBuf}; +use std::process::{exit, Command}; + +fn main() { + let manifest_dir = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()); + let out_dir = manifest_dir.join("../enclone_proto/src"); + make_output(manifest_dir.as_path(), out_dir.as_path()); +} + +fn make_output(manifest_dir: &Path, out_dir: &Path) { + std::env::set_var("OUT_DIR", out_dir.as_os_str()); + let mut config = Config::new(); + config.type_attribute(".", "#[derive(::serde::Serialize, ::serde::Deserialize)]"); + config + .compile_protos(&[manifest_dir.join("types.proto")], &[manifest_dir]) + .unwrap(); + + let status = Command::new("rustfmt") + .arg(out_dir.join("enclone.types.rs")) + .status() + .expect("failed to execute rustfmt"); + if !status.success() { + println!("rustfmt did not complete successfully!"); + exit(status.code().unwrap_or_default()); + } +} + +#[cfg(test)] +mod test { + use super::make_output; + use std::{fs::read_to_string, path::PathBuf}; + + // Ensure that the checked-in file is up to date. + #[test] + fn check_output_unchanged() { + let manifest_dir = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()); + let out_dir = tempfile::tempdir().unwrap(); + make_output(manifest_dir.as_path(), out_dir.path()); + let current_source = + read_to_string(manifest_dir.join("enclone.types.rs").as_path()).unwrap(); + let new_source = read_to_string(out_dir.path().join("enclone.types.rs").as_path()).unwrap(); + assert_eq!(current_source, new_source); + } +} diff --git a/enclone_proto/types.proto b/build_enclone_proto/types.proto similarity index 85% rename from enclone_proto/types.proto rename to build_enclone_proto/types.proto index 73edebc78..ba484e9da 100644 --- a/enclone_proto/types.proto +++ b/build_enclone_proto/types.proto @@ -15,6 +15,14 @@ // In the above diagram, // - `Length` is an unsigned 32 bit integer stored in **Big endian** order. // - Multiple messages are stored consecutively following the same format. +// +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ +// +// THE ORDER OF FIELDS IN THIS FILE CAN NEVER BE CHANGED. +// +// ALWAYS ADD FIELDS TO THE END OF A DATA STRUCTURE. +// +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ syntax = "proto2"; @@ -22,11 +30,11 @@ package enclone.types; // Various regions within a VDJ transcript enum Region { - U = 0; // 5' untranslated region - V = 1; // Variable region - D = 2; // Diversity region - J = 3; // Joining region - C = 4; // Constant region + U = 0; // 5' untranslated region + V = 1; // Variable region + D = 2; // Diversity region + J = 3; // Joining region + C = 4; // Constant region } // Evidence that a given cell is iNKT or MAIT. Each ExactSubclonotype has one @@ -64,10 +72,11 @@ message ExactSubClonotypeChain { // same UVDJ regions. optional uint32 c_region_idx = 5; // Index of the start of the CDR3 sequence in the `nt_sequence`. The start of - // the CDR3 amino acid in the `aa_sequence` is `(cdr3_start - v_start)/3` + // the CDR3 amino acid in the `aa_sequence` is `(cdr3_start - v_start)/3`. required uint32 cdr3_start = 6; - // Index of the end of the CDR3 sequence in the `nt_sequence` (exclusive). The - // end of the CDR3 amino acid in the `aa_sequence` is `(cdr3_end - v_start)/3` + // Index of the end of the CDR3 sequence in the `nt_sequence` (exclusive). + // The end of the CDR3 amino acid in the `aa_sequence` is + // `(cdr3_end - v_start)/3`. required uint32 cdr3_end = 7; // UMI counts of contigs associated with this exact subclonotype chain. The // number of elements in this vector is equal to the number of barcodes @@ -95,6 +104,23 @@ message ExactSubClonotypeChain { // concatenated universal reference of this chain (defined elsewhere in this // file). required Alignment universal_reference_aln = 13; + // Index of the start of the FWR1 sequence in the `nt_sequence`. + optional uint32 fwr1_start = 14; + // Index of the start of the CDR1 sequence in the `nt_sequence`. + optional uint32 cdr1_start = 15; + // Index of the start of the FWR2 sequence in the `nt_sequence`. + optional uint32 fwr2_start = 16; + // Index of the start of the CDR2 sequence in the `nt_sequence`. + optional uint32 cdr2_start = 17; + // Index of the start of the FWR3 sequence in the `nt_sequence`. + optional uint32 fwr3_start = 18; + // Index of the end of the FWR4 sequence in the `nt_sequence` (exclusive). + optional uint32 fwr4_end = 19; + // Nucleotide percent identity with the donor reference, outside junction region. + required float dna_percent = 20; + // Amino acid percent identity with the donor reference, outside junction region. + required float aa_percent = 21; + } // The chains in a clonotype are ordered an hence they have a unique index. @@ -115,10 +141,8 @@ message ExactSubClonotypeChainInfo { // end of the J-REGION as well as the same C-REGION annotation for each chain. // TODO: Maybe mutations outside V-J? message ExactSubClonotype { - // The chains in an exact subclonotype. The number of elements in this vector - // is equal to the total number of chains in the parent clonotype. The order - // of chains is consistent with the order in the parent clonotype and at least - // one element in this vector is not a `None`. + // The chains in an exact subclonotype along with the index of the chain in + // the parent clonotype repeated ExactSubClonotypeChainInfo chains = 1; // List of cell barcodes in this exact subclonotype. The number of elements in // this list is equal to the number of elements in the `umi_counts` and @@ -218,6 +242,18 @@ message ClonotypeChain { required bytes aa_sequence_universal = 23; // AA sequence of the concatenated donor reference starting from the V regions required bytes aa_sequence_donor = 24; + // Index of the start of the FWR1 sequence in the `nt_sequence`. + optional uint32 fwr1_start = 25; + // Index of the start of the CDR1 sequence in the `nt_sequence`. + optional uint32 cdr1_start = 26; + // Index of the start of the FWR2 sequence in the `nt_sequence`. + optional uint32 fwr2_start = 27; + // Index of the start of the CDR2 sequence in the `nt_sequence`. + optional uint32 cdr2_start = 28; + // Index of the start of the FWR3 sequence in the `nt_sequence`. + optional uint32 fwr3_start = 29; + // Index of the end of the FWR4 sequence in the `nt_sequence` (exclusive). + optional uint32 fwr4_end = 30; } // Definition of a clonotype. @@ -305,6 +341,7 @@ message DonorReference { message GemWellInfo { required string donor = 1; required string origin = 2; + required string library_id = 3; // Data for all the additional columns. For convenience, we are storing it as // a map, rather than an array with the order defined by `additional_columns` // in Metadata diff --git a/cd_datasets b/cd_datasets deleted file mode 100644 index 15b11324d..000000000 --- a/cd_datasets +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/csh - -# This will only work if enclone-data has been imported, e.g. via ./build. - -cd enclone-data/big_inputs/current diff --git a/third_party/Apache-2.0 b/credits/Apache-2.0 similarity index 100% rename from third_party/Apache-2.0 rename to credits/Apache-2.0 diff --git a/third_party/MPL-2.0-source_code_availabliity b/credits/MPL-2.0-source_code_availabliity similarity index 100% rename from third_party/MPL-2.0-source_code_availabliity rename to credits/MPL-2.0-source_code_availabliity diff --git a/credits/MPL2.0 b/credits/MPL2.0 new file mode 100644 index 000000000..14e2f777f --- /dev/null +++ b/credits/MPL2.0 @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/credits/acknowledgements b/credits/acknowledgements new file mode 100644 index 000000000..16781e009 --- /dev/null +++ b/credits/acknowledgements @@ -0,0 +1,28 @@ +We thank Albert Vilella for many suggestions, including the column variables cdr3_aa_conx and +cdr3_aa_conp, COLOR=codon-diffs and NOSPACES. + +enclone includes software developed by the OpenSSL Project +for use in the OpenSSL Toolkit (http://www.openssl.org/). + +Plain text tree visualization in enclone is based on +https://gitlab.com/Noughmad/ptree, by Miha Čančula. + +The script install.sh reuses code from the rust language installation script. + +We thank Ganesh Phad for suggesting localization by color in honeycomb plots. + +The file polygon.rs uses code taken from the lorikeet-dash crate version 0.1.0, whose license +is MIT/Apache-2.0. + +The crate bio_edit (in crates.io) is a copy of rust-bio, with one modified function, and many +other parts deleted. The cigar function is from rust-bio. + +The file convert_svg_to_png.rs is largely copied from the resvg crate. + +The file colors.rs is from Anton Mikhailov at Google. + +We redistribute LIBRA-seq data from Setliff et al. 2019, High-Throughput Mapping of B Cell +Receptor Sequences to Antigen Specificity. + +The file xwrap.rs (or possibly xwrap.rs.aside) is derived from the create shotgun:2.2.1 and is +licensed under MPL 2.0. diff --git a/third_party/arrayref b/credits/arrayref similarity index 100% rename from third_party/arrayref rename to credits/arrayref diff --git a/third_party/chromium b/credits/chromium similarity index 100% rename from third_party/chromium rename to credits/chromium diff --git a/third_party/cloudabi b/credits/cloudabi similarity index 100% rename from third_party/cloudabi rename to credits/cloudabi diff --git a/third_party/fuchsia b/credits/fuchsia similarity index 100% rename from third_party/fuchsia rename to credits/fuchsia diff --git a/credits/instant b/credits/instant new file mode 100644 index 000000000..a301457c0 --- /dev/null +++ b/credits/instant @@ -0,0 +1,27 @@ +Copyright (c) 2019, Sébastien Crozet +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the author nor the names of its contributors may be used + to endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/credits/mach b/credits/mach new file mode 100644 index 000000000..5e2b2e365 --- /dev/null +++ b/credits/mach @@ -0,0 +1,25 @@ +This is the BSD license for mach. + +Copyright (c) 2019, Nick Fitzgerald +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/credits/num_enum b/credits/num_enum new file mode 100644 index 000000000..b742e292a --- /dev/null +++ b/credits/num_enum @@ -0,0 +1,27 @@ +Copyright (c) 2018, Daniel Wagner-Hall +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of num_enum nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/credits/palaver b/credits/palaver new file mode 100644 index 000000000..d62067c7f --- /dev/null +++ b/credits/palaver @@ -0,0 +1,2 @@ +We copied the thread function from this crate into a new function count_threads, because the +crate had not been updated recently and we wanted to avoid duplication of other crates. diff --git a/credits/pasteboard b/credits/pasteboard new file mode 100644 index 000000000..4b1f2c28e --- /dev/null +++ b/credits/pasteboard @@ -0,0 +1,2 @@ +Some code from the pasteboard crate, https://github.com/segeljakt/pasteboard, +rev = 1046ec011598318731988325de6f9c42523a6754, May 14, 2021, was used in the file gui.rs. diff --git a/third_party/ring b/credits/ring similarity index 100% rename from third_party/ring rename to credits/ring diff --git a/credits/tiny-skia b/credits/tiny-skia new file mode 100644 index 000000000..d7a0cb542 --- /dev/null +++ b/credits/tiny-skia @@ -0,0 +1,30 @@ +Copyright (c) 2011 Google Inc. All rights reserved. +Copyright (c) 2020 Reizner Evgeniy All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/webpki b/credits/webpki similarity index 100% rename from third_party/webpki rename to credits/webpki diff --git a/data_transfer b/data_transfer deleted file mode 100644 index f19903af5..000000000 --- a/data_transfer +++ /dev/null @@ -1,11 +0,0 @@ -Instructions for transferring customer data to 10x genomics (for enclone support): - -1. The customer-facing instructions are here -https://support.10xgenomics.com/docs/transfers. The instructions do not explain what to do -with the typical file types customers would transfer -(especially all_contig_annotations.json) but they should just do the obvious thing, which -is to tar and gzip a directory containing these files, and proceed by analogy with the -published instructions. They should substitute enclone@10xgenomics.com for support@10xgenomics.com. - -2. There are internal-facing instructions here (only readable at 10x Genomics): -https://10xtech.atlassian.net/wiki/spaces/SFO/pages/938475641/How+to+work+with+customer+s+large+data+via+Redstone. diff --git a/data_version b/data_version deleted file mode 100644 index d0f69d35f..000000000 --- a/data_version +++ /dev/null @@ -1 +0,0 @@ -296b88f61893f1c9da170ac5b77570935fd55bc2 diff --git a/datasets_medium_checksum b/datasets_medium_checksum index dcaa16a6a..f886f37f9 100644 --- a/datasets_medium_checksum +++ b/datasets_medium_checksum @@ -1 +1 @@ -147559fcfc37835224753da203f47f7f7ec4cee4 +4f646563659e4fbd1a9511655fc93b37019849ac diff --git a/datasets_small_checksum b/datasets_small_checksum index ea87c2fc5..330ac75aa 100644 --- a/datasets_small_checksum +++ b/datasets_small_checksum @@ -1 +1 @@ -f7ff4d388f66f7fdc9eeb41551a9906770799deb +9f67152261fd93a75cb2fdec2fdd5245bf67c36d diff --git a/deny.toml b/deny.toml new file mode 100644 index 000000000..98dbe080c --- /dev/null +++ b/deny.toml @@ -0,0 +1,141 @@ +# If 1 or more target triples (and optionally, target_features) are specified, +# only the specified targets will be checked when running `cargo deny check`. +# This means, if a particular package is only ever used as a target specific +# dependency, such as, for example, the `nix` crate only being used via the +# `target_family = "unix"` configuration, that only having windows targets in +# this list would mean the nix crate, as well as any of its exclusive +# dependencies not shared by any other crates, would be ignored, as the target +# list here is effectively saying which targets you are building for. +targets = [ + { triple = "x86_64-apple-darwin" }, + { triple = "x86_64-pc-windows-gnu" }, + { triple = "x86_64-unknown-linux-gnu" }, +] + +# This section is considered when running `cargo deny check advisories` +# More documentation for the advisories section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html +[advisories] +# The path where the advisory database is cloned/fetched into +# db-path = "~/.cargo/advisory-db" +# The url(s) of the advisory databases to use +db-urls = ["https://github.com/rustsec/advisory-db"] +# The lint level for security vulnerabilities +vulnerability = "deny" +# The lint level for unmaintained crates +unmaintained = "warn" +# The lint level for crates that have been yanked from their source registry +yanked = "warn" +# The lint level for crates with security notices. Note that as of +# 2019-12-17 there are no security notice advisories in +# https://github.com/rustsec/advisory-db +notice = "warn" +# A list of advisory IDs to ignore. Note that ignored advisories will still +# output a note when they are encountered. +ignore = [ +] + +# This section is considered when running `cargo deny check licenses` +# More documentation for the licenses section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html +[licenses] +# The lint level for crates which do not have a detectable license +unlicensed = "deny" +# Allow us to keep a consistent list across projects without needing +# to customize deny.toml based on what's actually present. +unused-allowed-license = "allow" +# List of explictly allowed licenses +# See https://spdx.org/licenses/ for list of possible licenses +# [possible values: any SPDX 3.11 short identifier (+ optional exception)]. +allow = [ + "0BSD", + "Apache-2.0", + "Apache-2.0 WITH LLVM-exception", + "BSD-2-Clause", + "BSD-3-Clause", + "ISC", + "MIT", + "MPL-2.0", + "OpenSSL", + "Unicode-DFS-2016", + "WTFPL", +] +# List of explictly disallowed licenses +# See https://spdx.org/licenses/ for list of possible licenses +# [possible values: any SPDX 3.11 short identifier (+ optional exception)]. +deny = [ + #"Nokia", +] +# Lint level for licenses considered copyleft +copyleft = "deny" +# Blanket approval or denial for OSI-approved or FSF Free/Libre licenses +# * both - The license will be approved if it is both OSI-approved *AND* FSF +# * either - The license will be approved if it is either OSI-approved *OR* FSF +# * osi-only - The license will be approved if is OSI-approved *AND NOT* FSF +# * fsf-only - The license will be approved if is FSF *AND NOT* OSI-approved +# * neither - This predicate is ignored and the default lint level is used +allow-osi-fsf-free = "neither" +# Lint level used when no other predicates are matched +# 1. License isn't in the allow or deny lists +# 2. License isn't copyleft +# 3. License isn't OSI/FSF, or allow-osi-fsf-free = "neither" +default = "deny" +# The confidence threshold for detecting a license from license text. +# The higher the value, the more closely the license text must be to the +# canonical license text of a valid SPDX license file. +# [possible values: any between 0.0 and 1.0]. +confidence-threshold = 0.6 + +[licenses.private] +# If true, ignores workspace crates that aren't published, or are only +# published to private registries +ignore = true +# One or more private registries that you might publish crates to, if a crate +# is only published to private registries, and ignore is true, the crate will +# not have its license(s) checked +registries = [ + #"https://sekretz.com/registry +] + +# This section is considered when running `cargo deny check bans`. +# More documentation about the 'bans' section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html +[bans] +# Lint level for when multiple versions of the same crate are detected +multiple-versions = "deny" +# Lint level for when a crate version requirement is `*` +wildcards = "allow" +# The graph highlighting used when creating dotgraphs for crates +# with multiple versions +# * lowest-version - The path to the lowest versioned duplicate is highlighted +# * simplest-path - The path to the version with the fewest edges is highlighted +# * all - Both lowest-version and simplest-path are used +highlight = "all" +# List of crates that are allowed. Use with care! + +# This section is considered when running `cargo deny check sources`. +# More documentation about the 'sources' section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html +[sources] +# Lint level for what to happen when a crate from a crate registry that is not +# in the allow list is encountered +unknown-registry = "deny" +# Lint level for what to happen when a crate from a git repository that is not +# in the allow list is encountered +unknown-git = "deny" +# List of URLs for allowed crate registries. Defaults to the crates.io index +# if not specified. If it is specified but empty, no registries are allowed. +allow-registry = ["https://github.com/rust-lang/crates.io-index"] +# List of URLs for allowed Git repositories +allow-git = [ + # TODO: remove this + "https://github.com/Barandis/qd", +] + +[sources.allow-org] +# 1 or more github.com organizations to allow git sources for +github = ["10XGenomics"] +# 1 or more gitlab.com organizations to allow git sources for +gitlab = [] +# 1 or more bitbucket.org organizations to allow git sources for +bitbucket = [] diff --git a/enclone/Cargo.toml b/enclone/Cargo.toml index 067f47bdc..742b41b37 100644 --- a/enclone/Cargo.toml +++ b/enclone/Cargo.toml @@ -1,53 +1,58 @@ [package] name = "enclone" -version = "0.4.49" +version = "0.5.219" authors = ["""David Jaffe , + Nigel Delaney , Keri Dockter , + Jessica Hamel , + Lance Hepler , Shaun Jackman , Sreenath Krishnan , Meryl Lewis , + Alvin Liang , Patrick Marks , Wyatt McDonnell """] -edition = "2018" -license = "LICENSE.txt" +edition = "2021" +license-file = "LICENSE.txt" publish = false +include = ["LICENSE.txt", "src/*.rs", "src/*.json"] # Please do not edit crate versions within this file. Instead edit the file master.toml # in the root of the enclone repo. [dependencies] -amino = "0.1.1" -ansi_escape = "0.1.0" -assert_cmd = "0.12.0" -bio = "0.31.0" -debruijn = "0.3.2" -dirs = "2.0.2" +amino = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +ansi_escape = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +debruijn = "0.3" enclone_core = { path = "../enclone_core" } -equiv = "0.1.1" -graph_simple = "0.1.1" -io_utils = "0.2" -itertools = "0.9.0" -mirror_sparse_matrix = "0.1.4" -perf_stats = "0.1.2" -petgraph = "0.4.13" -pretty_trace = "0.3.2" -rayon = "1.0.2" -regex = "1.3.1" -serde = "1.0.90" -serde_derive = "1.0.102" -serde_json = "*" -stats_utils = "0.1.1" -stirling_numbers = "0.1.2" -string_utils = "0.1.1" -tilde-expand = "0.1.1" -vdj_ann = { git = "https://github.com/10XGenomics/rust-toolbox.git", rev="183e2d657e6436494072a32cf8da4f7b753d1e69" } -vector_utils = "0.1.3" +enclone_proto = { path = "../enclone_proto" } +equiv = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +graph_simple = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +io_utils = { version = "0.3", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +itertools.workspace = true +perf_stats = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +petgraph = "0.6" +pretty_trace = { version = "0.5", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +qd = { git = "https://github.com/Barandis/qd" } +rayon = "1" +stats_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +string_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +vdj_ann = { version = "0.4", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +vector_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } + +[dev-dependencies] +assert_cmd = "2" [target.'cfg(not(windows))'.dependencies] -pager = "0.15.0" +pager = "0.16" -[dependencies.hdf5] +[target.'cfg(not(windows))'.dependencies.hdf5] +git = "https://github.com/10XGenomics/hdf5-rust.git" +branch = "conda_nov2021" features = ["conda"] -git = "https://github.com/pmarks/hdf5-rs.git" -rev = "0c98e57b2af1f4247708c198b324ba3a8bc18dba" +default-features = false +[target.'cfg(windows)'.dependencies.hdf5] +git = "https://github.com/10XGenomics/hdf5-rust.git" +branch = "conda_nov2021" +default-features = false diff --git a/enclone/LICENSE.txt b/enclone/LICENSE.txt new file mode 120000 index 000000000..4ab43736a --- /dev/null +++ b/enclone/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/enclone/src/UNDOC_OPTIONS b/enclone/src/UNDOC_OPTIONS index 54b7cea75..b2e2193f1 100644 --- a/enclone/src/UNDOC_OPTIONS +++ b/enclone/src/UNDOC_OPTIONS @@ -11,12 +11,12 @@ Optional arguments controlling printing of join events: - ANN0: print annotations of contig, after truncation to V..J - PFREQ=n: show data for 1/n joins even if correct - SHOW_BC: show barcodes -- PRINT_FAILED_JOINS: print failed join events. +- PRINT_FAILED_JOINS: print failed join events +- FAILS_ONLY: only print failed clonotypes. Optional arguments that control clonotype filtering: - WHITEF: only show clonotypes that exhibit whitelist contamination - PROTECT_BADS: don't delete putatively bad stuff -- FAIL_ONLY=true or false: only print failed clonotypes in the true case - VDUP: only show clonotypes having two chains with the same V segment - HAVE_ONESIE: only show clonotypes including a onesie exact subclonotype @@ -57,13 +57,8 @@ Other optional arguments: - RE: reannotate instead of using the annotation in the json file, useful for two purposes: 1. If you have a json file generated by an outdated code version; 2. You want to see the effect of changed annotation code. -- ONESIE_MULT=n: for a onesie exact subclonotype to be created, its number of cells must be at - least 1/n of the total number of cells in exact subclonotypes; by default - n = 10,000. Lowering the value of this will increase the number of onesie - exact subclonotypes, but increase the fraction that are incorrect. - NPLAIN: reverses PLAIN - INDELS: search for and list CDR3s from clonotypes with possible SHM indels (exploratory) -- INSERTIONS: search for and list CDR3s from clonotypes with possible SHM insertions (exploratory) - NOPRETTY: turn off pretty trace entirely - HEAVY_CHAIN_REUSE: look for instances of heavy chain reuse - BINARY=filename: generate binary output file @@ -87,9 +82,6 @@ composition in the external clonotyping, as clonotype_id[count], ... The input file should have lines of the form: sample barcode clonotype_id. -REQUIRED_FPS=n: exit(1) if the number of false positives is not n; here we define a false -positive to be a clonotype that contains cells from multiple donors. - SUMMARY_CLEAN: if SUMMARY specified, don't show computational performance stats, so we can regress on output @@ -119,11 +111,9 @@ LVARS entries - npe = total number of cells in this clonotype that are within PCA distance n of this cell - ppe = percent of all gex cells within PCA distance n of this cell that are in this clonotype -IMGT: specify the IMGT reference, only implemented for human, forces RE so slow -How this reference was created -- go into a new directory -- /mnt/home/jaffe/repos/cellranger/lib/bin/fetch-imgt --genome human-imgt-20200415 -- mkvdjref --genome=vdj_IMGT_human_20200415 --seqs=human-imgt-20200415-mkvdjref-input.fasta +NOTE: pe, npe and ppe cannot be used in bounds. + +IMGT: specify the IMGT reference, forces RE so slow IMGT_FIX: make certain hardcoded fixes to the IMGT reference. @@ -147,6 +137,7 @@ UMI_RATIO_FILT_MARK NON_CELL_MARK (only makes sense if you run NCELL) ECHO: echo command +ECHOC: echo command, preceeded by "# ", and don't emit blank line --------------------------------------------------------------------------------------------------- @@ -178,14 +169,190 @@ BI=n, where 1 <= n <= 13 (for now): load BCR and GEX from datasets described in enclone.testdata.bcr.gex (This only works internally where we have these datasets available.) (BI short for "built-in".) +also BIB and BIP PRINT_CPU: print number of cpu cycles used PRINT_CPU_INFO: print info about cpu usage -recognizes S - PLOT_BY_MARK -n_b as LVAR = number of B cells - MAX_SCORE: was public, now deprecated, do not remove + +REQUIRE_UNBROKEN_OK: require that if a V reference sequence is not marked as broken, then all +its features can be computed, and exit after this test + +NALL_GEX: turn off all filters except GEX filter + +REPROD: accept nonproductive contigs, recompute productive, then reject nonproductive + +SPLIT_COMMAND: assuming that BCR and GEX have been provided with multiple entries each, split out +separate commands having them specified one by one, and run them, then exit + +OPTIONS THAT ALLOW METRIC VALUES TO BE LOCKED + +REQUIRED_FPS=n: exit(1) if the number of false positives is not n; here we define a false +positive to be a clonotype that contains cells from multiple donors. + +REQUIRED_CELLS=n: exit(1) if the number of cells in clonotypes is not n + +REQUIRED_CLONOTYPES=n: exit(1) if the number of clonotypes is not n + +REQUIRED_DONORS=n: exit(1) if the number of donors is not n + +REQUIRED_TWO_CELL_CLONOTYPES=n: exit(1) if the number of clonotypes having >= 2 cells is not n + +REQUIRED_TWO_CHAIN_CLONOTYPES=n: exit(1) if the number of two chain clonotypes is not n + +REQUIRED_DATASETS=n: exit(1) if the number of datasets is not n + +GD_BC=x +translates into BC=x/outs/genetic_demux_results/clusters.tsv +to allow for convenient use of genetic demux results in the PD pipeline + +=================================================================================================== + +cvars + +nval = number of validated UMIs +nnval = number of non-validated UMIs +nival = number of invalidated UMIs +valumis = validated UMIs +nvalumis = non-validated UMIs +ivalumis = invalidated UMIs +valbcumis = validated UMIs, preceded by barcode +nvalbcumis = non-validated UMIs, preceded by barcode +ivalbcumis = invalidated UMIs, preceded by barcode + +All three classes of UMIs are capped at 20 UMIs. + +=================================================================================================== + +CLONOTYPE_GROUP_NAMES=filename +* The file should be a a CSV file with that includes fields new_group_name and group_id. This is + confusing nomenclature, see what follows. +* The group_id should be obtained by running enclone WITHOUT grouping, so that there is exactly + one clonotype per group. +* The new_group_name is the name of a group (of your own concoction) that you are assigning a + clonotype to. +* A clonotype may not be assigned to more than one group. +* The idea is that one would run enclone once to gather information that would + allow grouping of clonotypes. Then run it as second time with this argument. +* The argument only does something if honeycomb plotting is used. In that case, the honeycomb + plot is split by group and each group is given a different (light) background color. +* Clonotypes that are not assigned to a group are not displayed. + +=================================================================================================== + +read utilization - This is a stat generated with the SUMMARY option, in internal mode. It is the +fraction of reads that are assigned to chains by enclone. It has the following issues: +1. It doesn't account for reads that are lost because of capping in assembly. Internally, we +can work around this by removing the capping on a cellranger branch. (Done now.) +2. It would not be correct if read one was significantly longer than 28 bases, so that it could +contribute to assembly. + +NO_UNCAP_SIM: turn off uncapping simulation + +=================================================================================================== + +HAPS_DEBUG: turn on debug mode for the HAPS option + +PROFILE: turn on profiling, normally used with NOPRINT; paging is turned off because this +This won't work with paging, however you can pipe to "less -r". + +=================================================================================================== + +ROW_FILL_VERBOSE: special option for debugging + +TOP_GENES: list genes having the highest expression in the selected clonotypes, taking the median +across all cells (only implemented if .bin file has been generated using NH5) + +COMPE: COMP, plus enforce no unaccounted time (except up to 0.02 seconds) +UNACCOUNTED: show unaccounted time at each step +EVIL_EYE: print logging to facilitate diagnosis of mysterious hanging + +TOY_COM: compute clonotypes, then act as toy server, which can speak to enclone_client + +TOOLTIP: add tooltip text to honeycomb plots + +ALIGN_JUN_ALIGN_CONSISTENCY: test to see if they give consistent results +(run with ALIGN1 JUN_ALIGN1 PLAIN) + +junction region alignment penalties accessible from the command line: +- JSCORE_MATCH +- JSCORE_MISMATCH +- JSCORE_GAP_EXTEND +- JSCORE_GAP_OPEN +- JSCORE_BITS_MULT + +lvars fb and fb_n: for example, fb1 is the 15-base sequence of the most frequent feature +barcode, and fb1_n is the number of UMIs for that feature barcode +- only usable if there is just one dataset +- the file feature_barcode_matrix_top.bin must have been generated + +SIM_MAT_PLOT=filename,v1,...,vn +generate a plot of the all-vs-all per cell cosine similarity for the variables vi + +NO_BUG_REPORTS: don't automatically report bugs + +SUBSAMPLE: subsample barcodes at the indicated fraction; at present this is deliberately + randomly seeded and so nonreproducible + +=================================================================================================== + +ALL_BC=filename,field1,...,fieldn + +Dump a CSV file with fields + + dataset,barcode,field1,...,fieldn + +to filename, with one line for each barcode. All barcodes in the data files are included, whether +or not they correspond to VDJ cells. This would correspond (at least approximately) to all +whitelisted barcodes in the raw data. + +Only certain other fields are allowed: +1. feature variables, e.g. CDR3_ab, representing the UMI count +2. gex = total gene expression UMI count +3. type = the cell type +4. clust = cluster id, from analysis_csv/clustering/graphclust/clusters.csv +5. cell = vdj or gex or gex_vdj or empty. + +This may also be used with VAR_DEF. + +ALL_BCH: same but human readable instead of CSV + +=================================================================================================== + +PRE_EVAL: evaluate sensitivity and specificity before calling print_clonotypes + +JOIN_BASIC_H=n: use a special join heuristic as follows (for demonstration purposes only) + same heavy chain V, same heavy chain J, + same heavy chain CDR3 length, + n% identity on nucleotides within heavy chain CDR3 +This automatically invokes PRE_EVAL because otherwise it would crash. +This option is particularly slow because it forces more comparisons in the join step. +Uses very high memory. Time and memory use increase as n is reduced. + +=================================================================================================== + +EXTERNAL_REF: if you set this to a IMGT reference fasta file, this will compare the internally +generated donor reference to it, do some analyses, and exit + +To generate the current IMGT reference within the 10x codebase, do something like this: +1. bazel build //:pd //:shimulate //:devpipes_env +2. cellranger/bazel-bin/devpipes_env.sh python cellranger/lib/bin/fetch_imgt_lib.py + --genome vdj_IMGT_human --species "Homo sapiens" + +=================================================================================================== + +POST_FILTER=filename +should be CSV +dataset,barcode + +Use this with PRE_EVAL. It filters out cells that are not on that list. + +=================================================================================================== + +PCOLS_SHOW: use this list instead of PCOLS to head the parseable output + +SUPER_COMP_FILT=...: like JUN_SHARE but literally gates on junction sharing. Note that this has +verbose logging, which really should be controlled by a separate option. diff --git a/enclone/src/allele.rs b/enclone/src/allele.rs index 694931c6a..b8df12491 100644 --- a/enclone/src/allele.rs +++ b/enclone/src/allele.rs @@ -2,17 +2,17 @@ // This file provides functions to find alternate alleles and substitute them into references. -use vdj_ann::*; +use vdj_ann::refx; -use self::refx::*; -use debruijn::{dna_string::*, Mer}; -use enclone_core::defs::*; +use self::refx::RefData; +use debruijn::{dna_string::DnaString, Mer}; +use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype}; use itertools::Itertools; use rayon::prelude::*; -use stats_utils::*; -use std::cmp::*; +use stats_utils::percent_ratio; +use std::cmp::{max, min, PartialOrd}; use std::time::Instant; -use vector_utils::*; +use vector_utils::{erase_if, next_diff, next_diff1_2, next_diff1_3, reverse_sort, unique_sort}; // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ @@ -21,8 +21,8 @@ use vector_utils::*; pub fn find_alleles( refdata: &RefData, ctl: &EncloneControl, - exact_clonotypes: &Vec, -) -> Vec<(usize, usize, DnaString)> { + exact_clonotypes: &[ExactClonotype], +) -> Vec<(usize, usize, DnaString, usize, bool)> { // Derive consensus sequences for alternate alleles of V segments. // // The priority of this algorithm is to reduce the likelihood of false positive joins. It @@ -51,12 +51,13 @@ pub fn find_alleles( // 3. Make alt_refs into a more efficient data structure. // 4. Speed up. - let mut alt_refs = Vec::<(usize, usize, DnaString)>::new(); // (donor, ref id, alt seq) + // (donor, ref id, alt seq, support, is_ref): + let mut alt_refs = Vec::<(usize, usize, DnaString, usize, bool)>::new(); // Organize data by reference ID. Note that we ignore exact subclonotypes having four chains. let mut allxy = - vec![Vec::<(usize, Vec, Vec, usize, usize)>::new(); refdata.refs.len()]; + vec![Vec::<(usize, Vec, Vec, usize, usize, String)>::new(); refdata.refs.len()]; for (m, x) in exact_clonotypes.iter().enumerate() { if x.share.len() >= 2 && x.share.len() <= 3 { for j in 0..x.share.len() { @@ -71,19 +72,20 @@ pub fn find_alleles( partner.push(x.share[ja].v_ref_id); } } - if !partner.is_empty() { - if y.seq_del.len() >= refdata.refs[id].len() - ctl.heur.ref_v_trim { - for l in 0..x.clones.len() { - let donor = x.clones[l][j].donor_index; - if donor.is_some() { - allxy[id].push(( - donor.unwrap(), - y.seq_del.clone(), - partner.clone(), - m, - x.clones[l][j].dataset_index, - )); - } + if !partner.is_empty() + && y.seq_del.len() >= refdata.refs[id].len() - ctl.heur.ref_v_trim + { + for clone in &x.clones { + let donor = clone[j].donor_index; + if let Some(donor) = donor { + allxy[id].push(( + donor, + y.seq_del.clone(), + partner.clone(), + m, + clone[j].dataset_index, + clone[0].barcode.clone(), + )); } } } @@ -99,7 +101,7 @@ pub fn find_alleles( vs.push(id); } } - let mut results = Vec::<(usize, Vec<(usize, usize, DnaString)>)>::new(); + let mut results = Vec::<(usize, Vec<(usize, usize, DnaString, usize, bool)>)>::new(); for v in vs.iter() { results.push((*v, Vec::new())); } @@ -110,28 +112,31 @@ pub fn find_alleles( // Divide by donor. allx.sort(); - let mut alls = Vec::, Vec, usize, usize)>>::new(); + let mut alls = Vec::, Vec, usize, usize, String)>>::new(); let mut i = 0; while i < allx.len() { - let j = next_diff1_5(&allx, i as i32) as usize; - let mut all = Vec::<(usize, Vec, Vec, usize, usize)>::new(); - for k in i..j { - all.push(allx[k].clone()); + // let j = next_diff1_6(&allx, i as i32) as usize; + let mut j = i + 1; + while j < allx.len() { + if allx[j].0 != allx[i].0 { + break; + } + j += 1; } - alls.push(all); + alls.push(allx[i..j].to_owned()); i = j; } // Process donor by donor. - for di in 0..alls.len() { + for (di, all) in alls.iter().enumerate() { // Data here are given by "all", the relevant entries of which are: // 1: V..J sequence for one chain of a given info entry // 2: the reference ID(s) of the partner chain(s) -- possibly not used // 3: the index in exact_clonotypes // 4: the dataset ID. - let mut all = alls[di].clone(); + let mut all = all.clone(); let donor_id = all[0].0; // If two entries have @@ -158,11 +163,11 @@ pub fn find_alleles( let mut to_delete = vec![false; all.len()]; { let mut trace = Vec::<((usize, usize, usize, usize), usize)>::new(); - for i in 0..all.len() { - let u = all[i].3; + for (i, item) in all.iter().enumerate() { + let u = item.3; let ex = &exact_clonotypes[u]; for j1 in 0..ex.share.len() { - if ex.share[j1].seq_del == all[i].1 { + if ex.share[j1].seq_del == item.1 { for j2 in 0..ex.share.len() { let (s1, s2) = (&ex.share[j1], &ex.share[j2]); if s2.left != s1.left { @@ -180,7 +185,7 @@ pub fn find_alleles( } } } - trace.sort(); + trace.sort_unstable(); let mut i = 0; while i < trace.len() { let j = next_diff1_2(&trace, i as i32) as usize; @@ -221,9 +226,9 @@ pub fn find_alleles( let j = next_diff1_3(&bases, i as i32) as usize; let mut x = Vec::::new(); let mut y = Vec::>::new(); - for m in i..j { - x.push(bases[m].1); - y.push(bases[m].2.clone()); + for base in &bases[i..j] { + x.push(base.1); + y.push(base.2.clone()); } freqs.push((j - i, x, y, bases[i].0)); i = j; @@ -248,7 +253,9 @@ pub fn find_alleles( } else { c = b'T'; } - if (freqs.len() > 0 && freqs[0].0 >= ctl.allele_alg_opt.min_alt && freqs[0].3 != c) + if (!freqs.is_empty() + && freqs[0].0 >= ctl.allele_alg_opt.min_alt + && freqs[0].3 != c) || (freqs.len() > 1 && ctl.allele_alg_opt.min_mult * freqs[1].0 >= bases.len() && freqs[1].0 >= ctl.allele_alg_opt.min_alt) @@ -256,7 +263,7 @@ pub fn find_alleles( ps.push(p); } } - if ps.len() == 0 { + if ps.is_empty() { continue; } if ctl.allele_print_opt.con_trace { @@ -273,11 +280,10 @@ pub fn find_alleles( // and sort. let mut types = Vec::<(Vec, usize)>::new(); - for loc in 0..all.len() { + for (loc, item) in all.iter().enumerate() { let mut t = Vec::::new(); - for i in 0..ps.len() { - let p = ps[i]; - let base = all[loc].1[p]; + for &p in &ps { + let base = item.1[p]; t.push(base); } types.push((t, loc)); @@ -287,7 +293,7 @@ pub fn find_alleles( // Traverse the types, grouping contigs that have an identical footprint at // the positions in ps. - let mut keep = Vec::<(Vec, usize, f64, bool)>::new(); + let mut keep = Vec::<(Vec, usize, f64, bool, Vec)>::new(); let mut i = 0; let mut have_ref = false; while i < types.len() { @@ -317,9 +323,11 @@ pub fn find_alleles( || is_ref { let mut q = Vec::>::new(); - for k in i..j { - let m = types[k].1; + let mut barcodes = Vec::::new(); + for t in &types[i..j] { + let m = t.1; q.push(all[m].2.clone()); + barcodes.push(all[m].5.clone()); } q.sort(); let (mut m, mut r) = (0, 0); @@ -329,21 +337,74 @@ pub fn find_alleles( r = s; } let purity = 100.0 - percent_ratio(m, q.len()); - keep.push((types[i].0.clone(), j - i, purity, is_ref)); + keep.push((types[i].0.clone(), j - i, purity, is_ref, barcodes)); if is_ref { have_ref = true; } } i = j; } - if keep.len() > 1 || (keep.len() > 0 && !have_ref) { - // Remove columns that are pure reference. + + // Delete rare alleles, which are probably artifacts. Commented out because it + // seemed to make results worse. + + /* + let mut m = 0; + for i in 0..keep.len() { + m = max(m, keep[i].1); + } + let mut to_delete = vec![false; keep.len()]; + for i in 0..keep.len() { + if keep[i].1 * 10 < m { + to_delete[i] = true; + } + } + erase_if(&mut keep, &to_delete); + */ + + // Print. + + if ctl.allele_print_opt.con { + println!( + "\nDONOR {} ({})", + donor_id + 1, + ctl.origin_info.donor_list[donor_id] + ); + println!("{id} = |{}| = {}", refdata.id[id], refdata.name[id]); + println!("ps = {}", ps.iter().format(",")); + for x in keep.iter() { + let mut bases = String::new(); + for z in x.0.iter() { + bases.push(*z as char); + } + print!("{bases} [{}] {:.1}", x.1, x.2); + if x.3 { + print!(" (ref)"); + } + for i in 0..min(x.4.len(), 5) { + print!(" {}", x.4[i]); + } + println!(); + } + } + + let analysis_mode = !ctl.gen_opt.external_ref.is_empty(); + if (analysis_mode && !keep.is_empty()) + || keep.len() > 1 + || (!keep.is_empty() && !have_ref) + { + // Remove columns that are pure reference. We don't do this if the EXTERNAL_REF + // option was used. let mut to_delete = vec![false; keep[0].0.len()]; - for i in 0..keep[0].0.len() { + for (i, (&p, d)) in ps + .iter() + .take(keep[0].0.len()) + .zip(to_delete.iter_mut()) + .enumerate() + { let mut is_ref = true; - for j in 0..keep.len() { - let p = ps[i]; + for j in &keep { let x = refdata.refs[id].get(p); let c; if x == 0 { @@ -355,67 +416,44 @@ pub fn find_alleles( } else { c = b'T'; } - if c != keep[j].0[i] { + if c != j.0[i] { is_ref = false; } } - if is_ref { - to_delete[i] = true; + if is_ref && !analysis_mode { + *d = true; } } erase_if(&mut ps, &to_delete); - for j in 0..keep.len() { - erase_if(&mut keep[j].0, &to_delete); + for j in keep.iter_mut() { + erase_if(&mut j.0, &to_delete); } let mut keep0 = Vec::>::new(); - for i in 0..keep.len() { - keep0.push(keep[i].0.clone()); + for i in &keep { + keep0.push(i.0.clone()); } keep0.sort(); keep.sort_by(|a, b| a.partial_cmp(b).unwrap()); - // Print. - - if ctl.allele_print_opt.con { - println!( - "\nDONOR {} ({})", - donor_id + 1, - ctl.origin_info.donor_list[donor_id] - ); - println!("{} = |{}| = {}", id, refdata.id[id], refdata.name[id]); - println!("ps = {}", ps.iter().format(",")); - for x in keep.iter() { - let mut bases = String::new(); - for z in x.0.iter() { - bases.push(*z as char); - } - print!("{} [{}] {:.1}", bases, x.1, x.2); - if x.3 { - print!(" (ref)"); - } - println!(""); - } - } - // Save alternate references. for x in keep.iter() { - if !x.3 { + if !x.3 || analysis_mode { let mut b = refdata.refs[id].clone(); - for i in 0..ps.len() { + for (&x0, &ps) in x.0.iter().zip(ps.iter()) { let c; - if x.0[i] == b'A' { + if x0 == b'A' { c = 0; - } else if x.0[i] == b'C' { + } else if x0 == b'C' { c = 1; - } else if x.0[i] == b'G' { + } else if x0 == b'G' { c = 2; } else { c = 3; } - b.set_mut(ps[i], c); + b.set_mut(ps, c); } - res.1.push((donor_id, id, b)); + res.1.push((donor_id, id, b, x.1, x.3)); } } @@ -455,8 +493,8 @@ pub fn find_alleles( } } }); - for i in 0..results.len() { - alt_refs.append(&mut results[i].1); + for mut r in results { + alt_refs.append(&mut r.1); } alt_refs.sort(); alt_refs @@ -468,10 +506,11 @@ pub fn find_alleles( // Computational performance dubious because of full alt_refs traversal. pub fn sub_alts( + refdata: &RefData, ctl: &EncloneControl, - alt_refs: &Vec<(usize, usize, DnaString)>, + alt_refs: &[(usize, usize, DnaString, usize, bool)], info: &mut Vec, - exact_clonotypes: &mut Vec, + exact_clonotypes: &mut [ExactClonotype], ) { let t = Instant::now(); for i in 0..info.len() { @@ -504,46 +543,49 @@ pub fn sub_alts( unique_sort(&mut donors); for donor in donors { for m in 0..alt_refs.len() { - if alt_refs[m].0 == donor && alt_refs[m].1 == info[i].vsids[j] { - if alt_refs[m].2.len() - ctl.heur.ref_v_trim <= info[i].tigs[j].len() { - let mut alt_errs = 0; - for l in 0..alt_refs[m].2.len() - ctl.heur.ref_v_trim { - let x = alt_refs[m].2.get(l); - let c; - if x == 0 { - c = b'A'; - } else if x == 1 { - c = b'C'; - } else if x == 2 { - c = b'G'; - } else { - c = b'T'; - } - if info[i].tigs[j][l] != c { - alt_errs += 1; - } + if alt_refs[m].0 == donor + && refdata.name[alt_refs[m].1] == refdata.name[info[i].vsids[j]] + && alt_refs[m].2.len() - ctl.heur.ref_v_trim <= info[i].tigs[j].len() + { + let mut alt_errs = 0; + for l in 0..alt_refs[m].2.len() - ctl.heur.ref_v_trim { + let x = alt_refs[m].2.get(l); + let c; + if x == 0 { + c = b'A'; + } else if x == 1 { + c = b'C'; + } else if x == 2 { + c = b'G'; + } else { + c = b'T'; } - if alt_errs < errs { - info[i].vs[j] = alt_refs[m].2.clone(); - info[i].dref[j] = Some(m); // not sure we're actually using this - let ex = &mut exact_clonotypes[info[i].clonotype_id]; - for z in 0..ex.share.len() { - if ex.share[z].seq == info[i].tigs[j] { - ex.share[z].v_ref_id_donor = Some(m); - ex.share[z].v_ref_id_donor_donor = Some(donor); - let mut alts = 0; - let mut mm = m; - while mm >= 1 { - mm -= 1; - if alt_refs[mm].0 == donor - && alt_refs[mm].1 == alt_refs[m].1 - { - alts += 1; - } + if info[i].tigs[j][l] != c { + alt_errs += 1; + } + } + if alt_errs < errs { + info[i].vs[j] = alt_refs[m].2.clone(); + info[i].vsids[j] = alt_refs[m].1; + info[i].dref[j] = Some(m); // not sure we're actually using this + let ex = &mut exact_clonotypes[info[i].clonotype_id]; + for z in 0..ex.share.len() { + if ex.share[z].seq == info[i].tigs[j] { + ex.share[z].v_ref_id = alt_refs[m].1; + ex.share[z].v_ref_id_donor = Some(m); + ex.share[z].v_ref_id_donor_donor = Some(donor); + let mut alts = 0; + let mut mm = m; + while mm >= 1 { + mm -= 1; + if alt_refs[mm].0 == donor + && alt_refs[mm].1 == alt_refs[m].1 + { + alts += 1; } - ex.share[z].v_ref_id_donor_alt_id = Some(alts); - break; } + ex.share[z].v_ref_id_donor_alt_id = Some(alts); + break; } } } diff --git a/enclone/src/bin/bump_version.rs b/enclone/src/bin/bump_version.rs deleted file mode 100644 index 40d486047..000000000 --- a/enclone/src/bin/bump_version.rs +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Bump version x.y.z to x.y.z+1 in every Cargo.toml. - -use io_utils::*; -use itertools::Itertools; -use pretty_trace::*; -use std::fs::{read_dir, File}; -use std::io::{BufRead, BufReader, BufWriter, Write}; -use string_utils::*; -use vector_utils::*; - -fn main() { - PrettyTrace::new().on(); - let all = read_dir(".").unwrap(); - let mut versions = Vec::::new(); - for f in all { - let f = f.unwrap().path(); - let f = f.to_str().unwrap(); - let toml = format!("{}/Cargo.toml", f); - if path_exists(&toml) { - let g = open_for_read![&toml]; - let mut found_version = false; - for line in g.lines() { - let s = line.unwrap(); - if s.starts_with("version = \"") { - versions.push(s.between("\"", "\"").to_string()); - found_version = true; - } - } - if !found_version { - eprintln!("\nFailed to find version in {}.\n", toml); - std::process::exit(1); - } - } - } - unique_sort(&mut versions); - if versions.len() > 1 { - eprintln!( - "\nFound multiple versions: {}", - versions.iter().format(", ") - ); - std::process::exit(1); - } - let version = format!( - "{}.{}", - versions[0].rev_before("."), - versions[0].rev_after(".").force_usize() + 1 - ); - let all = read_dir(".").unwrap(); - for f in all { - let f = f.unwrap().path(); - let f = f.to_str().unwrap(); - let toml = format!("{}/Cargo.toml", f); - if path_exists(&toml) { - let mut newg = Vec::::new(); - { - let g = open_for_read![&toml]; - for line in g.lines() { - let s = line.unwrap(); - if s.starts_with("version = ") { - newg.push(format!("version = \"{}\"", version)); - } else { - newg.push(s.clone()); - } - } - } - let mut g = open_for_write_new![&toml]; - fwrite!(g, "{}\n", newg.iter().format("\n")); - } - } -} diff --git a/enclone/src/bin/merge_html.rs b/enclone/src/bin/merge_html.rs deleted file mode 100644 index a9714c623..000000000 --- a/enclone/src/bin/merge_html.rs +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Build html files by inserting other html files. -// -// If supplied the single argument BUILD, also rebuild from source. - -use enclone::html::*; -use enclone::misc3::parse_bsv; -use enclone_core::testlist::SITE_EXAMPLES; -use io_utils::*; -use itertools::Itertools; -use pretty_trace::*; -use rayon::prelude::*; -use std::env; -use std::fs::{read_dir, File}; -use std::io::{BufRead, BufReader, BufWriter, Write}; -use std::process::Command; -use string_utils::*; -use vector_utils::*; - -fn main() { - PrettyTrace::new().on(); - - // Build from source. - - let args: Vec = env::args().collect(); - if args.len() == 2 && args[1] == "BUILD" { - let mut results = Vec::<(usize, String)>::new(); - for i in 0..SITE_EXAMPLES.len() { - results.push((i, String::new())); - } - results.par_iter_mut().for_each(|r| { - let i = r.0; - let test = SITE_EXAMPLES[i].1; - let args = parse_bsv(&test); - let new = Command::new("target/debug/enclone") - .args(&args) - .arg("MAX_CORES=24") - .output() - .expect(&format!("failed to execute enclone")); - if new.status.code() != Some(0) { - eprint!( - "\nenclone_site_examples: example {} failed to execute, stderr =\n{}", - i + 1, - strme(&new.stderr), - ); - std::process::exit(1); - } - r.1 = stringme(&new.stdout); - }); - for i in 0..SITE_EXAMPLES.len() { - let example_name = SITE_EXAMPLES[i].0; - let out_file = format!("{}", example_name); - let mut f = open_for_write_new![&out_file]; - fwrite!(&mut f, "{}", results[i].1); - } - } - let mut site_ex = Vec::::new(); - for i in 0..SITE_EXAMPLES.len() { - let example_name = SITE_EXAMPLES[i].0; - let out_file = format!("{}", example_name); - site_ex.push(out_file); - } - unique_sort(&mut site_ex); - - // Apply insert_html. - - let all = read_dir("pages").unwrap(); - for f in all { - let f = f.unwrap().path(); - let f = f.to_str().unwrap(); - if f.ends_with(".html.src") { - let mut level = 2; - let mut target = format!("pages/auto/{}.html", f.between("/", ".")); - if target == "pages/auto/index.html".to_string() { - target = "index.html".to_string(); - level = 0; - } - insert_html(&f, &target, false, level); - } - } - - // This is ugly. Edit the html pages. - - let all = read_dir("pages/auto").unwrap(); - let mut allx = vec!["index.html".to_string()]; - for f in all { - let f = f.unwrap().path(); - let f = f.to_str().unwrap().to_string(); - if f.ends_with(".html") && !bin_member(&site_ex, &f) { - allx.push(f); - } - } - for f in allx { - let mut edited = false; - let mut lines = Vec::::new(); - { - let h = open_for_read![&format!("{}", f)]; - for line in h.lines() { - let s = line.unwrap(); - lines.push(s.clone()); - if s.contains("googletag") { - edited = true; - } - } - } - if !edited { - let x = format!("{}\n", lines.iter().format("\n")); - let mut h = open_for_write_new![&format!("{}", f)]; - fwrite!(h, "{}", edit_html(&x)); - } - } -} diff --git a/enclone/src/bin/post_process_test.rs b/enclone/src/bin/post_process_test.rs new file mode 100644 index 000000000..41ba74766 --- /dev/null +++ b/enclone/src/bin/post_process_test.rs @@ -0,0 +1,30 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Post process cargo test results to remove stuff we don't need to see. +// This is done without buffering. + +use std::io::{self, BufRead}; + +fn main() { + let reject = [ + "running ", + " Running ", + " Doc-tests ", + " filtered out", + " Compiling ", + " Finished ", + ]; + let stdin = io::stdin(); + for line in stdin.lock().lines() { + let line = line.unwrap(); + let mut rejected = false; + for r in reject.iter() { + if line.contains(r) { + rejected = true; + } + } + if !rejected && !line.is_empty() { + println!("{line}"); + } + } +} diff --git a/enclone/src/bin/review_main_tests.rs b/enclone/src/bin/review_main_tests.rs deleted file mode 100644 index f24b3eb3a..000000000 --- a/enclone/src/bin/review_main_tests.rs +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Walk through the main tests, show which have changed, and give option to update results. -// (First version, just prints output and pipes to less.) -// -// NOTE: you have to run this from the enclone_main directory. Otherwise it won't work. - -use enclone::run_test::*; -use enclone_core::testlist::*; -use pager::Pager; -use pretty_trace::*; -use rayon::prelude::*; - -fn main() { - PrettyTrace::new().on(); - Pager::with_pager("less -R -F -X").setup(); - let mut results = Vec::<(usize, bool, String)>::new(); - for i in 0..TESTS.len() { - results.push((i, false, String::new())); - } - results.par_iter_mut().for_each(|res| { - let mut out = String::new(); - run_test( - "enclone", - res.0, - &TESTS[res.0], - "test", - &mut res.1, - &mut res.2, - &mut out, - ); - }); - for i in 0..TESTS.len() { - if !results[i].1 { - print!("{}", results[i].2); - } - } -} diff --git a/enclone/src/bin/sync_to_master.rs b/enclone/src/bin/sync_to_master.rs deleted file mode 100644 index 068effa81..000000000 --- a/enclone/src/bin/sync_to_master.rs +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Sync all the crate versions in the workspace to the versions defined in the file master.toml -// in the top-level of the workspace. - -use io_utils::*; -use itertools::Itertools; -use pretty_trace::*; -use std::collections::HashMap; -use std::fs::{read_dir, File}; -use std::io::{BufRead, BufReader, BufWriter, Write}; -use string_utils::*; - -fn main() { - PrettyTrace::new().on(); - let mut version = HashMap::::new(); - let f = open_for_read!["master.toml"]; - for line in f.lines() { - let s = line.unwrap(); - if !s.starts_with('#') && s.contains("=") { - version.insert(s.before(" = ").to_string(), s.after(" = ").to_string()); - } - } - let all = read_dir(".").unwrap(); - for f in all { - let f = f.unwrap().path(); - let f = f.to_str().unwrap(); - let toml = format!("{}/Cargo.toml", f); - if path_exists(&toml) { - let mut newg = Vec::::new(); - { - let g = open_for_read![&toml]; - for line in g.lines() { - let s = line.unwrap(); - let mut t = s.clone(); - if s.contains(" =") && !s.contains(" = [") { - let cratex = s.before(" =").to_string(); - if version.contains_key(&cratex) { - t = format!("{} = {}", cratex, version[&cratex]); - } - } - newg.push(t); - } - } - let mut g = open_for_write_new![&toml]; - fwrite!(g, "{}\n", newg.iter().format("\n")); - } - } -} diff --git a/enclone/src/bin/traceback1.rs b/enclone/src/bin/traceback1.rs index 965ec0ca5..478761c23 100644 --- a/enclone/src/bin/traceback1.rs +++ b/enclone/src/bin/traceback1.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // Check that an out-of-range reference within a rayon parallel loop yields a correct traceback. // Make sure that line 17 stays as line 17. Otherwise change the reference to traceback1.rs:17. @@ -6,7 +6,7 @@ // This was originally engineered without PrettyTrace, but the problem with this was that if the // test failed, you get a godawful mess that is impossible to distangle. -use pretty_trace::*; +use pretty_trace::PrettyTrace; use rayon::prelude::*; fn main() { @@ -20,9 +20,9 @@ fn main() { #[test] fn test_traceback1() { - extern crate assert_cmd; + use assert_cmd; use assert_cmd::prelude::*; - use enclone_core::*; + use std::fmt::Write; use std::{env, process::Command}; let mut cmd = Command::cargo_bin("traceback1").expect( "\nAttempt to run traceback1 failed. The most likely explanation for this is that\n\ @@ -31,23 +31,22 @@ fn test_traceback1() { ); let cmd = cmd .output() - .expect(&format!("very strange, failed to execute test_traceback1")); + .unwrap_or_else(|_| panic!("{}", "very strange, failed to execute test_traceback1")); let morsel = "traceback1.rs:17"; let err = std::str::from_utf8(&cmd.stderr).unwrap(); - if !err.contains(&morsel) { + if !err.contains(morsel) { let mut head = String::new(); let lines = err.split('\n').collect::>(); const MAX_LINES: usize = 60; - for i in 0..std::cmp::min(lines.len(), MAX_LINES) { - head += &format!("{}\n", lines[i]); + for &line in lines.iter().take(MAX_LINES) { + writeln!(head, "{line}").unwrap(); } eprint!( "\n▓▓▓ test_traceback1 failed because did not find {} as expected;\n\n\ - this was using enclone version {} : {}\n\n\ + this was using enclone version {} \n\n\ ▓▓▓ traceback begins with\n{}", morsel, env!("CARGO_PKG_VERSION"), - version_string(), head, ); std::process::exit(1); diff --git a/enclone/src/bin/update_all_main_tests.rs b/enclone/src/bin/update_all_main_tests.rs deleted file mode 100644 index 70435228a..000000000 --- a/enclone/src/bin/update_all_main_tests.rs +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Update the output of all main tests. -// -// Do not do this unless you're highly confident that it's safe to do so, without -// manually examining the outputs. -// -// NOTE: you have to run this from the enclone_main directory. Otherwise it won't work. - -use enclone::run_test::*; -use enclone_core::testlist::*; -use io_utils::*; -use pretty_trace::*; -use rayon::prelude::*; -use std::fs::File; -use std::io::{BufWriter, Write}; - -fn main() { - PrettyTrace::new().on(); - let mut results = Vec::<(usize, bool, String)>::new(); - for i in 0..TESTS.len() { - if !TESTS[i].contains("EXPECT_FAIL") && !TESTS[i].contains("EXPECT_OK") { - results.push((i, false, String::new())); - } - } - results.par_iter_mut().for_each(|res| { - let it = res.0; - let testname = &TESTS[it]; - let mut out = String::new(); - run_test( - "enclone", it, &testname, "test", &mut res.1, &mut res.2, &mut out, - ); - if !res.1 { - let out_file = format!("testx/inputs/outputs/enclone_test{}_output", it + 1); - let mut f = open_for_write_new![&out_file]; - fwrite!(f, "{}", out); - } - }); -} diff --git a/enclone/src/bin/update_enclone_binary.rs b/enclone/src/bin/update_enclone_binary.rs new file mode 100644 index 000000000..b70da8b24 --- /dev/null +++ b/enclone/src/bin/update_enclone_binary.rs @@ -0,0 +1,44 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Update the internally public enclone binary. This is also done by start_release, but sometimes +// one wants to update the binary without making a release. + +use enclone_core::defs::get_config; +use io_utils::path_exists; +use pretty_trace::PrettyTrace; +use std::collections::HashMap; +use std::env; + +#[cfg(not(target_os = "windows"))] +use std::os::unix::fs::PermissionsExt; + +fn main() { + PrettyTrace::new().on(); + let mut config = HashMap::::new(); + let mut config_file = String::new(); + for (key, value) in env::vars() { + if key == "ENCLONE_CONFIG" { + config_file = value.to_string(); + } + } + if get_config(&config_file, &mut config) { + let bin = &config["enclone_linux_bin"]; + if !path_exists(bin) { + std::fs::create_dir_all(bin).unwrap(); + } + let current = format!("{bin}/enclone"); + let last = format!("{bin}/enclone_last"); + if path_exists(&last) { + std::fs::remove_file(&last).unwrap(); + } + if path_exists(¤t) { + std::fs::rename(¤t, &last).unwrap(); + } + std::fs::copy("target/debug/enclone", ¤t).unwrap(); + #[cfg(not(target_os = "windows"))] + { + let perms = std::fs::Permissions::from_mode(0o775); + std::fs::set_permissions(¤t, perms).unwrap(); + } + } +} diff --git a/enclone/src/enclone.test b/enclone/src/enclone.test deleted file mode 100755 index b344bb4e2..000000000 --- a/enclone/src/enclone.test +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -rootdir=$(dirname $0) - -enclone \ - BCR=`cat $rootdir/../../enclone_core/src/enclone.testdata | grep -v '#' | tr -d ' ' | grep -v '^$' | head --bytes=-1 | \ - tr '\n' ';' | tr -s ';'` \ - ANN COMP SHOW_BC FAIL_ONLY=true PLAIN NSILENT PRINT_FAILED_JOINS \ - PRE=/mnt/assembly/vdj/current14 MIX_DONORS REQUIRED_FPS=15 $* diff --git a/enclone/src/enclone.test.tcr b/enclone/src/enclone.test.tcr index ee413d5dc..09f62caa8 100755 --- a/enclone/src/enclone.test.tcr +++ b/enclone/src/enclone.test.tcr @@ -5,5 +5,5 @@ set rootdir = `dirname $0` enclone \ TCR=`cat $rootdir/../../enclone_core/src/enclone.testdata.tcr | tail -n +3 | grep -v '#' | \ tr -d ' ' | head --bytes=-1 | tr '\n' ';' | tr -s ';'` \ - ANN COMP SHOW_BC FAIL_ONLY=true PLAIN NSILENT PRINT_FAILED_JOINS \ - PRE=/mnt/assembly/vdj/current14 $* + ANN COMP SHOW_BC MIN_DONORS=2 PLAIN NSILENT PRINT_FAILED_JOINS \ + PRE=/mnt/assembly/vdj/current15 $* diff --git a/enclone/src/enclone.test2 b/enclone/src/enclone.test2 index 64fd30e7f..02d8ff20f 100755 --- a/enclone/src/enclone.test2 +++ b/enclone/src/enclone.test2 @@ -5,5 +5,5 @@ set rootdir = `dirname $0` enclone \ BCR=`cat $rootdir/../../enclone_core/src/testdata.public.bcr.human | grep -v '#' | tr -d ' ' | grep -v '^$' | head --bytes=-1 | \ tr '\n' ';' | tr -s ';'` \ - ANN COMP SHOW_BC FAIL_ONLY=true PLAIN NSILENT PRINT_FAILED_JOINS \ - MIX_DONORS PRE=/mnt/assembly/vdj/current14 $* + ANN COMP SHOW_BC MIN_DONORS=2 PLAIN NSILENT PRINT_FAILED_JOINS \ + PRE=/mnt/assembly/vdj/current15 $* diff --git a/enclone/src/enclone.testdata.bcr.gex b/enclone/src/enclone.testdata.bcr.gex deleted file mode 100644 index 5fb73f878..000000000 --- a/enclone/src/enclone.testdata.bcr.gex +++ /dev/null @@ -1,73 +0,0 @@ -# These are matched BCR/GEX datasets, all human except one mouse. -# -# These overlap other dataset lists, and many of the ids given here are not given in any -# other list. All are included in the big VDJ dataset collection that is internal to 10x, -# and not all are distributable. -# -# Note that in some cases, multiple donors etc. are included within individual sets. - -DONOR=1 -NAME=B cells -BCR=140696-140711 -GEX=140712-140727 - -DONOR=2 -NAME=PBMC -BCR=135799-135830,140688-140695,140680-140687 -GEX=135767-135798,140612-140619,140620-140627 - -DONOR=3 -NAME=JWY545 -BCR=145309-145324,143550,179159,155246-155253,1016691-1016692,180025-180037 -GEX=145261-145276,143453,174145,155641-155648,1016669-1016670,180007-180019 - -DONOR=4 -NAME=AGBT pre/post-vaccination (4 donors, 2 timepoints) PBMC -BCR=174919-174951,174953-174958,174960-174966,174999-175003,175005-175006,175008-175010,175012-175013 -GEX=173169-173201,173203-173208,173210-173216,179831-179835,173223-173224,173226-173228,173230-173231 - -DONOR=5 -NAME=CRC-3111 tumor plus normal (PBMC) -BCR=1005384,1005385,1005386,1005387 -GEX=1010615,1010616,1005229,1005230 - -DONOR=6 -NAME=melanoma -BCR=1021300-1021303,1020665-1020668 -GEX=1019531-1019534,1020597-1020600 - -DONOR=7 -NAME=PBMC (JWY) -BCR=1009445-1009448,1009450-1009468,1016687-1016688,1018289-1018293,1020559 -GEX=1008841-1008844,1008846-1008864,1019932-1019933,1017640-1017644,1017655 - -DONOR=8 -NAME=lung cancer -BCR=163919,1020669-1020676,1021304-1021311,1032722-1032729,1027521-1027536 -GEX=160549,1020601-1020608,1019535-1019542,1032690-1032697,1027481-1027496 - -DONOR=9 -NAME=ovarian cancer -BCR=123085-123086,123089-123090 -GEX=123749-123750,123753-123754 - -DONOR=10 -NAME=MALT -BCR=83808-83809,86233-86234 -GEX=83216-83217,85651-85652 - -DONOR=11 -NAME=PBMC -BCR=129476-129486,131256,131270,131275,134532-134538 -GEX=131092-131102,127966,127948,127953,133968-133974 - -DONOR=12 -NAME= -BCR=86202-86208,86210-86216 -GEX=86518-86524,86218-86224 - -DONOR=m1 -NAME=mouse -BCR=1023660-1023661,1023652-1023653,1023933-1023934,1023925-1023926 -GEX=1018214-1018215,1018198-1018199,1018206-1018207,1018190-1018191 -SPECIES=mouse diff --git a/enclone/src/enclone.testdata.mouse b/enclone/src/enclone.testdata.mouse index d2b8ea670..3a6da7756 100644 --- a/enclone/src/enclone.testdata.mouse +++ b/enclone/src/enclone.testdata.mouse @@ -4,4 +4,4 @@ 70838 = B6 BCR 77990 = BALB/c BCR -128043 +128043 = BCR diff --git a/enclone/src/enclone.testdata.targeted.bcr b/enclone/src/enclone.testdata.targeted.bcr new file mode 100644 index 000000000..f4b6671b8 --- /dev/null +++ b/enclone/src/enclone.testdata.targeted.bcr @@ -0,0 +1 @@ +1076423 = targeted GEX + BCR diff --git a/enclone/src/enclone.testlist.all b/enclone/src/enclone.testlist.all index 488b3a86c..ee16e6da8 100644 --- a/enclone/src/enclone.testlist.all +++ b/enclone/src/enclone.testlist.all @@ -1 +1,1296 @@ -35884 36368 37661 40043 40086 40935 40936 40937 40938 40939 40940 40942 40943 40944 40945 40946 40947 40948 40951 40952 40953 40954 40955 40956 40959 40960 40961 40962 40963 40964 40965 40966 40967 40968 40969 40970 40971 40972 42787 42788 42789 42817 43892 43893 43894 43895 43896 43897 43898 43899 44933 44934 44935 44936 44979 44980 44981 44982 44987 44988 45977 45987 46032 47199 47200 47201 47202 47203 47204 47211 47212 47213 47214 47215 47216 47674 47680 48600 48602 48612 48614 48616 48618 48620 48622 48624 48626 48630 48632 48634 52177 70838 74396 76829 77990 79619 79620 79621 79622 79627 79628 79629 79630 83808 83809 83812 83813 83815 83816 83817 83819 83821 85328 85329 85330 85331 85332 85333 85344 85345 85346 85347 85348 85349 85360 85361 85362 85363 85364 85365 86052 86189 86197 86202 86203 86204 86205 86206 86207 86208 86210 86211 86212 86213 86214 86215 86216 86225 86226 86227 86228 86229 86230 86231 86232 86233 86234 86237 86238 86239 86240 87252 87253 87254 87255 87256 87257 87258 87259 87302 87483 88348 88349 88350 88351 88352 88353 88354 88355 88356 88357 88358 88359 88360 88361 88362 88363 88364 89642 89643 89644 89645 90100 90103 90104 90105 90106 90107 90108 90109 90110 90111 90112 90113 91295 91296 91297 91298 91299 91300 91301 91302 91312 91314 91316 91318 91320 91322 91324 91673 92751 92758 92763 95455 99634 99635 99636 99637 99638 99639 99640 99641 99642 99643 99644 99645 101287 106060 106062 106227 106228 106229 106236 106237 106240 106241 106242 106243 106244 107518 107520 107522 107524 107525 107527 107529 107531 107532 107533 107534 107537 107539 107541 107542 107544 107545 107546 107547 107548 107549 107550 107551 107555 107556 107557 107560 107574 107575 107576 107577 108161 108162 108165 108167 108168 109767 109768 109770 109772 110552 110553 110554 110555 110556 110557 110558 110559 110560 110561 110562 110563 110564 110565 110566 110567 110568 110569 110570 110571 110572 110573 110574 110575 110576 110577 110578 110579 110580 110581 110582 110583 110588 110592 110597 110601 110602 110603 110607 110608 110610 112740 112741 113526 113808 114510 114512 114515 114517 114519 114521 114524 114844 116087 116088 116089 116090 116091 116092 116093 116094 116095 116096 116097 116098 116099 116100 116101 116102 116103 116104 116105 116106 116107 116108 116109 116110 116111 116112 116156 116157 116158 116159 116164 116165 116166 116167 116168 116169 116170 117417 117458 117459 117703 117704 117707 118053 118175 118176 118177 118178 118179 118180 118181 118182 118183 118184 118185 118186 118187 118188 118189 118190 118191 118192 121352 121472 121473 121474 121475 121480 121481 121483 123083 123084 123085 123086 123089 123090 123092 123137 123138 123141 123142 123176 123178 123179 123182 123183 123186 123187 123190 123191 124477 124478 124479 124480 124481 124482 124483 124484 124485 124486 124487 124488 124489 124490 124493 124494 124547 124550 126184 126185 128024 128037 128040 128043 128045 128048 129476 129477 129478 129480 129481 129483 129486 129517 129518 129519 129520 129973 129974 129975 129976 129979 129980 131015 131036 131065 131066 131235 131237 131240 131256 131268 131270 131275 132723 132725 132882 132883 132884 132885 132888 132889 132890 132891 132895 132896 132897 132900 132901 132902 132903 132906 132907 132908 132909 132910 132912 132913 132914 132915 134532 134533 134534 134535 134536 134537 134538 134592 140330 140331 140332 140333 140334 140335 140336 140337 140338 140339 140340 140341 140342 140343 140344 140345 140346 140347 140348 140349 140350 140351 140352 140353 140354 140355 140356 140357 140358 140359 140360 140361 140362 140363 140364 140366 140367 140368 140369 160326 160342 163911 163914 165807 165808 1021341 +# List of all enclone test and development datasets. These are largely +# internal to 10x Genomics, for various reasons including because the data +# are not in general consented for public release. The purpose of this catalog +# is to enable reconstruction of the datasets in the event of data loss. +# This file is autogenerated by enclone/src/bin/make_enclone_testlist_all.rs and should +# not be manually edited. + +id,cellranger_version +35884,unknown +36368,unknown +37661,unknown +40043,unknown +40086,unknown +40935,cellranger-pd-5.0.0 +40936,cellranger-pd-5.0.0 +40937,cellranger-pd-5.0.0 +40938,cellranger-pd-5.0.0 +40939,unknown +40940,unknown +40942,unknown +40943,cellranger-pd-5.0.0 +40944,cellranger-pd-5.0.0 +40945,cellranger-pd-5.0.0 +40946,cellranger-pd-5.0.0 +40947,unknown +40948,unknown +40951,cellranger-pd-5.0.0 +40952,cellranger-pd-5.0.0 +40953,cellranger-pd-5.0.0 +40954,cellranger-pd-5.0.0 +40955,unknown +40956,unknown +40959,cellranger-pd-5.0.0 +40960,cellranger-pd-5.0.0 +40961,cellranger-pd-5.0.0 +40962,cellranger-pd-5.0.0 +40963,unknown +40964,unknown +40965,unknown +40966,unknown +40967,unknown +40968,unknown +40969,unknown +40970,unknown +40971,unknown +40972,unknown +42787,cellranger-pd-5.0.0 +42788,cellranger-pd-5.0.0 +42789,cellranger-pd-5.0.0 +42817,unknown +43892,cellranger-pd-5.0.0 +43893,cellranger-pd-5.0.0 +43894,cellranger-pd-5.0.0 +43895,cellranger-pd-5.0.0 +43896,cellranger-pd-5.0.0 +43897,cellranger-pd-5.0.0 +43898,cellranger-pd-5.0.0 +43899,cellranger-pd-5.0.0 +44933,unknown +44934,unknown +44935,unknown +44936,unknown +44979,cellranger-pd-5.0.0 +44980,cellranger-pd-5.0.0 +44981,cellranger-pd-5.0.0 +44982,cellranger-pd-5.0.0 +44987,cellranger-pd-5.0.0 +44988,cellranger-pd-5.0.0 +45977,cellranger-pd-5.0.0 +45987,unknown +46032,cellranger-pd-5.0.0 +47199,cellranger-pd-5.0.0 +47200,cellranger-pd-5.0.0 +47201,cellranger-pd-5.0.0 +47202,cellranger-pd-5.0.0 +47203,cellranger-pd-5.0.0 +47204,cellranger-pd-5.0.0 +47211,cellranger-pd-5.0.0 +47212,cellranger-pd-5.0.0 +47213,cellranger-pd-5.0.0 +47214,cellranger-pd-5.0.0 +47215,cellranger-pd-5.0.0 +47216,cellranger-pd-5.0.0 +47674,unknown +47680,unknown +48600,unknown +48602,unknown +48612,unknown +48614,unknown +48616,unknown +48618,unknown +48620,unknown +48622,unknown +48624,unknown +48626,unknown +48630,unknown +48632,unknown +48634,unknown +52177,cellranger-pd-5.0.0 +70838,unknown +74396,cellranger-pd-5.0.0 +76829,unknown +77990,unknown +79619,unknown +79620,unknown +79621,unknown +79622,unknown +79627,unknown +79628,unknown +79629,unknown +79630,unknown +82445,unknown +83216,unknown +83217,unknown +83808,cellranger-pd-5.0.0 +83809,unknown +83812,unknown +83813,unknown +83815,unknown +83816,unknown +83817,unknown +83819,unknown +83821,unknown +84542,unknown +84546,unknown +84547,unknown +84548,unknown +85328,unknown +85329,unknown +85330,unknown +85331,unknown +85332,unknown +85333,cellranger-pd-5.0.0 +85344,unknown +85345,unknown +85346,unknown +85347,unknown +85348,unknown +85349,unknown +85360,unknown +85361,unknown +85362,unknown +85363,unknown +85364,unknown +85365,unknown +85651,unknown +85652,unknown +85679,unknown +86052,unknown +86189,unknown +86197,unknown +86202,unknown +86203,unknown +86204,unknown +86205,unknown +86206,unknown +86207,unknown +86208,unknown +86210,unknown +86211,unknown +86212,unknown +86213,unknown +86214,unknown +86215,unknown +86216,unknown +86218,unknown +86219,unknown +86220,unknown +86221,unknown +86222,unknown +86223,unknown +86224,unknown +86225,unknown +86226,unknown +86227,unknown +86228,unknown +86229,unknown +86230,unknown +86231,cellranger-pd-5.0.0 +86232,unknown +86233,cellranger-pd-5.0.0 +86234,unknown +86237,cellranger-pd-5.0.0 +86238,unknown +86239,unknown +86240,unknown +86518,unknown +86519,unknown +86520,unknown +86521,unknown +86522,unknown +86523,unknown +86524,unknown +87252,unknown +87253,unknown +87254,unknown +87255,unknown +87256,unknown +87257,unknown +87258,unknown +87259,unknown +87302,unknown +87483,unknown +88348,unknown +88349,unknown +88350,unknown +88351,unknown +88352,unknown +88353,unknown +88354,unknown +88355,unknown +88356,unknown +88357,unknown +88358,unknown +88359,unknown +88360,unknown +88361,unknown +88362,unknown +88363,unknown +88364,unknown +89642,unknown +89643,unknown +89644,unknown +89645,unknown +90100,unknown +90103,unknown +90104,unknown +90105,unknown +90106,unknown +90107,unknown +90108,unknown +90109,unknown +90110,unknown +90111,unknown +90112,unknown +90113,unknown +91295,unknown +91296,unknown +91297,unknown +91298,unknown +91299,unknown +91300,unknown +91301,unknown +91302,unknown +91312,unknown +91314,unknown +91316,unknown +91318,unknown +91320,unknown +91322,unknown +91324,unknown +91673,unknown +92751,unknown +92758,unknown +92763,unknown +95455,unknown +99634,unknown +99635,unknown +99636,unknown +99637,unknown +99638,unknown +99639,unknown +99640,unknown +99641,unknown +99642,unknown +99643,unknown +99644,unknown +99645,unknown +101287,cellranger-pd-5.0.0 +106060,unknown +106062,unknown +106227,unknown +106228,unknown +106229,unknown +106236,unknown +106237,unknown +106240,unknown +106241,unknown +106242,unknown +106243,unknown +106244,unknown +107518,unknown +107520,unknown +107522,unknown +107524,unknown +107525,unknown +107527,unknown +107529,unknown +107531,unknown +107532,unknown +107533,unknown +107534,unknown +107537,unknown +107539,unknown +107541,unknown +107542,unknown +107544,unknown +107545,unknown +107546,unknown +107547,unknown +107548,unknown +107549,unknown +107550,unknown +107551,unknown +107555,unknown +107556,unknown +107557,unknown +107560,unknown +107574,unknown +107575,unknown +107576,unknown +107577,unknown +108161,unknown +108162,unknown +108165,unknown +108167,unknown +108168,unknown +109767,unknown +109768,unknown +109770,unknown +109772,unknown +110552,unknown +110553,unknown +110554,unknown +110555,unknown +110556,unknown +110557,unknown +110558,unknown +110559,unknown +110560,unknown +110561,unknown +110562,unknown +110563,unknown +110564,unknown +110565,unknown +110566,unknown +110567,unknown +110568,unknown +110569,unknown +110570,unknown +110571,unknown +110572,unknown +110573,unknown +110574,unknown +110575,unknown +110576,unknown +110577,unknown +110578,unknown +110579,unknown +110580,unknown +110581,unknown +110582,unknown +110583,unknown +110588,unknown +110592,unknown +110597,unknown +110601,unknown +110602,unknown +110603,unknown +110607,unknown +110608,unknown +110610,unknown +112740,unknown +112741,unknown +113526,unknown +113808,unknown +114510,unknown +114512,unknown +114515,unknown +114517,unknown +114519,unknown +114521,unknown +114524,unknown +114844,unknown +116087,unknown +116088,unknown +116089,unknown +116090,unknown +116091,unknown +116092,unknown +116093,unknown +116094,unknown +116095,unknown +116096,unknown +116097,unknown +116098,unknown +116099,unknown +116100,unknown +116101,unknown +116102,unknown +116103,unknown +116104,unknown +116105,unknown +116106,unknown +116107,unknown +116108,unknown +116109,unknown +116110,unknown +116111,unknown +116112,unknown +116156,unknown +116157,unknown +116158,unknown +116159,unknown +116164,unknown +116165,unknown +116166,unknown +116167,unknown +116168,unknown +116169,unknown +116170,unknown +117417,unknown +117458,unknown +117459,unknown +117703,unknown +117704,unknown +117707,unknown +118053,unknown +118175,unknown +118176,unknown +118177,unknown +118178,unknown +118179,unknown +118180,unknown +118181,unknown +118182,unknown +118183,unknown +118184,unknown +118185,unknown +118186,unknown +118187,unknown +118188,unknown +118189,unknown +118190,unknown +118191,unknown +118192,unknown +121289,unknown +121290,unknown +121293,unknown +121297,unknown +121298,unknown +121301,unknown +121302,unknown +121352,unknown +121472,unknown +121473,unknown +121474,unknown +121475,unknown +121480,unknown +121481,unknown +121483,unknown +121488,unknown +121489,unknown +121490,unknown +121491,unknown +121496,unknown +121497,unknown +121499,unknown +123083,unknown +123084,unknown +123085,cellranger-pd-5.0.0 +123086,unknown +123089,cellranger-pd-5.0.0 +123090,unknown +123092,unknown +123137,unknown +123138,unknown +123141,unknown +123142,unknown +123176,unknown +123178,unknown +123179,unknown +123182,unknown +123183,unknown +123186,unknown +123187,unknown +123190,unknown +123191,unknown +123201,unknown +123205,unknown +123217,unknown +123749,unknown +123750,unknown +123753,unknown +123754,unknown +124477,unknown +124478,unknown +124479,unknown +124480,unknown +124481,cellranger-pd-5.0.0 +124482,cellranger-pd-5.0.0 +124483,cellranger-pd-5.0.0 +124484,cellranger-pd-5.0.0 +124485,cellranger-pd-5.0.0 +124486,cellranger-pd-5.0.0 +124487,cellranger-pd-5.0.0 +124488,cellranger-pd-5.0.0 +124489,cellranger-pd-5.0.0 +124490,cellranger-pd-5.0.0 +124493,unknown +124494,unknown +124547,cellranger-pd-5.0.0 +126106,unknown +126184,unknown +126185,unknown +127798,unknown +127801,unknown +127948,unknown +127953,unknown +127966,unknown +128024,cellranger-pd-5.0.0 +128037,cellranger-pd-5.0.0 +128040,cellranger-pd-5.0.0 +128043,unknown +128045,cellranger-pd-5.0.0 +128048,cellranger-pd-5.0.0 +129476,unknown +129477,unknown +129478,unknown +129479,cellranger-pd-dependabot-cargo-lib-rust-regex-1.3.9-2020.0416.2-953-g85421d3d7 +129480,unknown +129481,unknown +129482,cellranger-pd-dependabot-cargo-lib-rust-regex-1.3.9-2020.0416.2-953-g85421d3d7 +129483,unknown +129484,cellranger-pd-dependabot-cargo-lib-rust-regex-1.3.9-2020.0416.2-953-g85421d3d7 +129485,cellranger-pd-dependabot-cargo-lib-rust-regex-1.3.9-2020.0416.2-953-g85421d3d7 +129486,unknown +129517,unknown +129518,unknown +129519,unknown +129520,unknown +129973,unknown +129974,unknown +129975,unknown +129976,unknown +129979,unknown +129980,unknown +131015,unknown +131036,unknown +131065,unknown +131066,unknown +131092,unknown +131093,unknown +131094,unknown +131095,unknown +131096,unknown +131097,unknown +131098,unknown +131099,unknown +131100,unknown +131101,unknown +131102,unknown +131235,unknown +131237,unknown +131240,unknown +131256,unknown +131268,unknown +131270,unknown +131275,unknown +132723,unknown +132725,unknown +132882,unknown +132883,unknown +132884,unknown +132885,unknown +132888,unknown +132889,unknown +132890,unknown +132891,unknown +132895,unknown +132896,unknown +132897,unknown +132900,unknown +132901,unknown +132902,unknown +132903,unknown +132906,unknown +132907,unknown +132908,unknown +132909,unknown +132910,unknown +132912,unknown +132913,unknown +132914,unknown +132915,unknown +133968,unknown +133969,unknown +133970,unknown +133971,unknown +133972,unknown +133973,unknown +133974,unknown +134351,unknown +134532,unknown +134533,unknown +134534,unknown +134535,unknown +134536,unknown +134537,unknown +134538,unknown +134592,unknown +135767,unknown +135768,unknown +135769,unknown +135770,unknown +135771,unknown +135772,unknown +135773,unknown +135774,unknown +135775,unknown +135776,unknown +135777,unknown +135778,unknown +135779,unknown +135780,unknown +135781,unknown +135782,unknown +135783,unknown +135784,unknown +135785,unknown +135786,unknown +135787,unknown +135788,unknown +135789,unknown +135790,unknown +135791,unknown +135792,unknown +135793,unknown +135794,unknown +135795,unknown +135796,unknown +135797,unknown +135798,unknown +135799,cellranger-pd-3.1-4003.1.5 +135800,cellranger-pd-3.1-4003.1.5 +135801,cellranger-pd-3.1-4003.1.5 +135802,cellranger-pd-3.1-4003.1.5 +135803,cellranger-pd-3.1-4003.1.5 +135804,cellranger-pd-3.1-4003.1.5 +135805,cellranger-pd-3.1-4003.1.5 +135806,cellranger-pd-3.1-4003.1.5 +135807,cellranger-pd-3.1-4003.1.5 +135808,cellranger-pd-3.1-4003.1.5 +135809,cellranger-pd-3.1-4003.1.5 +135810,cellranger-pd-3.1-4003.1.5 +135811,cellranger-pd-3.1-4003.1.5 +135812,cellranger-pd-3.1-4003.1.5 +135813,cellranger-pd-3.1-4003.1.5 +135814,cellranger-pd-3.1-4003.1.5 +135815,cellranger-pd-3.1-4003.1.5 +135816,cellranger-pd-3.1-4003.1.5 +135817,cellranger-pd-3.1-4003.1.5 +135818,cellranger-pd-3.1-4003.1.5 +135819,cellranger-pd-3.1-4003.1.5 +135820,cellranger-pd-3.1-4003.1.5 +135821,cellranger-pd-3.1-4003.1.5 +135822,cellranger-pd-3.1-4003.1.5 +135823,cellranger-pd-3.1-4003.1.5 +135824,cellranger-pd-3.1-4003.1.5 +135825,cellranger-pd-3.1-4003.1.5 +135826,cellranger-pd-3.1-4003.1.5 +135827,cellranger-pd-3.1-4003.1.5 +135828,cellranger-pd-3.1-4003.1.5 +135829,cellranger-pd-3.1-4003.1.5 +135830,cellranger-pd-3.1-4003.1.5 +140330,unknown +140331,unknown +140332,unknown +140333,unknown +140334,unknown +140335,unknown +140336,unknown +140337,unknown +140338,unknown +140339,unknown +140340,unknown +140341,unknown +140342,unknown +140343,unknown +140344,unknown +140345,unknown +140346,unknown +140347,unknown +140348,unknown +140349,unknown +140350,unknown +140351,unknown +140352,unknown +140353,unknown +140354,unknown +140355,unknown +140356,unknown +140357,unknown +140358,unknown +140359,unknown +140360,unknown +140361,unknown +140362,unknown +140363,unknown +140364,unknown +140366,unknown +140367,unknown +140368,unknown +140369,unknown +140612,unknown +140613,unknown +140614,unknown +140615,unknown +140616,unknown +140617,unknown +140618,unknown +140619,unknown +140620,unknown +140621,unknown +140622,unknown +140623,unknown +140624,unknown +140625,unknown +140626,unknown +140627,unknown +140680,cellranger-pd-3.1-4003.1.5 +140681,cellranger-pd-3.1-4003.1.5 +140682,cellranger-pd-3.1-4003.1.5 +140683,cellranger-pd-3.1-4003.1.5 +140684,cellranger-pd-3.1-4003.1.5 +140685,cellranger-pd-3.1-4003.1.5 +140686,cellranger-pd-3.1-4003.1.5 +140687,cellranger-pd-3.1-4003.1.5 +140688,cellranger-pd-3.1-4003.1.5 +140689,cellranger-pd-3.1-4003.1.5 +140690,cellranger-pd-3.1-4003.1.5 +140691,cellranger-pd-3.1-4003.1.5 +140692,cellranger-pd-3.1-4003.1.5 +140693,cellranger-pd-3.1-4003.1.5 +140694,cellranger-pd-3.1-4003.1.5 +140695,cellranger-pd-3.1-4003.1.5 +140696,cellranger-pd-3.1-4003.1.5 +140697,cellranger-pd-3.1-4003.1.5 +140698,cellranger-pd-3.1-4003.1.5 +140699,cellranger-pd-3.1-4003.1.5 +140700,cellranger-pd-3.1-4003.1.5 +140701,cellranger-pd-3.1-4003.1.5 +140702,cellranger-pd-3.1-4003.1.5 +140703,cellranger-pd-3.1-4003.1.5 +140704,cellranger-pd-3.1-4003.1.5 +140705,cellranger-pd-3.1-4003.1.5 +140706,cellranger-pd-3.1-4003.1.5 +140707,cellranger-pd-3.1-4003.1.5 +140708,cellranger-pd-3.1-4003.1.5 +140709,cellranger-pd-3.1-4003.1.5 +140710,cellranger-pd-3.1-4003.1.5 +140711,cellranger-pd-3.1-4003.1.5 +140712,unknown +140713,unknown +140714,unknown +140715,unknown +140716,unknown +140717,unknown +140718,unknown +140719,unknown +140720,unknown +140721,unknown +140722,unknown +140723,unknown +140724,unknown +140725,unknown +140726,unknown +140727,unknown +143453,unknown +143550,cellranger-pd-master-4009.44.1 +145261,unknown +145262,unknown +145263,unknown +145264,unknown +145265,unknown +145266,unknown +145267,unknown +145268,unknown +145269,unknown +145270,unknown +145271,unknown +145272,unknown +145273,unknown +145274,unknown +145275,unknown +145276,unknown +145309,cellranger-pd-master-4009.44.1 +145310,cellranger-pd-master-4009.44.1 +145311,cellranger-pd-master-4009.44.1 +145312,cellranger-pd-master-4009.44.1 +145313,cellranger-pd-master-4009.44.1 +145314,cellranger-pd-master-4009.44.1 +145315,cellranger-pd-master-4009.44.1 +145316,cellranger-pd-master-4009.44.1 +145317,cellranger-pd-master-4009.44.1 +145318,cellranger-pd-master-4009.44.1 +145319,cellranger-pd-master-4009.44.1 +145320,cellranger-pd-master-4009.44.1 +145321,cellranger-pd-master-4009.44.1 +145322,cellranger-pd-master-4009.44.1 +145323,cellranger-pd-master-4009.44.1 +145324,cellranger-pd-master-4009.44.1 +155246,cellranger-pd-master-4009.46.0 +155247,cellranger-pd-master-4009.46.0 +155248,cellranger-pd-master-4009.46.0 +155249,cellranger-pd-master-4009.46.0 +155250,cellranger-pd-master-4009.46.0 +155251,cellranger-pd-master-4009.46.0 +155252,cellranger-pd-master-4009.46.0 +155253,cellranger-pd-master-4009.46.0 +155641,unknown +155642,unknown +155643,unknown +155644,unknown +155645,unknown +155646,unknown +155647,unknown +155648,unknown +160326,unknown +160342,unknown +160549,unknown +163911,cellranger-pd-5.0.0 +163914,cellranger-pd-5.0.0 +163919,cellranger-pd-master-4009.46.5 +165807,cellranger-pd-5.0.0 +165808,cellranger-pd-5.0.0 +173169,unknown +173170,unknown +173171,unknown +173172,unknown +173173,unknown +173174,unknown +173175,unknown +173176,unknown +173177,unknown +173178,unknown +173179,unknown +173180,unknown +173181,unknown +173182,unknown +173183,unknown +173184,unknown +173185,unknown +173186,unknown +173187,unknown +173188,unknown +173189,unknown +173190,unknown +173191,unknown +173192,unknown +173193,unknown +173194,unknown +173195,unknown +173196,unknown +173197,unknown +173198,unknown +173199,unknown +173200,unknown +173201,unknown +173203,unknown +173204,unknown +173205,unknown +173206,unknown +173207,unknown +173208,unknown +173210,unknown +173211,unknown +173212,unknown +173213,unknown +173214,unknown +173215,unknown +173216,unknown +173223,unknown +173224,unknown +173226,unknown +173227,unknown +173228,unknown +173230,unknown +173231,unknown +174145,unknown +174919,cellranger-pd-master-4009.49.1 +174920,cellranger-pd-master-4009.49.1 +174921,cellranger-pd-master-4009.49.1 +174922,cellranger-pd-master-4009.49.1 +174923,cellranger-pd-master-4009.49.1 +174924,cellranger-pd-master-4009.49.1 +174925,cellranger-pd-master-4009.49.1 +174926,cellranger-pd-master-4009.49.1 +174927,cellranger-pd-master-4009.49.1 +174928,cellranger-pd-master-4009.49.1 +174929,cellranger-pd-master-4009.49.1 +174930,cellranger-pd-master-4009.49.1 +174931,cellranger-pd-master-4009.49.1 +174932,cellranger-pd-master-4009.49.1 +174933,cellranger-pd-master-4009.49.1 +174934,cellranger-pd-master-4009.49.1 +174935,cellranger-pd-master-4009.49.1 +174936,cellranger-pd-master-4009.49.1 +174937,cellranger-pd-master-4009.49.1 +174938,cellranger-pd-master-4009.49.1 +174939,cellranger-pd-master-4009.49.1 +174940,cellranger-pd-master-4009.49.1 +174941,cellranger-pd-master-4009.49.1 +174942,cellranger-pd-master-4009.49.1 +174943,cellranger-pd-master-4009.49.1 +174944,cellranger-pd-master-4009.49.1 +174945,cellranger-pd-master-4009.49.1 +174946,cellranger-pd-master-4009.49.1 +174947,cellranger-pd-master-4009.49.1 +174948,cellranger-pd-master-4009.49.1 +174949,cellranger-pd-master-4009.49.1 +174950,cellranger-pd-master-4009.49.1 +174951,cellranger-pd-master-4009.49.1 +174953,cellranger-pd-master-4009.49.1 +174954,cellranger-pd-master-4009.49.1 +174955,cellranger-pd-master-4009.49.1 +174956,cellranger-pd-master-4009.49.1 +174957,cellranger-pd-master-4009.49.1 +174958,cellranger-pd-master-4009.49.1 +174960,cellranger-pd-master-4009.49.1 +174961,cellranger-pd-master-4009.49.1 +174962,cellranger-pd-master-4009.49.1 +174963,cellranger-pd-master-4009.49.1 +174964,cellranger-pd-master-4009.49.1 +174965,cellranger-pd-master-4009.49.1 +174966,cellranger-pd-master-4009.49.1 +174999,cellranger-pd-master-4009.49.1 +175000,cellranger-pd-master-4009.49.1 +175001,cellranger-pd-master-4009.49.1 +175002,cellranger-pd-master-4009.49.1 +175003,cellranger-pd-master-4009.49.1 +175005,cellranger-pd-master-4009.49.1 +175006,cellranger-pd-master-4009.49.1 +175008,cellranger-pd-master-4009.49.1 +175009,cellranger-pd-master-4009.49.1 +175010,cellranger-pd-master-4009.49.1 +175012,cellranger-pd-master-4009.49.1 +175013,cellranger-pd-master-4009.49.1 +179159,cellranger-pd-master-4009.50.1 +179831,unknown +179832,unknown +179833,unknown +179834,unknown +179835,unknown +180007,unknown +180008,unknown +180009,unknown +180010,unknown +180011,unknown +180012,unknown +180013,unknown +180014,unknown +180015,unknown +180016,unknown +180017,unknown +180018,unknown +180019,unknown +180025,cellranger-pd-master-4009.51.0 +180026,cellranger-pd-master-4009.51.0 +180027,cellranger-pd-master-4009.51.0 +180028,cellranger-pd-master-4009.51.0 +180029,cellranger-pd-master-4009.51.0 +180030,cellranger-pd-master-4009.51.0 +180031,cellranger-pd-master-4009.51.0 +180032,cellranger-pd-master-4009.51.0 +180033,cellranger-pd-master-4009.51.0 +180034,cellranger-pd-master-4009.51.0 +180035,cellranger-pd-master-4009.51.0 +180036,cellranger-pd-master-4009.51.0 +180037,cellranger-pd-master-4009.51.0 +180517,unknown +1005229,unknown +1005230,unknown +1005384,cellranger-pd-master-4009.54.2 +1005385,cellranger-pd-master-4009.54.2 +1005386,cellranger-pd-master-4009.54.2 +1005387,cellranger-pd-master-4009.54.2 +1008841,unknown +1008842,unknown +1008843,unknown +1008844,unknown +1008846,unknown +1008847,unknown +1008848,unknown +1008849,unknown +1008850,unknown +1008851,unknown +1008852,unknown +1008853,unknown +1008854,unknown +1008855,unknown +1008856,unknown +1008857,unknown +1008858,unknown +1008859,unknown +1008860,unknown +1008861,unknown +1008862,unknown +1008863,unknown +1008864,unknown +1009445,cellranger-pd-master-4009.54.3 +1009446,cellranger-pd-master-4009.54.3 +1009447,cellranger-pd-master-4009.54.3 +1009448,cellranger-pd-master-4009.54.3 +1009450,cellranger-pd-master-4009.54.3 +1009451,cellranger-pd-master-4009.54.3 +1009452,cellranger-pd-master-4009.54.3 +1009453,cellranger-pd-master-4009.54.3 +1009454,cellranger-pd-master-4009.54.3 +1009455,cellranger-pd-master-4009.54.3 +1009456,cellranger-pd-master-4009.54.3 +1009457,cellranger-pd-master-4009.54.3 +1009458,cellranger-pd-master-4009.54.3 +1009459,cellranger-pd-master-4009.54.3 +1009460,cellranger-pd-master-4009.54.3 +1009461,cellranger-pd-master-4009.54.3 +1009462,cellranger-pd-master-4009.54.3 +1009463,cellranger-pd-master-4009.54.3 +1009464,cellranger-pd-master-4009.54.3 +1009465,cellranger-pd-master-4009.54.3 +1009466,cellranger-pd-master-4009.54.3 +1009467,cellranger-pd-master-4009.54.3 +1009468,cellranger-pd-master-4009.54.3 +1010615,unknown +1010616,unknown +1016669,unknown +1016670,unknown +1016687,cellranger-pd-master-2020.0415.1-1-g503896194 +1016688,cellranger-pd-master-2020.0415.1-1-g503896194 +1016691,cellranger-pd-master-2020.0415.1-1-g503896194 +1016692,cellranger-pd-master-2020.0415.1-1-g503896194 +1017640,unknown +1017641,unknown +1017642,unknown +1017643,unknown +1017644,unknown +1017655,unknown +1017974,cellranger-pd-5.0.0 +1017975,cellranger-pd-5.0.0 +1017976,cellranger-pd-5.0.0 +1017977,cellranger-pd-5.0.0 +1018095,cellranger-pd-5.0.0 +1018096,cellranger-pd-5.0.0 +1018097,cellranger-pd-5.0.0 +1018098,cellranger-pd-5.0.0 +1018190,unknown +1018191,unknown +1018198,unknown +1018199,unknown +1018206,unknown +1018207,unknown +1018214,unknown +1018215,unknown +1018288,cellranger-pd-5.0.0 +1018289,cellranger-pd-5.0.0 +1018290,cellranger-pd-master-2020.0422.1 +1018291,cellranger-pd-5.0.0 +1018292,cellranger-pd-master-2020.0422.1 +1018293,cellranger-pd-5.0.0 +1018296,cellranger-pd-5.0.0 +1018297,cellranger-pd-5.0.0 +1018298,cellranger-pd-5.0.0 +1018301,cellranger-pd-5.0.0 +1019531,unknown +1019532,unknown +1019533,unknown +1019534,unknown +1019535,unknown +1019536,unknown +1019537,unknown +1019538,unknown +1019539,unknown +1019540,unknown +1019541,unknown +1019542,unknown +1019932,unknown +1019933,unknown +1020559,cellranger-pd-master-2020.0427.2 +1020597,unknown +1020598,unknown +1020599,unknown +1020600,unknown +1020601,unknown +1020602,unknown +1020603,unknown +1020604,unknown +1020605,unknown +1020606,unknown +1020607,unknown +1020608,unknown +1020665,cellranger-pd-master-2020.0427.2 +1020666,cellranger-pd-master-2020.0427.2 +1020667,cellranger-pd-master-2020.0427.2 +1020668,cellranger-pd-master-2020.0427.2 +1020669,cellranger-pd-master-2020.0427.2 +1020670,cellranger-pd-master-2020.0427.2 +1020671,cellranger-pd-master-2020.0427.2 +1020672,cellranger-pd-master-2020.0427.2 +1020673,cellranger-pd-master-2020.0427.2 +1020674,cellranger-pd-master-2020.0427.2 +1020675,cellranger-pd-master-2020.0427.2 +1020676,cellranger-pd-master-2020.0427.2 +1021300,cellranger-pd-master-2020.0427.2 +1021301,cellranger-pd-master-2020.0427.2 +1021302,cellranger-pd-master-2020.0427.2 +1021303,cellranger-pd-master-2020.0427.2 +1021304,cellranger-pd-master-2020.0427.2 +1021305,cellranger-pd-master-2020.0427.2 +1021306,cellranger-pd-master-2020.0427.2 +1021307,cellranger-pd-master-2020.0427.2 +1021308,cellranger-pd-master-2020.0427.2 +1021309,cellranger-pd-master-2020.0427.2 +1021310,cellranger-pd-master-2020.0427.2 +1021311,cellranger-pd-master-2020.0427.2 +1021341,unknown +1021358,cellranger-pd-5.0.0 +1021360,cellranger-pd-5.0.0 +1021361,cellranger-pd-5.0.0 +1021362,cellranger-pd-5.0.0 +1023652,cellranger-pd-master-2020.0504.1 +1023653,cellranger-pd-master-2020.0504.1 +1023660,cellranger-pd-master-2020.0504.1 +1023661,cellranger-pd-master-2020.0504.1 +1023925,cellranger-pd-master-2020.0504.1 +1023926,cellranger-pd-master-2020.0504.1 +1023933,cellranger-pd-master-2020.0504.1 +1023934,cellranger-pd-master-2020.0504.1 +1027481,unknown +1027482,unknown +1027483,unknown +1027484,unknown +1027485,unknown +1027486,unknown +1027487,unknown +1027488,unknown +1027489,unknown +1027490,unknown +1027491,unknown +1027492,unknown +1027493,unknown +1027494,unknown +1027495,unknown +1027496,unknown +1027521,cellranger-pd-master-2020.0511.1 +1027522,cellranger-pd-master-2020.0511.1 +1027523,cellranger-pd-master-2020.0511.1 +1027524,cellranger-pd-master-2020.0511.1 +1027525,cellranger-pd-master-2020.0511.1 +1027526,cellranger-pd-master-2020.0511.1 +1027527,cellranger-pd-master-2020.0511.1 +1027528,cellranger-pd-master-2020.0511.1 +1027529,cellranger-pd-master-2020.0511.1 +1027530,cellranger-pd-master-2020.0511.1 +1027531,cellranger-pd-master-2020.0511.1 +1027532,cellranger-pd-master-2020.0511.1 +1027533,cellranger-pd-master-2020.0511.1 +1027534,cellranger-pd-master-2020.0511.1 +1027535,cellranger-pd-master-2020.0511.1 +1027536,cellranger-pd-master-2020.0511.1 +1027553,cellranger-pd-master-2020.0511.1 +1027554,cellranger-pd-master-2020.0511.1 +1027555,cellranger-pd-master-2020.0511.1 +1027556,cellranger-pd-master-2020.0511.1 +1027558,cellranger-pd-master-2020.0511.1 +1027559,cellranger-pd-master-2020.0511.1 +1027560,cellranger-pd-master-2020.0511.1 +1027588,unknown +1027589,unknown +1027590,unknown +1027591,unknown +1027593,unknown +1027594,unknown +1027595,unknown +1029474,cellranger-pd-5.0.0 +1029475,cellranger-pd-5.0.0 +1029476,cellranger-pd-5.0.0 +1029477,cellranger-pd-5.0.0 +1031779,unknown +1031844,cellranger-pd-5.0.0 +1031845,cellranger-pd-5.0.0 +1031846,cellranger-pd-5.0.0 +1031847,cellranger-pd-5.0.0 +1031848,cellranger-pd-5.0.0 +1031850,cellranger-pd-5.0.0 +1031851,cellranger-pd-5.0.0 +1031853,cellranger-pd-5.0.0 +1031854,cellranger-pd-5.0.0 +1031856,cellranger-pd-5.0.0 +1031857,cellranger-pd-5.0.0 +1031858,cellranger-pd-5.0.0 +1032690,unknown +1032691,unknown +1032692,unknown +1032693,unknown +1032694,unknown +1032695,unknown +1032696,unknown +1032697,unknown +1032722,cellranger-pd-master-2020.0606.1 +1032723,cellranger-pd-master-2020.0606.1 +1032724,cellranger-pd-master-2020.0606.1 +1032725,cellranger-pd-master-2020.0606.1 +1032726,cellranger-pd-master-2020.0606.1 +1032727,cellranger-pd-master-2020.0606.1 +1032728,cellranger-pd-master-2020.0606.1 +1032729,cellranger-pd-master-2020.0606.1 +1076423,cellranger-pd-master-2020.1027.2 +1084461,cellranger-pd-master-2021.0402.1 +1084462,cellranger-pd-master-2021.0329.1 +1084463,cellranger-pd-master-2020.1203.1 +1084464,cellranger-pd-master-2020.1203.1 +1084465,cellranger-pd-master-2020.1203.1 +1084466,cellranger-pd-master-2020.1203.1 +1089848,unknown +1089851,unknown +1096354,cellranger-pd-master-2021.0120.2 +1096355,cellranger-pd-master-2021.0120.2 +1096356,cellranger-pd-master-2021.0120.2 +1096357,cellranger-pd-master-2021.0120.2 +1096360,cellranger-pd-master-2021.0120.2 +1096361,cellranger-pd-master-2021.0120.2 +1096366,cellranger-pd-master-2021.0120.2 +1096367,cellranger-pd-master-2021.0120.2 +1096368,cellranger-pd-master-2021.0120.2 +1096369,cellranger-pd-master-2021.0120.2 +1101253,cellranger-pd-master-2021.0205.2 +1101254,cellranger-pd-master-2021.0205.2 +1117064,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117065,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117066,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117067,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117068,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117069,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117070,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117071,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117176,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117177,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117178,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117179,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117180,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117181,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117182,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1117183,cellranger-pd-cellranger6.0-cellranger-6.0.0 +1139876,unknown +1139877,unknown +1139878,unknown +1139879,unknown +1139880,unknown +1139881,unknown +1139882,unknown +1139883,unknown +1139884,unknown +1139885,unknown +1139886,unknown +1139887,unknown +1139888,unknown +1139889,unknown +1139890,unknown +1139891,unknown +1142262,cellranger-pd-master-2021.0519.2 +1142263,cellranger-pd-master-2021.0519.2 +1142264,cellranger-pd-master-2021.0519.2 +1142266,cellranger-pd-master-2021.0519.2 +1142267,cellranger-pd-master-2021.0519.2 +1142268,cellranger-pd-master-2021.0519.2 +1142269,cellranger-pd-master-2021.0519.2 +1142280,unknown +1142281,unknown +1142282,unknown +1142283,unknown +1142284,unknown +1142285,unknown +1145353,unknown +1145354,unknown +1145355,unknown +1145356,unknown +1145357,unknown +1145358,unknown +1145359,unknown +1145360,unknown +1145361,unknown +1145362,unknown +1145363,unknown +1145364,unknown +1145365,unknown +1145366,unknown +1145367,unknown +1145368,unknown +1145805,unknown +1145806,unknown +1145807,unknown +1145808,unknown +1145809,unknown +1145810,unknown +1145811,unknown +1145812,unknown diff --git a/enclone/src/explore.rs b/enclone/src/explore.rs deleted file mode 100644 index aaa1178a4..000000000 --- a/enclone/src/explore.rs +++ /dev/null @@ -1,252 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -use enclone_core::defs::*; -use io_utils::*; -use std::io::Write; -use string_utils::*; -use vector_utils::*; - -// Exploratory code, in which we examine the data, so as to help decide on the -// best definition of exact subclonotype. - -pub fn explore(li: usize, tig_bc: &Vec>, ctl: &EncloneControl) { - if ctl.gen_opt.exp { - let mut logs = Vec::>::new(); - let mut r = 0; - while r < tig_bc.len() { - let mut s = r + 1; - while s < tig_bc.len() { - if tig_bc[s].len() != tig_bc[r].len() { - break; - } - let mut ok = true; - for m in 0..tig_bc[r].len() { - if tig_bc[s][m].cdr3_dna != tig_bc[r][m].cdr3_dna - || tig_bc[s][m].len != tig_bc[r][m].len - { - ok = false; - break; - } - } - if !ok { - break; - } - s += 1; - } - if s > r { - if s - r > 1 { - // if only one barcode, should save but not analyze - let mut log = Vec::::new(); - fwriteln!(log, "lena = {}", ctl.origin_info.dataset_id[li]); - fwriteln!(log, "there are {} barcodes", s - r); - - // printme!( r, s ); // YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY - /* - for t in r..s { // YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY - println!( "bc {}", tig_bc[t][0].barcode ); // YYYYYYYYYYYYYYYYYYYYY - } // YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY - */ - - fwriteln!(log, "there are {} chains", tig_bc[r].len()); - for m in 0..tig_bc[r].len() { - fwrite!( - log, - "{} = {} = {} = {}", - m + 1, - tig_bc[r][m].cdr3_aa, - tig_bc[r][m].cdr3_dna, - tig_bc[r][m].len - ); - if m < tig_bc[r].len() - 1 { - fwriteln!(log, " "); - } - } - fwriteln!(log, ""); - let mut bc_print = Vec::::new(); - // let mut different_columns = 0; - // let mut clear_cut = true; - let mut printme = false; - let mut bcs = Vec::<(usize, usize)>::new(); - for m in 0..tig_bc[r].len() { - // go through the chains - for p in 0..tig_bc[r][m].len { - // go through positions - - // Find {(base,qual,bc)}. - - let mut bqb = Vec::<(u8, u8, usize)>::new(); - for u in r..s { - bqb.push((tig_bc[u][m].seq[p], tig_bc[u][m].quals[p], u - r)); - } - bqb.sort(); - - // Ignore case where only one base seen. - - let mut nbases = 1; - for j in 1..bqb.len() { - if bqb[j].0 != bqb[j - 1].0 { - nbases += 1; - } - } - if nbases == 1 { - continue; - } - - // Gather frequency info. - - let mut freq = Vec::<(usize, u8, Vec<(u8, usize)>)>::new(); - let mut u = 0; - while u < bqb.len() { - let mut v = u + 1; - while v < bqb.len() { - if bqb[v].0 != bqb[u].0 { - break; - } - v += 1; - } - let mut y = Vec::<(u8, usize)>::new(); - for t in u..v { - y.push((bqb[t].1, bqb[t].2)); - } - freq.push((y.len(), bqb[u].0, y)); - u = v; - } - reverse_sort(&mut freq); - /* - if freq.len() > 1 { - different_columns += 1; - } - */ - - // A column is declared "clear cut" if there is at most one - // base having only non-Q60 support. - - /* - let mut non_q60s = 0; - for m in 0..freq.len() { - let mut have_q60 = false; - for j in 0..freq[m].2.len() { - if freq[m].2[j].0 >= 60 { - have_q60 = true; - } - } - if !have_q60 { - non_q60s += 1; - } - } - if non_q60s > 1 { - clear_cut = false; - } - */ - - // Print. - - fwrite!(log, "chain {}, pos {}:", m + 1, p + 1); - for f in 0..freq.len() { - if freq[f].0 > 1 && f == 0 { - fwrite!(log, " {}[{}]", freq[f].1 as char, freq[f].0); - } else { - fwrite!(log, " {}[{}", freq[f].1 as char, freq[f].0); - for j in 0..freq[f].2.len() { - let bc_id = freq[f].2[j].1; - bcs.push((bc_id, m)); - bc_print.push(bc_id); - fwrite!(log, "; q = {}, bc = {}", freq[f].2[j].0, bc_id); - } - fwrite!(log, "]"); - } - let mut have_q60 = false; - for j in 0..freq[f].2.len() { - if freq[f].2[j].0 >= 60 { - have_q60 = true; - } - } - if !have_q60 { - fwrite!(log, " = WEAK"); - } - } - fwriteln!(log, ""); - } - bcs.sort(); - let mut bcs_count = Vec::<(usize, usize)>::new(); - let mut i = 0; - while i < bcs.len() { - let j = next_diff(&bcs, i); - bcs_count.push((bcs[i].0, j - i)); - i = j; - } - let mut i = 0; - while i < bcs_count.len() { - let j = next_diff1_2(&bcs_count, i as i32) as usize; - if j - i == 1 && bcs_count[i].1 >= 10 { - printme = true; - } - i = j; - } - } - // if different_columns > 5 || !clear_cut { - if printme || ctl.gen_opt.weak { - unique_sort(&mut bc_print); - for j in 0..bc_print.len() { - let bc_id = bc_print[j]; - fwriteln!(log, "bc {} = {}", bc_id, tig_bc[bc_id + r][0].barcode); - } - - logs.push(log); - } - } - } - r = s; - } - for i in 0..logs.len() { - println!("\ncase {}", i + 1); - print!("{}", strme(&logs[i])); - } - std::process::exit(0); - } -} - -// Look for insertions (initial exploration). - -pub fn find_insertions(ctl: &EncloneControl, exact_clonotypes: &Vec) { - if ctl.gen_opt.insertions { - println!("CDR3s associated with possible SHM insertions"); - let mut z = Vec::<(String, usize, isize)>::new(); // {(cdr3_aa, v_ref_id, delta)} - for i in 0..exact_clonotypes.len() { - let ex = &exact_clonotypes[i]; - for j in 0..ex.share.len() { - let sh = &ex.share[j]; - if sh.annv.len() == 2 && sh.annv[1].0 > sh.annv[0].0 + sh.annv[0].1 { - let ins = sh.annv[1].0 - sh.annv[0].0 - sh.annv[0].1; - z.push((sh.cdr3_aa.clone(), sh.v_ref_id, ins as isize)); - } else if sh.annv.len() == 1 { - z.push((sh.cdr3_aa.clone(), sh.v_ref_id, 0)); - } - } - } - unique_sort(&mut z); - z.sort(); - let mut i = 0; - while i < z.len() { - let j = next_diff12_3(&z, i as i32) as usize; - if j - i > 1 { - let mut have_zero = false; - for k in i..j { - if z[k].2 == 0 { - have_zero = true; - } - } - if have_zero { - for k in i..j { - if z[k].2 > 0 { - println!("{} ==> {}", z[k].0, z[k].2); - } - } - } - } - i = j; - } - println!(""); - std::process::exit(0); - } -} diff --git a/enclone/src/graph_filter.rs b/enclone/src/graph_filter.rs index f9b5465c6..340794963 100644 --- a/enclone/src/graph_filter.rs +++ b/enclone/src/graph_filter.rs @@ -1,16 +1,18 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // This file provides the single function graph_filter. -use enclone_core::defs::*; +use enclone_core::barcode_fate::BarcodeFate; +use enclone_core::defs::{EncloneControl, TigData}; use graph_simple::GraphSimple; -use io_utils::*; +use io_utils::fwriteln; use petgraph::prelude::*; use rayon::prelude::*; -use std::cmp::*; +use std::cmp::{max, min}; +use std::collections::HashMap; use std::io::Write; -use string_utils::*; -use vector_utils::*; +use string_utils::strme; +use vector_utils::{bin_member, bin_position, erase_if, lower_bound, next_diff12_3, reverse_sort}; // Create a digraph which has one vertex for each V..J that appears in a productive // pair, and for a given light chain and a given heavy chain vertex, a weighted edge @@ -28,13 +30,17 @@ use vector_utils::*; // // Hmm, seems like the edges go from heavy to light. -pub fn graph_filter(mut tig_bc: &mut Vec>, graph: bool) { +pub fn graph_filter( + ctl: &EncloneControl, + tig_bc: &mut Vec>, + graph: bool, + fate: &mut [HashMap], +) { let mut ndels = 0; - let mut seqs = Vec::<(Vec, bool, String, usize)>::new(); - for i in 0..tig_bc.len() { - for j in 0..tig_bc[i].len() { - let x = &tig_bc[i][j]; - seqs.push((x.seq.clone(), x.left, x.cdr3_aa.clone(), x.v_ref_id)); + let mut seqs = Vec::<(&[u8], bool, &str, usize)>::new(); + for tigi in tig_bc.iter() { + for x in tigi { + seqs.push((x.seq(), x.left, x.cdr3_aa.as_str(), x.v_ref_id)); } } seqs.par_sort(); @@ -53,8 +59,8 @@ pub fn graph_filter(mut tig_bc: &mut Vec>, graph: bool) { } j += 1; } - for k in i + 1..j { - to_delete[k] = true; + for d in &mut to_delete[i + 1..j] { + *d = true; } i = j; } @@ -72,30 +78,29 @@ pub fn graph_filter(mut tig_bc: &mut Vec>, graph: bool) { for j1 in 0..tig_bc[i].len() { if tig_bc[i][j1].left { let x1 = &tig_bc[i][j1]; - let p1 = - lower_bound(&seqs, &(x1.seq.clone(), false, x1.cdr3_aa.clone(), 0)) as usize; + let p1 = lower_bound(&seqs, &(x1.seq(), false, x1.cdr3_aa.as_str(), 0)) as usize; for j2 in 0..tig_bc[i].len() { if !tig_bc[i][j2].left { let x2 = &tig_bc[i][j2]; - let p2 = lower_bound(&seqs, &(x2.seq.clone(), false, x2.cdr3_aa.clone(), 0)) - as usize; + let p2 = + lower_bound(&seqs, &(x2.seq(), false, x2.cdr3_aa.as_str(), 0)) as usize; res.1.push((p1, p2, min(x1.umi_count, x2.umi_count))); } } } } }); - for i in 0..results.len() { - edges0.append(&mut results[i].1.clone()); + for mut r in results { + edges0.append(&mut r.1); } - edges0.sort(); + edges0.sort_unstable(); let mut edges1 = Vec::<(usize, usize, (usize, usize))>::new(); let mut i = 0; while i < edges0.len() { let j = next_diff12_3(&edges0, i as i32) as usize; let mut weight = 0; - for k in i..j { - weight += edges0[k].2; + for e in &edges0[i..j] { + weight += e.2; } edges1.push((edges0[i].0, edges0[i].1, (weight, j - i))); i = j; @@ -106,16 +111,22 @@ pub fn graph_filter(mut tig_bc: &mut Vec>, graph: bool) { for i in 0..seqs.len() { g.add_node(i as u32); } - for e in 0..edges1.len() { - let v = edges1[e].0; - let w = edges1[e].1; - let weight = edges1[e].2; + for (v, w, weight) in edges1 { g.add_edge(NodeIndex::::new(v), NodeIndex::::new(w), weight); } // Kill weak branches from light to heavy chains. Also kill light chain onesies that // have too many heavy chain partners. + // + // ******************************************************************************************** + // THIS IS TURNED OFF. Reason: if a light chain is ubiquitous, then this code would be + // prejudiciously deleting pairs that use it. The code kills a lot of real cells. + // When we turned off this code, we got one additional false positive, but it seems like we + // "should" have the false positive. The code also resulted in the creation of a few more + // 5-chain clonotypes, and lots more 4-chain clonotypes (both in the ~400 dataset run). + // ******************************************************************************************** + /* let mut log = Vec::::new(); fwriteln!(log, "\nBRANCHING FROM LIGHT CHAINS"); const MIN_RATIO_KILL: usize = 8; @@ -255,11 +266,22 @@ pub fn graph_filter(mut tig_bc: &mut Vec>, graph: bool) { ndels += 1; } } - erase_if(&mut tig_bc, &to_delete); + for i in 0..tig_bc.len() { + if to_delete[i] { + fate[tig_bc[i][0].dataset_index].insert( + tig_bc[i][0].barcode.clone(), + "failed GRAPH_FILTER filter".to_string(), + ); + } + } + if !ctl.gen_opt.ngraph_filter { + erase_if(&mut tig_bc, &to_delete); + } if graph { fwriteln!(log, ""); print!("{}", strme(&log)); } + */ // Kill weak branches from heavy to light chains. @@ -311,11 +333,11 @@ pub fn graph_filter(mut tig_bc: &mut Vec>, graph: bool) { } } }); - for i in 0..results.len() { - kills.append(&mut results[i].1.clone()); - log.append(&mut results[i].2.clone()); + for (_, mut r1, mut r2) in results { + kills.append(&mut r1); + log.append(&mut r2); } - kills.sort(); + kills.sort_unstable(); // presumably badly inefficient let mut to_delete = vec![false; tig_bc.len()]; let mut results = Vec::<(usize, bool)>::new(); @@ -329,14 +351,14 @@ pub fn graph_filter(mut tig_bc: &mut Vec>, graph: bool) { continue; } let x1 = &tig_bc[i][j1]; - let m1 = (x1.seq.clone(), x1.left, x1.cdr3_aa.clone(), x1.v_ref_id); + let m1 = (x1.seq(), x1.left, x1.cdr3_aa.as_str(), x1.v_ref_id); let p1 = bin_position(&seqs, &m1) as usize; for j2 in 0..tig_bc[i].len() { if tig_bc[i][j2].left { continue; } let x2 = &tig_bc[i][j2]; - let m2 = (x2.seq.clone(), x2.left, x2.cdr3_aa.clone(), x2.v_ref_id); + let m2 = (x2.seq(), x2.left, x2.cdr3_aa.as_str(), x2.v_ref_id); let p2 = bin_position(&seqs, &m2) as usize; if bin_member(&kills, &(p1, p2)) { res.1 = true; @@ -350,10 +372,18 @@ pub fn graph_filter(mut tig_bc: &mut Vec>, graph: bool) { ndels += 1; } } - erase_if(&mut tig_bc, &to_delete); + for i in 0..tig_bc.len() { + if to_delete[i] { + fate[tig_bc[i][0].dataset_index] + .insert(tig_bc[i][0].barcode.clone(), BarcodeFate::GraphFilter); + } + } + if !ctl.gen_opt.ngraph_filter { + erase_if(tig_bc, &to_delete); + } if graph { fwriteln!(log, ""); print!("{}", strme(&log)); - println!("total graph filter deletions = {}", ndels); + println!("total graph filter deletions = {ndels}"); } } diff --git a/enclone/src/html.rs b/enclone/src/html.rs deleted file mode 100644 index 43f8235f5..000000000 --- a/enclone/src/html.rs +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Utility for inserting html files. It also changes all instance of #enclone to -// a preset format for that. - -use io_utils::*; -use std::env; -use std::fs::File; -use std::io::{BufRead, BufReader, BufWriter, Write}; -use string_utils::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn gtag() -> String { - "\n\ - \n\ - \n" - .to_string() -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Insert google tag and banner. - -pub fn edit_html(html: &str) -> String { - let mut lines2 = Vec::::new(); - for line in html.lines() { - if line == "" { - let g = gtag(); - for x in g.lines() { - lines2.push(x.to_string()); - } - } - lines2.push(line.to_string()); - if line == "" { - lines2.push("".to_string()); - lines2.push("
".to_string()); - lines2.push( - "\"enclone" - .to_string(), - ); - } - } - let mut x = String::new(); - for i in 0..lines2.len() { - x += &format!("{}\n", lines2[i]); - } - x -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn html_header(level: usize, title: &str) -> String { - assert!(level == 0 || level == 2); - let ltext; - if level == 0 { - ltext = "pages"; - } else { - ltext = ".."; - } - format!( - "\n\ - \n\ - \n\ - \n\ - \n\ - \n\ - {}\n\ - \n\ - {} - \n", - title, - ltext, - gtag() - ) -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn insert_html(in_file: &str, out_file: &str, up: bool, level: usize) { - const ENCLONE_FORMATTED: &str = - "enclone"; - let pwd = env::current_dir().unwrap(); - let pwd = pwd.to_str().unwrap(); - let mut title = String::new(); - { - let f = BufReader::new(File::open(&in_file).expect(&format!( - "In directory {}, could not open file \"{}\"", - pwd, &in_file - ))); - for line in f.lines() { - let s = line.unwrap(); - if s.contains("") { - title = s.between("<title>", "").to_string(); - break; - } - } - } - let f = open_for_read![&in_file]; - let mut g = open_for_write_new![&out_file]; - fwrite!(g, "{}", html_header(level, &title)); - fwriteln!( - g, - " - " - ); - for line in f.lines() { - let mut s = line.unwrap(); - if s.starts_with("") { - } else if s.starts_with("#include ") { - let mut f = format!("../{}", s.after("#include ")); - if !up { - f = format!("{}", s.after("#include ")); - } - let h = open_for_read![&f]; - let mut started = false; - let mut count = 0; - for line in h.lines() { - count += 1; - let t = line.unwrap(); - if t == "<body>" { - started = true; - } else if t == "</body>" { - break; - } else if started && !t.contains("enclone_banner") { - fwriteln!(g, "{}", t); - } - } - if count == 0 { - eprintln!("\nThe file {} is empty.\n", f); - std::process::exit(1); - } - } else { - s = s.replace("#enclone", ENCLONE_FORMATTED); - fwriteln!(g, "{}", s); - } - } -} diff --git a/enclone/src/info.rs b/enclone/src/info.rs index a32c42891..265208a13 100644 --- a/enclone/src/info.rs +++ b/enclone/src/info.rs @@ -1,25 +1,28 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // This file provides the single function build_info. -use vdj_ann::*; +use enclone_core::barcode_fate::BarcodeFate; +use vdj_ann::refx; -use self::refx::*; -use crate::read_json::*; -use amino::*; -use ansi_escape::*; -use debruijn::{dna_string::*, Mer}; -use enclone_core::defs::*; -use enclone_core::print_tools::*; +use self::refx::RefData; +use amino::{aa_seq, codon_to_aa}; +use ansi_escape::emit_end_escape; +use debruijn::{dna_string::DnaString, Mer}; +use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype}; +use enclone_core::print_tools::emit_codon_color_escape; use rayon::prelude::*; -use std::sync::atomic::AtomicBool; -use string_utils::*; -use vector_utils::*; +use std::collections::HashMap; +use std::convert::TryInto; +use std::fmt::Write; +use string_utils::strme; +use vector_utils::unique_sort; pub fn build_info( refdata: &RefData, ctl: &EncloneControl, - exact_clonotypes: &mut Vec<ExactClonotype>, + exact_clonotypes: &mut [ExactClonotype], + fate: &mut [HashMap<String, BarcodeFate>], ) -> Vec<CloneInfo> { // Build info about clonotypes. We create a data structure info. // An entry in info is a clonotype having appropriate properties. @@ -27,24 +30,25 @@ pub fn build_info( // Much of the information in a CloneInfo object is redundant. So we could probably // improve both time and space computational performance by reducing that redundancy. - let exiting = AtomicBool::new(false); - let mut total_clones = 0; - for i in 0..exact_clonotypes.len() { - total_clones += exact_clonotypes[i].ncells(); - } let mut info = Vec::<CloneInfo>::new(); - let mut results = Vec::<(usize, Vec<CloneInfo>, ExactClonotype)>::new(); - for i in 0..exact_clonotypes.len() { - results.push((i, Vec::new(), exact_clonotypes[i].clone())); + let mut results = Vec::<( + usize, + Vec<CloneInfo>, + ExactClonotype, + Vec<(usize, String, BarcodeFate)>, + )>::new(); + for (i, ct) in exact_clonotypes.iter().enumerate() { + results.push((i, Vec::new(), ct.clone(), Vec::new())); } results.par_iter_mut().for_each(|res| { let i = res.0; let mut lens = Vec::<usize>::new(); let mut tigs = Vec::<Vec<u8>>::new(); let mut tigs_amino = Vec::<Vec<u8>>::new(); + let mut aa_mod_indel = Vec::<Vec<u8>>::new(); + let mut tigs_ins = Vec::<Vec<(usize, Vec<u8>)>>::new(); let mut tigsp = Vec::<DnaString>::new(); let mut has_del = Vec::<bool>::new(); - let mut orig_tigs = Vec::<DnaString>::new(); let (mut vs, mut js) = (Vec::<DnaString>::new(), Vec::<DnaString>::new()); let mut vsids = Vec::<usize>::new(); let mut jsids = Vec::<usize>::new(); @@ -57,8 +61,6 @@ pub fn build_info( for j in 0..p.share.len() { let x = &mut p.share[j]; tigsp.push(DnaString::from_acgt_bytes(&x.seq)); - // INCORRECT, TO DO SOMETHING ABOUT LATER: - orig_tigs.push(DnaString::from_acgt_bytes(&x.full_seq)); let jid = x.j_ref_id; js.push(refdata.refs[jid].clone()); @@ -70,18 +72,14 @@ pub fn build_info( let mut annv = x.annv.clone(); vsids.push(vid); jsids.push(jid); + let mut vsnx = String::new(); + // DELETION if annv.len() == 2 && annv[1].0 == annv[0].0 + annv[0].1 { let mut t = Vec::<u8>::new(); let (mut del_start, mut del_stop) = (annv[0].1, annv[1].3); - for i in 0..del_start { - t.push(x.seq[i as usize]); - } - for _ in del_start..del_stop { - t.push(b'-'); - } - for i in (annv[1].0 as usize)..x.seq.len() { - t.push(x.seq[i as usize]); - } + t.extend(&x.seq[..del_start.try_into().unwrap()]); + t.resize(del_stop.try_into().unwrap(), b'-'); + t.extend(&x.seq[annv[1].0 as usize..]); lens.push(t.len()); tigs.push(t.clone()); if del_start % 3 != 0 { @@ -90,40 +88,90 @@ pub fn build_info( del_start -= offset; del_stop -= offset; t.clear(); - for i in 0..del_start { - t.push(x.seq[i as usize]); - } - for _ in del_start..del_stop { - t.push(b'-'); - } - for i in ((annv[1].0 - offset) as usize)..x.seq.len() { - t.push(x.seq[i as usize]); - } + t.extend(&x.seq[..del_start.try_into().unwrap()]); + t.resize(del_stop.try_into().unwrap(), b'-'); + t.extend(&x.seq[((annv[1].0 - offset) as usize)..]); } annv[0].1 += (del_stop - del_start) + annv[1].1; annv.truncate(1); - tigs_amino.push(t); + tigs_amino.push(t.clone()); + let mut aa = Vec::<u8>::new(); + for p in (0..=t.len() - 3).step_by(3) { + if t[p] == b'-' { + aa.push(b'-'); + } else { + aa.push(codon_to_aa(&t[p..p + 3])); + } + } + + aa_mod_indel.push(aa); + tigs_ins.push(Vec::new()); has_del.push(true); + // INSERTION } else if annv.len() == 2 && annv[1].3 == annv[0].3 + annv[0].1 { let ins_len = (annv[1].0 - annv[0].0 - annv[0].1) as usize; - let mut ins_pos = (annv[0].0 + annv[0].1) as usize; + let ins_pos = (annv[0].0 + annv[0].1) as usize; let mut t = Vec::<u8>::new(); + let mut nt = Vec::<u8>::new(); for i in 0..x.seq.len() { if i < ins_pos || i >= ins_pos + ins_len { t.push(x.seq[i]); + } else { + nt.push(x.seq[i]); } } has_del.push(true); // DOES NOT MAKE SENSE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! lens.push(t.len()); - tigs.push(t); - ins_pos -= ins_pos % 3; - let mut t = Vec::<u8>::new(); - for i in 0..x.seq.len() { - if i < ins_pos || i >= ins_pos + ins_len { - t.push(x.seq[i]); + tigs.push(t.clone()); + let ins = vec![(ins_pos, nt)]; + tigs_ins.push(ins); + tigs_amino.push(t); + + // Optimize to compute entry in aa_mod_indel and the inserted aa sequence. + + let aa_full = aa_seq(&x.seq, 0); + let ref_aa = aa_seq(&refdata.refs[vid].to_ascii_vec(), 0); + let ins_len_aa = ins_len / 3; + const EXT: usize = 10; + let ins_pos_low = if ins_pos / 3 < EXT { + 0 + } else { + ins_pos / 3 - EXT + }; + let mut ins_pos_high = + std::cmp::min(ins_pos / 3 + EXT, aa_full.len() - ins_len_aa + 1); + ins_pos_high = std::cmp::min(ins_pos_high, ref_aa.len() - ins_len_aa + 1); + let mut mis = Vec::<(usize, usize, Vec<u8>)>::new(); + for j in ins_pos_low..ins_pos_high { + let mut y = Vec::<u8>::new(); + for (k, &aa) in aa_full.iter().enumerate() { + if k < j || k >= j + ins_len_aa { + y.push(aa); + } + } + let mut m = 0; + for l in 0..ref_aa.len() { + if l < y.len() && ref_aa[l] != y[l] { + m += 1; + } } + mis.push((m, j, y.clone())); } - tigs_amino.push(t); + mis.sort(); + aa_mod_indel.push(mis[0].2.clone()); + let ins_aa_pos = mis[0].1; + let mut aax = Vec::<u8>::new(); + let b = 3 * ins_aa_pos; + for p in 0..ins_len_aa { + emit_codon_color_escape(&x.seq[b + 3 * p..b + 3 * p + 3], &mut aax); + let aa = codon_to_aa(&x.seq[b + 3 * p..b + 3 * p + 3]); + aax.push(aa); + emit_end_escape(&mut aax); + } + vsnx = format!("ins = {} at {ins_aa_pos}", strme(&aax)); + + // Finish up ann. + annv[0].1 += annv[1].1; annv.truncate(1); } else { @@ -131,19 +179,16 @@ pub fn build_info( lens.push(x.seq.len()); tigs.push(x.seq.clone()); tigs_amino.push(x.seq.clone()); + aa_mod_indel.push(aa_seq(&x.seq, 0)); + tigs_ins.push(Vec::new()); } // Save reference V segment. However in the case where there is a // single indel between the contig and the reference sequence, edit the // reference V segment accordingly. - let rt = &refdata.refs[vid as usize]; - let mut vsnx = String::new(); + let rt = &refdata.refs[vid]; if x.annv.len() == 2 { - if x.annv[0].1 as usize > rt.len() { - let msg = format!("x.annv[0].1 = {}, rt.len() = {}", x.annv[0].1, rt.len()); - json_error(None, &ctl, &exiting, &msg); - } let mut r = rt.slice(0, x.annv[0].1 as usize).to_owned(); // deletion if x.annv[1].0 == x.annv[0].0 + x.annv[0].1 { @@ -156,7 +201,7 @@ pub fn build_info( "has deletion of {} bases relative to reference", x.annv[1].3 - x.annv[0].1 )); - vs_notesx.push("".to_string()); + vs_notesx.push(String::new()); // insertion } else if x.annv[1].3 == x.annv[0].3 + x.annv[0].1 { /* @@ -171,22 +216,7 @@ pub fn build_info( r.push(rt.get(m)); } vs.push(r.clone()); - vs_notes.push("".to_string()); - - // Make note on insertion. Rounded down to modulo 3 position. Note that - // rounding down doesn't necessarily make sense. - - let ins_len = (x.annv[1].0 - x.annv[0].0 - x.annv[0].1) as usize; - let mut ins_pos = (x.annv[0].0 + x.annv[0].1) as usize; - ins_pos -= ins_pos % 3; - let mut aax = Vec::<u8>::new(); - for p in 0..ins_len / 3 { - emit_codon_color_escape(&x.seq[3 * p..3 * p + 3], &mut aax); - let aa = codon_to_aa(&x.seq[3 * p..3 * p + 3]); - aax.push(aa); - emit_end_escape(&mut aax); - } - vsnx = format!("ins = {} at {}", strme(&aax), ins_pos / 3); + vs_notes.push(String::new()); } else { // maybe can't happen vs.push(rt.clone()); @@ -194,13 +224,13 @@ pub fn build_info( // This caused a traceback on "enclone 123085 RE". It is interesting because // the traceback did not get back to the main program, even with // "enclone 123085 RE NOPRETTY". - vs_notes.push("".to_string()); - vsnx = "".to_string(); + vs_notes.push(String::new()); + vsnx = String::new(); } } else { vs.push(rt.clone()); vs_notes.push(String::new()); - vsnx = "".to_string(); + vsnx = String::new(); } cdr3s.push(x.cdr3_dna.clone()); cdr3_aa.push(x.cdr3_aa.clone()); @@ -212,15 +242,13 @@ pub fn build_info( if z.c_start.is_some() { let delta = z.c_start.unwrap() as isize - z.j_stop as isize; if delta != 0 { - if vsnx.len() > 0 { + if !vsnx.is_empty() { vsnx += "; "; } if delta > 0 { - vsnx += &mut format!("gap from J stop to C start = {}", delta); - } else { - if delta != -1 || ctl.gen_opt.jc1 { - vsnx += &mut format!("J and C segs overlap by {}", -delta); - } + write!(vsnx, "gap from J stop to C start = {delta}").unwrap(); + } else if delta != -1 || ctl.gen_opt.jc1 { + write!(vsnx, "J and C segs overlap by {}", -delta).unwrap(); } } } @@ -231,6 +259,8 @@ pub fn build_info( x.seq_del = tigs[tigs.len() - 1].clone(); x.seq_del_amino = tigs_amino[tigs_amino.len() - 1].clone(); + x.aa_mod_indel = aa_mod_indel[aa_mod_indel.len() - 1].clone(); + x.ins = tigs_ins[tigs_ins.len() - 1].clone(); x.vs = vs[vs.len() - 1].clone(); x.vs_notesx = vs_notesx[vs_notesx.len() - 1].clone(); x.js = js[js.len() - 1].clone(); @@ -252,7 +282,6 @@ pub fn build_info( let tigs_aminox = [tigs_amino[i1].clone(), tigs_amino[i2].clone()].to_vec(); let tigspx = [tigsp[i1].clone(), tigsp[i2].clone()].to_vec(); let has_delx = [has_del[i1], has_del[i2]].to_vec(); - let orig_tigsx = [orig_tigs[i1].clone(), orig_tigs[i2].clone()].to_vec(); let vsx = [vs[i1].clone(), vs[i2].clone()].to_vec(); let jsx = [js[i1].clone(), js[i2].clone()].to_vec(); let cdr3sx = [cdr3s[i1].clone(), cdr3s[i2].clone()].to_vec(); @@ -268,9 +297,8 @@ pub fn build_info( tigs_amino: tigs_aminox, tigsp: tigspx, has_del: has_delx, - orig_tigs: orig_tigsx, clonotype_id: i, - exact_cols: exact_cols, + exact_cols, clonotype_index: i, // CLEARLY UNNEEDED origin: origin.clone(), vs: vsx.clone(), @@ -291,36 +319,40 @@ pub fn build_info( // Incorporate improper cells if they are onesies. Note that we're dropping the // improper cells having two or more chains. - if !placed - && (shares.len() == 1 - && ctl.join_alg_opt.merge_onesies - && exact_clonotypes[i].ncells() * ctl.onesie_mult >= total_clones) - || ctl.merge_all_impropers - { + if !placed && shares.len() > 1 { + let ex = &exact_clonotypes[i]; + for j in 0..ex.clones.len() { + res.3.push(( + ex.clones[j][0].dataset_index, + ex.clones[j][0].barcode.clone(), + BarcodeFate::Improper, + )); + } + } + if !placed && (shares.len() == 1 || ctl.merge_all_impropers) { let mut exact_cols = Vec::<usize>::new(); for i in 0..tigs.len() { exact_cols.push(i); } res.1.push(CloneInfo { - lens: lens, - tigs: tigs, - tigs_amino: tigs_amino, - tigsp: tigsp, - has_del: has_del, - orig_tigs: orig_tigs, + lens, + tigs, + tigs_amino, + tigsp, + has_del, clonotype_id: i, - exact_cols: exact_cols, + exact_cols, clonotype_index: i, // CLEARLY UNNEEDED origin: origin.clone(), vs: vs.clone(), dref: vec![None; vs.len()], - vs_notesx: vs_notesx, - js: js, - vsids: vsids, - jsids: jsids, - cdr3s: cdr3s, - cdr3_aa: cdr3_aa, - chain_types: chain_types, + vs_notesx, + js, + vsids, + jsids, + cdr3s, + cdr3_aa, + chain_types, }); } }); @@ -330,6 +362,9 @@ pub fn build_info( for i in 0..results.len() { info.append(&mut results[i].1); exact_clonotypes[i] = results[i].2.clone(); + for j in 0..results[i].3.len() { + fate[results[i].3[j].0].insert(results[i].3[j].1.clone(), results[i].3[j].2.clone()); + } } // Sort info. diff --git a/enclone/src/innate.rs b/enclone/src/innate.rs index f8d8655f9..0a2332b98 100644 --- a/enclone/src/innate.rs +++ b/enclone/src/innate.rs @@ -1,17 +1,17 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // Functions relating to the identification if iNKT and MAIT cells. // species: return "human" or "mouse" or "unknown", based on a 60-base perfect match between -// the TRAC sequence in the provided reference sequences, and the internally provided reference -// sequences for human and mouse. +// the TRAC or IGHM sequence in the provided reference sequences, and internally provided reference +// sequences for human and mouse, for those regions. -use enclone_core::defs::*; -use string_utils::*; -use vdj_ann::refx::*; -use vector_utils::*; +use enclone_core::defs::ExactClonotype; +use string_utils::TextUtils; +use vdj_ann::refx::RefData; +use vector_utils::{bin_member, reverse_sort, unique_sort}; -pub fn species(refdata: &RefData) -> String { +pub fn species(refdata: &RefData) -> &'static str { let mut my_trac = Vec::<Vec<u8>>::new(); for i in 0..refdata.refs.len() { if refdata.name[i].starts_with("TRAC") || refdata.name[i].starts_with("IGHM") { @@ -19,43 +19,119 @@ pub fn species(refdata: &RefData) -> String { } } const K: usize = 60; - let mut kmers = Vec::<Vec<u8>>::new(); - for i in 0..my_trac.len() { - for j in 0..=my_trac[i].len() - K { - kmers.push(my_trac[i][j..j + K].to_vec()); + let mut kmers = Vec::<&[u8]>::new(); + for tr in &my_trac { + if tr.len() >= K { + for j in 0..=tr.len() - K { + kmers.push(&tr[j..j + K]); + } } } unique_sort(&mut kmers); - let mut counts = Vec::<(usize, String)>::new(); + let mut counts = Vec::<(usize, &'static str)>::new(); for pass in 1..=2 { let mut count = 0; - let species; - let refx; - if pass == 1 { - refx = human_ref(); - species = "human".to_string(); + let species = if pass == 1 { "human" } else { "mouse" }; + + // Build trac. This is the concatenation, with single space separation, of the all + // the human (pass = 1) or mouse (pass = 2) reference sequences that contain + // |TRAC or |IGHM, for particular versions of these reference sequences (and probably + // that choice doesn't matter much). + + let trac = if pass == 1 { + b"GGAGTGCATCCGCCCCAACCCTTTTCCCCCTCGTCTCCTGTGAGAATTCCCCGTCGGATACGAGCAGCGTGGCCG\ + TTGGCTGCCTCGCACAGGACTTCCTTCCCGACTCCATCACTTTCTCCTGGAAATACAAGAACAACTCTGACATCA\ + GCAGCACCCGGGGCTTCCCATCAGTCCTGAGAGGGGGCAAGTACGCAGCCACCTCACAGGTGCTGCTGCCTTCCA\ + AGGACGTCATGCAGGGCACAGACGAACACGTGGTGTGCAAAGTCCAGCACCCCAACGGCAACAAAGAAAAGAACG\ + TGCCTCTTCCAGTGATTGCTGAGCTGCCTCCCAAAGTGAGCGTCTTCGTCCCACCCCGCGACGGCTTCTTCGGCA\ + ACCCCCGCAAGTCCAAGCTCATCTGCCAGGCCACGGGTTTCAGTCCCCGGCAGATTCAGGTGTCCTGGCTGCGCG\ + AGGGGAAGCAGGTGGGGTCTGGCGTCACCACGGACCAGGTGCAGGCTGAGGCCAAAGAGTCTGGGCCCACGACCT\ + ACAAGGTGACCAGCACACTGACCATCAAAGAGAGCGACTGGCTCGGCCAGAGCATGTTCACCTGCCGCGTGGATC\ + ACAGGGGCCTGACCTTCCAGCAGAATGCGTCCTCCATGTGTGTCCCCGATCAAGACACAGCCATCCGGGTCTTCG\ + CCATCCCCCCATCCTTTGCCAGCATCTTCCTCACCAAGTCCACCAAGTTGACCTGCCTGGTCACAGACCTGACCA\ + CCTATGACAGCGTGACCATCTCCTGGACCCGCCAGAATGGCGAAGCTGTGAAAACCCACACCAACATCTCCGAGA\ + GCCACCCCAATGCCACTTTCAGCGCCGTGGGTGAGGCCAGCATCTGCGAGGATGACTGGAATTCCGGGGAGAGGT\ + TCACGTGCACCGTGACCCACACAGACCTGCCCTCGCCACTGAAGCAGACCATCTCCCGGCCCAAGGGGGTGGCCC\ + TGCACAGGCCCGATGTCTACTTGCTGCCACCAGCCCGGGAGCAGCTGAACCTGCGGGAGTCGGCCACCATCACGT\ + GCCTGGTGACGGGCTTCTCTCCCGCGGACGTCTTCGTGCAGTGGATGCAGAGGGGGCAGCCCTTGTCCCCGGAGA\ + AGTATGTGACCAGCGCCCCAATGCCTGAGCCCCAGGCCCCAGGCCGGTACTTCGCCCACAGCATCCTGACCGTGT\ + CCGAAGAGGAATGGAACACGGGGGAGACCTACACCTGCGTGGTGGCCCATGAGGCCCTGCCCAACAGGGTCACCG\ + AGAGGACCGTGGACAAGTCCACCGGTAAACCCACCCTGTACAACGTGTCCCTGGTCATGTCCGACACAGCTGGCA\ + CCTGCTAC GGAGTGCATCCGCCCCAACCCTTTTCCCCCTCGTCTCCTGTGAGAATTCCCCGTCGGATACGAGCA\ + GCGTGGCCGTTGGCTGCCTCGCACAGGACTTCCTTCCCGACTCCATCACTTTCTCCTGGAAATACAAGAACAACT\ + CTGACATCAGCAGCACCCGGGGCTTCCCATCAGTCCTGAGAGGGGGCAAGTACGCAGCCACCTCACAGGTGCTGC\ + TGCCTTCCAAGGACGTCATGCAGGGCACAGACGAACACGTGGTGTGCAAAGTCCAGCACCCCAACGGCAACAAAG\ + AAAAGAACGTGCCTCTTCCAGTGATTGCTGAGCTGCCTCCCAAAGTGAGCGTCTTCGTCCCACCCCGCGACGGCT\ + TCTTCGGCAACCCCCGCAAGTCCAAGCTCATCTGCCAGGCCACGGGTTTCAGTCCCCGGCAGATTCAGGTGTCCT\ + GGCTGCGCGAGGGGAAGCAGGTGGGGTCTGGCGTCACCACGGACCAGGTGCAGGCTGAGGCCAAAGAGTCTGGGC\ + CCACGACCTACAAGGTGACCAGCACACTGACCATCAAAGAGAGCGACTGGCTCGGCCAGAGCATGTTCACCTGCC\ + GCGTGGATCACAGGGGCCTGACCTTCCAGCAGAATGCGTCCTCCATGTGTGTCCCCGATCAAGACACAGCCATCC\ + GGGTCTTCGCCATCCCCCCATCCTTTGCCAGCATCTTCCTCACCAAGTCCACCAAGTTGACCTGCCTGGTCACAG\ + ACCTGACCACCTATGACAGCGTGACCATCTCCTGGACCCGCCAGAATGGCGAAGCTGTGAAAACCCACACCAACA\ + TCTCCGAGAGCCACCCCAATGCCACTTTCAGCGCCGTGGGTGAGGCCAGCATCTGCGAGGATGACTGGAATTCCG\ + GGGAGAGGTTCACGTGCACCGTGACCCACACAGACCTGCCCTCGCCACTGAAGCAGACCATCTCCCGGCCCAAGG\ + GGGTGGCCCTGCACAGGCCCGATGTCTACTTGCTGCCACCAGCCCGGGAGCAGCTGAACCTGCGGGAGTCGGCCA\ + CCATCACGTGCCTGGTGACGGGCTTCTCTCCCGCGGACGTCTTCGTGCAGTGGATGCAGAGGGGGCAGCCCTTGT\ + CCCCGGAGAAGTATGTGACCAGCGCCCCAATGCCTGAGCCCCAGGCCCCAGGCCGGTACTTCGCCCACAGCATCC\ + TGACCGTGTCCGAAGAGGAATGGAACACGGGGGAGACCTACACCTGCGTGGTGGCCCATGAGGCCCTGCCCAACA\ + GGGTCACCGAGAGGACCGTGGACAAGTCCACCGAGGGGGAGGTGAGCGCCGACGAGGAGGGCTTTGAGAACCTGT\ + GGGCCACCGCCTCCACCTTCATCGTCCTCTTCCTCCTGAGCCTCTTCTACAGTACCACCGTCACCTTGTTCAAGG\ + TGAAA ATATCCAGAACCCTGACCCTGCCGTGTACCAGCTGAGAGACTCTAAATCCAGTGACAAGTCTGTCTGCC\ + TATTCACCGATTTTGATTCTCAAACAAATGTGTCACAAAGTAAGGATTCTGATGTGTATATCACAGACAAAACTG\ + TGCTAGACATGAGGTCTATGGACTTCAAGAGCAACAGTGCTGTGGCCTGGAGCAACAAATCTGACTTTGCATGTG\ + CAAACGCCTTCAACAACAGCATTATTCCAGAAGACACCTTCTTCCCCAGCCCAGAAAGTTCCTGTGATGTCAAGC\ + TGGTCGAGAAAAGCTTTGAAACAGATACGAACCTAAACTTTCAAAACCTGTCAGTGATTGGGTTCCGAATCCTCC\ + TCCTGAAAGTGGCCGGGTTTAATCTGCTCATGACGCTGCGGCTGTGGTCCAGC" } else { - refx = mouse_ref(); - species = "mouse".to_string(); - } - let mut trac = Vec::<u8>::new(); - let mut in_trac = false; - for line in refx.lines() { - if line.starts_with(">") && (line.contains("|TRAC") || line.contains("|IGHM")) { - in_trac = true; - continue; - } - if in_trac { - trac.append(&mut line.as_bytes().to_vec()); - trac.push(b' '); - in_trac = false; - } - } - if trac.len() < K { - return "unknown".to_string(); - } + b"AGAGTCAGTCCTTCCCAAATGTCTTCCCCCTCGTCTCCTGCGAGAGCCCCCTGTCTGATAAGAATCTGGTGGCCA\ + TGGGCTGCCTGGCCCGGGACTTCCTGCCCAGCACCATTTCCTTCACCTGGAACTACCAGAACAACACTGAAGTCA\ + TCCAGGGTATCAGAACCTTCCCAACACTGAGGACAGGGGGCAAGTACCTAGCCACCTCGCAGGTGTTGCTGTCTC\ + CCAAGAGCATCCTTGAAGGTTCAGATGAATACCTGGTATGCAAAATCCACTACGGAGGCAAAAACAAAGATCTGC\ + ATGTGCCCATTCCAGCTGTCGCAGAGATGAACCCCAATGTAAATGTGTTCGTCCCACCACGGGATGGCTTCTCTG\ + GCCCTGCACCACGCAAGTCTAAACTCATCTGCGAGGCCACGAACTTCACTCCAAAACCGATCACAGTATCCTGGC\ + TAAAGGATGGGAAGCTCGTGGAATCTGGCTTCACCACAGATCCGGTGACCATCGAGAACAAAGGATCCACACCCC\ + AAACCTACAAGGTCATAAGCACACTTACCATCTCTGAAATCGACTGGCTGAACCTGAATGTGTACACCTGCCGTG\ + TGGATCACAGGGGTCTCACCTTCTTGAAGAACGTGTCCTCCACATGTGCTGCCAGTCCCTCCACAGACATCCTAA\ + CCTTCACCATCCCCCCCTCCTTTGCCGACATCTTCCTCAGCAAGTCCGCTAACCTGACCTGTCTGGTCTCAAACC\ + TGGCAACCTATGAAACCCTGAATATCTCCTGGGCTTCTCAAAGTGGTGAACCACTGGAAACCAAAATTAAAATCA\ + TGGAAAGCCATCCCAATGGCACCTTCAGTGCTAAGGGTGTGGCTAGTGTTTGTGTGGAAGACTGGAATAACAGGA\ + AGGAATTTGTGTGTACTGTGACTCACAGGGATCTGCCTTCACCACAGAAGAAATTCATCTCAAAACCCAATGAGG\ + TGCACAAACATCCACCTGCTGTGTACCTGCTGCCACCAGCTCGTGAGCAACTGAACCTGAGGGAGTCAGCCACAG\ + TCACCTGCCTGGTGAAGGGCTTCTCTCCTGCAGACATCAGTGTGCAGTGGCTTCAGAGAGGGCAACTCTTGCCCC\ + AAGAGAAGTATGTGACCAGTGCCCCGATGCCAGAGCCTGGGGCCCCAGGCTTCTACTTTACCCACAGCATCCTGA\ + CTGTGACAGAGGAGGAATGGAACTCCGGAGAGACCTATACCTGTGTTGTAGGCCACGAGGCCCTGCCACACCTGG\ + TGACCGAGAGGACCGTGGACAAGTCCACTGGTAAACCCACACTGTACAATGTCTCCCTGATCATGTCTGACACAG\ + GCGGCACCTGCTAT AGAGTCAGTCCTTCCCAAATGTCTTCCCCCTCGTCTCCTGCGAGAGCCCCCTGTCTGATA\ + AGAATCTGGTGGCCATGGGCTGCCTGGCCCGGGACTTCCTGCCCAGCACCATTTCCTTCACCTGGAACTACCAGA\ + ACAACACTGAAGTCATCCAGGGTATCAGAACCTTCCCAACACTGAGGACAGGGGGCAAGTACCTAGCCACCTCGC\ + AGGTGTTGCTGTCTCCCAAGAGCATCCTTGAAGGTTCAGATGAATACCTGGTATGCAAAATCCACTACGGAGGCA\ + AAAACAAAGATCTGCATGTGCCCATTCCAGCTGTCGCAGAGATGAACCCCAATGTAAATGTGTTCGTCCCACCAC\ + GGGATGGCTTCTCTGGCCCTGCACCACGCAAGTCTAAACTCATCTGCGAGGCCACGAACTTCACTCCAAAACCGA\ + TCACAGTATCCTGGCTAAAGGATGGGAAGCTCGTGGAATCTGGCTTCACCACAGATCCGGTGACCATCGAGAACA\ + AAGGATCCACACCCCAAACCTACAAGGTCATAAGCACACTTACCATCTCTGAAATCGACTGGCTGAACCTGAATG\ + TGTACACCTGCCGTGTGGATCACAGGGGTCTCACCTTCTTGAAGAACGTGTCCTCCACATGTGCTGCCAGTCCCT\ + CCACAGACATCCTAACCTTCACCATCCCCCCCTCCTTTGCCGACATCTTCCTCAGCAAGTCCGCTAACCTGACCT\ + GTCTGGTCTCAAACCTGGCAACCTATGAAACCCTGAATATCTCCTGGGCTTCTCAAAGTGGTGAACCACTGGAAA\ + CCAAAATTAAAATCATGGAAAGCCATCCCAATGGCACCTTCAGTGCTAAGGGTGTGGCTAGTGTTTGTGTGGAAG\ + ACTGGAATAACAGGAAGGAATTTGTGTGTACTGTGACTCACAGGGATCTGCCTTCACCACAGAAGAAATTCATCT\ + CAAAACCCAATGAGGTGCACAAACATCCACCTGCTGTGTACCTGCTGCCACCAGCTCGTGAGCAACTGAACCTGA\ + GGGAGTCAGCCACAGTCACCTGCCTGGTGAAGGGCTTCTCTCCTGCAGACATCAGTGTGCAGTGGCTTCAGAGAG\ + GGCAACTCTTGCCCCAAGAGAAGTATGTGACCAGTGCCCCGATGCCAGAGCCTGGGGCCCCAGGCTTCTACTTTA\ + CCCACAGCATCCTGACTGTGACAGAGGAGGAATGGAACTCCGGAGAGACCTATACCTGTGTTGTAGGCCACGAGG\ + CCCTGCCACACCTGGTGACCGAGAGGACCGTGGACAAGTCCACTGAGGGGGAGGTGAATGCTGAGGAGGAAGGCT\ + TTGAGAACCTGTGGACCACTGCCTCCACCTTCATCGTCCTCTTCCTCCTGAGCCTCTTCTACAGCACCACCGTCA\ + CCCTGTTCAAGGTGAAA ACATCCAGAACCCAGAACCTGCTGTGTACCAGTTAAAAGATCCTCGGTCTCAGGACA\ + GCACCCTCTGCCTGTTCACCGACTTTGACTCCCAAATCAATGTGCCGAAAACCATGGAATCTGGAACGTTCATCA\ + CTGACAAAACTGTGCTGGACATGAAAGCTATGGATTCCAAGAGCAATGGGGCCATTGCCTGGAGCAACCAGACAA\ + GCTTCACCTGCCAAGATATCTTCAAAGAGACCAACGCCACCTACCCCAGTTCAGACGTTCCCTGTGATGCCACGT\ + TGACTGAGAAAAGCTTTGAAACAGATATGAACCTAAACTTTCAAAACCTGTCAGTTATGGGACTCCGAATCCTCC\ + TGCTGAAAGTAGCCGGATTTAACCTGCTCATGACGCTGAGGCTGTGGTCCAGT" + }; + + // Test the kmers. + for i in 0..=trac.len() - K { - let kmer = trac[i..i + K].to_vec(); + let kmer = &trac[i..i + K]; if bin_member(&kmers, &kmer) { count += 1; } @@ -64,9 +140,9 @@ pub fn species(refdata: &RefData) -> String { } reverse_sort(&mut counts); if counts[0].0 == counts[1].0 { - return "unknown".to_string(); + "unknown" } else { - return counts[0].1.clone(); + counts[0].1 } } @@ -94,24 +170,24 @@ pub fn innate_cdr3(species: &str, class: &str) -> Vec<String> { // mark_innate: for each exact subclonotype, fill in iNKT and MAIT fields. pub fn mark_innate(refdata: &RefData, ex: &mut Vec<ExactClonotype>) { - let species = species(&refdata); - let inkt_cdr3 = innate_cdr3(&species, "iNKT"); - let mait_cdr3 = innate_cdr3(&species, "MAIT"); - for i in 0..ex.len() { + let species = species(refdata); + let inkt_cdr3 = innate_cdr3(species, "iNKT"); + let mait_cdr3 = innate_cdr3(species, "MAIT"); + for e in ex { let (mut have_mait_tra, mut have_mait_trb) = (false, false); let (mut have_mait_tra_cdr3, mut have_mait_trb_cdr3) = (false, false); let (mut have_inkt_tra, mut have_inkt_trb) = (false, false); let (mut have_inkt_tra_cdr3, mut have_inkt_trb_cdr3) = (false, false); - for j in 0..ex[i].share.len() { - let mut vname = refdata.name[ex[i].share[j].v_ref_id].clone(); + for share in &e.share { + let mut vname = refdata.name[share.v_ref_id].as_str(); if vname.contains('*') { - vname = vname.before("*").to_string(); + vname = vname.before("*"); } - let mut jname = refdata.name[ex[i].share[j].j_ref_id].clone(); + let mut jname = refdata.name[share.j_ref_id].as_str(); if jname.contains('*') { - jname = jname.before("*").to_string(); + jname = jname.before("*"); } - if species == "human".to_string() { + if species == "human" { if vname == "TRAV10" && jname == "TRAJ18" { have_inkt_tra = true; } @@ -126,7 +202,7 @@ pub fn mark_innate(refdata: &RefData, ex: &mut Vec<ExactClonotype>) { if vname.starts_with("TRBV20") || vname.starts_with("TRBV6") { have_mait_trb = true; } - } else if species == "mouse".to_string() { + } else if species == "mouse" { if vname == "TRAV1" && jname == "TRAJ33" { have_mait_tra = true; } @@ -144,31 +220,31 @@ pub fn mark_innate(refdata: &RefData, ex: &mut Vec<ExactClonotype>) { have_inkt_trb = true; } } - if ex[i].share[j].left { - if bin_member(&inkt_cdr3, &ex[i].share[j].cdr3_aa) { + if share.left { + if bin_member(&inkt_cdr3, &share.cdr3_aa) { have_inkt_trb_cdr3 = true; } - if bin_member(&mait_cdr3, &ex[i].share[j].cdr3_aa) { + if bin_member(&mait_cdr3, &share.cdr3_aa) { have_mait_trb_cdr3 = true; } } else { - if bin_member(&inkt_cdr3, &ex[i].share[j].cdr3_aa) { + if bin_member(&inkt_cdr3, &share.cdr3_aa) { have_inkt_tra_cdr3 = true; } - if bin_member(&mait_cdr3, &ex[i].share[j].cdr3_aa) { + if bin_member(&mait_cdr3, &share.cdr3_aa) { have_mait_tra_cdr3 = true; } } } - for j in 0..ex[i].share.len() { - ex[i].share[j].inkt_alpha_chain_gene_match = have_inkt_tra; - ex[i].share[j].inkt_alpha_chain_junction_match = have_inkt_tra_cdr3; - ex[i].share[j].inkt_beta_chain_gene_match = have_inkt_trb; - ex[i].share[j].inkt_beta_chain_junction_match = have_inkt_trb_cdr3; - ex[i].share[j].mait_alpha_chain_gene_match = have_mait_tra; - ex[i].share[j].mait_alpha_chain_junction_match = have_mait_tra_cdr3; - ex[i].share[j].mait_beta_chain_gene_match = have_mait_trb; - ex[i].share[j].mait_beta_chain_junction_match = have_mait_trb_cdr3; + for share in e.share.iter_mut() { + share.inkt_alpha_chain_gene_match = have_inkt_tra; + share.inkt_alpha_chain_junction_match = have_inkt_tra_cdr3; + share.inkt_beta_chain_gene_match = have_inkt_trb; + share.inkt_beta_chain_junction_match = have_inkt_trb_cdr3; + share.mait_alpha_chain_gene_match = have_mait_tra; + share.mait_alpha_chain_junction_match = have_mait_tra_cdr3; + share.mait_beta_chain_gene_match = have_mait_trb; + share.mait_beta_chain_junction_match = have_mait_trb_cdr3; } } } diff --git a/enclone/src/join.rs b/enclone/src/join.rs index d27724aa1..a355ba4ab 100644 --- a/enclone/src/join.rs +++ b/enclone/src/join.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // This file provides the single function join_exacts, which computes the equivalence relation // on exact subclonotypes. @@ -7,37 +7,42 @@ // contigs that represent the sequence of the "other" allele. This does not look easy to // execute. -use vdj_ann::*; +use vdj_ann::{annotate, refx}; -use self::annotate::*; -use self::refx::*; -use crate::join2::*; -use crate::join_core::*; -use crate::join_utils::*; -use debruijn::dna_string::*; -use enclone_core::defs::*; +use self::annotate::print_annotations; +use self::refx::RefData; +use crate::join2::finish_join; +use crate::join_core::join_core; +use debruijn::dna_string::DnaString; +use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype, PotentialJoin}; +use enclone_proto::types::DonorReferenceItem; use equiv::EquivRel; -use io_utils::*; +use io_utils::{fwrite, fwriteln}; use itertools::Itertools; +use qd::Double; use rayon::prelude::*; -use std::cmp::*; +use std::cmp::min; use std::collections::HashMap; use std::io::Write; use std::time::Instant; -use stirling_numbers::*; -use string_utils::*; -use vector_utils::*; +use vector_utils::{bin_member, erase_if, next_diff1_2}; pub fn join_exacts( is_bcr: bool, + to_bc: &HashMap<(usize, usize), Vec<String>>, refdata: &RefData, ctl: &EncloneControl, - exact_clonotypes: &Vec<ExactClonotype>, - info: &Vec<CloneInfo>, - mut join_info: &mut Vec<(usize, usize, bool, Vec<u8>)>, + exact_clonotypes: &[ExactClonotype], + info: &[CloneInfo], + join_info: &mut Vec<(usize, usize, bool, Vec<u8>)>, + raw_joins: &mut Vec<(i32, i32)>, + sr: &[Vec<Double>], + dref: &[DonorReferenceItem], ) -> EquivRel { + // // Run special option for joining by barcode identity. + let timer1 = Instant::now(); if ctl.join_alg_opt.bcjoin { let mut eq: EquivRel = EquivRel::new(info.len() as i32); let mut bcx = Vec::<(String, usize)>::new(); // {(barcode, info_index)} @@ -59,25 +64,6 @@ pub fn join_exacts( return eq; } - // Compute to_bc, which maps (dataset_index, clonotype_id) to {barcodes}. - // This is intended as a replacement for some old code below. - - let timer1 = Instant::now(); - let mut to_bc = HashMap::<(usize, usize), Vec<String>>::new(); - for i in 0..exact_clonotypes.len() { - for j in 0..exact_clonotypes[i].clones.len() { - let x = &exact_clonotypes[i].clones[j][0]; - if !to_bc.contains_key(&(x.dataset_index, i)) { - to_bc.insert((x.dataset_index, i), vec![x.barcode.clone()]); - } else { - to_bc - .get_mut(&(x.dataset_index, i)) - .unwrap() - .push(x.barcode.clone()); - } - } - } - // Find potential joins. let mut i = 0; @@ -92,7 +78,15 @@ pub fn join_exacts( while i < info.len() { let mut j = i + 1; while j < info.len() { - if info[j].lens != info[i].lens { + // Note that the organization of the loop here separates info entries by their + // contig lengths. One could rejigger the code to also separate by CDR3 lengths, + // but surprisingly this doesn't help much if any. It does perturb results very + // slightly. + if ctl.join_alg_opt.basic_h.is_some() { + if info[j].lens[0] != info[i].lens[0] { + break; + } + } else if info[j].lens != info[i].lens { break; } j += 1; @@ -107,20 +101,25 @@ pub fn join_exacts( )); i = j; } - // Not sure that fixing the size of this is safe. - let sr = stirling2_ratio_table::<f64>(3000); if !ctl.silent { println!("comparing {} simple clonotypes", info.len()); } ctl.perf_stats(&timer1, "join setup"); let timer2 = Instant::now(); - results.par_iter_mut().for_each(|r| { + + let joinf = |r: &mut ( + usize, + usize, + usize, + usize, + Vec<(usize, usize, bool, Vec<u8>)>, + Vec<(usize, usize)>, + )| { let (i, j) = (r.0, r.1); let joins = &mut r.2; let errors = &mut r.3; let logplus = &mut r.4; let mut pot = Vec::<PotentialJoin>::new(); - let mut eq: EquivRel = EquivRel::new((j - i) as i32); // Main join logic. If you change par_iter_mut to iter_mut above, and run happening, // a lot of time shows up on the following line. If further you manually inline join_core @@ -131,13 +130,14 @@ pub fn join_exacts( is_bcr, i, j, - &ctl, - &exact_clonotypes, - &info, - &to_bc, - &sr, - &mut eq, + ctl, + exact_clonotypes, + info, + to_bc, + sr, &mut pot, + refdata, + dref, ); // Run two passes. @@ -146,39 +146,44 @@ pub fn join_exacts( // Form the equivalence relation implied by the potential joins. let mut eq: EquivRel = EquivRel::new((j - i) as i32); - for pj in 0..pot.len() { - let (k1, k2) = (pot[pj].k1, pot[pj].k2); + for pot in &pot { + let (k1, k2) = (pot.k1, pot.k2); eq.join((k1 - i) as i32, (k2 - i) as i32); } // Impose a higher bar on joins that involve only two cells. (not documented) let mut to_pot = vec![Vec::<usize>::new(); j - i]; - for pj in 0..pot.len() { - let k1 = pot[pj].k1; + for (pj, pot) in pot.iter().enumerate() { + let k1 = pot.k1; to_pot[k1 - i].push(pj); } let mut to_delete = vec![false; pot.len()]; let mut reps = Vec::<i32>::new(); eq.orbit_reps(&mut reps); - for s in 0..reps.len() { + for rep in reps { // Examine a potential orbit. let mut x = Vec::<i32>::new(); - eq.orbit(reps[s], &mut x); + eq.orbit(rep, &mut x); // Count the number of cells in the orbit. let mut ncells = 0; - for t in 0..x.len() { - let k = x[t] as usize + i; + for &t in &x { + let k = t as usize + i; let mult = exact_clonotypes[info[k].clonotype_index].ncells(); ncells += mult; } // Impose more stringent conditions if number of cells is two. - if ncells == 2 && x.len() == 2 { + if ncells == 2 + && x.len() == 2 + && ctl.join_alg_opt.basic_h.is_none() + && ctl.join_alg_opt.basic.is_none() + && !ctl.join_alg_opt.basicx + { let (k1, k2) = (x[0] as usize + i, x[1] as usize + i); let k = min(k1, k2); for pj in to_pot[k - i].iter() { @@ -200,21 +205,23 @@ pub fn join_exacts( // Analyze potential joins. let mut eq: EquivRel = EquivRel::new((j - i) as i32); - for pj in 0..pot.len() { - let k1 = pot[pj].k1; - let k2 = pot[pj].k2; - let nrefs = pot[pj].nrefs; - let cd = pot[pj].cd; - let diffs = pot[pj].diffs; - let bcs1 = &pot[pj].bcs1; - let bcs2 = &pot[pj].bcs2; - let shares = &pot[pj].shares; - let indeps = &pot[pj].indeps; - let shares_details = &pot[pj].shares_details; - let share_pos_v = &pot[pj].share_pos_v; - let share_pos_j = &pot[pj].share_pos_j; - let score = pot[pj].score; - let err = pot[pj].err; + for pj in pot { + let k1 = pj.k1; + let k2 = pj.k2; + let nrefs = pj.nrefs; + let cd = pj.cd; + let diffs = pj.diffs; + let bcs1 = &pj.bcs1; + let bcs2 = &pj.bcs2; + let shares = &pj.shares; + let indeps = &pj.indeps; + let shares_details = &pj.shares_details; + let share_pos_v = &pj.share_pos_v; + let share_pos_j = &pj.share_pos_j; + let score = pj.score; + let err = pj.err; + let p1 = pj.p1; + let mult = pj.mult; // Do nothing if join could have no effect on equivalence relation. @@ -296,39 +303,29 @@ pub fn join_exacts( ctl.origin_info.descrips[*l2] ); } - let ci1 = info[k1].clonotype_index; - let ci2 = info[k2].clonotype_index; + let (ci1, ci2) = (info[k1].clonotype_index, info[k2].clonotype_index); + let (ex1, ex2) = (&exact_clonotypes[ci1], &exact_clonotypes[ci2]); let mut mega1 = String::new(); - for j in 0..exact_clonotypes[ci1].share.len() { - let x = &exact_clonotypes[ci1].share[j]; + for j in 0..ex1.share.len() { + let x = &ex1.share[j]; if j > 0 { mega1 += ";"; } mega1 += format!("{}:{}", x.chain_type, x.cdr3_aa).as_str(); } let mut mega2 = String::new(); - for j in 0..exact_clonotypes[ci2].share.len() { - let x = &exact_clonotypes[ci2].share[j]; + for j in 0..ex2.share.len() { + let x = &ex2.share[j]; if j > 0 { mega2 += ";"; } mega2 += format!("{}:{}", x.chain_type, x.cdr3_aa).as_str(); } - fwriteln!( - log, - "{}, mult = {}", - mega1, - exact_clonotypes[info[k1].clonotype_index].ncells() - ); + fwriteln!(log, "{}, mult = {}", mega1, ex1.ncells()); if ctl.join_print_opt.show_bc { fwriteln!(log, "bcs = {}", bcs1.iter().format(" ")); } - fwriteln!( - log, - "{}, mult = {}", - mega2, - exact_clonotypes[info[k2].clonotype_index].ncells() - ); + fwriteln!(log, "{}, mult = {}", mega2, ex2.ncells()); if ctl.join_print_opt.show_bc { fwriteln!(log, "bcs = {}", bcs2.iter().format(" ")); } @@ -364,10 +361,164 @@ pub fn join_exacts( fwriteln!(log, "{}", mega2); } + // Compute heavy chain FWR1, CDR1 and CDR2 nucleotide diffs. + + let nchains = info[k1].lens.len(); + let (mut fwr1_len, mut cdr1_len, mut cdr2_len, mut cdr3_len) = (0, 0, 0, 0); + let (mut fwr1_diffs, mut cdr1_diffs, mut cdr2_diffs, mut cdr3_diffs) = (0, 0, 0, 0); + for m in 0..nchains { + let (j1, j2) = (info[k1].exact_cols[m], info[k2].exact_cols[m]); + let (x1, x2) = (&ex1.share[j1], &ex2.share[j2]); + if x1.left { + if x1.cdr1_start.is_some() && x2.cdr1_start.is_some() { + let fr1_start1 = x1.fr1_start; + let fr1_stop1 = x1.cdr1_start.unwrap(); + let fr1_start2 = x2.fr1_start; + let fr1_stop2 = x2.cdr1_start.unwrap(); + let len = fr1_stop1 - fr1_start1; + if fr1_stop2 - fr1_start2 == len { + let mut diffs = 0; + for p in 0..len { + if x1.seq_del_amino[p + fr1_start1] + != x2.seq_del_amino[p + fr1_start2] + { + diffs += 1; + } + } + fwriteln!(log, "heavy chain FWR1 diffs = {}", diffs); + fwr1_len = len; + fwr1_diffs = diffs; + } + } + if x1.cdr1_start.is_some() + && x1.fr2_start.is_some() + && x2.cdr1_start.is_some() + && x2.fr2_start.is_some() + { + let cdr1_start1 = x1.cdr1_start.unwrap(); + let cdr1_stop1 = x1.fr2_start.unwrap(); + let cdr1_start2 = x2.cdr1_start.unwrap(); + let cdr1_stop2 = x2.fr2_start.unwrap(); + let len = cdr1_stop1 - cdr1_start1; + if cdr1_stop2 - cdr1_start2 == len { + let mut diffs = 0; + for p in 0..len { + if x1.seq_del_amino[p + cdr1_start1] + != x2.seq_del_amino[p + cdr1_start2] + { + diffs += 1; + } + } + fwriteln!(log, "heavy chain CDR1 diffs = {}", diffs); + cdr1_len = len; + cdr1_diffs = diffs; + } + } + if x1.cdr2_start.is_some() + && x1.fr3_start.is_some() + && x2.cdr2_start.is_some() + && x2.fr3_start.is_some() + { + let cdr2_start1 = x1.cdr2_start.unwrap(); + let cdr2_stop1 = x1.fr3_start.unwrap(); + let cdr2_start2 = x2.cdr2_start.unwrap(); + let cdr2_stop2 = x2.fr3_start.unwrap(); + let len = cdr2_stop1 - cdr2_start1; + if cdr2_stop2 - cdr2_start2 == len { + let mut diffs = 0; + for p in 0..len { + if x1.seq_del_amino[p + cdr2_start1] + != x2.seq_del_amino[p + cdr2_start2] + { + diffs += 1; + } + } + fwriteln!(log, "heavy chain CDR2 diffs = {}", diffs); + cdr2_len = len; + cdr2_diffs = diffs; + } + } + let cdr3_start1 = x1.cdr3_start; + let cdr3_stop1 = cdr3_start1 + x1.cdr3_aa.len() * 3; + let cdr3_start2 = x2.cdr3_start; + let cdr3_stop2 = x2.cdr3_start + x2.cdr3_aa.len() * 3; + let len = cdr3_stop1 - cdr3_start1; + if cdr3_stop2 - cdr3_start2 == len { + let mut diffs = 0; + for p in 0..len { + if x1.seq_del_amino[p + cdr3_start1] + != x2.seq_del_amino[p + cdr3_start2] + { + diffs += 1; + } + } + fwriteln!(log, "heavy chain CDR3 diffs = {}", diffs); + cdr3_len = len; + cdr3_diffs = diffs; + } + } + } + if fwr1_len > 0 { + let len = fwr1_len; + let diffs = fwr1_diffs; + fwriteln!( + log, + "nucleotide identity on heavy chain FWR1 = {:.1}%", + 100.0 * (len - diffs) as f64 / len as f64 + ); + } + if cdr1_len > 0 && cdr2_len > 0 { + let len = cdr1_len + cdr2_len; + let diffs = cdr1_diffs + cdr2_diffs; + fwriteln!( + log, + "nucleotide identity on heavy chain CDR1-2 = {:.1}%", + 100.0 * (len - diffs) as f64 / len as f64 + ); + } + if cdr2_len > 0 && cdr3_len > 0 { + let len = cdr2_len + cdr3_len; + let diffs = cdr2_diffs + cdr3_diffs; + fwriteln!( + log, + "nucleotide identity on heavy chain CDR2-3 = {:.1}%", + 100.0 * (len - diffs) as f64 / len as f64 + ); + } + if cdr3_len > 0 { + let len = cdr3_len; + let diffs = cdr3_diffs; + fwriteln!( + log, + "nucleotide identity on heavy chain CDR3 = {:.1}%", + 100.0 * (len - diffs) as f64 / len as f64 + ); + } + + // Keep going. + + fwriteln!( + log, + "p1 = prob of getting so many shares by accident = {}", + p1 + ); + fwriteln!( + log, + "computed using k = {}, d = {}, n = {}", + pj.k, + pj.d, + pj.n + ); + fwriteln!( + log, + "mult = CDR3: partial_bernoulli_sum(3 * cn, cd as usize) = {}", + mult + ); + fwriteln!(log, "score = p1 * mult = {}", p1 * mult); + // Show difference patterns. And x denotes a different base. A ▓ denotes an // equal base that differs from the reference. Otherwise - is shown. - let nchains = info[k1].lens.len(); for m in 0..nchains { let (tig1, tig2) = (&info[k1].tigs[m], &info[k2].tigs[m]); fwriteln!(log, "difference pattern for chain {}", m + 1); @@ -396,39 +547,44 @@ pub fn join_exacts( let nchains = info[k1].lens.len(); for m in 0..nchains { let (tig1, tig2) = (&info[k1].tigs[m], &info[k2].tigs[m]); - let (otig1, otig2) = (&info[k1].orig_tigs[m], &info[k2].orig_tigs[m]); + let otig1 = + DnaString::from_acgt_bytes(&ex1.share[info[k1].exact_cols[m]].full_seq); + let otig2 = + DnaString::from_acgt_bytes(&ex2.share[info[k2].exact_cols[m]].full_seq); if ctl.join_print_opt.seq { - fwriteln!(log, "chain {}, tig 1 = {}", m + 1, otig1.to_string()); + fwriteln!(log, "\nchain {}, tig 1 = {}", m + 1, otig1.to_string()); } if ctl.join_print_opt.ann0 { // somewhat broken for the moment, because tig1 could have - characters if !info[k1].has_del[m] { fwriteln!(log, "chain {}, tig 1", m + 1); - let t1 = DnaString::from_acgt_bytes(&tig1); - print_annotations(&t1, &refdata, &mut log, false, true, false); + let t1 = DnaString::from_acgt_bytes(tig1); + print_annotations(&t1, refdata, &mut log, false, true, false); } } if ctl.join_print_opt.ann { fwriteln!(log, "chain {}, tig 1", m + 1); - print_annotations(&otig1, &refdata, &mut log, false, true, false); + print_annotations(&otig1, refdata, &mut log, false, true, false); } if ctl.join_print_opt.seq { - fwriteln!(log, "chain {}, tig 2 = {}", m + 1, otig2.to_string()); + fwriteln!(log, "\nchain {}, tig 2 = {}", m + 1, otig2.to_string()); } if ctl.join_print_opt.ann0 { // somewhat broken for the moment, because tig2 could have - characters if !info[k2].has_del[m] { fwriteln!(log, "chain {}, tig 2", m + 1); - let t2 = DnaString::from_acgt_bytes(&tig2); - print_annotations(&t2, &refdata, &mut log, false, true, false); + let t2 = DnaString::from_acgt_bytes(tig2); + print_annotations(&t2, refdata, &mut log, false, true, false); } } if ctl.join_print_opt.ann { fwriteln!(log, "chain {}, tig 2", m + 1); - print_annotations(&otig2, &refdata, &mut log, false, true, false); + print_annotations(&otig2, refdata, &mut log, false, true, false); } } } + // not sure why this logging is here, so turned off for now + /* if ctl.join_print_opt.seq { for x in 0..info[k1].lens.len() { fwriteln!(log, "{}", strme(&info[k1].tigs[x])); @@ -438,9 +594,18 @@ pub fn join_exacts( fwriteln!(log, "{:?}", strme(&info[k2].tigs[x])); } } + */ logplus.push((info[k1].clonotype_index, info[k2].clonotype_index, err, log)); } - }); + }; + + results.par_iter_mut().for_each(joinf); + ctl.perf_stats(&timer2, "in main part of join"); - finish_join(&ctl, &exact_clonotypes, &info, &results, &mut join_info) + for r in &results { + for &j in &r.5 { + raw_joins.push((j.0 as i32, j.1 as i32)); + } + } + finish_join(ctl, info, &results, join_info) } diff --git a/enclone/src/join2.rs b/enclone/src/join2.rs index bf2984395..e91d160f3 100644 --- a/enclone/src/join2.rs +++ b/enclone/src/join2.rs @@ -1,74 +1,56 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // This file provides the tail end code for join.rs, plus a small function used there. -use enclone_core::defs::*; +use enclone_core::defs::{CloneInfo, EncloneControl}; use equiv::EquivRel; -use stats_utils::*; +use stats_utils::percent_ratio; use std::time::Instant; -use vector_utils::*; - -// partial_bernoulli_sum( n, k ): return sum( choose(n,i), i = 0..=k ). -// -// Beware of overflow. - -pub fn partial_bernoulli_sum(n: usize, k: usize) -> f64 { - assert!(n >= 1); - assert!(k <= n); - let mut sum = 0.0; - let mut choose = 1.0; - for i in 0..=k { - sum += choose; - choose *= (n - i) as f64; - choose /= (i + 1) as f64; - } - sum -} +use vector_utils::next_diff1_2; // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ pub fn finish_join( ctl: &EncloneControl, - exact_clonotypes: &Vec<ExactClonotype>, - info: &Vec<CloneInfo>, - results: &Vec<( + info: &[CloneInfo], + results: &[( usize, usize, usize, usize, Vec<(usize, usize, bool, Vec<u8>)>, Vec<(usize, usize)>, - )>, + )], join_info: &mut Vec<(usize, usize, bool, Vec<u8>)>, ) -> EquivRel { // Tally results. let (mut joins, mut errors) = (0, 0); let timer3 = Instant::now(); - for l in 0..results.len() { - joins += results[l].2; - errors += results[l].3; - for i in 0..results[l].4.len() { - let u1 = results[l].4[i].0; - let u2 = results[l].4[i].1; - let err = results[l].4[i].2; - let log = results[l].4[i].3.clone(); + for r in results { + joins += r.2; + errors += r.3; + for i in &r.4 { + let u1 = i.0; + let u2 = i.1; + let err = i.2; + let log = i.3.clone(); join_info.push((u1, u2, err, log)); } } if !ctl.silent { - println!("{} joins", joins); + println!("{joins} joins"); if ctl.origin_info.donors > 1 { - println!("{} errors", errors); + println!("{errors} errors"); } } // Make equivalence relation. let mut eq: EquivRel = EquivRel::new(info.len() as i32); - for l in 0..results.len() { - for j in 0..results[l].5.len() { - eq.join(results[l].5[j].0 as i32, results[l].5[j].1 as i32); + for r in results { + for j in &r.5 { + eq.join(j.0 as i32, j.1 as i32); } } @@ -76,11 +58,10 @@ pub fn finish_join( // clonotypes into two-chain clonotypes. let mut ox = Vec::<(usize, i32)>::new(); - for i in 0..info.len() { - let x: &CloneInfo = &info[i]; + for (i, x) in info.iter().enumerate() { ox.push((x.clonotype_id, eq.class_id(i as i32))); } - ox.sort(); + ox.sort_unstable(); let mut i = 0; while i < ox.len() { let j = next_diff1_2(&ox, i as i32) as usize; @@ -90,68 +71,10 @@ pub fn finish_join( i = j; } - // Merge onesies where totally unambiguous. Possibly inefficient and should optimize. - - if ctl.join_alg_opt.merge_onesies { - let mut ncells_total = 0; - for i in 0..exact_clonotypes.len() { - ncells_total += exact_clonotypes[i].ncells(); - } - let mut onesies = Vec::<usize>::new(); - for i in 0..info.len() { - if info[i].tigs.len() == 1 { - onesies.push(i); - } - } - let mut alltigs2 = Vec::<(Vec<u8>, usize)>::new(); - for i in 0..info.len() { - if info[i].tigs.len() >= 2 { - for j in 0..info[i].tigs.len() { - alltigs2.push((info[i].tigs[j].clone(), i)); - } - } - } - alltigs2.sort(); - for x in onesies.iter() { - let low = lower_bound1_2(&alltigs2, &info[*x].tigs[0]); - let high = upper_bound1_2(&alltigs2, &info[*x].tigs[0]); - let mut ms = Vec::<usize>::new(); - for m in low..high { - if alltigs2[m as usize].0 == info[*x].tigs[0] { - ms.push(m as usize); - } - } - let mut ok = ms.len() > 0; - let mut exacts = Vec::<usize>::new(); - for j in 0..ms.len() { - if eq.class_id(alltigs2[ms[j]].1 as i32) != eq.class_id(alltigs2[ms[0]].1 as i32) { - ok = false; - } - let mut o = Vec::<i32>::new(); - eq.orbit(alltigs2[ms[j]].1 as i32, &mut o); - for z in o.iter() { - exacts.push(info[*z as usize].clonotype_index); - } - } - unique_sort(&mut exacts); - let left = exact_clonotypes[info[*x].clonotype_index].share[0].left; - let mut ncells = exact_clonotypes[info[*x].clonotype_index].ncells(); - for j in 0..exacts.len() { - ncells += exact_clonotypes[exacts[j]].ncells(); - } - if ncells == 2 && ncells_total > 1000 && !left { - ok = false; - } - if ok { - eq.join(*x as i32, alltigs2[ms[0]].1 as i32); - } - } - } - // Tally whitelist contamination. // WARNING: THIS ONLY WORKS IF YOU RUN WITH CLONES=1 AND NO OTHER FILTERS. - let mut white = ctl.clono_filt_opt.whitef; + let mut white = ctl.clono_filt_opt_def.whitef; for j in 0..ctl.clono_print_opt.cvars.len() { if ctl.clono_print_opt.cvars[j] == "white" { white = true; @@ -160,12 +83,12 @@ pub fn finish_join( if white { let mut bads = 0; let mut denom = 0; - for i in 0..results.len() { - bads += results[i].2; - denom += results[i].3; + for r in results { + bads += r.2; + denom += r.3; } let bad_rate = percent_ratio(bads, denom); - println!("whitelist contamination rate = {:.2}%", bad_rate); + println!("whitelist contamination rate = {bad_rate:.2}%"); } ctl.perf_stats(&timer3, "in tail of join"); eq diff --git a/enclone/src/join_core.rs b/enclone/src/join_core.rs index 1d11e736d..6eab015b5 100644 --- a/enclone/src/join_core.rs +++ b/enclone/src/join_core.rs @@ -1,27 +1,27 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. -use crate::join2::*; -use crate::join_utils::*; -use debruijn::{dna_string::*, Mer}; -use enclone_core::defs::*; +use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype, PotentialJoin}; +use enclone_core::join_one::join_one; +use enclone_proto::types::DonorReferenceItem; use equiv::EquivRel; -use stats_utils::*; +use qd::Double; use std::collections::HashMap; -use stirling_numbers::*; -use vector_utils::*; +use vdj_ann::refx::RefData; -pub fn join_core( +pub fn join_core<'a>( is_bcr: bool, i: usize, j: usize, ctl: &EncloneControl, - exact_clonotypes: &Vec<ExactClonotype>, - info: &Vec<CloneInfo>, - to_bc: &HashMap<(usize, usize), Vec<String>>, - sr: &Vec<Vec<f64>>, - eq: &mut EquivRel, - pot: &mut Vec<PotentialJoin>, + exact_clonotypes: &[ExactClonotype], + info: &[CloneInfo], + to_bc: &'a HashMap<(usize, usize), Vec<String>>, + sr: &[Vec<Double>], + pot: &mut Vec<PotentialJoin<'a>>, + refdata: &RefData, + dref: &[DonorReferenceItem], ) { + let mut eq: EquivRel = EquivRel::new((j - i) as i32); for k1 in i..j { for k2 in k1 + 1..j { // Do nothing if join could have no effect on equivalence relation. @@ -33,318 +33,21 @@ pub fn join_core( if !ctl.force && (eq.class_id((k1 - i) as i32) == eq.class_id((k2 - i) as i32)) { continue; } - - // Do not merge onesies or foursies with anything. Deferred until later. - // Note that perhaps some foursies should be declared doublets and deleted. - // Note onesies merging above is turned off so this appears to be moot. - - let (clono1, clono2) = (info[k1].clonotype_id, info[k2].clonotype_id); - let chains1 = exact_clonotypes[clono1].share.len(); - let chains2 = exact_clonotypes[clono2].share.len(); - if chains1 < 2 || chains1 > 3 || chains2 < 2 || chains2 > 3 { - continue; - } - // NEED FOR THIS SEEMS LIKE A BUG: - if info[k1].vs.len() == 1 || info[k2].vs.len() == 4 { - continue; - } - if info[k1].vs.len() > 2 { - continue; - } - - // Require that CDR3s have the same length. Ugly. - // First part should be a tautology. - - let (x1, x2) = (&info[k1].cdr3s, &info[k2].cdr3s); - if x1.len() != x2.len() { - continue; - } - let mut unequal = false; - for i in 0..x1.len() { - if x1[i].len() != x2[i].len() { - unequal = true; - } - } - if unequal { - continue; - } - - // Compute number of differences. - - let mut diffs = 0 as usize; - for x in 0..info[k1].lens.len() { - if !info[k1].has_del[x] && !info[k2].has_del[x] { - // A great deal of time is spent in the call to ndiffs. Notes on this: - // 1. It is slower than if the computation is done outside - // the ndiffs function. This is mysterious but must have something to - // do with the storage of the 256-byte lookup table. - // 2. Adding #[inline(always)] in front of the ndiffs function definition - // doesn't help. - // 3. Adding a bounds test for diffs > ctl.heur.max_diffs inside the ndiffs - // function doesn't help, whether placed in the inner loop or the other - // loop. - diffs += ndiffs(&info[k1].tigsp[x], &info[k2].tigsp[x]); - } else { - for j in 0..info[k1].tigs[x].len() { - if info[k1].tigs[x][j] != info[k2].tigs[x][j] { - diffs += 1; - } - } - } - } - - // Another test for acceptable join. - - if diffs > ctl.heur.max_diffs { - continue; - } - if !is_bcr && diffs > 5 { - continue; - } - - // Unless MIX_DONORS specified, do not join across donors. - // And test for error. - // - // WARNING! There are actually two cases: where an individual exact subclonotype - // itself crosses donors, and where we cross donors in making a join. Note that - // the former case is most improbable, unless there is cross-sample contamination. - // And if that did happen, the output would be confusing and might have a greatly - // exaggerated number of fails. - - let (mut donors1, mut donors2) = (Vec::<usize>::new(), Vec::<usize>::new()); - let ex1 = &exact_clonotypes[info[k1].clonotype_index]; - let ex2 = &exact_clonotypes[info[k2].clonotype_index]; - for j in 0..ex1.clones.len() { - if ex1.clones[j][0].donor_index.is_some() { - donors1.push(ex1.clones[j][0].donor_index.unwrap()); - } - } - for j in 0..ex2.clones.len() { - if ex2.clones[j][0].donor_index.is_some() { - donors2.push(ex2.clones[j][0].donor_index.unwrap()); - } - } - unique_sort(&mut donors1); - unique_sort(&mut donors2); - if !ctl.clono_filt_opt.donor { - if donors1.len() > 0 && donors2.len() > 0 && donors1 != donors2 { - continue; - } - } - let err = donors1 != donors2 || donors1.len() != 1 || donors2.len() != 1; - - // Analyze the two clonotypes versus the reference. First traverse the reference - // sequences. Either we use the references for k1 or the references for k2, but - // these are nearly always the same. - - let mut nrefs = 1; - for m in 0..2 { - if info[k1].vs[m] != info[k2].vs[m] || info[k1].js[m] != info[k2].js[m] { - nrefs = 2; - } - } - let mut fail = false; - let mut shares = vec![0; nrefs]; // shared mutations from reference - let mut indeps = vec![0; nrefs]; // independent mutations from reference - let mut total = vec![vec![0; 2]; nrefs]; // total differences from reference - let mut shares_details = vec![vec![0; 4]; nrefs]; - let mut share_pos_v = vec![Vec::<usize>::new(); 2]; - let mut share_pos_j = vec![Vec::<usize>::new(); 2]; - for u in 0..nrefs { - let k: usize; - if u == 0 { - k = k1; - } else { - k = k2; - } - - // Traverse the chains in the clonotype. - - let nchains = info[k1].lens.len(); - for m in 0..nchains { - let (tig1, tig2) = (&info[k1].tigs[m], &info[k2].tigs[m]); - - // Traverse the two segments (V and J). - - for si in 0..2 { - let seg: &DnaString; - if si == 0 { - seg = &info[k].vs[m]; - } else { - seg = &info[k].js[m]; - } - let mut ref_trim = ctl.heur.ref_v_trim; - if si == 1 { - ref_trim = ctl.heur.ref_j_trim; - } - for p in 0..seg.len() - ref_trim { - let (t1, t2); - let r; - if si == 0 { - // Ugly bailout arising very rarely if the two reference - // sequences have different lengths. - if p >= tig1.len() || p >= tig2.len() { - fail = true; - break; - } - t1 = tig1[p]; - t2 = tig2[p]; - // r = seg.get(p); - let rx = seg.get(p); - if rx == 0 { - r = b'A'; - } else if rx == 1 { - r = b'C'; - } else if rx == 2 { - r = b'G'; - } else { - r = b'T'; - } - } else { - t1 = tig1[tig1.len() - p - 1]; - t2 = tig2[tig2.len() - p - 1]; - // r = seg.get( seg.len() - p - 1 ); - let rx = seg.get(seg.len() - p - 1); - if rx == 0 { - r = b'A'; - } else if rx == 1 { - r = b'C'; - } else if rx == 2 { - r = b'G'; - } else { - r = b'T'; - } - } - if t1 == t2 && t1 != r { - shares[u] += 1; - shares_details[u][2 * m + si] += 1; - if si == 0 { - share_pos_v[m].push(p); - } else { - share_pos_j[m].push(p); - } - } else if t1 == r && t2 != r { - indeps[u] += 1; - } else if t2 == r && t1 != r { - indeps[u] += 1; - } else if t1 != r && t2 != r { - indeps[u] += 2; - } - if t1 != r { - total[u][0] += 1; - } - if t2 != r { - total[u][1] += 1; - } - } - } - } - } - - // Don't allow different references if one is strongly favored. - // (not documented) - - if nrefs == 2 { - for m in 0..2 { - if abs_diff(total[0][m], total[1][m]) > ctl.heur.max_degradation { - fail = true; - } - } - } - if fail { - continue; - } - - // Compute junction diffs. Ugly. - - let mut cd = 0 as isize; - for l in 0..x1.len() { - for m in 0..x1[l].len() { - if x1[l].as_bytes()[m] != x2[l].as_bytes()[m] { - cd += 1; - } - } - } - - // Cap CDR3 diffs. - - if cd > ctl.join_alg_opt.max_cdr3_diffs as isize || (!is_bcr && cd > 0) { - continue; - } - - // Another test for acceptable join. (not fully documented) - - let min_shares = shares.iter().min().unwrap(); - let min_indeps = indeps.iter().min().unwrap(); - - // Reject if barcode overlap. (not documented) - - let (mut bcs1, mut bcs2) = (Vec::<String>::new(), Vec::<String>::new()); - for origin in info[k1].origin.iter() { - bcs1.append(&mut to_bc[&(*origin, info[k1].clonotype_id)].clone()); - } - for origin in info[k2].origin.iter() { - bcs2.append(&mut to_bc[&(*origin, info[k2].clonotype_id)].clone()); - } - unique_sort(&mut bcs1); - unique_sort(&mut bcs2); - if meet(&bcs1, &bcs2) { - continue; - } - - // Estimate the probability p1 that drawing k = min_indeps + 2 * min_shares - // objects from n = 3 * (sum of VJ contig lengths) yields d = min_shares or - // more duplicates. - - let n = 3 * (info[k1].tigs[0].len() + info[k1].tigs[1].len()); - let k = *min_indeps + 2 * *min_shares; - let d = *min_shares; - let p1 = - p_at_most_m_distinct_in_sample_of_x_from_n((k - d) as usize, k as usize, n, &sr); - assert!(!p1.is_infinite()); // TODO: IS THIS SAFE? - - // Multiply by the number of DNA sequences that differ from the given CDR3 - // sequences on <= cd bases. This is sum( choose(3cn, m), m = 0..=cd ). - - let mut cn = 0; - for l in 0..x1.len() { - cn += x1[l].len(); - } - let mult = partial_bernoulli_sum(3 * cn, cd as usize); - assert!(!mult.is_infinite()); // TODO: IS THIS SAFE? - let score = p1 * mult; - - // Threshold on score. - - if score > ctl.join_alg_opt.max_score { - continue; - } - - // Save potential joins. Note that this jacks up memory usage significantly, - // so it would likely be more efficient to duplicate some of the computations - // during the analysis phase. - - eq.join((k1 - i) as i32, (k2 - i) as i32); - if !ctl.join_print_opt.show_bc { - bcs1.clear(); - bcs2.clear(); + if join_one( + is_bcr, + k1, + k2, + ctl, + exact_clonotypes, + info, + to_bc, + sr, + pot, + refdata, + dref, + ) { + eq.join((k1 - i) as i32, (k2 - i) as i32); } - pot.push(PotentialJoin { - k1: k1, - k2: k2, - nrefs: nrefs, - cd: cd, - diffs: diffs, - bcs1: bcs1, - bcs2: bcs2, - shares: shares, - indeps: indeps, - shares_details: shares_details, - share_pos_v: share_pos_v, - share_pos_j: share_pos_j, - score: score, - err: err, - }); } } } diff --git a/enclone/src/join_utils.rs b/enclone/src/join_utils.rs deleted file mode 100644 index 2d2d49a2a..000000000 --- a/enclone/src/join_utils.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Potential join structure. - -pub struct PotentialJoin { - pub k1: usize, - pub k2: usize, - pub nrefs: usize, - pub cd: isize, - pub diffs: usize, - pub bcs1: Vec<String>, - pub bcs2: Vec<String>, - pub shares: Vec<isize>, - pub indeps: Vec<isize>, - pub shares_details: Vec<Vec<usize>>, - pub share_pos_v: Vec<Vec<usize>>, - pub share_pos_j: Vec<Vec<usize>>, - pub score: f64, - pub err: bool, -} diff --git a/enclone/src/lib.rs b/enclone/src/lib.rs index 5081e8958..55e07e0a9 100644 --- a/enclone/src/lib.rs +++ b/enclone/src/lib.rs @@ -1,26 +1,14 @@ -// Copyright (c) 2020 10x Genomics, Inc. All rights reserved. - -extern crate enclone_core; +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. pub mod allele; -pub mod explore; pub mod graph_filter; -pub mod html; pub mod info; pub mod innate; pub mod join; pub mod join2; pub mod join_core; -pub mod join_utils; -pub mod load_gex; pub mod misc1; pub mod misc2; pub mod misc3; -pub mod proc_args; -pub mod proc_args2; -pub mod proc_args3; -pub mod proc_args_check; -pub mod read_json; -pub mod run_test; pub mod secret; pub mod subset_json; diff --git a/enclone/src/load_gex.rs b/enclone/src/load_gex.rs deleted file mode 100644 index b8633455c..000000000 --- a/enclone/src/load_gex.rs +++ /dev/null @@ -1,594 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. -// -// Load gene expression and feature barcoding (antibody, antigen) data from -// Cell Ranger outputs. - -use enclone_core::defs::*; -use hdf5::types::FixedAscii; -use hdf5::Dataset; -use io_utils::*; -use mirror_sparse_matrix::*; -use rayon::prelude::*; -use std::{ - collections::HashMap, - fs::{remove_file, File}, - io::{BufRead, BufReader}, - time::Instant, -}; -use string_utils::*; -use vector_utils::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn load_gex( - ctl: &mut EncloneControl, - gex_features: &mut Vec<Vec<String>>, - gex_barcodes: &mut Vec<Vec<String>>, - gex_matrices: &mut Vec<MirrorSparseMatrix>, - cluster: &mut Vec<HashMap<String, usize>>, - cell_type: &mut Vec<HashMap<String, String>>, - cell_type_specified: &mut Vec<bool>, - pca: &mut Vec<HashMap<String, Vec<f64>>>, - gex_mults: &mut Vec<f64>, - fb_mults: &mut Vec<f64>, - gex_cell_barcodes: &mut Vec<Vec<String>>, - have_gex: &mut bool, - have_fb: &mut bool, -) { - let t = Instant::now(); - let mut results = Vec::<( - usize, - Vec<String>, - Vec<String>, - MirrorSparseMatrix, - Option<f64>, - Option<f64>, - Vec<String>, - HashMap<String, usize>, - HashMap<String, String>, - HashMap<String, Vec<f64>>, - bool, - )>::new(); - for i in 0..ctl.origin_info.gex_path.len() { - results.push(( - i, - Vec::<String>::new(), - Vec::<String>::new(), - MirrorSparseMatrix::new(), - None, - None, - Vec::<String>::new(), - HashMap::<String, usize>::new(), - HashMap::<String, String>::new(), - HashMap::<String, Vec<f64>>::new(), - false, - )); - } - let gex_outs = &ctl.origin_info.gex_path; - // Here and in other places, where an error message can be printed in a parallel loop, it - // would be better if the thread could use a global lock to prevent multiple threads from - // issuing an error message. - // - // A lot of time is spent in this parallel loop. Some things are known about this: - // 1. When running it over a large number of datasets, the observed load average is ~2, so - // somehow the parallelism is not working. - // 2. We know where the time is spent in the loop, and this is marked below. - results.par_iter_mut().for_each(|r| { - let i = r.0; - if gex_outs[i].len() > 0 { - // First define the path where the GEX files should live, and make sure that the path - // exists. - - let root = gex_outs[i].clone(); - let mut outs = root.clone(); - if root.ends_with("/outs") && path_exists(&root) { - outs = root.clone(); - } else if root.ends_with("/outs") { - outs = root.before("/outs").to_string(); - if !path_exists(&outs) { - eprintln!( - "\nThe directory\n{}\ndoes not exist. Something must be amiss with \ - the arguments to PRE and/or GEX and/or META.\n", - outs - ); - std::process::exit(1); - } - } - - // Define the file paths and test for their existence. - - let mut h5_path = format!("{}/raw_feature_bc_matrix.h5", outs); - let h5_path_alt = format!("{}/raw_gene_bc_matrices_h5.h5", outs); - if !path_exists(&h5_path) && !path_exists(&h5_path_alt) { - eprintln!( - "\nThe file raw_feature_bc_matrix.h5 is not in the directory\n{}\n\ - and neither is the older-named version raw_gene_bc_matrices_h5.h5. Perhaps \ - something\nis amiss with the arguments to PRE and/or GEX and/or META.\n", - outs - ); - std::process::exit(1); - } - if !path_exists(&h5_path) { - h5_path = h5_path_alt; - } - let types_file = format!("{}/analysis_csv/celltypes/celltypes.csv", outs); - let mut pca_file = format!("{}/analysis_csv/pca/10_components/projection.csv", outs); - if !path_exists(&pca_file) { - pca_file = format!("{}/analysis/pca/10_components/projection.csv", outs); - } - let mut cluster_file = - format!("{}/analysis_csv/clustering/graphclust/clusters.csv", outs); - if !path_exists(&cluster_file) { - cluster_file = format!("{}/analysis/clustering/graphclust/clusters.csv", outs); - } - let bin_file = format!("{}/feature_barcode_matrix.bin", outs); - for f in [pca_file.clone(), cluster_file.clone()].iter() { - if !path_exists(&f) { - eprintln!( - "\nThe file\n{}\ndoes not exist. \ - Perhaps one of your directories is missing some stuff.\n", - f - ); - std::process::exit(1); - } - } - let csv1 = format!("{}/metrics_summary.csv", outs); - let csv2 = format!("{}/metrics_summary_csv.csv", outs); - if !path_exists(&csv1) && !path_exists(&csv2) { - eprintln!( - "\nSomething wrong with GEX or META argument:\ncan't find the file \ - metrics_summary.csv or metrics_summary_csv.csv in the directory\n\ - {}", - outs - ); - std::process::exit(1); - } - let mut csv = csv1.clone(); - if !path_exists(&csv1) { - csv = csv2.clone(); - } - - // Determine the state of affairs of the bin file. We end with one of three outcomes: - // - // 1. We're not using the bin file at all. - // 2. We are reading the bin file. - // 3. We are writing the bin file. - - let mut bin_file_state = 1; - if !ctl.gen_opt.force_h5 { - let bin_file_exists = path_exists(&bin_file); - if !bin_file_exists { - if !ctl.gen_opt.h5 { - bin_file_state = 3; - } - } else { - // THE FOLLOWING LINE HAS BEEN OBSERVED TO FAIL SPORADICALLY. THIS HAS - // HAPPENED AT LEAST ONCE. THE FAIL WAS IN - // binary_read_to_ref::<u32>(&mut ff, &mut x[0], 11).unwrap(); - // WHERE THE unwrap() FAILED ON - // UnexpectedEof, error: "failed to fill whole buffer". - - let v = get_code_version_from_file(&bin_file); - if v == 1 { - bin_file_state = 2; - } else { - bin_file_state = 3; - } - } - } - - // If we need to write feature_barcode_matrix.bin, make sure that's possible, before - // spending a lot of time reading other stuff. - - if bin_file_state == 3 { - let f = File::create(&bin_file); - if !f.is_ok() { - eprintln!( - "\nenclone is trying to create the path\n{}\n\ - but that path cannot be created. This path is for the binary GEX \ - matrix file that enclone can read\n\ - faster than the hdf5 file. Your options are:\n\ - 1. Make that location writable (or fix the path, if it's wrong).\n\ - 2. Find a new location where you can write.\n\ - 3. Don't specify NH5 (if you specified it).\n", - bin_file - ); - std::process::exit(1); - } - remove_file(&bin_file).unwrap(); - } - - // Read cell types. - - if path_exists(&types_file) { - let f = open_for_read![&types_file]; - let mut count = 0; - for line in f.lines() { - count += 1; - if count == 1 { - continue; - } - let s = line.unwrap(); - let barcode = s.before(","); - let cell_type = s.after(","); - r.8.insert(barcode.to_string(), cell_type.to_string()); - r.10 = true; - } - } else if ctl.gen_opt.mark_stats - || ctl.gen_opt.mark_stats2 - || ctl.clono_filt_opt.marked_b - { - eprintln!( - "\nIf you use MARK_STATS or MARK_STATS2 or MARKED_B, celltypes.csv has to \ - exist, and this file\n{}\ndoes not exist.\n", - types_file - ); - std::process::exit(1); - } - - // Read PCA file. - - let f = open_for_read![&pca_file]; - let mut count = 0; - for line in f.lines() { - count += 1; - if count == 1 { - continue; - } - let s = line.unwrap(); - let barcode = s.before(","); - let x = s.after(",").split(',').collect::<Vec<&str>>(); - // This assert is turned off because in fact there are not always 10 components. - // assert_eq!(x.len(), 10); - let mut y = Vec::<f64>::new(); - for i in 0..x.len() { - y.push(x[i].force_f64()); - } - r.9.insert(barcode.to_string(), y); - } - - // Read graph clusters, and also get the cell barcodes from that. - - let f = open_for_read![&cluster_file]; - let mut count = 0; - for line in f.lines() { - count += 1; - if count == 1 { - continue; - } - let s = line.unwrap(); - let (barcode, cluster) = (s.before(","), s.after(",").force_usize()); - r.7.insert(barcode.to_string(), cluster); - r.6.push(barcode.to_string()); - } - - // Get the multipliers gene and feature barcode counts. - - let mut gene_mult = None; - let f = open_for_read![&csv]; - let mut line_no = 0; - let mut rpc_field = None; - let mut rpc = None; - let mut fbrpc_field = None; - let mut fbrpc = None; - for line in f.lines() { - let s = line.unwrap(); - let fields = parse_csv(&s); - line_no += 1; - if line_no == 1 { - for i in 0..fields.len() { - if fields[i] == "Mean Reads per Cell" { - rpc_field = Some(i); - } else if fields[i] == "Antibody: Mean Reads per Cell" { - fbrpc_field = Some(i); - } - } - } else if line_no == 2 { - if rpc_field.is_some() && rpc_field.unwrap() >= fields.len() { - eprintln!( - "\nSomething appears to be wrong with the file\n{}:\n\ - the second line doesn't have enough fields.\n", - csv - ); - std::process::exit(1); - } else if rpc_field.is_some() { - let mut rpcx = fields[rpc_field.unwrap()].to_string(); - rpcx = rpcx.replace(",", ""); - rpcx = rpcx.replace("\"", ""); - if !rpcx.parse::<usize>().is_ok() { - eprintln!( - "\nSomething appears to be wrong with the file\n{}:\n\ - the Mean Reads per Cell field isn't an integer.\n", - csv - ); - std::process::exit(1); - } - rpc = Some(rpcx.force_usize() as isize); - } - if fbrpc_field.is_some() && fbrpc_field.unwrap() >= fields.len() { - eprintln!( - "\nSomething appears to be wrong with the file\n{}:\n\ - the second line doesn't have enough fields.\n", - csv - ); - std::process::exit(1); - } else if fbrpc_field.is_some() { - let mut fbrpcx = fields[fbrpc_field.unwrap()].to_string(); - fbrpcx = fbrpcx.replace(",", ""); - fbrpcx = fbrpcx.replace("\"", ""); - if !fbrpcx.parse::<usize>().is_ok() { - eprintln!( - "\nSomething appears to be wrong with the file\n{}:\n\ - the Antibody: Mean Reads per Cell field isn't an integer.\n", - csv - ); - std::process::exit(1); - } - fbrpc = Some(fbrpcx.force_usize() as isize); - } - } - } - if rpc.is_some() { - const RPC_EXPECTED: f64 = 20_000.0; - gene_mult = Some(RPC_EXPECTED / rpc.unwrap() as f64); - } - let mut fb_mult = None; - if fbrpc.is_some() { - const FB_RPC_EXPECTED: f64 = 5_000.0; - fb_mult = Some(FB_RPC_EXPECTED / fbrpc.unwrap() as f64); - } - r.4 = gene_mult; - r.5 = fb_mult; - - // Read the binary matrix file if appropriate. - - if bin_file_state == 2 { - read_from_file(&mut r.3, &bin_file); - let (n, k) = (r.3.nrows(), r.3.ncols()); - for i in 0..n { - r.2.push(r.3.row_label(i)); - } - for j in 0..k { - r.1.push(r.3.col_label(j)); - } - - // Otherwise we have to get stuff from the h5 file. - } else { - // Read barcodes from the h5 file. - - let h = hdf5::File::open(&h5_path).unwrap(); - let barcode_loc = h.dataset("matrix/barcodes").unwrap(); - let barcodes: Vec<FixedAscii<[u8; 18]>> = - barcode_loc.as_reader().read_raw().unwrap(); - for i in 0..barcodes.len() { - r.2.push(barcodes[i].to_string()); - } - - // Read features from the h5 file. - - let feature_id_loc = h.dataset("matrix/features/id").unwrap(); - let feature_ids: Vec<FixedAscii<[u8; 256]>> = - feature_id_loc.as_reader().read_raw().unwrap(); - let feature_name_loc = h.dataset("matrix/features/name").unwrap(); - let feature_names: Vec<FixedAscii<[u8; 256]>> = - feature_name_loc.as_reader().read_raw().unwrap(); - let feature_type_loc = h.dataset("matrix/features/feature_type").unwrap(); - let feature_types: Vec<FixedAscii<[u8; 256]>> = - feature_type_loc.as_reader().read_raw().unwrap(); - for i in 0..feature_ids.len() { - r.1.push(format!( - "{}\t{}\t{}", - feature_ids[i], feature_names[i], feature_types[i] - )); - } - - // If appropriate, construct the binary matrix file from the h5 file. - - if bin_file_state == 3 { - let data_loc = h.dataset("matrix/data").unwrap(); - let data: Vec<u32> = data_loc.as_reader().read_raw().unwrap(); - let ind_loc = h.dataset("matrix/indices").unwrap(); - let ind: Vec<u32> = ind_loc.as_reader().read_raw().unwrap(); - let ind_ptr_loc = h.dataset("matrix/indptr").unwrap(); - let ind_ptr: Vec<u32> = ind_ptr_loc.as_reader().read_raw().unwrap(); - let mut matrix = vec![Vec::<(i32, i32)>::new(); r.2.len()]; - for i in 0..matrix.len() { - for j in ind_ptr[i]..ind_ptr[i + 1] { - matrix[i].push((ind[j as usize] as i32, data[j as usize] as i32)); - } - } - r.3 = MirrorSparseMatrix::build_from_vec(&matrix, &r.2, &r.1); - write_to_file(&r.3, &bin_file); - } - } - } - unique_sort(&mut r.6); - }); - ctl.perf_stats(&t, "in load_gex main loop"); - - // Set have_gex and have_fb. - - let t = Instant::now(); - for i in 0..results.len() { - if results[i].4.is_some() { - *have_gex = true; - } - if results[i].5.is_some() { - *have_fb = true; - } - } - - // Save results. This avoids cloning, which saves a lot of time. - - let n = results.len(); - for (_i, (_x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10)) in - results.into_iter().take(n).enumerate() - { - gex_features.push(x1); - gex_barcodes.push(x2); - gex_matrices.push(x3); - let mut gex_mult = 1.0; - if x4.is_some() { - gex_mult = x4.unwrap(); - } - gex_mults.push(gex_mult); - let mut fb_mult = 1.0; - if x5.is_some() { - fb_mult = x5.unwrap(); - } - fb_mults.push(fb_mult); - gex_cell_barcodes.push(x6); - cluster.push(x7); - cell_type.push(x8); - pca.push(x9); - cell_type_specified.push(x10); - } - ctl.perf_stats(&t, "in load_gex tail"); -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Get gene expression and feature barcoding counts. - -pub fn get_gex_info(mut ctl: &mut EncloneControl) -> GexInfo { - let mut gex_features = Vec::<Vec<String>>::new(); - let mut gex_barcodes = Vec::<Vec<String>>::new(); - let mut gex_matrices = Vec::<MirrorSparseMatrix>::new(); - let mut cluster = Vec::<HashMap<String, usize>>::new(); - let mut cell_type = Vec::<HashMap<String, String>>::new(); - let mut cell_type_specified = Vec::<bool>::new(); - let mut pca = Vec::<HashMap<String, Vec<f64>>>::new(); - let mut gex_mults = Vec::<f64>::new(); - let mut fb_mults = Vec::<f64>::new(); - let mut gex_cell_barcodes = Vec::<Vec<String>>::new(); - let mut have_gex = false; - let mut have_fb = false; - load_gex( - &mut ctl, - &mut gex_features, - &mut gex_barcodes, - &mut gex_matrices, - &mut cluster, - &mut cell_type, - &mut cell_type_specified, - &mut pca, - &mut gex_mults, - &mut fb_mults, - &mut gex_cell_barcodes, - &mut have_gex, - &mut have_fb, - ); - let t = Instant::now(); - if ctl.gen_opt.gene_scan_test.is_some() && !ctl.gen_opt.accept_inconsistent { - let mut allf = gex_features.clone(); - unique_sort(&mut allf); - if allf.len() != 1 { - eprintln!( - "\nCurrently, SCAN requires that all datasets have identical \ - features, and they do not." - ); - eprintln!( - "There are {} datasets and {} feature sets after removal of \ - duplicates.", - gex_features.len(), - allf.len() - ); - eprintln!("Classification of features sets:\n"); - for i in 0..gex_features.len() { - let p = bin_position(&allf, &gex_features[i]); - eprintln!("{} ==> {}", ctl.origin_info.dataset_id[i], p); - } - eprintln!(""); - std::process::exit(1); - } - } - let mut h5_data = Vec::<Option<Dataset>>::new(); - let mut h5_indices = Vec::<Option<Dataset>>::new(); - let mut h5_indptr = Vec::<Vec<u32>>::new(); - if ctl.gen_opt.h5 { - let gex_outs = &ctl.origin_info.gex_path; - for i in 0..ctl.origin_info.dataset_path.len() { - let bin_file = format!("{}/feature_barcode_matrix.bin", gex_outs[i]); - if gex_outs[i].len() > 0 && !(path_exists(&bin_file) && !ctl.gen_opt.force_h5) { - let mut f = format!("{}/raw_feature_bc_matrix.h5", gex_outs[i]); - if !path_exists(&f) { - f = format!("{}/raw_gene_bc_matrices_h5.h5", gex_outs[i]); - } - if !path_exists(&f) { - eprintln!("\nThere's a missing input file:\n{}.\n", f); - std::process::exit(1); - } - let h = hdf5::File::open(&f).unwrap(); - h5_data.push(Some(h.dataset("matrix/data").unwrap())); - h5_indices.push(Some(h.dataset("matrix/indices").unwrap())); - let indptr = h.dataset("matrix/indptr").unwrap(); - let x: Vec<u32> = indptr.as_reader().read().unwrap().to_vec(); - h5_indptr.push(x); - } else { - h5_data.push(None); - h5_indices.push(None); - h5_indptr.push(Vec::<u32>::new()); - } - } - } - fn compute_feature_id(gex_features: &Vec<String>) -> HashMap<String, usize> { - let mut x = HashMap::<String, usize>::new(); - for j in 0..gex_features.len() { - let f = &gex_features[j]; - let ff = f.split('\t').collect::<Vec<&str>>(); - for z in 0..2 { - if ff[2].starts_with(&"Antibody") { - x.insert(format!("{}_ab", ff[z]), j); - } else if ff[2].starts_with(&"Antigen") { - x.insert(format!("{}_ag", ff[z]), j); - } else if ff[2].starts_with(&"CRISPR") { - x.insert(format!("{}_cr", ff[z]), j); - } else if ff[2].starts_with(&"CUSTOM") { - x.insert(format!("{}_cu", ff[z]), j); - } else if ff[2].starts_with(&"Gene") { - x.insert(format!("{}_g", ff[z]), j); - } - } - } - x - } - let n = gex_features.len(); - let pi = (0..n).into_par_iter(); - let mut feature_id = Vec::<HashMap<String, usize>>::new(); - pi.map(|i| compute_feature_id(&gex_features[i])) - .collect_into_vec(&mut feature_id); - let mut is_gex = Vec::<Vec<bool>>::new(); - for i in 0..gex_features.len() { - is_gex.push(vec![false; gex_features[i].len()]); - for j in 0..gex_features[i].len() { - let f = &gex_features[i][j]; - let ff = f.split('\t').collect::<Vec<&str>>(); - if ff[2].starts_with(&"Gene") { - is_gex[i][j] = true; - } - } - } - ctl.perf_stats(&t, "after load_gex"); - - // Answer. - - GexInfo { - gex_features: gex_features, - gex_barcodes: gex_barcodes, - gex_matrices: gex_matrices, - cluster: cluster, - cell_type: cell_type, - cell_type_specified: cell_type_specified, - pca: pca, - gex_cell_barcodes: gex_cell_barcodes, - gex_mults: gex_mults, - fb_mults: fb_mults, - h5_data: h5_data, - h5_indices: h5_indices, - h5_indptr: h5_indptr, - is_gex: is_gex, - feature_id: feature_id, - have_gex: have_gex, - have_fb: have_fb, - } -} diff --git a/enclone/src/misc1.rs b/enclone/src/misc1.rs index b8695614d..a5e50babe 100644 --- a/enclone/src/misc1.rs +++ b/enclone/src/misc1.rs @@ -1,16 +1,22 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // Miscellaneous functions. -use enclone_core::defs::*; -use equiv::*; -use itertools::*; +use enclone_core::{ + barcode_fate::BarcodeFate, + defs::{CloneInfo, EncloneControl, ExactClonotype, TigData}, +}; +use equiv::EquivRel; +use itertools::Itertools; #[cfg(not(target_os = "windows"))] use pager::Pager; -use perf_stats::*; +use perf_stats::elapsed; +use std::collections::HashMap; use std::time::Instant; -use string_utils::*; -use vector_utils::*; +use string_utils::stringme; +use vector_utils::{ + bin_member, bin_position, erase_if, next_diff, next_diff1_3, unique_sort, VecUtils, +}; // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ @@ -28,7 +34,7 @@ use vector_utils::*; #[cfg(not(target_os = "windows"))] pub fn setup_pager(pager: bool) { // If the output is going to a terminal, set up paging so that output is in effect piped to - // "less -R -F -X". + // "less -R -F -X -K". // // ∙ The option -R is used to render ANSI escape characters correctly. We do not use // -r instead because if you navigate backwards in less -r, stuff gets screwed up, @@ -43,12 +49,12 @@ pub fn setup_pager(pager: bool) { // from enclone. This is really bad, so do not turn off this option! if pager { - Pager::with_pager("less -R -F -X").setup(); + Pager::with_pager("less -R -F -X -K").setup(); } } #[cfg(target_os = "windows")] -pub fn setup_pager(pager: bool) {} +pub fn setup_pager(_pager: bool) {} // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ @@ -60,8 +66,8 @@ pub fn setup_pager(pager: bool) {} pub fn lookup_heavy_chain_reuse( ctl: &EncloneControl, - exact_clonotypes: &Vec<ExactClonotype>, - info: &Vec<CloneInfo>, + exact_clonotypes: &[ExactClonotype], + info: &[CloneInfo], eq: &EquivRel, ) { if ctl.gen_opt.heavy_chain_reuse { @@ -116,12 +122,12 @@ pub fn lookup_heavy_chain_reuse( for z1 in 0..25 { for z2 in z1 + 1..25 { let mut xcdr3 = cdr3.clone(); - for i in 0..xcdr3.len() { - if xcdr3[i].0.len() > z2 { - let mut t = xcdr3[i].0.as_bytes().to_vec(); + for cdr in xcdr3.iter_mut() { + if cdr.0.len() > z2 { + let mut t = cdr.0.as_bytes().to_vec(); t[z1] = b'*'; t[z2] = b'*'; - xcdr3[i].0 = stringme(&t); + cdr.0 = stringme(&t); } } @@ -133,14 +139,14 @@ pub fn lookup_heavy_chain_reuse( while i < xcdr3.len() { let j = next_diff1_3(&xcdr3, i as i32) as usize; let mut ids = Vec::<usize>::new(); - for k in i..j { - ids.push(xcdr3[k].2); + for cdr in &xcdr3[i..j] { + ids.push(cdr.2); } unique_sort(&mut ids); if !ids.solo() { - let mut x = Vec::<String>::new(); - for k in i..j { - x.push(xcdr3[k].1.clone()); + let mut x = Vec::new(); + for cdr in &xcdr3[i..j] { + x.push(cdr.1.clone()); } unique_sort(&mut x); dio.push(x); @@ -150,14 +156,13 @@ pub fn lookup_heavy_chain_reuse( } } unique_sort(&mut dio); - for i in 0..dio.len() { - println!("{} = {}", i + 1, dio[i].iter().format(", ")); + for (i, d) in dio.into_iter().enumerate() { + println!("{} = {}", i + 1, d.iter().format(", ")); } println!( "\nused {:.2} seconds in heavy chain reuse calculation\n", elapsed(&t) ); - std::process::exit(0); } } @@ -190,100 +195,105 @@ pub fn lookup_heavy_chain_reuse( // original cells that were drawn (perhaps breaking up in the process of drawing), and was // subsequently distintegrated. -pub fn cross_filter(ctl: &EncloneControl, mut tig_bc: &mut Vec<Vec<TigData>>) { - if !ctl.clono_filt_opt.ncross { - // Get the list of dataset origins. Here we allow the same origin name to have been used for - // more than one donor, as we haven't explicitly prohibited that. +pub fn cross_filter( + ctl: &EncloneControl, + tig_bc: &mut Vec<Vec<TigData>>, + fate: &mut [HashMap<String, BarcodeFate>], +) { + // Get the list of dataset origins. Here we allow the same origin name to have been used + // for more than one donor, as we haven't explicitly prohibited that. - let mut origins = Vec::<(String, String)>::new(); - for i in 0..ctl.origin_info.n() { - origins.push(( - ctl.origin_info.donor_id[i].clone(), - ctl.origin_info.origin_id[i].clone(), - )); - } - unique_sort(&mut origins); - let mut to_origin = vec![0; ctl.origin_info.n()]; - for i in 0..ctl.origin_info.n() { - to_origin[i] = bin_position( - &origins, - &( - ctl.origin_info.donor_id[i].clone(), - ctl.origin_info.origin_id[i].clone(), - ), - ) as usize; - } + let mut origins = Vec::<(&str, &str)>::new(); + for i in 0..ctl.origin_info.n() { + origins.push(( + ctl.origin_info.donor_id[i].as_str(), + ctl.origin_info.origin_id[i].as_str(), + )); + } + unique_sort(&mut origins); + let to_origin = ctl + .origin_info + .donor_id + .iter() + .zip(ctl.origin_info.origin_id.iter()) + .map(|(donor_id, origin_id)| { + bin_position(&origins, &(donor_id.as_str(), origin_id.as_str())) as usize + }) + .collect::<Vec<_>>(); - // For each dataset index, and each origin, compute the total number of productive pairs. + // For each dataset index, and each origin, compute the total number of productive pairs. - let mut n_dataset_index = vec![0; ctl.origin_info.n()]; - let mut n_origin = vec![0; origins.len()]; - for i in 0..tig_bc.len() { - for j in 0..tig_bc[i].len() { - let x = &tig_bc[i][j]; - n_dataset_index[x.dataset_index] += 1; - n_origin[to_origin[x.dataset_index]] += 1; - } + let mut n_dataset_index = vec![0; ctl.origin_info.n()]; + let mut n_origin = vec![0; origins.len()]; + for tigi in tig_bc.iter() { + for x in tigi { + n_dataset_index[x.dataset_index] += 1; + n_origin[to_origin[x.dataset_index]] += 1; } + } - // Find all the V..J segments, and for each, the number of times it appears in each dataset ID. - // - // Note that there is no point running this unless we have at least two dataset IDs, and in - // fact unless there is an origin with at least two dataset IDs. Better: just gather data for - // the origin for which there are at least two dataset IDs. Also no point if NCROSS. + // Find all the V..J segments, and for each, the number of times it appears in each + // dataset ID. + // + // Note that there is no point running this unless we have at least two dataset IDs, and in + // fact unless there is an origin with at least two dataset IDs. Better: just gather data + // for the origin for which there are at least two dataset IDs. Also no point if NCROSS. - let mut vjx = Vec::<(Vec<u8>, usize, usize)>::new(); // (V..J, dataset index, count) - { - for i in 0..tig_bc.len() { - for j in 0..tig_bc[i].len() { - let x = &tig_bc[i][j]; - vjx.push((x.seq.clone(), x.dataset_index, 1)); - } + let vjx = { + let mut vjx = Vec::<(&[u8], usize, usize)>::new(); // (V..J, dataset index, count) + for tigi in tig_bc.iter() { + for x in tigi { + vjx.push((x.seq(), x.dataset_index, 1)); } - vjx.sort(); - let mut to_delete = vec![false; vjx.len()]; - let mut i = 0; - while i < vjx.len() { - let j = next_diff(&vjx, i); // actually only need to check first two fields - vjx[i].2 = j - i; - for k in i + 1..j { - to_delete[k] = true; - } - i = j; + } + vjx.sort(); + let mut to_delete = vec![false; vjx.len()]; + let mut i = 0; + while i < vjx.len() { + let j = next_diff(&vjx, i); // actually only need to check first two fields + vjx[i].2 = j - i; + for d in &mut to_delete[i + 1..j] { + *d = true; } - erase_if(&mut vjx, &to_delete); + i = j; } + erase_if(&mut vjx, &to_delete); + vjx + }; - // Now do the cross filter. + // Now do the cross filter. - let mut blacklist = Vec::<Vec<u8>>::new(); - let mut i = 0; - while i < vjx.len() { - let j = next_diff1_3(&vjx, i as i32) as usize; - if j - i == 1 { - let dataset_index = vjx[i].1; - let n = vjx[i].2; - let x = n_dataset_index[dataset_index]; - let y = n_origin[to_origin[dataset_index]]; - if y > 0 { - let p = (x as f64 / y as f64).powi(n as i32); - if p <= 1.0e-6 { - blacklist.push(vjx[i].0.clone()); - } + let mut blacklist = Vec::<&[u8]>::new(); + let mut i = 0; + while i < vjx.len() { + let j = next_diff1_3(&vjx, i as i32) as usize; + if j - i == 1 { + let dataset_index = vjx[i].1; + let n = vjx[i].2; + let x = n_dataset_index[dataset_index]; + let y = n_origin[to_origin[dataset_index]]; + if y > 0 { + let p = (x as f64 / y as f64).powi(n as i32); + if p <= 1.0e-6 { + blacklist.push(vjx[i].0); } } - i = j; } - blacklist.sort(); - let mut to_delete = vec![false; tig_bc.len()]; - const UMIS_SAVE: usize = 100; - for i in 0..tig_bc.len() { - for j in 0..tig_bc[i].len() { - if tig_bc[i][j].umi_count < UMIS_SAVE && bin_member(&blacklist, &tig_bc[i][j].seq) { + i = j; + } + blacklist.sort(); + let mut to_delete = vec![false; tig_bc.len()]; + const UMIS_SAVE: usize = 100; + for (i, tigi) in tig_bc.iter().enumerate() { + for tig in tigi { + if tig.umi_count < UMIS_SAVE && bin_member(&blacklist, &tig.seq()) { + fate[tigi[0].dataset_index].insert(tigi[0].barcode.clone(), BarcodeFate::Cross); + if !ctl.clono_filt_opt_def.ncross { to_delete[i] = true; } + break; } } - erase_if(&mut tig_bc, &to_delete); } + erase_if(tig_bc, &to_delete); } diff --git a/enclone/src/misc2.rs b/enclone/src/misc2.rs index 59c8d2db2..fa7375bf8 100644 --- a/enclone/src/misc2.rs +++ b/enclone/src/misc2.rs @@ -1,69 +1,89 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // Miscellaneous functions. -use crate::innate::*; -use crate::misc3::*; -use debruijn::dna_string::*; -use enclone_core::defs::*; -use io_utils::*; +use crate::innate::mark_innate; +use crate::misc3::study_consensus; +use amino::aa_seq; +use debruijn::dna_string::DnaString; +use enclone_core::barcode_fate::BarcodeFate; +use enclone_core::defs::{EncloneControl, ExactClonotype, Junction, TigData, TigData0, TigData1}; +use io_utils::{fwriteln, open_for_write_new}; use rayon::prelude::*; use std::cmp::{max, min}; -use std::fs::File; -use std::io::{BufWriter, Write}; -use string_utils::*; -use vdj_ann::refx::*; -use vector_utils::*; +use std::collections::HashMap; + +use std::fmt::Write as _; +use std::io::Write; +use std::time::Instant; +use string_utils::strme; +use vdj_ann::refx::RefData; +use vector_utils::{ + erase_if, next_diff, next_diff12_4, next_diff1_2, next_diff1_3, reverse_sort, unique_sort, +}; // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ // Filter out putative gel bead contamination. We look for cases where inside a -// given exact subclonotype, the same first or last half of the barcode is reused, and one +// given exact subclonotype, the same first or last half of a barcode is reused, and one // instance has at least 10-fold higher UMI count. If the fraction of the "bad" // clones is at least 20%, delete them. -pub fn filter_gelbead_contamination(ctl: &EncloneControl, mut clones: &mut Vec<Vec<TigData0>>) { - if !ctl.gen_opt.nwhitef { - const GB_UMI_MULT: usize = 10; - const GB_MIN_FRAC: f64 = 0.2; - let mut bch = vec![Vec::<(usize, String, usize, usize)>::new(); 2]; - for l in 0..clones.len() { - let li = clones[l][0].dataset_index; - let bc = &clones[l][0].barcode; - let mut numi = 0; - for j in 0..clones[l].len() { - numi += clones[l][j].umi_count; - } - bch[0].push((li, bc[0..8].to_string(), numi, l)); - bch[1].push((li, bc[8..16].to_string(), numi, l)); +pub fn filter_gelbead_contamination( + ctl: &EncloneControl, + clones: &mut Vec<Vec<TigData0>>, + fate: &mut Vec<(usize, String, BarcodeFate)>, +) { + const GB_UMI_MULT: usize = 10; + const GB_MIN_FRAC: f64 = 0.2; + let mut bch = vec![Vec::<(usize, &str, usize, usize)>::new(); 2]; + for (l, clone) in clones.iter().enumerate() { + let li = clone[0].dataset_index; + let bc = clone[0].barcode.as_str(); + let mut numi = 0; + for tig in clone { + numi += tig.umi_count; } - let mut bad = vec![false; clones.len()]; - for l in 0..2 { - bch[l].sort(); - let mut m = 0; - while m < bch[l].len() { - let n = next_diff12_4(&bch[l], m as i32) as usize; - let mut count = 0; + bch[0].push((li, &bc[0..8], numi, l)); + bch[1].push((li, &bc[8..16], numi, l)); + } + let mut bad = vec![false; clones.len()]; + for l in 0..2 { + bch[l].sort_unstable(); + let mut m = 0; + while m < bch[l].len() { + let n = next_diff12_4(&bch[l], m as i32) as usize; + let mut count = 0; + for u1 in m..n { + for u2 in m..n { + if bch[l][u1].2 >= GB_UMI_MULT * bch[l][u2].2 { + count += 1; + } + } + } + if count as f64 / clones.len() as f64 >= GB_MIN_FRAC { for u1 in m..n { for u2 in m..n { if bch[l][u1].2 >= GB_UMI_MULT * bch[l][u2].2 { - count += 1; - } - } - } - if count as f64 / clones.len() as f64 >= GB_MIN_FRAC { - for u1 in m..n { - for u2 in m..n { - if bch[l][u1].2 >= GB_UMI_MULT * bch[l][u2].2 { - bad[bch[l][u2].3] = true; - } + bad[bch[l][u2].3] = true; } } } - m = n; } + m = n; } - erase_if(&mut clones, &bad); + } + for (&b, clone) in bad.iter().zip(clones.iter()) { + if b { + fate.push(( + clone[0].dataset_index, + clone[0].barcode.clone(), + BarcodeFate::GelBeadContamination, + )); + } + } + if !ctl.gen_opt.nwhitef { + erase_if(clones, &bad); } } @@ -71,10 +91,10 @@ pub fn filter_gelbead_contamination(ctl: &EncloneControl, mut clones: &mut Vec<V pub fn create_exact_subclonotype_core( // inputs: - tig_bc: &Vec<Vec<TigData>>, + tig_bc: &[Vec<TigData>], r: usize, s: usize, - to_delete: &Vec<bool>, + to_delete: &[bool], // outputs: share: &mut Vec<TigData1>, clones: &mut Vec<Vec<TigData0>>, @@ -88,7 +108,7 @@ pub fn create_exact_subclonotype_core( loop { let mut calls = Vec::<(u8, u8)>::new(); // (base,qual) for t in r..s { - if !to_delete[t - r] && tig_bc[t][m].v_start >= pos + 1 { + if !to_delete[t - r] && tig_bc[t][m].v_start > pos { let p = tig_bc[t][m].v_start - pos - 1; calls.push((tig_bc[t][m].full_seq[p], tig_bc[t][m].full_quals[p])); } @@ -97,14 +117,14 @@ pub fn create_exact_subclonotype_core( break; } last_calls = calls.len(); - calls.sort(); + calls.sort_unstable(); let mut callsx = Vec::<(usize, u8)>::new(); // (qual,base) let mut i = 0; while i < calls.len() { let j = next_diff1_2(&calls, i as i32) as usize; let mut q = 0; - for k in i..j { - q += calls[k].1 as usize; + for c in &calls[i..j] { + q += c.1 as usize; } callsx.push((q, calls[i].0)); i = j; @@ -132,14 +152,14 @@ pub fn create_exact_subclonotype_core( break; } last_calls = calls.len(); - calls.sort(); + calls.sort_unstable(); let mut callsx = Vec::<(usize, u8)>::new(); // (qual,base) let mut i = 0; while i < calls.len() { let j = next_diff1_2(&calls, i as i32) as usize; let mut q = 0; - for k in i..j { - q += calls[k].1 as usize; + for c in &calls[i..j] { + q += c.1 as usize; } callsx.push((q, calls[i].0)); i = j; @@ -152,22 +172,31 @@ pub fn create_exact_subclonotype_core( // Form full sequence. let mut full = utr.clone(); - let mut z = tig_bc[r][m].seq.clone(); + let mut z = tig_bc[r][m].seq().to_vec(); full.append(&mut z); full.append(&mut constx); - // Note that here we are taking the first entry (r), sort of assuming - // that all the entries are the same, which in principle they should be. + // Note that here we are taking the first entry (r), sort of assuming that all the entries + // are the same, which in principle they should be, but this is not actually always true. + // However this is hard to fix. + let aa = aa_seq(tig_bc[r][m].seq(), 0); + let mut d_start = None; + if tig_bc[r][m].d_start.is_some() { + d_start = Some(tig_bc[r][m].d_start.unwrap() + utr.len() - tig_bc[r][m].v_start); + } share.push(TigData1 { cdr3_dna: tig_bc[r][m].cdr3_dna.clone(), - seq: tig_bc[r][m].seq.clone(), - seq_del: tig_bc[r][m].seq.clone(), // may get changed later - seq_del_amino: tig_bc[r][m].seq.clone(), // may get changed later + seq: tig_bc[r][m].seq().to_vec(), + seq_del: tig_bc[r][m].seq().to_vec(), // may get changed later + seq_del_amino: tig_bc[r][m].seq().to_vec(), // may get changed later + ins: Vec::new(), // may get changed later + aa_mod_indel: aa, // may get changed later full_seq: full, v_start: utr.len(), v_stop: tig_bc[r][m].v_stop + utr.len() - tig_bc[r][m].v_start, v_stop_ref: tig_bc[r][m].v_stop_ref, + d_start, j_start: tig_bc[r][m].j_start + utr.len() - tig_bc[r][m].v_start, j_start_ref: tig_bc[r][m].j_start_ref, j_stop: tig_bc[r][m].j_stop + utr.len() - tig_bc[r][m].v_start, @@ -179,9 +208,10 @@ pub fn create_exact_subclonotype_core( d_ref_id: tig_bc[r][m].d_ref_id, j_ref_id: tig_bc[r][m].j_ref_id, c_ref_id: tig_bc[r][m].c_ref_id, - cdr1_aa: tig_bc[r][m].cdr1_aa.clone(), + fr1_start: tig_bc[r][m].fr1_start, + fr2_start: tig_bc[r][m].fr2_start, + fr3_start: tig_bc[r][m].fr3_start, cdr1_start: tig_bc[r][m].cdr1_start, - cdr2_aa: tig_bc[r][m].cdr2_aa.clone(), cdr2_start: tig_bc[r][m].cdr2_start, cdr3_aa: tig_bc[r][m].cdr3_aa.clone(), cdr3_start: tig_bc[r][m].cdr3_start, @@ -201,6 +231,7 @@ pub fn create_exact_subclonotype_core( mait_alpha_chain_junction_match: false, mait_beta_chain_gene_match: false, mait_beta_chain_junction_match: false, + jun: Junction::default(), }); } for t in r..s { @@ -209,8 +240,8 @@ pub fn create_exact_subclonotype_core( for m in 0..tig_bc[t].len() { x.push(TigData0 { quals: tig_bc[t][m].quals.clone(), - v_start: tig_bc[t][m].v_start.clone(), - j_stop: tig_bc[t][m].j_stop.clone(), + v_start: tig_bc[t][m].v_start, + j_stop: tig_bc[t][m].j_stop, c_start: tig_bc[t][m].c_start, full_seq: tig_bc[t][m].full_seq.clone(), barcode: tig_bc[t][m].barcode.clone(), @@ -221,7 +252,12 @@ pub fn create_exact_subclonotype_core( tag_index: tig_bc[t][m].tag_index, umi_count: tig_bc[t][m].umi_count, read_count: tig_bc[t][m].read_count, + validated_umis: tig_bc[t][m].validated_umis.clone(), + non_validated_umis: tig_bc[t][m].non_validated_umis.clone(), + invalidated_umis: tig_bc[t][m].invalidated_umis.clone(), + frac_reads_used: tig_bc[t][m].frac_reads_used, marked: false, + v_ref_id: tig_bc[t][m].v_ref_id, }); } clones.push(x); @@ -235,12 +271,14 @@ pub fn create_exact_subclonotype_core( pub fn find_exact_subclonotypes( ctl: &EncloneControl, - tig_bc: &Vec<Vec<TigData>>, + tig_bc: &[Vec<TigData>], refdata: &RefData, + fate: &mut [HashMap<String, BarcodeFate>], ) -> Vec<ExactClonotype> { let mut exact_clonotypes = Vec::<ExactClonotype>::new(); let mut r = 0; let mut groups = Vec::<(usize, usize)>::new(); + let t = Instant::now(); while r < tig_bc.len() { let mut s = r + 1; while s < tig_bc.len() { @@ -251,7 +289,7 @@ pub fn find_exact_subclonotypes( for m in 0..tig_bc[r].len() { let (cid1, cid2) = (tig_bc[r][m].c_ref_id, tig_bc[s][m].c_ref_id); if tig_bc[s][m].cdr3_dna != tig_bc[r][m].cdr3_dna - || tig_bc[s][m].seq != tig_bc[r][m].seq + || tig_bc[s][m].seq() != tig_bc[r][m].seq() // Working around a bug here. See above for explanation. @@ -262,12 +300,13 @@ pub fn find_exact_subclonotypes( && refdata.name[cid1.unwrap()] != refdata.name[cid2.unwrap()] ) || ( cid1.is_some() && cid2.is_some() - && tig_bc[r][m].c_start.unwrap() + tig_bc[s][m].j_stop < tig_bc[s][m].c_start.unwrap() + tig_bc[r][m].j_stop ) + && tig_bc[r][m].c_start.unwrap() + tig_bc[s][m].j_stop + < tig_bc[s][m].c_start.unwrap() + tig_bc[r][m].j_stop ) // Check for different donors if MIX_DONORS specified on command line. // Note funky redundancy in checking each chain - || ( !ctl.clono_filt_opt.donor + || ( !ctl.clono_filt_opt_def.donor && tig_bc[r][m].donor_index != tig_bc[s][m].donor_index ) { ok = false; @@ -282,9 +321,15 @@ pub fn find_exact_subclonotypes( groups.push((r, s)); r = s; } - let mut results = Vec::<(usize, Vec<ExactClonotype>)>::new(); + ctl.perf_stats(&t, "finding exact subclonotypes one"); + let t = Instant::now(); + let mut results = Vec::<( + usize, + Vec<ExactClonotype>, + Vec<(usize, String, BarcodeFate)>, + )>::new(); for i in 0..groups.len() { - results.push((i, Vec::new())); + results.push((i, Vec::new(), Vec::new())); } results.par_iter_mut().for_each(|res| { let i = res.0; @@ -305,7 +350,7 @@ pub fn find_exact_subclonotypes( for m in 0..tig_bc[t1].len() { print!(" {}", tig_bc[t1][m].umi_count); } - println!(""); + println!(); print!( "{}: numis =", ctl.origin_info.dataset_id[tig_bc[t2][0].dataset_index] @@ -324,58 +369,75 @@ pub fn find_exact_subclonotypes( // the case where a barcode was accidentally reused. let mut to_delete = vec![false; s - r]; - if ctl.clono_filt_opt.bc_dup { - for t1 in r..s { - for t2 in t1 + 1..s { - if tig_bc[t1][0].barcode == tig_bc[t2][0].barcode { - to_delete[t1 - r] = true; - to_delete[t2 - r] = true; + let mut bc = (r..s) + .map(|t| (tig_bc[t][0].barcode.as_str(), t)) + .collect::<Vec<_>>(); + bc.sort_unstable(); + let mut i = 0; + while i < bc.len() { + let j = next_diff1_2(&bc, i as i32) as usize; + if j - i >= 2 { + for bck in &bc[i..j] { + let t = bck.1; + if ctl.clono_filt_opt_def.bc_dup { + to_delete[t - r] = true; } + res.2.push(( + tig_bc[t][0].dataset_index, + tig_bc[t][0].barcode.clone(), + BarcodeFate::DuplicatedBarcode, + )); } } + i = j; } // Create the exact subclonotype. let mut share = Vec::<TigData1>::new(); let mut clones = Vec::<Vec<TigData0>>::new(); - create_exact_subclonotype_core(&tig_bc, r, s, &to_delete, &mut share, &mut clones); + create_exact_subclonotype_core(tig_bc, r, s, &to_delete, &mut share, &mut clones); // Explore consensus. let mut _count = 0; study_consensus( &mut _count, - &ctl, + ctl, &share, &clones, &exact_clonotypes, - &refdata, + refdata, ); // Filter out putative gel bead contamination. - filter_gelbead_contamination(&ctl, &mut clones); + filter_gelbead_contamination(ctl, &mut clones, &mut res.2); // Save exact subclonotype. - if clones.len() > 0 { - res.1.push(ExactClonotype { - share: share, - clones: clones, - }); + if (share.len() >= ctl.gen_opt.min_chains_exact + || (ctl.join_alg_opt.basic_h.is_none() && !ctl.gen_opt.pre_eval)) + && !clones.is_empty() + { + res.1.push(ExactClonotype { share, clones }); } }); + ctl.perf_stats(&t, "finding exact subclonotypes two"); + let t = Instant::now(); let mut max_exact = 0; for i in 0..results.len() { - if results[i].1.len() > 0 { + if !results[i].1.is_empty() { max_exact = max(max_exact, results[i].1[0].ncells()); exact_clonotypes.append(&mut results[i].1); } + for j in 0..results[i].2.len() { + fate[results[i].2[j].0].insert(results[i].2[j].1.clone(), results[i].2[j].2.clone()); + } } if ctl.gen_opt.utr_con || ctl.gen_opt.con_con { - println!(""); - std::process::exit(0); + println!(); + return Vec::new(); } if !ctl.silent { println!( @@ -383,33 +445,58 @@ pub fn find_exact_subclonotypes( exact_clonotypes.len(), tig_bc.len() ); - println!("max exact subclonotype size = {}", max_exact); + println!("max exact subclonotype size = {max_exact}"); } + // Edit if NWEAK_ONESIES not specified. + + /* + if ctl.clono_filt_opt.weak_onesies { + let mut total_cells = 0; + for i in 0..exact_clonotypes.len() { + total_cells += exact_clonotypes[i].ncells(); + } + let mut exacts2 = Vec::<ExactClonotype>::new(); + for i in 0..exact_clonotypes.len() { + let ex = &exact_clonotypes[i]; + if ex.share.len() == 1 && ex.ncells() > 1 && ex.ncells() * 1000 < total_cells { + for j in 0..ex.clones.len() { + exacts2.push(ExactClonotype { + share: ex.share.clone(), + clones: vec![ex.clones[j].clone()], + }); + } + } else { + exacts2.push(exact_clonotypes[i].clone()); + } + } + exact_clonotypes = exacts2; + } + */ + // Fill in iNKT and MAIT annotations. - mark_innate(&refdata, &mut exact_clonotypes); + mark_innate(refdata, &mut exact_clonotypes); // Do other stuff. - if ctl.gen_opt.fasta.len() > 0 { + if !ctl.gen_opt.fasta.is_empty() { let mut f = open_for_write_new![&ctl.gen_opt.fasta]; - for i in 0..exact_clonotypes.len() { - let x = &exact_clonotypes[i]; - for j in 0..x.share.len() { + for (i, x) in exact_clonotypes.iter().enumerate() { + for (j, s) in x.share.iter().enumerate() { fwriteln!( f, ">exact_clonotype{}.chain{}.VJ\n{}", i, j + 1, - strme(&x.share[j].seq) + strme(&s.seq) ); } } } - if ctl.gen_opt.exact.is_some() { - let ex = &exact_clonotypes[ctl.gen_opt.exact.unwrap()]; - println!("\nEXACT CLONOTYPE {}", ctl.gen_opt.exact.unwrap()); + if let Some(exact) = ctl.gen_opt.exact { + let ex = &exact_clonotypes[exact]; + println!("\nEXACT CLONOTYPE {exact}"); for i in 0..ex.share.len() { let vid = ex.share[i].v_ref_id; let jid = ex.share[i].j_ref_id; @@ -421,8 +508,8 @@ pub fn find_exact_subclonotypes( ex.share[i].cdr3_aa ); } - for i in 0..ex.clones.len() { - let x = &ex.clones[i][0]; + for (i, clone) in ex.clones.iter().enumerate() { + let x = &clone[0]; println!( "clone {} = {}.{}", i + 1, @@ -430,8 +517,9 @@ pub fn find_exact_subclonotypes( x.barcode ); } - println!(""); + println!(); } + ctl.perf_stats(&t, "finding exact subclonotypes three"); exact_clonotypes } @@ -439,20 +527,21 @@ pub fn find_exact_subclonotypes( // Search for SHM indels. Exploratory. -pub fn search_for_shm_indels(ctl: &EncloneControl, tig_bc: &Vec<Vec<TigData>>) { +pub fn search_for_shm_indels(ctl: &EncloneControl, tig_bc: &[Vec<TigData>]) { if ctl.gen_opt.indels { println!("CDR3s associated with possible SHM indels"); - let mut cs = Vec::<((String, usize), usize, String)>::new(); - for i in 0..tig_bc.len() { - for j in 0..tig_bc[i].len() { - let x = &tig_bc[i][j]; - cs.push(( - (x.cdr3_dna.clone(), x.v_ref_id), - x.seq.len(), - x.cdr3_aa.clone(), - )); - } - } + let mut cs: Vec<((&str, usize), usize, &str)> = tig_bc + .iter() + .flat_map(|tig| { + tig.iter().map(|x| { + ( + (x.cdr3_dna.as_str(), x.v_ref_id), + x.seq().len(), + x.cdr3_aa.as_str(), + ) + }) + }) + .collect(); unique_sort(&mut cs); let mut i = 0; while i < cs.len() { @@ -462,8 +551,7 @@ pub fn search_for_shm_indels(ctl: &EncloneControl, tig_bc: &Vec<Vec<TigData>>) { } i = j; } - println!(""); - std::process::exit(0); + println!(); } } @@ -472,14 +560,17 @@ pub fn search_for_shm_indels(ctl: &EncloneControl, tig_bc: &Vec<Vec<TigData>>) { // Look for barcode reuse. The primary purpose of this is to detect instances where two // datasets were obtained from the same cDNA (from the same GEM well). -pub fn check_for_barcode_reuse(ctl: &EncloneControl, tig_bc: &Vec<Vec<TigData>>) { +pub fn check_for_barcode_reuse( + ctl: &EncloneControl, + tig_bc: &[Vec<TigData>], +) -> Result<(), String> { if !ctl.gen_opt.accept_reuse { const MIN_REUSE_FRAC_TO_SHOW: f64 = 0.25; - let mut all = Vec::<(String, usize, usize)>::new(); + let mut all = Vec::<(&str, usize, usize)>::new(); let mut total = vec![0; ctl.origin_info.dataset_id.len()]; - for i in 0..tig_bc.len() { - all.push((tig_bc[i][0].barcode.clone(), tig_bc[i][0].dataset_index, i)); - total[tig_bc[i][0].dataset_index] += 1; + for (i, tig_i) in tig_bc.iter().enumerate() { + all.push((tig_i[0].barcode.as_str(), tig_i[0].dataset_index, i)); + total[tig_i[0].dataset_index] += 1; } all.par_sort(); let mut reuse = Vec::<(usize, usize)>::new(); @@ -509,9 +600,10 @@ pub fn check_for_barcode_reuse(ctl: &EncloneControl, tig_bc: &Vec<Vec<TigData>>) } i = j; } - reuse.sort(); - let mut found = false; + reuse.sort_unstable(); let mut i = 0; + let mut found = false; + let mut msg = String::new(); while i < reuse.len() { let j = next_diff(&reuse, i); let n = j - i; @@ -520,16 +612,16 @@ pub fn check_for_barcode_reuse(ctl: &EncloneControl, tig_bc: &Vec<Vec<TigData>>) let frac = n as f64 / min(n1, n2) as f64; if frac >= MIN_REUSE_FRAC_TO_SHOW { if !found { - eprintln!("\nSignificant barcode reuse detected. If at least 25% of the barcodes \ + found = true; + msg += "\nSignificant barcode reuse detected. If at least 25% of the barcodes \ in one dataset\nare present in another dataset, is is likely that two datasets \ arising from the\nsame library were included as input to enclone. Since this \ would normally occur\nonly by accident, enclone exits. \ If you wish to override this behavior,\nplease rerun with the argument \ - ACCEPT_REUSE.\n\nHere are the instances of reuse that were observed:\n" - ); - found = true; + ACCEPT_REUSE.\n\nHere are the instances of reuse that were observed:\n\n"; } - eprintln!( + writeln!( + msg, "{}, {} ==> {} of {}, {} barcodes ({:.1}%)", ctl.origin_info.dataset_id[l1], ctl.origin_info.dataset_id[l2], @@ -537,13 +629,14 @@ pub fn check_for_barcode_reuse(ctl: &EncloneControl, tig_bc: &Vec<Vec<TigData>>) n1, n2, 100.0 * frac - ); + ) + .unwrap(); } i = j; } if found { - eprintln!(""); - std::process::exit(1); + return Err(msg); } } + Ok(()) } diff --git a/enclone/src/misc3.rs b/enclone/src/misc3.rs index 8f1d3f160..e703bea30 100644 --- a/enclone/src/misc3.rs +++ b/enclone/src/misc3.rs @@ -1,57 +1,19 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // Miscellaneous functions. -use enclone_core::defs::*; -use io_utils::*; -use itertools::*; +use enclone_core::defs::{EncloneControl, ExactClonotype, TigData, TigData0, TigData1}; +use io_utils::{fwrite, fwriteln}; +use itertools::Itertools; use std::cmp::{max, min, Ordering}; use std::io::Write; -use string_utils::*; -use vdj_ann::refx::*; -use vector_utils::*; +use string_utils::strme; +use vdj_ann::refx::RefData; +use vector_utils::unique_sort; // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ -// Parse a line, breaking at blanks, but not if they're in quotes. And strip the quotes. -// Ridiculously similar to parse_csv, probably should refactor. - -pub fn parse_bsv(x: &str) -> Vec<String> { - let mut args = Vec::<String>::new(); - let mut w = Vec::<char>::new(); - for c in x.chars() { - w.push(c); - } - let (mut quotes, mut i) = (0, 0); - while i < w.len() { - let mut j = i; - while j < w.len() { - if quotes % 2 == 0 && w[j] == ' ' { - break; - } - if w[j] == '"' { - quotes += 1; - } - j += 1; - } - let (mut start, mut stop) = (i, j); - if stop - start >= 2 && w[start] == '"' && w[stop - 1] == '"' { - start += 1; - stop -= 1; - } - let mut s = String::new(); - for m in start..stop { - s.push(w[m]); - } - args.push(s); - i = j + 1; - } - args -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn sort_tig_bc(ctl: &EncloneControl, tig_bc: &mut Vec<Vec<TigData>>, refdata: &RefData) { +pub fn sort_tig_bc(ctl: &EncloneControl, tig_bc: &mut [Vec<TigData>], refdata: &RefData) { tig_bc.sort_by(|x, y| -> Ordering { for i in 0..x.len() { // Order by number of chains. @@ -74,9 +36,9 @@ pub fn sort_tig_bc(ctl: &EncloneControl, tig_bc: &mut Vec<Vec<TigData>>, refdata return Ordering::Greater; // Order by chain sequence. - } else if x[i].seq < y[i].seq { + } else if x[i].seq() < y[i].seq() { return Ordering::Less; - } else if x[i].seq > y[i].seq { + } else if x[i].seq() > y[i].seq() { return Ordering::Greater; } @@ -116,16 +78,16 @@ pub fn sort_tig_bc(ctl: &EncloneControl, tig_bc: &mut Vec<Vec<TigData>>, refdata return Ordering::Greater; // Order by donor if MIX_DONORS option used. - } else if !ctl.clono_filt_opt.donor && x[i].donor_index < y[i].donor_index { + } else if !ctl.clono_filt_opt_def.donor && x[i].donor_index < y[i].donor_index { return Ordering::Less; - } else if !ctl.clono_filt_opt.donor && x[i].donor_index > y[i].donor_index { + } else if !ctl.clono_filt_opt_def.donor && x[i].donor_index > y[i].donor_index { return Ordering::Greater; } } if x.len() < y.len() { return Ordering::Less; } - return Ordering::Equal; + Ordering::Equal }); } @@ -144,9 +106,9 @@ pub fn sort_tig_bc(ctl: &EncloneControl, tig_bc: &mut Vec<Vec<TigData>>, refdata pub fn study_consensus( _count: &mut usize, ctl: &EncloneControl, - share: &Vec<TigData1>, - clones: &Vec<Vec<TigData0>>, - exact_clonotypes: &Vec<ExactClonotype>, + share: &[TigData1], + clones: &[Vec<TigData0>], + exact_clonotypes: &[ExactClonotype], refdata: &RefData, ) { if ctl.gen_opt.utr_con { @@ -174,40 +136,40 @@ pub fn study_consensus( ); let _len = share[z].seq.len(); let mut lefts = Vec::<Vec<u8>>::new(); - for m in 0..clones.len() { - let start = clones[m][z].v_start; - let mut x = clones[m][z].full_seq[0..start].to_vec(); + for clone in clones { + let start = clone[z].v_start; + let mut x = clone[z].full_seq[0..start].to_vec(); x.reverse(); - lefts.push(x.to_vec()); + lefts.push(x); } let mut rutrs = Vec::<Vec<u8>>::new(); - for i in 0..utr_ids.len() { - let mut x = refdata.refs[utr_ids[i]].to_string().as_bytes().to_vec(); + for &id in &utr_ids { + let mut x = refdata.refs[id].to_string().as_bytes().to_vec(); x.reverse(); - rutrs.push(x.to_vec()); + rutrs.push(x); } let mut minlen = 1_000_000; let mut maxlen = 0; - for i in 0..lefts.len() { - minlen = min(minlen, lefts[i].len()); - maxlen = max(maxlen, lefts[i].len()); + for left in &lefts { + minlen = min(minlen, left.len()); + maxlen = max(maxlen, left.len()); } - for i in 0..rutrs.len() { - minlen = min(minlen, rutrs[i].len()); - maxlen = max(maxlen, rutrs[i].len()); + for r in &rutrs { + minlen = min(minlen, r.len()); + maxlen = max(maxlen, r.len()); } let mut dots = Vec::<u8>::new(); let mut diffs = 0; for j in 0..maxlen { let mut bases = Vec::<u8>::new(); - for i in 0..lefts.len() { - if j < lefts[i].len() { - bases.push(lefts[i][j]); + for left in &lefts { + if j < left.len() { + bases.push(left[j]); } } - for i in 0..rutrs.len() { - if j < rutrs[i].len() { - bases.push(rutrs[i][j]); + for r in &rutrs { + if j < r.len() { + bases.push(r[j]); } } let mut diff = false; @@ -224,14 +186,14 @@ pub fn study_consensus( } } fwriteln!(log, " {}", strme(&dots)); - for i in 0..rutrs.len() { - fwriteln!(log, " U = {}", strme(&rutrs[i])); + for r in rutrs { + fwriteln!(log, " U = {}", strme(&r)); } - for i in 0..lefts.len() { - if i + 1 <= 9 { + for (i, left) in lefts.iter().enumerate() { + if i < 9 { fwrite!(log, " "); } - fwriteln!(log, "{} = {}", i + 1, strme(&lefts[i])); + fwriteln!(log, "{} = {}", i + 1, strme(left)); } if !(minlen == maxlen && diffs == 0 && utr_ids.len() == 1) { print!("{}", strme(&log)); @@ -239,14 +201,13 @@ pub fn study_consensus( } } } - if ctl.gen_opt.con_con && clones.len() > 0 { + if ctl.gen_opt.con_con && !clones.is_empty() { // ??????????????????????????????????????? // NOTE TRUNCATED TO 120 BASES! const SHOW: usize = 120; for z in 0..clones[0].len() { let mut log = Vec::<u8>::new(); - let mut c_ref_ids = Vec::<Option<usize>>::new(); - c_ref_ids.push(share[z].c_ref_id); + let mut c_ref_ids = vec![share[z].c_ref_id]; unique_sort(&mut c_ref_ids); fwriteln!( log, @@ -259,57 +220,57 @@ pub fn study_consensus( let _len = share[z].seq.len(); let mut rights = Vec::<Vec<u8>>::new(); let mut bcs = Vec::<String>::new(); - for m in 0..clones.len() { - let start = clones[m][z].j_stop; - let mut x = clones[m][z].full_seq[start..].to_vec(); + for clone in clones { + let start = clone[z].j_stop; + let mut x = clone[z].full_seq[start..].to_vec(); if x.len() > SHOW { x.truncate(SHOW); } rights.push(x.to_vec()); - bcs.push(clones[m][0].barcode.clone()); - } - let mut rconst = Vec::<Vec<u8>>::new(); - for i in 0..c_ref_ids.len() { - let cid = c_ref_ids[i]; - if cid.is_none() { - continue; - } - let mut x = refdata.refs[cid.unwrap()].to_string().as_bytes().to_vec(); - if x.len() > SHOW { - x.truncate(SHOW); - } - /* - // WARNING! TO INVESTIGATE, AND NOT NECESSARILY VALID FOR MOUSE!!!!!!!!!!!!!!! - let n = refdata.name[cid.unwrap()].after("IG"); - if n == "HM" || n == "HA1" || n == "HA2" || n == "HG1" || n == "HG2" - || n == "HG4"{ - x.remove(0); - } - */ - rconst.push(x.to_vec()); + bcs.push(clone[0].barcode.clone()); } + let rconst = c_ref_ids + .iter() + .filter_map(|&cid| { + cid.map(|cid| { + let mut x = refdata.refs[cid].to_string().as_bytes().to_vec(); + if x.len() > SHOW { + x.truncate(SHOW); + } + /* + // WARNING! TO INVESTIGATE, AND NOT NECESSARILY VALID FOR MOUSE!!!!!!!!!!!!!!! + let n = refdata.name[cid.unwrap()].after("IG"); + if n == "HM" || n == "HA1" || n == "HA2" || n == "HG1" || n == "HG2" + || n == "HG4"{ + x.remove(0); + } + */ + x + }) + }) + .collect::<Vec<_>>(); let mut minlen = 1_000_000; let mut maxlen = 0; - for i in 0..rights.len() { - minlen = min(minlen, rights[i].len()); - maxlen = max(maxlen, rights[i].len()); + for r in &rights { + minlen = min(minlen, r.len()); + maxlen = max(maxlen, r.len()); } - for i in 0..rights.len() { - minlen = min(minlen, rights[i].len()); - maxlen = max(maxlen, rights[i].len()); + for r in &rights { + minlen = min(minlen, r.len()); + maxlen = max(maxlen, r.len()); } let mut dots = Vec::<u8>::new(); let mut diffs = 0; for j in 0..maxlen { let mut bases = Vec::<u8>::new(); - for i in 0..rights.len() { - if j < rights[i].len() { - bases.push(rights[i][j]); + for r in &rights { + if j < r.len() { + bases.push(r[j]); } } - for i in 0..rconst.len() { - if j < rconst[i].len() { - bases.push(rconst[i][j]); + for r in &rconst { + if j < r.len() { + bases.push(r[j]); } } let mut diff = false; @@ -342,7 +303,7 @@ pub fn study_consensus( } } for i in 0..rights.len() { - if i + 1 <= 9 { + if i < 9 { fwrite!(log, " "); } fwriteln!(log, "{} = {} = {}", i + 1, strme(&rights[i]), bcs[i]); diff --git a/enclone/src/proc_args.rs b/enclone/src/proc_args.rs deleted file mode 100644 index 8cfd97e82..000000000 --- a/enclone/src/proc_args.rs +++ /dev/null @@ -1,1089 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -use crate::proc_args2::*; -use crate::proc_args3::*; -use crate::proc_args_check::*; -use enclone_core::defs::*; -use enclone_core::testlist::*; -use io_utils::*; -use itertools::Itertools; -use regex::Regex; -use std::fs::{remove_file, File}; -use std::io::{BufRead, BufReader}; -use std::{env, time::Instant}; -use string_utils::*; -use tilde_expand::*; -use vector_utils::*; - -// Process arguments. - -pub fn proc_args(mut ctl: &mut EncloneControl, args: &Vec<String>) { - // Knobs. - - let targs = Instant::now(); - let heur = ClonotypeHeuristics { - max_diffs: 50, - max_degradation: 3, - ref_v_trim: 15, - ref_j_trim: 15, - }; - ctl.heur = heur; - - // Form the combined set of command-line arguments and "command-line" arguments - // implied by environment variables. - - let mut args = args.clone(); - let mut args2 = Vec::<String>::new(); - args2.push(args[0].clone()); - for (key, value) in env::vars() { - if key.starts_with("ENCLONE_") { - args2.push(format!("{}={}", key.after("ENCLONE_"), value)); - } - } - for i in 1..args.len() { - args2.push(args[i].clone()); - } - args = args2; - - // Test for internal run. - - for (key, value) in env::vars() { - if (key == "HOST" || key == "HOSTNAME") && value.ends_with(".fuzzplex.com") { - ctl.gen_opt.internal_run = true; - } - } - for i in 1..args.len() { - if args[i] == "FORCE_EXTERNAL".to_string() { - ctl.gen_opt.internal_run = false; - } - } - if ctl.gen_opt.internal_run { - ctl.gen_opt.current_ref = true; // not sure this is right - ctl.gen_opt.pre = vec![ - format!("/mnt/assembly/vdj/current{}", TEST_FILES_VERSION), - format!("enclone/test/inputs"), - format!("enclone_main"), - ]; - } else if !ctl.gen_opt.cellranger { - let home = dirs::home_dir().unwrap().to_str().unwrap().to_string(); - ctl.gen_opt.pre = vec![ - format!("{}/enclone/datasets", home), - format!("{}/enclone/datasets2", home), - ]; - } - - // Set up general options. - - ctl.gen_opt.h5_pre = true; - ctl.gen_opt.min_cells_exact = 1; - ctl.gen_opt.min_chains_exact = 1; - ctl.gen_opt.exact = None; - for i in 1..args.len() { - if args[i].starts_with("PRE=") { - let pre = args[i].after("PRE=").split(',').collect::<Vec<&str>>(); - ctl.gen_opt.pre.clear(); - for x in pre.iter() { - ctl.gen_opt.pre.push(x.to_string()); - } - } - } - ctl.gen_opt.full_counts = true; - ctl.gen_opt.color = "codon".to_string(); - ctl.silent = true; - - // Set up clonotyping control parameters. - - ctl.clono_filt_opt.ncells_low = 1; - ctl.clono_filt_opt.ncells_high = 1_000_000_000; - ctl.clono_filt_opt.min_umi = 0; - ctl.clono_filt_opt.max_chains = 1000000; - ctl.clono_filt_opt.qual_filter = true; - ctl.clono_filt_opt.weak_chains = true; - ctl.clono_filt_opt.weak_onesies = true; - ctl.clono_filt_opt.weak_foursies = true; - ctl.clono_filt_opt.bc_dup = true; - ctl.clono_filt_opt.max_datasets = 1000000000; - ctl.clono_filt_opt.umi_filt = true; - ctl.clono_filt_opt.umi_ratio_filt = true; - - ctl.clono_print_opt.amino = vec![ - "cdr3".to_string(), - "var".to_string(), - "share".to_string(), - "donor".to_string(), - ]; - ctl.clono_print_opt.cvars = vec!["u".to_string(), "const".to_string(), "notes".to_string()]; - ctl.clono_print_opt.lvars = vec!["datasets".to_string(), "n".to_string()]; - - ctl.clono_group_opt.min_group = 1; - - ctl.allele_alg_opt.min_mult = 4; - ctl.allele_alg_opt.min_alt = 4; - - ctl.join_alg_opt.max_score = 1_000_000.0; - ctl.join_alg_opt.merge_onesies = true; // should just kill this as an option - ctl.join_alg_opt.max_cdr3_diffs = 10; - - ctl.join_print_opt.pfreq = 1_000_000_000; - ctl.join_print_opt.quiet = true; - - ctl.parseable_opt.pchains = 4; - - ctl.onesie_mult = 10_000; - - // Pretest for consistency amongst TCR, BCR, GEX and META. Also preparse GEX. - - let (mut have_tcr, mut have_bcr) = (false, false); - let mut have_gex = false; - let mut have_meta = false; - let mut gex = String::new(); - let mut bc = String::new(); - let mut metas = Vec::<String>::new(); - let mut xcrs = Vec::<String>::new(); - for i in 1..args.len() { - if args[i].starts_with("BI=") { - have_bcr = true; - have_gex = true; - } else if args[i].starts_with("TCR=") { - have_tcr = true; - } else if args[i].starts_with("BCR=") { - have_bcr = true; - } else if args[i].starts_with("GEX=") { - have_gex = true; - } else if args[i].starts_with("META=") { - have_meta = true; - } - if args[i].starts_with("GEX=") { - gex = args[i].after("GEX=").to_string(); - } - if args[i].starts_with("BC=") { - bc = args[i].after("BC=").to_string(); - } - if is_simple_arg(&args[i], "MARK_STATS") { - ctl.gen_opt.mark_stats = true; - } - if is_simple_arg(&args[i], "MARK_STATS2") { - ctl.gen_opt.mark_stats2 = true; - } - if is_simple_arg(&args[i], "MARKED_B") { - ctl.clono_filt_opt.marked_b = true; - } - } - if have_meta && (have_tcr || have_bcr || have_gex || bc.len() > 0) { - eprintln!("\nIf META is specified, then none of TCR, BCR, GEX or BC can be specified.\n"); - std::process::exit(1); - } - if have_tcr && have_bcr { - eprintln!("\nKindly please do not specify both TCR and BCR.\n"); - std::process::exit(1); - } - let mut using_plot = false; - - // Preprocess BI argument. - - if ctl.gen_opt.internal_run { - for i in 1..args.len() { - if args[i].starts_with("BI=") { - let x = args[i].after("BI=").split(',').collect::<Vec<&str>>(); - let mut y = Vec::<String>::new(); - for j in 0..x.len() { - if x[j].contains('-') { - let (start, stop) = (x[j].before("-"), x[j].after("-")); - if !start.parse::<usize>().is_ok() - || !stop.parse::<usize>().is_ok() - || start.force_usize() > stop.force_usize() - { - eprintln!("\nIllegal range in BI argument.\n"); - std::process::exit(1); - } - let (start, stop) = (start.force_usize(), stop.force_usize()); - for j in start..=stop { - y.push(format!("{}", j)); - } - } else { - y.push(x[j].to_string()); - } - } - let mut args2 = Vec::<String>::new(); - for j in 0..i { - args2.push(args[j].clone()); - } - let f = include_str!["enclone.testdata.bcr.gex"]; - let (mut bcrv, mut gexv) = (Vec::<String>::new(), Vec::<String>::new()); - for n in y.iter() { - if *n != "m1" { - if !n.parse::<usize>().is_ok() - || n.force_usize() < 1 - || n.force_usize() > 12 - { - eprintln!( - "\nBI only works for values n with if 1 <= n <= 12, or n = m1.\n" - ); - std::process::exit(1); - } - } else if y.len() > 1 { - eprintln!("\nFor BI, if you specify m1, you can only specify m1.\n"); - std::process::exit(1); - } - let mut found = false; - for s in f.lines() { - if s == format!("DONOR={}", n) { - found = true; - } else if found && s.starts_with("DONOR=") { - break; - } - if found { - if s.starts_with("BCR=") { - bcrv.push(s.after("BCR=").to_string()); - } - if s.starts_with("GEX=") { - gexv.push(s.after("GEX=").to_string()); - } - if s == "SPECIES=mouse" { - args2.push("MOUSE".to_string()); - } - } - } - } - args2.push(format!("BCR={}", bcrv.iter().format(";"))); - args2.push(format!("GEX={}", gexv.iter().format(";"))); - gex = format!("{}", gexv.iter().format(";")); - for j in i + 1..args.len() { - args2.push(args[j].clone()); - } - args = args2; - break; - } - } - } - - // Preprocess NALL. - - for i in 1..args.len() { - if args[i] == "NALL".to_string() || args[i] == "NALL_CELL" { - let f = [ - "NCELL", - "NGEX", - "NCROSS", - "NUMI", - "NUMI_RATIO", - "NGRAPH_FILTER", - "NQUAL", - "NWEAK_CHAINS", - "NWEAK_ONESIES", - "NFOURSIE_KILL", - "NWHITEF", - "NBC_DUP", - "MIX_DONORS", - "KEEP_IMPROPER", - ]; - for j in 0..f.len() { - if args[i] == "NALL" || f[j] != "NCELL" { - args.push(f[j].to_string()); - } - } - break; - } - } - - // Define arguments that set something to true. - - let mut set_true = vec![ - ("ACCEPT_INCONSISTENT", &mut ctl.gen_opt.accept_inconsistent), - ("ACCEPT_REUSE", &mut ctl.gen_opt.accept_reuse), - ("ALLOW_INCONSISTENT", &mut ctl.gen_opt.allow_inconsistent), - ("ANN", &mut ctl.join_print_opt.ann), - ("ANN0", &mut ctl.join_print_opt.ann0), - ("BARCODES", &mut ctl.clono_print_opt.barcodes), - ("BASELINE", &mut ctl.gen_opt.baseline), - ("BCJOIN", &mut ctl.join_alg_opt.bcjoin), - ("CDIFF", &mut ctl.clono_filt_opt.cdiff), - ("CHAIN_BRIEF", &mut ctl.clono_print_opt.chain_brief), - ("COMPLETE", &mut ctl.gen_opt.complete), - ("CON", &mut ctl.allele_print_opt.con), - ("CON_CON", &mut ctl.gen_opt.con_con), - ("CON_TRACE", &mut ctl.allele_print_opt.con_trace), - ("CURRENT_REF", &mut ctl.gen_opt.current_ref), - ("DEBUG_TABLE_PRINTING", &mut ctl.debug_table_printing), - ("DEL", &mut ctl.clono_filt_opt.del), - ("DESCRIP", &mut ctl.gen_opt.descrip), - ("EASY", &mut ctl.join_alg_opt.easy), - ("ECHO", &mut ctl.gen_opt.echo), - ("EXP", &mut ctl.gen_opt.exp), - ("FORCE", &mut ctl.force), - ("FULL_SEQC", &mut ctl.clono_print_opt.full_seqc), - ("GRAPH", &mut ctl.gen_opt.graph), - ("GROUP_HEAVY_CDR3", &mut ctl.clono_group_opt.heavy_cdr3_aa), - ("GROUP_VJ_REFNAME", &mut ctl.clono_group_opt.vj_refname), - ( - "GROUP_VJ_REFNAME_STRONG", - &mut ctl.clono_group_opt.vj_refname_strong, - ), - ("HAVE_ONESIE", &mut ctl.clono_filt_opt.have_onesie), - ("HEAVY_CHAIN_REUSE", &mut ctl.gen_opt.heavy_chain_reuse), - ("IMGT", &mut ctl.gen_opt.imgt), - ("IMGT_FIX", &mut ctl.gen_opt.imgt_fix), - ("INDELS", &mut ctl.gen_opt.indels), - ("INKT", &mut ctl.clono_filt_opt.inkt), - ("INSERTIONS", &mut ctl.gen_opt.insertions), - ("JC1", &mut ctl.gen_opt.jc1), - ("KEEP_IMPROPER", &mut ctl.merge_all_impropers), - ("MAIT", &mut ctl.clono_filt_opt.mait), - ("MARKED", &mut ctl.clono_filt_opt.marked), - ("MEAN", &mut ctl.clono_print_opt.mean), - ("MIX_DONORS", &mut ctl.clono_filt_opt.donor), - ("MOUSE", &mut ctl.gen_opt.mouse), - ("NCELL", &mut ctl.gen_opt.ncell), - ("NCROSS", &mut ctl.clono_filt_opt.ncross), - ("NEWICK", &mut ctl.gen_opt.newick), - ("NGEX", &mut ctl.clono_filt_opt.ngex), - ("NGRAPH_FILTER", &mut ctl.gen_opt.ngraph_filter), - ("NGROUP", &mut ctl.gen_opt.ngroup), - ("NON_CELL_MARK", &mut ctl.clono_filt_opt.non_cell_mark), - ("NOPRINT", &mut ctl.gen_opt.noprint), - ("NOTE_SIMPLE", &mut ctl.clono_print_opt.note_simple), - ("NPLAIN", &mut ctl.pretty), - ("NWHITEF", &mut ctl.gen_opt.nwhitef), - ("NWARN", &mut ctl.gen_opt.nwarn), - ("PCELL", &mut ctl.parseable_opt.pbarcode), - ("PER_CELL", &mut ctl.clono_print_opt.bu), - ("PROTECT_BADS", &mut ctl.clono_filt_opt.protect_bads), - ("RE", &mut ctl.gen_opt.reannotate), - ("REUSE", &mut ctl.gen_opt.reuse), - ("SEQC", &mut ctl.clono_print_opt.seqc), - ("SHOW_BC", &mut ctl.join_print_opt.show_bc), - ("STABLE_DOC", &mut ctl.gen_opt.stable_doc), - ("SUM", &mut ctl.clono_print_opt.sum), - ("SUMMARY", &mut ctl.gen_opt.summary), - ("SUMMARY_CLEAN", &mut ctl.gen_opt.summary_clean), - ("SUMMARY_CSV", &mut ctl.gen_opt.summary_csv), - ("TOY", &mut ctl.toy), - ("UMI_FILT_MARK", &mut ctl.clono_filt_opt.umi_filt_mark), - ( - "UMI_RATIO_FILT_MARK", - &mut ctl.clono_filt_opt.umi_ratio_filt_mark, - ), - ("UTR_CON", &mut ctl.gen_opt.utr_con), - ("VDUP", &mut ctl.clono_filt_opt.vdup), - ("WEAK", &mut ctl.gen_opt.weak), - ("WHITEF", &mut ctl.clono_filt_opt.whitef), - ]; - - // Define arguments that set something to false. - - let mut set_false = vec![ - ("H5_SLICE", &mut ctl.gen_opt.h5_pre), - ("NBC_DUP", &mut ctl.clono_filt_opt.bc_dup), - ("NFOURSIE_KILL", &mut ctl.clono_filt_opt.weak_foursies), - ("NQUAL", &mut ctl.clono_filt_opt.qual_filter), - ("NSILENT", &mut ctl.silent), - ("NUMI", &mut ctl.clono_filt_opt.umi_filt), - ("NUMI_RATIO", &mut ctl.clono_filt_opt.umi_ratio_filt), - ("NWEAK_CHAINS", &mut ctl.clono_filt_opt.weak_chains), - ("NWEAK_ONESIES", &mut ctl.clono_filt_opt.weak_onesies), - ("PRINT_FAILED_JOINS", &mut ctl.join_print_opt.quiet), - ]; - - // Define arguments that set something to a usize. - - let set_usize = [ - ("CHAINS_EXACT", &mut ctl.gen_opt.chains_exact), - ("MAX_CDR3_DIFFS", &mut ctl.join_alg_opt.max_cdr3_diffs), - ("MAX_DATASETS", &mut ctl.clono_filt_opt.max_datasets), - ("MAX_DEGRADATION", &mut ctl.heur.max_degradation), - ("MAX_DIFFS", &mut ctl.heur.max_diffs), - ("MIN_ALT", &mut ctl.allele_alg_opt.min_alt), - ("MIN_CELLS_EXACT", &mut ctl.gen_opt.min_cells_exact), - ("MIN_CHAINS_EXACT", &mut ctl.gen_opt.min_chains_exact), - ( - "MIN_DATASET_RATIO", - &mut ctl.clono_filt_opt.min_dataset_ratio, - ), - ("MIN_DATASETS", &mut ctl.clono_filt_opt.min_datasets), - ("MIN_EXACTS", &mut ctl.clono_filt_opt.min_exacts), - ("MIN_GROUP", &mut ctl.clono_group_opt.min_group), - ("MIN_MULT", &mut ctl.allele_alg_opt.min_mult), - ("MIN_UMI", &mut ctl.clono_filt_opt.min_umi), - ("ONESIE_MULT", &mut ctl.onesie_mult), - ("PCHAINS", &mut ctl.parseable_opt.pchains), - ("PFREQ", &mut ctl.join_print_opt.pfreq), - ]; - - // Define arguments that set something to a string. - - let set_string = [ - ("CLUSTAL_AA", &mut ctl.gen_opt.clustal_aa), - ("CLUSTAL_DNA", &mut ctl.gen_opt.clustal_dna), - ("EXT", &mut ctl.gen_opt.ext), - ("PHYLIP_AA", &mut ctl.gen_opt.phylip_aa), - ("PHYLIP_DNA", &mut ctl.gen_opt.phylip_dna), - ("POUT", &mut ctl.parseable_opt.pout), - ("REF", &mut ctl.gen_opt.refname), - ("TRACE_BARCODE", &mut ctl.gen_opt.trace_barcode), - ]; - - // Define arguments that set something to a string that is an output file name. - - let set_string_writeable = [ - ("BINARY", &mut ctl.gen_opt.binary), - ("DONOR_REF_FILE", &mut ctl.gen_opt.dref_file), - ("PROTO", &mut ctl.gen_opt.proto), - ]; - - // Define arguments that set something to a string that is an input file name. - - let set_string_readable = [("PROTO_METADATA", &mut ctl.gen_opt.proto_metadata)]; - - // Define arguments that do nothing (because already parsed). - - let set_nothing = [ - "BC", - "BI", - "CELLRANGER", - "COMP", - "COMP2", - "CTRLC", - "DUMP_INTERNAL_IDS", - "EMAIL", - "FORCE_EXTERNAL", - "GEX", - "HAPS", - "HTML", - "LONG_HELP", - "MARKED_B", - "MARK_STATS", - "MARK_STATS2", - "NALL", - "NALL_CELL", - "NOPAGER", - "NOPRETTY", - "PLAIN", - "PRE", - "PRINT_CPU", - "PRINT_CPU_INFO", - "SVG", - ]; - - // Traverse arguments. - - 'args_loop: for i in 1..args.len() { - let mut arg = args[i].to_string(); - - // Strip out certain quoted expressions. - - if arg.contains("=\"") && arg.ends_with("\"") { - let mut quotes = 0; - for c in arg.chars() { - if c == '\"' { - quotes += 1; - } - } - if quotes == 2 { - arg = format!("{}={}", arg.before("="), arg.between("\"", "\"")); - } - } - - // Check for weird case that might arise if testing code is screwed up. - - if arg.len() == 0 { - eprintln!( - "\nYou've passed a null argument to enclone. Normally that isn't \ - possible.\nPlease take a detailed look at how you're invoking enclone.\n" - ); - std::process::exit(1); - } - - // Process set_true arguments. - - for j in 0..set_true.len() { - if arg == set_true[j].0.to_string() { - *(set_true[j].1) = true; - continue 'args_loop; - } - } - - // Process set_false arguments. - - for j in 0..set_false.len() { - if arg == set_false[j].0.to_string() { - *(set_false[j].1) = false; - continue 'args_loop; - } - } - - // Process set_usize args. - - for j in 0..set_usize.len() { - if is_usize_arg(&arg, &set_usize[j].0) { - *(set_usize[j].1) = arg.after(&format!("{}=", set_usize[j].0)).force_usize(); - continue 'args_loop; - } - } - - // Process set_string args. - - for j in 0..set_string.len() { - if is_string_arg(&arg, &set_string[j].0) { - *(set_string[j].1) = arg.after(&format!("{}=", set_string[j].0)).to_string(); - continue 'args_loop; - } - } - - // Process set_string_writeable args. - - for j in 0..set_string_writeable.len() { - let var = &set_string_writeable[j].0; - if is_string_arg(&arg, var) { - *(set_string_writeable[j].1) = arg.after(&format!("{}=", var)).to_string(); - let val = &set_string_writeable[j].1; - let f = File::create(&val); - if f.is_err() { - eprintln!( - "\nYou've specified an output file\n{}\nthat cannot be written.\n", - val - ); - std::process::exit(1); - } - remove_file(&val).unwrap(); - continue 'args_loop; - } - } - - // Process set_string_readable args. - - for j in 0..set_string_readable.len() { - let var = &set_string_readable[j].0; - if is_string_arg(&arg, var) { - let val = arg.after(&format!("{}=", var)); - if val.is_empty() { - eprintln!("\nFilename input in {} cannot be empty\n", val); - std::process::exit(1); - } - *(set_string_readable[j].1) = Some(val.to_string()); - if let Err(e) = File::open(&val) { - eprintln!( - "\nYou've specified an input file\n{}\nthat cannot be read due to {}\n", - val, e - ); - std::process::exit(1); - } - continue 'args_loop; - } - } - - // Process set_nothing args. - - for j in 0..set_nothing.len() { - if arg == set_nothing[j].to_string() || arg.starts_with(&format!("{}=", set_nothing[j])) - { - continue 'args_loop; - } - } - - // Process the argument. - - if is_simple_arg(&arg, "SEQ") { - ctl.join_print_opt.seq = true; - - // Not movable. - } else if is_simple_arg(&arg, "H5") { - ctl.gen_opt.force_h5 = true; - } else if is_simple_arg(&arg, "NH5") { - ctl.gen_opt.force_h5 = false; - } else if arg == "LEGEND" { - ctl.gen_opt.use_legend = true; - } else if is_usize_arg(&arg, "REQUIRED_FPS") { - ctl.gen_opt.required_fps = Some(arg.after("REQUIRED_FPS=").force_usize()); - } else if is_usize_arg(&arg, "EXACT") { - ctl.gen_opt.exact = Some(arg.after("EXACT=").force_usize()); - } else if is_usize_arg(&arg, "MIN_CHAINS") { - ctl.clono_filt_opt.min_chains = arg.after("MIN_CHAINS=").force_usize(); - } else if is_usize_arg(&arg, "MAX_CHAINS") { - ctl.clono_filt_opt.max_chains = arg.after("MAX_CHAINS=").force_usize(); - } else if is_usize_arg(&arg, "MIN_CELLS") { - ctl.clono_filt_opt.ncells_low = arg.after("MIN_CELLS=").force_usize(); - } else if is_usize_arg(&arg, "MAX_CELLS") { - ctl.clono_filt_opt.ncells_high = arg.after("MAX_CELLS=").force_usize(); - } else if arg.starts_with("EXFASTA=") { - ctl.gen_opt.fasta = arg.after("EXFASTA=").to_string(); - } else if arg.starts_with("FASTA=") { - ctl.gen_opt.fasta_filename = arg.after("FASTA=").to_string(); - } else if arg.starts_with("FASTA_AA=") { - ctl.gen_opt.fasta_aa_filename = arg.after("FASTA_AA=").to_string(); - - // Other. - } else if arg.starts_with("COLOR=") { - ctl.gen_opt.color = arg.after("COLOR=").to_string(); - if ctl.gen_opt.color != "codon".to_string() - && ctl.gen_opt.color != "property".to_string() - { - eprintln!("\nThe only allowed values for COLOR are codon and property.\n"); - std::process::exit(1); - } - } else if arg == "TREE" { - ctl.gen_opt.tree = ".".to_string(); - } else if arg == "TREE=const" { - ctl.gen_opt.tree = "const".to_string(); - } else if arg.starts_with("FCELL=") { - let body = arg.after("FCELL="); - if !body.contains('=') { - eprintln!("\nFCELL usage incorrect.\n"); - std::process::exit(1); - } - let (var, val) = (body.before("=").to_string(), body.after("=").to_string()); - ctl.clono_filt_opt.fcell.push((var, val)); - } else if is_simple_arg(&arg, "FAIL_ONLY=true") { - ctl.clono_filt_opt.fail_only = true; - } else if arg.starts_with("LEGEND=") { - let x = parse_csv(&arg.after("LEGEND=")); - if x.len() == 0 || x.len() % 2 != 0 { - eprintln!("\nValue of LEGEND doesn't make sense.\n"); - std::process::exit(1); - } - ctl.gen_opt.use_legend = true; - for i in 0..x.len() / 2 { - ctl.gen_opt - .legend - .push((x[2 * i].clone(), x[2 * i + 1].clone())); - } - } else if arg.starts_with("BARCODE=") { - let bcs = arg.after("BARCODE=").split(',').collect::<Vec<&str>>(); - let mut x = Vec::<String>::new(); - for j in 0..bcs.len() { - if !bcs[j].contains('-') { - eprintln!( - "\nValue for a barcode in BARCODE argument is invalid, must contain -.\n" - ); - std::process::exit(1); - } - x.push(bcs[j].to_string()); - } - ctl.clono_filt_opt.barcode = x; - } else if arg.starts_with("F=") { - let filt = arg.after("F=").to_string(); - ctl.clono_filt_opt.bounds.push(LinearCondition::new(&filt)); - } else if arg.starts_with("SCAN=") { - let mut x = arg.after("SCAN=").to_string(); - x = x.replace(" ", "").to_string(); - let x = x.split(',').collect::<Vec<&str>>(); - if x.len() != 3 { - eprintln!("\nArgument to SCAN must have three components.\n"); - std::process::exit(1); - } - ctl.gen_opt.gene_scan_test = Some(LinearCondition::new(&x[0])); - ctl.gen_opt.gene_scan_control = Some(LinearCondition::new(&x[1])); - let threshold = LinearCondition::new(&x[2]); - for i in 0..threshold.var.len() { - if threshold.var[i] != "t".to_string() && threshold.var[i] != "c".to_string() { - eprintln!("\nIllegal variable in threshold for scan.\n"); - std::process::exit(1); - } - } - ctl.gen_opt.gene_scan_threshold = Some(threshold); - } else if arg.starts_with("PLOT=") { - using_plot = true; - let x = arg.after("PLOT=").split(',').collect::<Vec<&str>>(); - if x.is_empty() { - eprintln!("\nArgument to PLOT is invalid.\n"); - std::process::exit(1); - } - ctl.gen_opt.plot_file = x[0].to_string(); - for j in 1..x.len() { - if !x[j].contains("->") { - eprintln!("\nArgument to PLOT is invalid.\n"); - std::process::exit(1); - } - ctl.gen_opt - .origin_color_map - .insert(x[j].before("->").to_string(), x[j].after("->").to_string()); - } - } else if arg.starts_with("PLOT_BY_ISOTYPE=") { - ctl.gen_opt.plot_by_isotype = true; - ctl.gen_opt.plot_file = arg.after("PLOT_BY_ISOTYPE=").to_string(); - if ctl.gen_opt.plot_file.is_empty() { - eprintln!("\nFilename value needs to be supplied to PLOT_BY_ISOTYPE.\n"); - std::process::exit(1); - } - } else if arg.starts_with("PLOT_BY_MARK=") { - ctl.gen_opt.plot_by_mark = true; - ctl.gen_opt.plot_file = arg.after("PLOT_BY_MARK=").to_string(); - if ctl.gen_opt.plot_file.is_empty() { - eprintln!("\nFilename value needs to be supplied to PLOT_BY_MARK.\n"); - std::process::exit(1); - } - } else if is_simple_arg(&arg, "FAIL_ONLY=false") { - ctl.clono_filt_opt.fail_only = false; - } else if is_usize_arg(&arg, "MAX_CORES") { - let nthreads = arg.after("MAX_CORES=").force_usize(); - let _ = rayon::ThreadPoolBuilder::new() - .num_threads(nthreads) - .build_global(); - } else if arg.starts_with("PCOLS=") { - ctl.parseable_opt.pcols.clear(); - let p = arg.after("PCOLS=").split(',').collect::<Vec<&str>>(); - for i in 0..p.len() { - let mut x = p[i].to_string(); - x = x.replace("_sum", "_Σ"); - x = x.replace("_mean", "_μ"); - ctl.parseable_opt.pcols.push(x.to_string()); - ctl.parseable_opt.pcols_sort = ctl.parseable_opt.pcols.clone(); - ctl.parseable_opt.pcols_sortx = ctl.parseable_opt.pcols.clone(); - for j in 0..ctl.parseable_opt.pcols_sortx.len() { - if ctl.parseable_opt.pcols_sortx[j].contains(":") { - ctl.parseable_opt.pcols_sortx[j] = - ctl.parseable_opt.pcols_sortx[j].before(":").to_string(); - } - } - unique_sort(&mut ctl.parseable_opt.pcols_sort); - unique_sort(&mut ctl.parseable_opt.pcols_sortx); - } - } else if arg.starts_with("VJ=") { - ctl.clono_filt_opt.vj = arg.after("VJ=").as_bytes().to_vec(); - for c in ctl.clono_filt_opt.vj.iter() { - if !(*c == b'A' || *c == b'C' || *c == b'G' || *c == b'T') { - eprintln!("\nIllegal value for VJ, must be over alphabet ACGT.\n"); - std::process::exit(1); - } - } - } else if arg.starts_with("AMINO=") { - ctl.clono_print_opt.amino.clear(); - for x in arg.after("AMINO=").split(',').collect::<Vec<&str>>() { - if x != "" { - ctl.clono_print_opt.amino.push(x.to_string()); - } - } - for x in ctl.clono_print_opt.amino.iter() { - let mut ok = false; - if *x == "cdr3" || *x == "var" || *x == "share" || *x == "donor" || *x == "donorn" { - ok = true; - } else if x.contains('-') { - let (start, stop) = (x.before("-"), x.after("-")); - if start.parse::<usize>().is_ok() && stop.parse::<usize>().is_ok() { - if start.force_usize() <= stop.force_usize() { - ok = true; - } - } - } - if !ok { - eprintln!( - "\nUnrecognized variable {} for AMINO. Please type \ - \"enclone help amino\".\n", - x - ); - std::process::exit(1); - } - } - } else if arg.starts_with("CVARS=") { - ctl.clono_print_opt.cvars.clear(); - for x in arg.after("CVARS=").split(',').collect::<Vec<&str>>() { - if x.len() > 0 { - ctl.clono_print_opt.cvars.push(x.to_string()); - } - } - for x in ctl.clono_print_opt.cvars.iter_mut() { - *x = x.replace("_sum", "_Σ"); - *x = x.replace("_mean", "_μ"); - } - } else if arg.starts_with("CVARSP=") { - for x in arg.after("CVARSP=").split(',').collect::<Vec<&str>>() { - if x.len() > 0 { - ctl.clono_print_opt.cvars.push(x.to_string()); - } - } - for x in ctl.clono_print_opt.cvars.iter_mut() { - *x = x.replace("_sum", "_Σ"); - *x = x.replace("_mean", "_μ"); - } - } else if arg.starts_with("LVARS=") { - ctl.clono_print_opt.lvars.clear(); - for x in arg.after("LVARS=").split(',').collect::<Vec<&str>>() { - ctl.clono_print_opt.lvars.push(x.to_string()); - } - for x in ctl.clono_print_opt.lvars.iter_mut() { - *x = x.replace("_sum", "_Σ"); - *x = x.replace("_mean", "_μ"); - } - } else if arg.starts_with("LVARSP=") { - let lvarsp = arg.after("LVARSP=").split(',').collect::<Vec<&str>>(); - for x in lvarsp { - ctl.clono_print_opt.lvars.push(x.to_string()); - } - for x in ctl.clono_print_opt.lvars.iter_mut() { - *x = x.replace("_sum", "_Σ"); - *x = x.replace("_mean", "_μ"); - } - } else if is_f64_arg(&arg, "MAX_SCORE") { - ctl.join_alg_opt.max_score = arg.after("MAX_SCORE=").force_f64(); - } else if is_f64_arg(&arg, "MAX_LOG_SCORE") { - let x = arg.after("MAX_LOG_SCORE=").force_f64(); - ctl.join_alg_opt.max_score = 10.0_f64.powf(x); - } else if arg.starts_with("CDR3=") { - let reg = Regex::new(&format!("^{}$", arg.after("CDR3="))); - if !reg.is_ok() { - eprintln!( - "\nYour CDR3 value {} could not be parsed as a regular expression.\n", - arg.after("CDR3=") - ); - std::process::exit(1); - } - ctl.clono_filt_opt.cdr3 = Some(reg.unwrap()); - } else if is_usize_arg(&arg, "CHAINS") { - ctl.clono_filt_opt.min_chains = arg.after("CHAINS=").force_usize(); - ctl.clono_filt_opt.max_chains = arg.after("CHAINS=").force_usize(); - } else if arg.starts_with("SEG=") { - let fields = arg.after("SEG=").split('|').collect::<Vec<&str>>(); - let mut y = Vec::<String>::new(); - for x in fields.iter() { - y.push(x.to_string()); - } - y.sort(); - ctl.clono_filt_opt.seg.push(y); - } else if arg.starts_with("SEGN=") { - let fields = arg.after("SEGN=").split('|').collect::<Vec<&str>>(); - let mut y = Vec::<String>::new(); - for x in fields.iter() { - if !x.parse::<i32>().is_ok() { - eprintln!("\nInvalid argument to SEGN.\n"); - std::process::exit(1); - } - y.push(x.to_string()); - } - y.sort(); - ctl.clono_filt_opt.segn.push(y); - } else if is_usize_arg(&arg, "CELLS") { - ctl.clono_filt_opt.ncells_low = arg.after("CELLS=").force_usize(); - ctl.clono_filt_opt.ncells_high = ctl.clono_filt_opt.ncells_low; - } else if arg.starts_with("META=") { - let f = arg.after("META="); - metas.push(f.to_string()); - } else if arg.starts_with("TCR=") - || arg.starts_with("BCR=") - || (arg.len() > 0 && arg.as_bytes()[0] >= b'0' && arg.as_bytes()[0] <= b'9') - { - xcrs.push(arg.to_string()); - } else { - eprintln!("\nUnrecognized argument {}.\n", arg); - std::process::exit(1); - } - } - ctl.perf_stats(&targs, "in main args loop"); - - // Expand ~ and ~user in output file names. - - let t = Instant::now(); - let mut files = [ - &mut ctl.gen_opt.plot_file, - &mut ctl.gen_opt.fasta_filename, - &mut ctl.gen_opt.fasta_aa_filename, - &mut ctl.gen_opt.dref_file, - &mut ctl.parseable_opt.pout, - ]; - for f in files.iter_mut() { - **f = stringme(&tilde_expand(&f.as_bytes())); - } - - // Sanity check arguments. - - if ctl.gen_opt.clustal_aa != "".to_string() && ctl.gen_opt.clustal_aa != "stdout".to_string() { - if !ctl.gen_opt.clustal_aa.ends_with(".tar") { - eprintln!("\nIf the value of CLUSTAL_AA is not stdout, it must end in .tar.\n"); - std::process::exit(1); - } - } - if ctl.gen_opt.clustal_dna != "".to_string() && ctl.gen_opt.clustal_dna != "stdout".to_string() - { - if !ctl.gen_opt.clustal_dna.ends_with(".tar") { - eprintln!("\nIf the value of CLUSTAL_DNA is not stdout, it must end in .tar.\n"); - std::process::exit(1); - } - } - if ctl.gen_opt.phylip_aa != "".to_string() && ctl.gen_opt.phylip_aa != "stdout".to_string() { - if !ctl.gen_opt.phylip_aa.ends_with(".tar") { - eprintln!("\nIf the value of PHYLIP_AA is not stdout, it must end in .tar.\n"); - std::process::exit(1); - } - } - if ctl.gen_opt.phylip_dna != "".to_string() && ctl.gen_opt.phylip_dna != "stdout".to_string() { - if !ctl.gen_opt.phylip_dna.ends_with(".tar") { - eprintln!("\nIf the value of PHYLIP_DNA is not stdout, it must end in .tar.\n"); - std::process::exit(1); - } - } - if ctl.clono_filt_opt.umi_filt && ctl.clono_filt_opt.umi_filt_mark { - eprintln!( - "\nIf you use UMI_FILT_MARK, you should also use NUMI, to turn off \ - the filter,\nas otherwise nothing will be marked.\n" - ); - std::process::exit(1); - } - if ctl.clono_filt_opt.umi_ratio_filt && ctl.clono_filt_opt.umi_ratio_filt_mark { - eprintln!( - "\nIf you use UMI_RATIO_FILT_MARK, you should also use NUMI_RATIO, to turn off \ - the filter,\nas otherwise nothing will be marked.\n" - ); - std::process::exit(1); - } - ctl.perf_stats(&t, "after main args loop 1"); - let t = Instant::now(); - check_cvars(&ctl); - if metas.len() > 0 { - let f = &metas[metas.len() - 1]; - let f = get_path_fail(&f, &ctl, "META"); - proc_meta(&f, &mut ctl); - } - ctl.perf_stats(&t, "in proc_meta"); - if xcrs.len() > 0 { - let arg = &xcrs[xcrs.len() - 1]; - proc_xcr(&arg, &gex, &bc, have_gex, &mut ctl); - } - let t = Instant::now(); - let mut alt_bcs = Vec::<String>::new(); - for li in 0..ctl.origin_info.alt_bc_fields.len() { - for i in 0..ctl.origin_info.alt_bc_fields[li].len() { - alt_bcs.push(ctl.origin_info.alt_bc_fields[li][i].0.clone()); - } - } - unique_sort(&mut alt_bcs); - for con in ctl.clono_filt_opt.fcell.iter() { - if !bin_member(&alt_bcs, &con.0) { - eprintln!( - "\nYou've used a variable as part of an FCELL argument that has not\n\ - been specified using BC or bc (via META).\n" - ); - std::process::exit(1); - } - } - for i in 0..ctl.origin_info.n() { - let (mut cells_cr, mut rpc_cr) = (None, None); - if ctl.gen_opt.internal_run { - let p = &ctl.origin_info.dataset_path[i]; - let mut f = format!("{}/metrics_summary_csv.csv", p); - if !path_exists(&f) { - f = format!("{}/metrics_summary.csv", p); - } - if path_exists(&f) { - let f = open_for_read![&f]; - let mut count = 0; - let (mut cells_field, mut rpc_field) = (None, None); - for line in f.lines() { - count += 1; - let s = line.unwrap(); - let fields = parse_csv(&s); - for (i, x) in fields.iter().enumerate() { - if count == 1 { - if *x == "Estimated Number of Cells" { - cells_field = Some(i); - } else if *x == "Mean Read Pairs per Cell" { - rpc_field = Some(i); - } - } else if count == 2 { - if Some(i) == cells_field { - let mut n = x.to_string(); - if n.contains("\"") { - n = n.between("\"", "\"").to_string(); - } - n = n.replace(",", ""); - cells_cr = Some(n.force_usize()); - } else if Some(i) == rpc_field { - let mut n = x.to_string(); - if n.contains("\"") { - n = n.between("\"", "\"").to_string(); - } - n = n.replace(",", ""); - rpc_cr = Some(n.force_usize()); - } - } - } - } - } - } - ctl.origin_info.cells_cellranger.push(cells_cr); - ctl.origin_info - .mean_read_pairs_per_cell_cellranger - .push(rpc_cr); - } - if ctl.gen_opt.plot_by_isotype { - if using_plot || ctl.gen_opt.use_legend { - eprintln!("\nPLOT_BY_ISOTYPE cannot be used with PLOT or LEGEND.\n"); - std::process::exit(1); - } - if !have_bcr { - eprintln!("\nPLOT_BY_ISOTYPE can only be used with BCR data.\n"); - std::process::exit(1); - } - if ctl.gen_opt.plot_by_mark { - eprintln!("\nPLOT_BY_ISOTYPE and PLOT_BY_MARK cannot be used together.\n"); - std::process::exit(1); - } - } - if ctl.gen_opt.plot_by_mark { - if using_plot || ctl.gen_opt.use_legend { - eprintln!("\nPLOT_BY_MARK cannot be used with PLOT or LEGEND.\n"); - std::process::exit(1); - } - } - if ctl.parseable_opt.pbarcode && ctl.parseable_opt.pout.len() == 0 { - eprintln!("\nIt does not make sense to specify PCELL unless POUT is also specified.\n"); - std::process::exit(1); - } - if ctl.origin_info.n() == 0 { - eprintln!("\nNo TCR or BCR data have been specified.\n"); - std::process::exit(1); - } - let mut donors = Vec::<String>::new(); - let mut origins = Vec::<String>::new(); - let mut tags = Vec::<String>::new(); - let mut origin_for_bc = Vec::<String>::new(); - let mut donor_for_bc = Vec::<String>::new(); - for i in 0..ctl.origin_info.n() { - for x in ctl.origin_info.origin_for_bc[i].iter() { - origins.push(x.1.clone()); - origin_for_bc.push(x.1.clone()); - } - for x in ctl.origin_info.donor_for_bc[i].iter() { - donors.push(x.1.clone()); - donor_for_bc.push(x.1.clone()); - } - for x in ctl.origin_info.tag[i].iter() { - tags.push((x.1).clone()); - } - donors.push(ctl.origin_info.donor_id[i].clone()); - origins.push(ctl.origin_info.origin_id[i].clone()); - } - unique_sort(&mut donors); - unique_sort(&mut origins); - unique_sort(&mut tags); - unique_sort(&mut origin_for_bc); - unique_sort(&mut donor_for_bc); - ctl.origin_info.donors = donors.len(); - ctl.origin_info.dataset_list = ctl.origin_info.dataset_id.clone(); - unique_sort(&mut ctl.origin_info.dataset_list); - ctl.origin_info.origin_list = origins.clone(); - ctl.origin_info.donor_list = donors.clone(); - ctl.origin_info.tag_list = tags; - for i in 0..ctl.origin_info.donor_for_bc.len() { - if ctl.origin_info.donor_for_bc[i].len() > 0 { - ctl.clono_filt_opt.donor = true; - } - } - ctl.perf_stats(&t, "after main args loop 2"); - proc_args_tail(&mut ctl, &args); - - // Check for invalid variables in linear conditions. - - for i in 0..ctl.clono_filt_opt.bounds.len() { - ctl.clono_filt_opt.bounds[i].require_valid_variables(&ctl); - } - if ctl.gen_opt.gene_scan_test.is_some() { - ctl.gen_opt - .gene_scan_test - .as_ref() - .unwrap() - .require_valid_variables(&ctl); - ctl.gen_opt - .gene_scan_control - .as_ref() - .unwrap() - .require_valid_variables(&ctl); - } -} diff --git a/enclone/src/proc_args2.rs b/enclone/src/proc_args2.rs deleted file mode 100644 index 963a26934..000000000 --- a/enclone/src/proc_args2.rs +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -use enclone_core::defs::*; -use io_utils::*; -use rayon::prelude::*; -use std::{ - fs::File, - io::{BufRead, BufReader}, - time::Instant, -}; -use string_utils::*; -use vector_utils::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Simple arguments. We test for e.g. PLAIN or PLAIN=, the latter to allow for the case -// where the argument has been set by an environment variable. - -pub fn is_simple_arg(arg: &str, x: &str) -> bool { - if arg == x || arg == &format!("{}=", x) { - return true; - } else if arg.starts_with(&format!("{}=", x)) { - eprintln!( - "\nYour command line includes \"{}\", which is not a valid argument.\n\ - Perhaps you meant \"{}\".\n", - arg, x - ); - std::process::exit(1); - } - return false; -} - -// Usize arguments. We require that these are nonnegative integers. - -pub fn is_usize_arg(arg: &str, x: &str) -> bool { - if arg == x { - eprintln!( - "\nYour command line includes \"{}\", which is not a valid argument.\n\ - Perhaps you meant \"{}=n\", where n >= 0 is an integer.\n", - arg, x - ); - std::process::exit(1); - } else if arg.starts_with(&format!("{}=", x)) { - let val = arg.after(&format!("{}=", x)).parse::<usize>(); - if val.is_ok() { - return true; - } else { - eprintln!( - "\nYour command line includes \"{}\", which is not a valid argument.\n\ - Perhaps you meant \"{}=n\", where n >= 0 is an integer.\n", - arg, x - ); - std::process::exit(1); - } - } - return false; -} - -pub fn is_f64_arg(arg: &str, x: &str) -> bool { - if arg == x { - eprintln!( - "\nYour command line includes \"{}\", which is not a valid argument.\n\ - Perhaps you meant \"{}=n\", where n is a floating point number.\n", - arg, x - ); - std::process::exit(1); - } else if arg.starts_with(&format!("{}=", x)) { - let val = arg.after(&format!("{}=", x)).parse::<f64>(); - if val.is_ok() { - return true; - } else { - eprintln!( - "\nYour command line includes \"{}\", which is not a valid argument.\n\ - Perhaps you meant \"{}=n\", where n is a floating point number.\n", - arg, x - ); - std::process::exit(1); - } - } - return false; -} - -pub fn is_string_arg(arg: &str, x: &str) -> bool { - if arg == x { - eprintln!( - "\nYour command line includes \"{}\", which is not a valid argument.\n\ - Perhaps you meant \"{}=s\" for some string s.\n", - arg, x - ); - std::process::exit(1); - } else if arg.starts_with(&format!("{}=", x)) { - return true; - } - return false; -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn proc_args_tail(ctl: &mut EncloneControl, args: &Vec<String>) { - let tall = Instant::now(); - let mut lvars_specified = false; - for i in 1..args.len() { - if args[i].starts_with("LVARS=") { - lvars_specified = true; - } - } - if !ctl.clono_print_opt.amino.is_empty() { - ctl.clono_print_opt.cvars.insert(0, "amino".to_string()); - } - if ctl.gen_opt.mouse && ctl.gen_opt.refname.len() > 0 { - eprintln!( - "\nIf you specify REF, please do not also specify MOUSE. It is enough to\n\ - set REF to a mouse reference sequence.\n" - ); - std::process::exit(1); - } - - // Remove "datasets" from lvars if there is only one dataset and LVARS not specified. - - if !lvars_specified && ctl.origin_info.dataset_path.len() == 1 { - ctl.clono_print_opt.lvars.remove(0); - } - - // Print command line arguments and dataset summary. - - if !ctl.silent { - println!(""); - for i in 0..args.len() { - let mut x = args[i].clone(); - if i == 0 && x.contains("/") { - x = x.rev_after("/").to_string(); - } - if i > 0 { - print!(" "); - } - print!("{}", x); - } - println!(""); - println!( - "\nThere are {} datasets from {} donors.", - ctl.origin_info.dataset_path.len(), - ctl.origin_info.donors - ); - } - - // Check for duplicated directory paths. - - let mut dp = ctl.origin_info.dataset_path.clone(); - dp.sort(); - let mut i = 0; - while i < dp.len() { - let j = next_diff(&dp, i); - if j - i > 1 { - eprintln!("\nInput dataset path {} is duplicated.\n", dp[i]); - std::process::exit(1); - } - i = j; - } - if !ctl.silent { - println!(""); - } - - // Get origin descriptions. Flaky and particularly flaky when internal origin args are paths, - // since it will look in outs for the file. - - if ctl.gen_opt.internal_run { - ctl.origin_info.descrips.clear(); - let mut results = vec![(0, "".to_string()); ctl.origin_info.n()]; - for i in 0..ctl.origin_info.n() { - results[i].0 = i; - } - results.par_iter_mut().for_each(|res| { - let i = res.0; - let mut d = ctl.origin_info.dataset_id[i].clone(); - let mut dir = ctl.origin_info.dataset_path[i].clone(); - if dir.ends_with("/outs") { - dir = dir.rev_before("/outs").to_string(); - } - let invo = format!("{}/_invocation", dir); - if path_exists(&invo) { - let f = open_for_read![invo]; - for line in f.lines() { - let s = line.unwrap(); - // Leave sample_desc alone for internal architecture! - if s.contains("sample_desc ") { - d = s.between("\"", "\"").to_string(); - } - } - } - res.1 = d; - }); - for i in 0..ctl.origin_info.dataset_path.len() { - ctl.origin_info.descrips.push(results[i].1.clone()); - } - if ctl.gen_opt.descrip { - println!(""); - for i in 0..ctl.origin_info.n() { - if i > 0 { - println!(""); - } - println!( - "dataset {} ==> origin {} ==> donor {} ==> dataset descrip = {}", - ctl.origin_info.dataset_id[i], - // origin_id and donor_id don't make sense if bc specified in META - ctl.origin_info.origin_id[i], - ctl.origin_info.donor_id[i], - ctl.origin_info.descrips[i] - ); - println!("vdj path = {}", ctl.origin_info.dataset_path[i]); - if !ctl.origin_info.gex_path.is_empty() { - println!("gex path = {}", ctl.origin_info.gex_path[i]); - } - } - } - } - ctl.perf_stats(&tall, "in proc_args_tail"); -} diff --git a/enclone/src/proc_args3.rs b/enclone/src/proc_args3.rs deleted file mode 100644 index f099308da..000000000 --- a/enclone/src/proc_args3.rs +++ /dev/null @@ -1,781 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// This file contains the two functions proc_xcr and proc_meta. - -use enclone_core::defs::*; -use io_utils::*; -use itertools::Itertools; -use rayon::prelude::*; -use std::collections::HashMap; -use std::fs::File; -use std::io::{BufRead, BufReader}; -use std::process::Command; -use std::thread; -use std::time; -use std::time::Instant; -use string_utils::*; -use tilde_expand::*; -use vector_utils::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -fn expand_integer_ranges(x: &str) -> String { - let mut tokens = Vec::<String>::new(); - let mut token = String::new(); - for c in x.chars() { - if c == ',' || c == ':' || c == ';' { - if token.len() > 0 { - tokens.push(token.clone()); - token.clear(); - } - tokens.push(c.to_string()); - } else { - token.push(c); - } - } - if token.len() > 0 { - tokens.push(token); - } - let mut tokens2 = Vec::<String>::new(); - for i in 0..tokens.len() { - if tokens[i].contains("-") - && tokens[i].before("-").parse::<usize>().is_ok() - && tokens[i].after("-").parse::<usize>().is_ok() - { - let n1 = tokens[i].before("-").force_usize(); - let n2 = tokens[i].after("-").force_usize(); - if n1 <= n2 { - for n in n1..=n2 { - if n > n1 { - tokens2.push(",".to_string()); - } - tokens2.push(format!("{}", n)); - } - continue; - } - } - tokens2.push(tokens[i].clone()); - } - let mut y = String::new(); - for i in 0..tokens2.len() { - y += &tokens2[i]; - } - y -} - -fn expand_analysis_sets(x: &str) -> String { - let mut tokens = Vec::<String>::new(); - let mut token = String::new(); - for c in x.chars() { - if c == ',' || c == ':' || c == ';' { - if token.len() > 0 { - tokens.push(token.clone()); - token.clear(); - } - tokens.push(c.to_string()); - } else { - token.push(c); - } - } - if token.len() > 0 { - tokens.push(token); - } - let mut tokens2 = Vec::<String>::new(); - for i in 0..tokens.len() { - if tokens[i].starts_with('S') { - let setid = tokens[i].after("S"); - // do not use xena.txgmesh.net, does not work from inside enclone - let url = format!("https://xena.fuzzplex.com/api/analysis_sets/{}", setid); - let o = Command::new("curl") - .arg(url) - .output() - .expect("failed to execute xena http"); - let m = String::from_utf8(o.stdout).unwrap(); - if m.contains("502 Bad Gateway") { - // do not use xena.txgmesh.net, does not work from inside enclone - eprintln!( - "\nWell, this is sad. The URL \ - http://xena.fuzzplex.com/api/analysis_sets/{} returned a 502 Bad Gateway \ - message. Please try again later or ask someone for help.\n\n", - setid - ); - std::process::exit(1); - } - // printme!(m); - if m.contains("\"analysis_ids\":[") { - let mut ids = m.between("\"analysis_ids\":[", "]").to_string(); - ids = ids.replace(" ", ""); - ids = ids.replace("\n", ""); - let ids = ids.split(',').collect::<Vec<&str>>(); - let mut ids2 = Vec::<String>::new(); - - // Remove wiped analysis IDs. - - for j in 0..ids.len() { - // do not use xena.txgmesh.net, does not work from inside enclone - let url = format!("https://xena.fuzzplex.com/api/analyses/{}", ids[j]); - let o = Command::new("curl") - .arg(url) - .output() - .expect("failed to execute xena http"); - let m = String::from_utf8(o.stdout).unwrap(); - if m.contains("502 Bad Gateway") { - // do not use xena.txgmesh.net, does not work from inside enclone - eprintln!( - "\nWell, this is sad. The URL \ - http://xena.fuzzplex.com/api/analyses/{} returned a 502 Bad Gateway \ - message. Please try again later or ask someone for help.\n", - ids[j] - ); - std::process::exit(1); - } - if !m.contains("\"wiped\"") { - ids2.push(ids[j].to_string()); - } - } - - // Proceed. - - for j in 0..ids2.len() { - if j > 0 { - tokens2.push(",".to_string()); - } - tokens2.push(ids2[j].to_string()); - } - continue; - } else { - eprintln!( - "\nIt looks like you've provided an incorrect analysis set ID {}.\n", - setid - ); - std::process::exit(1); - } - } - tokens2.push(tokens[i].clone()); - } - let mut y = String::new(); - for i in 0..tokens2.len() { - y += &tokens2[i]; - } - y -} - -// Functions to find the path to data. - -pub fn get_path_fail(p: &str, ctl: &EncloneControl, source: &str) -> String { - for x in ctl.gen_opt.pre.iter() { - let pp = format!("{}/{}", x, p); - if path_exists(&pp) { - return pp; - } - } - if !path_exists(&p) { - if ctl.gen_opt.pre.is_empty() { - let path = std::env::current_dir().unwrap(); - eprintln!( - "\nIn directory {}, unable to find the path {}. This came from the {} argument.\n", - path.display(), - p, - source - ); - } else { - let path = std::env::current_dir().unwrap(); - eprintln!( - "\nIn directory {}, unable to find the\npath {},\n\ - even if prepended by any of the directories \ - in\nPRE={}.\nThis came from the {} argument.\n", - path.display(), - p, - ctl.gen_opt.pre.iter().format(","), - source - ); - } - std::process::exit(1); - } - p.to_string() -} - -fn get_path(p: &str, ctl: &EncloneControl, ok: &mut bool) -> String { - *ok = false; - for x in ctl.gen_opt.pre.iter() { - let mut pp = format!("{}/{}", x, p); - if pp.starts_with("~") { - pp = stringme(&tilde_expand(&pp.as_bytes())); - } - if path_exists(&pp) { - *ok = true; - return pp; - } - } - let mut pp = p.to_string(); - if pp.starts_with("~") { - pp = stringme(&tilde_expand(&pp.as_bytes())); - } - *ok = path_exists(&pp); - pp -} - -fn get_path_or_internal_id( - p: &str, - ctl: &EncloneControl, - source: &str, - current_ref: &mut bool, -) -> String { - let mut ok = false; - let mut pp = get_path(&p, &ctl, &mut ok); - if !ok { - if !ctl.gen_opt.internal_run { - get_path_fail(&pp, &ctl, source); - } else { - // For internal runs, try much harder. This is so that internal users can - // just type an internal numerical id for a dataset and have it always - // work. The code that's used here should be placed somewhere else. - - if p.parse::<usize>().is_ok() { - // do not use xena.txgmesh.net, does not work from inside enclone - let url = format!("https://xena.fuzzplex.com/api/analyses/{}", p); - let o = Command::new("curl") - .arg(url.clone()) - .output() - .expect("failed to execute xena http"); - let m = String::from_utf8(o.stdout).unwrap(); - if m.contains("502 Bad Gateway") { - // do not use xena.txgmesh.net, does not work from inside enclone - eprintln!( - "\nWell this is sad. The URL \ - http://xena.fuzzplex.com/api/analyses/{} yielded a 502 Bad Gateway \ - message. Please try again later or ask someone for help.\n", - p - ); - std::process::exit(1); - } - if m.contains("\"path\":\"") { - let path = m.between("\"path\":\"", "\"").to_string(); - *current_ref = true; - pp = format!("{}/outs", path); - if !path_exists(&pp) { - thread::sleep(time::Duration::from_millis(100)); - if path_exists(&pp) { - eprintln!( - "\nYou are experiencing unstable filesystem access: 100 milliseconds ago, \ - the path\n\ - {}\nwas not visible, but now it is. You might consider posting this problem on \ - the slack channel #seqops-bespin.\nOr retry again. enclone is giving up because \ - if filesystem access blinks in and out of existence,\n\ - other more cryptic events are likely to occur.\n", - pp - ); - } else { - eprintln!( - "\nIt looks like you've provided an analysis ID for \ - which the pipeline outs folder\n{}\nhas not yet been generated.\n\ - This path did not exist:\n{}\n\n", - p, pp - ); - } - std::process::exit(1); - } - } else { - eprintln!( - "\nIt looks like you've provided either an incorrect \ - analysis ID {} or else one for which\n\ - the pipeline outs folder has not yet been generated.\n\ - This URL\n{}\ndid not provide a path.\n", - p, url - ); - std::process::exit(1); - } - } else { - eprintln!( - "\nAfter searching high and low, your path for {} \ - cannot be found.\nPlease check its value and also the value \ - for PRE if you provided that.\n", - source - ); - std::process::exit(1); - } - } - } - if !pp.ends_with("/outs") && path_exists(&format!("{}/outs", pp)) { - pp = format!("{}/outs", pp); - } - pp -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Parse barcode-level information file. - -fn parse_bc(mut bc: String, ctl: &mut EncloneControl, call_type: &str) { - let mut origin_for_bc = HashMap::<String, String>::new(); - let mut donor_for_bc = HashMap::<String, String>::new(); - let mut tag = HashMap::<String, String>::new(); - let mut barcode_color = HashMap::<String, String>::new(); - let mut alt_bc_fields = Vec::<(String, HashMap<String, String>)>::new(); - if bc != "".to_string() { - bc = get_path_fail(&bc, &ctl, call_type); - let f = open_for_read![&bc]; - let mut first = true; - let mut fieldnames = Vec::<String>::new(); - let mut barcode_pos = 0; - let (mut origin_pos, mut donor_pos, mut tag_pos, mut color_pos) = (None, None, None, None); - let mut to_alt = Vec::<isize>::new(); - for line in f.lines() { - let s = line.unwrap(); - if first { - let fields = s.split(',').collect::<Vec<&str>>(); - to_alt = vec![-1 as isize; fields.len()]; - if !fields.contains(&"barcode") { - let mut origin = "from the bc field used in META"; - if call_type == "BC" { - origin = "from the BC argument"; - } - eprintln!( - "\nThe file\n{}\n{}\nis missing the barcode field.\n", - bc, origin, - ); - std::process::exit(1); - } - for x in fields.iter() { - fieldnames.push(x.to_string()); - } - for i in 0..fields.len() { - if fields[i] == "barcode" { - barcode_pos = i; - } else if fields[i] == "origin" { - origin_pos = Some(i); - } else if fields[i] == "donor" { - donor_pos = Some(i); - } else if fields[i] == "tag" { - tag_pos = Some(i); - } else if fields[i] == "color" { - color_pos = Some(i); - } else { - to_alt[i] = alt_bc_fields.len() as isize; - alt_bc_fields - .push((fields[i].to_string(), HashMap::<String, String>::new())); - } - } - first = false; - } else { - let fields = s.split(',').collect::<Vec<&str>>(); - if fields.len() != fieldnames.len() { - let mut origin = "bc in META"; - if call_type == "BC" { - origin = "BC"; - } - eprintln!( - "\nThere is a line\n{}\nin a CSV file defined by {}\n\ - that has {} fields, which isn't right, because the header line \ - has {} fields. This is for the file\n{}.\n", - s, - origin, - fields.len(), - fieldnames.len(), - bc - ); - std::process::exit(1); - } - for i in 0..fields.len() { - if to_alt[i] >= 0 { - alt_bc_fields[to_alt[i] as usize] - .1 - .insert(fields[barcode_pos].to_string(), fields[i].to_string()); - } - } - if !fields[barcode_pos].contains('-') { - let mut origin = "bc in META"; - if call_type == "BC" { - origin = "BC"; - } - eprintln!( - "\nThe barcode \"{}\" appears in the file\n{}\ndefined \ - by {}. That doesn't make sense because a barcode\n\ - should include a hyphen.\n", - fields[barcode_pos], bc, origin - ); - std::process::exit(1); - } - if origin_pos.is_some() { - origin_for_bc.insert( - fields[barcode_pos].to_string(), - fields[origin_pos.unwrap()].to_string(), - ); - } - if donor_pos.is_some() { - donor_for_bc.insert( - fields[barcode_pos].to_string(), - fields[donor_pos.unwrap()].to_string(), - ); - } - if tag_pos.is_some() { - let tag_pos = tag_pos.unwrap(); - tag.insert(fields[barcode_pos].to_string(), fields[tag_pos].to_string()); - } - if color_pos.is_some() { - let color_pos = color_pos.unwrap(); - barcode_color.insert( - fields[barcode_pos].to_string(), - fields[color_pos].to_string(), - ); - } - } - } - } - ctl.origin_info.origin_for_bc.push(origin_for_bc); - ctl.origin_info.donor_for_bc.push(donor_for_bc); - ctl.origin_info.tag.push(tag); - ctl.origin_info.barcode_color.push(barcode_color); - ctl.origin_info.alt_bc_fields.push(alt_bc_fields); -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn proc_xcr(f: &str, gex: &str, bc: &str, have_gex: bool, mut ctl: &mut EncloneControl) { - ctl.origin_info = OriginInfo::default(); - if (ctl.gen_opt.tcr && f.starts_with("BCR=")) || (ctl.gen_opt.bcr && f.starts_with("TCR=")) { - eprintln!("\nOnly one of TCR or BCR can be specified.\n"); - std::process::exit(1); - } - let t = Instant::now(); - ctl.gen_opt.tcr = f.starts_with("TCR="); - ctl.gen_opt.bcr = f.starts_with("BCR="); - let mut val: String; - if ctl.gen_opt.tcr { - val = f.after("TCR=").to_string(); - } else if ctl.gen_opt.bcr { - val = f.after("BCR=").to_string(); - } else { - val = f.to_string(); - } - if val == "".to_string() { - eprintln!( - "\nYou can't write {} with no value on the right hand side.", - f - ); - eprintln!("Perhaps you need to remove some white space from your command line.\n"); - std::process::exit(1); - } - val = expand_integer_ranges(&val); - if ctl.gen_opt.internal_run { - val = expand_analysis_sets(&val); - } - let donor_groups = val.split(';').collect::<Vec<&str>>(); - let mut gex2 = expand_integer_ranges(&gex); - if ctl.gen_opt.internal_run { - gex2 = expand_analysis_sets(&gex2); - } - let donor_groups_gex = gex2.split(';').collect::<Vec<&str>>(); - let donor_groups_bc = bc.split(';').collect::<Vec<&str>>(); - let mut xcr = "TCR".to_string(); - if ctl.gen_opt.bcr { - xcr = "BCR".to_string(); - } - if have_gex && donor_groups_gex.len() != donor_groups.len() { - eprintln!( - "\nThere are {} {} donor groups and {} GEX donor groups, so \ - the {} and GEX arguments do not exactly mirror each \ - other's structure.\n", - xcr, - donor_groups.len(), - donor_groups_gex.len(), - xcr - ); - std::process::exit(1); - } - if !bc.is_empty() && donor_groups_bc.len() != donor_groups.len() { - eprintln!( - "\nThe {} and BC arguments do not exactly mirror each \ - other's structure.\n", - xcr - ); - std::process::exit(1); - } - ctl.perf_stats(&t, "in proc_xcr 1"); - let t = Instant::now(); - for (id, d) in donor_groups.iter().enumerate() { - let origin_groups = (*d).split(':').collect::<Vec<&str>>(); - let mut origin_groups_gex = Vec::<&str>::new(); - if have_gex { - origin_groups_gex = donor_groups_gex[id].split(':').collect::<Vec<&str>>(); - if origin_groups_gex.len() != origin_groups.len() { - eprintln!( - "\nFor donor {}, there are {} {} origin groups and {} GEX origin groups, so \ - the {} and GEX arguments do not exactly mirror each \ - other's structure.\n", - id + 1, - xcr, - origin_groups.len(), - origin_groups_gex.len(), - xcr - ); - std::process::exit(1); - } - } - let mut origin_groups_bc = Vec::<&str>::new(); - if !bc.is_empty() { - origin_groups_bc = donor_groups_bc[id].split(':').collect::<Vec<&str>>(); - if origin_groups_bc.len() != origin_groups.len() { - eprintln!( - "\nThe {} and BC arguments do not exactly mirror each \ - other's structure.\n", - xcr - ); - std::process::exit(1); - } - } - for (is, s) in origin_groups.iter().enumerate() { - let datasets = (*s).split(',').collect::<Vec<&str>>(); - let datasets_gex: Vec<&str>; - let mut datasets_bc = Vec::<&str>::new(); - if have_gex { - datasets_gex = origin_groups_gex[is].split(',').collect::<Vec<&str>>(); - if datasets_gex.len() != datasets.len() { - eprintln!( - "\nSee {} {} datasets and {} GEX datasets, so \ - the {} and GEX arguments do not exactly mirror each \ - other's structure.\n", - xcr, - datasets.len(), - datasets_gex.len(), - xcr - ); - std::process::exit(1); - } - } - if !bc.is_empty() { - datasets_bc = origin_groups_bc[is].split(',').collect::<Vec<&str>>(); - if datasets_bc.len() != datasets.len() { - eprintln!( - "\nThe {} and BC arguments do not exactly mirror each \ - other's structure.\n", - xcr - ); - std::process::exit(1); - } - } - for (ix, x) in datasets.iter().enumerate() { - ctl.origin_info.color.push("".to_string()); - ctl.origin_info.tag.push(HashMap::<String, String>::new()); - let donor_name = format!("d{}", id + 1); - let origin_name = format!("s{}", is + 1); - ctl.origin_info.donor_id.push(donor_name); - ctl.origin_info.origin_id.push(origin_name); - let mut dataset_name = (*x).to_string(); - if dataset_name.contains('/') { - dataset_name = dataset_name.rev_after("/").to_string(); - } - ctl.origin_info.descrips.push(dataset_name.clone()); - ctl.origin_info.dataset_id.push(dataset_name.clone()); - - // Now work on the BC path. - - let mut bcx = String::new(); - if !bc.is_empty() { - bcx = datasets_bc[ix].to_string(); - } - parse_bc(bcx, &mut ctl, "BC"); - } - } - } - - // Get paths. This will need to change when cellranger switches to multi. This code is - // parallelized because this code can indirectly make many calls to path_exists, and the wall - // clock time for these can add up. There should be a way to do this that does not involve - // multithreading. - - let mut source = f.clone(); - if f.contains('=') { - source = f.before("="); - } - let mut results = Vec::<(String, String, bool)>::new(); - for (id, d) in donor_groups.iter().enumerate() { - let origin_groups = (*d).split(':').collect::<Vec<&str>>(); - let mut origin_groups_gex = Vec::<&str>::new(); - if have_gex { - origin_groups_gex = donor_groups_gex[id].split(':').collect::<Vec<&str>>(); - } - for (is, s) in origin_groups.iter().enumerate() { - let datasets = (*s).split(',').collect::<Vec<&str>>(); - let mut datasets_gex = Vec::<&str>::new(); - if have_gex { - datasets_gex = origin_groups_gex[is].split(',').collect::<Vec<&str>>(); - } - for (ix, x) in datasets.iter().enumerate() { - let p = (*x).to_string(); - let mut pg = String::new(); - if have_gex { - pg = datasets_gex[ix].to_string(); - } - results.push((p, pg, false)); - } - } - } - results.par_iter_mut().for_each(|res| { - let (p, pg) = (&mut res.0, &mut res.1); - let mut current_ref = &mut res.2; - *p = get_path_or_internal_id(&p, &ctl, source, &mut current_ref); - if have_gex { - *pg = get_path_or_internal_id(&pg, &ctl, "GEX", &mut current_ref); - } - }); - for i in 0..results.len() { - ctl.origin_info.dataset_path.push(results[i].0.clone()); - ctl.origin_info.gex_path.push(results[i].1.clone()); - if results[i].2 { - ctl.gen_opt.current_ref = true; - } - } - ctl.perf_stats(&t, "in proc_xcr 2"); -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn proc_meta(f: &str, mut ctl: &mut EncloneControl) { - if !path_exists(&f) { - eprintln!("\nCan't find the file referenced by your META argument.\n"); - std::process::exit(1); - } - let fx = File::open(&f); - if fx.is_err() { - eprintln!( - "\nProblem with META: unable to read from the file\n\ - \"{}\".\nPlease check that that path makes sense and that you have read \ - permission for it.\n", - f - ); - std::process::exit(1); - } - let f = BufReader::new(fx.unwrap()); - let mut fields = Vec::<String>::new(); - let mut donors = Vec::<String>::new(); - for (count, line) in f.lines().enumerate() { - let s = line.unwrap(); - if count == 0 { - let x = s.split(',').collect::<Vec<&str>>(); - for i in 0..x.len() { - fields.push(x[i].to_string()); - } - let mut fields_sorted = fields.clone(); - unique_sort(&mut fields_sorted); - if fields_sorted.len() < fields.len() { - eprintln!( - "\nThe CSV file that you specified using the META argument \ - has duplicate field names\nin its first line.\n" - ); - std::process::exit(1); - } - let allowed_fields = vec![ - "bc".to_string(), - "bcr".to_string(), - "donor".to_string(), - "gex".to_string(), - "origin".to_string(), - "tcr".to_string(), - "color".to_string(), - ]; - for x in fields.iter() { - if !allowed_fields.contains(&x) { - eprintln!( - "\nThe CSV file that you specified using the META argument \ - has an illegal field name ({}) in its first line.\n", - x - ); - std::process::exit(1); - } - } - ctl.gen_opt.tcr = fields.contains(&"tcr".to_string()); - ctl.gen_opt.bcr = fields.contains(&"bcr".to_string()); - if !ctl.gen_opt.tcr && !ctl.gen_opt.bcr { - eprintln!( - "\nThe CSV file that you specified using the META argument \ - has neither the field tcr or bcr in its first line.\n" - ); - std::process::exit(1); - } - if ctl.gen_opt.tcr && ctl.gen_opt.bcr { - eprintln!( - "\nThe CSV file that you specified using the META argument \ - has both the fields tcr and bcr in its first line.\n" - ); - std::process::exit(1); - } - } else if !s.starts_with('#') { - let val = s.split(',').collect::<Vec<&str>>(); - if val.len() != fields.len() { - eprintln!( - "\nMETA file line {} has a different number of fields than the \ - first line of the file.\n", - count + 1 - ); - std::process::exit(1); - } - let mut path = String::new(); - let mut abbr = String::new(); - let mut gpath = String::new(); - let mut origin = "s1".to_string(); - let mut donor = "d1".to_string(); - let mut color = "".to_string(); - let mut bc = "".to_string(); - for i in 0..fields.len() { - let x = &fields[i]; - let mut y = val[i].to_string(); - if y.starts_with('"') && y.ends_with('"') { - y = y.after("\"").rev_before("\"").to_string(); - } - if *x == "tcr" || *x == "bcr" { - if y.contains(':') { - path = y.after(":").to_string(); - abbr = y.before(":").to_string(); - } else { - path = y.to_string(); - if path.contains("/") { - abbr = path.rev_after("/").to_string(); - } else { - abbr = path.clone(); - } - } - } else if *x == "gex" { - gpath = y.to_string(); - } else if *x == "origin" { - origin = y.to_string(); - } else if *x == "donor" { - donor = y.to_string(); - } else if *x == "color" { - color = y.to_string(); - } else if *x == "bc" && y.len() > 0 { - bc = y.to_string(); - } - } - - // Parse bc and finish up. - - parse_bc(bc.clone(), &mut ctl, "META"); - let mut current_ref = false; - path = get_path_or_internal_id(&path, &ctl, "META", &mut current_ref); - if gpath.len() > 0 { - gpath = get_path_or_internal_id(&gpath, &mut ctl, "META", &mut current_ref); - } - if current_ref { - ctl.gen_opt.current_ref = true; - } - let mut dp = None; - for j in 0..donors.len() { - if donor == donors[j] { - dp = Some(j); - break; - } - } - if dp.is_none() { - donors.push(donor.clone()); - } - ctl.origin_info.descrips.push(abbr.clone()); - ctl.origin_info.dataset_path.push(path); - ctl.origin_info.gex_path.push(gpath); - ctl.origin_info.dataset_id.push(abbr); - ctl.origin_info.donor_id.push(donor); - ctl.origin_info.origin_id.push(origin); - ctl.origin_info.color.push(color); - } - } -} diff --git a/enclone/src/proc_args_check.rs b/enclone/src/proc_args_check.rs deleted file mode 100644 index 84bc45d20..000000000 --- a/enclone/src/proc_args_check.rs +++ /dev/null @@ -1,540 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Check lvars, cvars, and pcols. - -use enclone_core::defs::*; -use regex::Regex; -use string_utils::*; -use vector_utils::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn is_pattern(x: &String, parseable: bool) -> bool { - let ends0 = [ - "_g", "_ab", "_ag", "_cr", "_cu", "_g_μ", "_ab_μ", "_ag_μ", "_cr_μ", "_cu_μ", "_g_%", - ]; - let suffixes = ["", "_min", "_max", "_μ", "_Σ"]; - let mut ends = Vec::<String>::new(); - for z in ends0.iter() { - for y in suffixes.iter() { - ends.push(format!("{}{}", z, y)); - } - } - let mut x = x.clone(); - if x.contains(':') { - x = x.rev_after(":").to_string(); - } - if parseable && x.ends_with("_cell") { - x = x.rev_before("_cell").to_string(); - } - let mut pat = false; - for y in ends.iter() { - if x.ends_with(y) { - let p = x.rev_before(y); - if !p.is_empty() && Regex::new(&p).is_ok() { - let mut ok = true; - let mut special = false; - let p = p.as_bytes(); - for i in 0..p.len() { - if !((p[i] >= b'A' && p[i] <= b'Z') - || (p[i] >= b'a' && p[i] <= b'z') - || (p[i] >= b'0' && p[i] <= b'9') - || b".-_[]()|*".contains(&p[i])) - { - ok = false; - break; - } - if b"[]()|*".contains(&p[i]) { - special = true; - } - } - if ok && special { - pat = true; - break; - } - } - } - } - pat -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -fn check_gene_fb(ctl: &EncloneControl, gex_info: &GexInfo, to_check: &Vec<String>, category: &str) { - let g_ends0 = ["_g"]; - let fb_ends0 = ["_ab", "_ag", "_cr", "_cu"]; - let suffixes = ["", "_min", "_max", "_μ", "_Σ"]; - let suffixes_g = ["", "_min", "_max", "_μ", "_Σ", "_%"]; - let (mut g_ends, mut fb_ends) = (Vec::<String>::new(), Vec::<String>::new()); - for x in g_ends0.iter() { - for y in suffixes_g.iter() { - g_ends.push(format!("{}{}", x, y)); - } - } - for x in fb_ends0.iter() { - for y in suffixes.iter() { - fb_ends.push(format!("{}{}", x, y)); - } - } - for x in to_check.iter() { - if !gex_info.have_gex { - let mut problem = false; - for y in g_ends.iter() { - if x.ends_with(y) { - problem = true; - } - } - if problem - || *x == "gex".to_string() - || x.starts_with("gex_") - || *x == "n_gex_cell".to_string() - || *x == "n_gex".to_string() - || *x == "clust".to_string() - || *x == "type".to_string() - || *x == "entropy".to_string() - || *x == "cred".to_string() - || *x == "cred_cell".to_string() - { - if category == "parseable" { - eprintln!( - "\nParseable field {} does not make sense because gene expression \ - data\nwere not provided as input.\n", - x - ); - } else { - eprintln!( - "\nLead variable {} does not make sense because gene expression \ - data\nwere not provided as input.\n", - x - ); - } - std::process::exit(1); - } - } - if !gex_info.have_fb { - for y in fb_ends.iter() { - if x.ends_with(y) { - if category == "parseable" { - eprintln!( - "\nParseable field {} does not make sense because feature \ - barcode data\nwere not provided as input.\n", - x - ); - } else { - eprintln!( - "\nLead variable {} does not make sense because feature barcode \ - data\nwere not provided as input.\n", - x - ); - } - std::process::exit(1); - } - } - } - } - let mut known_features = Vec::<String>::new(); - for i in 0..gex_info.gex_features.len() { - for j in 0..gex_info.gex_features[i].len() { - let f = &gex_info.gex_features[i][j]; - let ff = f.split('\t').collect::<Vec<&str>>(); - if ff.len() != 3 { - eprintln!("Unexpected structure of features file, at this line\n{}", f); - eprintln!("Giving up.\n"); - std::process::exit(1); - } - for z in 0..2 { - if ff[2].starts_with("Antibody") { - for s in suffixes.iter() { - known_features.push(format!("{}_ab{}", ff[z], s)); - } - } else if ff[2].starts_with("Antigen") { - for s in suffixes.iter() { - known_features.push(format!("{}_ag{}", ff[z], s)); - } - } else if ff[2].starts_with("CRISPR") { - for s in suffixes.iter() { - known_features.push(format!("{}_cr{}", ff[z], s)); - } - } else if ff[2].starts_with("CUSTOM") { - for s in suffixes.iter() { - known_features.push(format!("{}_cu{}", ff[z], s)); - } - } else { - for s in suffixes_g.iter() { - known_features.push(format!("{}_g{}", ff[z], s)); - } - } - } - } - } - unique_sort(&mut known_features); - for i in 0..to_check.len() { - let mut x = to_check[i].clone(); - if x.contains(':') { - x = x.after(":").to_string(); - } - let mut y = x.clone(); - if category == "parseable" && y.ends_with("_cell") { - y = y.before("_cell").to_string(); - } - if !bin_member(&known_features, &y) { - let mut n_var = false; - if x.starts_with("n_") { - n_var = true; - let mut is_dataset_name = false; - let mut is_origin_name = false; - let mut is_donor_name = false; - let mut is_tag_name = false; - let name = x.after("n_").to_string(); - let s = ctl.origin_info.n(); - for j in 0..s { - if ctl.origin_info.dataset_id[j] == name { - is_dataset_name = true; - } - } - for j in 0..ctl.origin_info.origin_list.len() { - if ctl.origin_info.origin_list[j] == name { - is_origin_name = true; - } - } - for j in 0..ctl.origin_info.donor_list.len() { - if ctl.origin_info.donor_list[j] == name { - is_donor_name = true; - } - } - for j in 0..ctl.origin_info.tag_list.len() { - if ctl.origin_info.tag_list[j] == name { - is_tag_name = true; - } - } - let msg = "\nSuggested reading: \"enclone help input\" and \ - \"enclone help glossary\".\n"; - if !is_dataset_name && !is_origin_name && !is_donor_name && !is_tag_name { - eprintln!( - "\nYou've used the {} variable {}, and yet {} \ - does not name a dataset, nor an origin,\nnor a donor, nor a tag.\n{}", - category, x, name, msg - ); - std::process::exit(1); - } - let mut types = 0; - if is_dataset_name { - types += 1; - } - if is_origin_name { - types += 1; - } - if is_donor_name { - types += 1; - } - if is_tag_name { - types += 1; - } - if is_dataset_name && is_origin_name && is_donor_name { - eprintln!( - "\nYou've used the {} variable {}, and yet {} \ - names a dataset, an origin, and a donor. That's ambiguous.\n{}", - category, x, name, msg - ); - std::process::exit(1); - } - if is_dataset_name && is_origin_name { - eprintln!( - "\nYou've used the {} variable {}, and yet {} \ - names a dataset and an origin. That's ambiguous.\n{}", - category, x, name, msg - ); - std::process::exit(1); - } - if is_dataset_name && is_donor_name { - eprintln!( - "\nYou've used the {} variable {}, and yet {} \ - names a dataset and a donor. That's ambiguous.\n{}", - category, x, name, msg - ); - std::process::exit(1); - } - if is_origin_name && is_donor_name { - eprintln!( - "\nYou've used the {} variable {}, and yet {} \ - names an origin and a donor. That's ambiguous.\n{}", - category, x, name, msg - ); - std::process::exit(1); - } - if types != 1 { - eprintln!( - "\nYou've used the {} variable {}, and yet {} \ - names a tag and also a dataset, origin or donor.\n\ - That's ambiguous.\n{}", - category, x, name, msg - ); - std::process::exit(1); - } - } - if !n_var { - if category == "lead" { - eprintln!( - "\nThe variable {} for LVARS is unrecognized. Please type \ - \"enclone help lvars\".\n", - x - ); - } else { - eprintln!( - "\nUnrecognized parseable variable {}. Please type \ - \"enclone help parseable\".\n", - x - ); - } - std::process::exit(1); - } - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Check pcols args. - -pub fn check_pcols(ctl: &EncloneControl, gex_info: &GexInfo) { - let mut alt_bcs = Vec::<String>::new(); - for li in 0..ctl.origin_info.alt_bc_fields.len() { - for i in 0..ctl.origin_info.alt_bc_fields[li].len() { - alt_bcs.push(ctl.origin_info.alt_bc_fields[li][i].0.clone()); - } - } - unique_sort(&mut alt_bcs); - let mut to_check = Vec::<String>::new(); - let pchains = ctl.parseable_opt.pchains; - for x in ctl.parseable_opt.pcols.iter() { - let mut ok = false; - if bin_member(&alt_bcs, x) { - ok = true; - } - for y in PLVARS_ALLOWED.iter() { - if *x == *y { - ok = true; - } - } - for y in ctl.origin_info.dataset_list.iter() { - if *x == format!("{}_barcodes", y) { - ok = true; - } - } - if ctl.parseable_opt.pbarcode { - if *x == "barcode" { - ok = true; - } - for y in ctl.origin_info.dataset_list.iter() { - if *x == format!("{}_barcode", y) { - ok = true; - } - } - } - let gpvar = x.starts_with('g') && x.after("g").parse::<usize>().is_ok(); - if !gex_info.have_gex && (x.starts_with("gex") || x.starts_with("n_gex") || x == "clust") - || x == "type" - { - eprintln!( - "\nCan't use parseable variable {} without having gene \ - expression data.\n", - x - ); - std::process::exit(1); - } - if LVARS_ALLOWED.contains(&x.as_str()) || gpvar { - ok = true; - } else if is_pattern(&x, true) { - ok = true; - } else { - for p in 1..=pchains { - let ps = format!("{}", p); - if x.ends_with(&ps) { - let y = x.rev_before(&ps); - if CVARS_ALLOWED.contains(&y) - || (ctl.parseable_opt.pbarcode && CVARS_ALLOWED_PCELL.contains(&y)) - { - ok = true; - } else if PCVARS_ALLOWED.contains(&y) { - ok = true; - } else if y.starts_with('q') - && y.ends_with('_') - && y.between("q", "_").parse::<usize>().is_ok() - { - ok = true; - } else if y.starts_with("ndiff") - && y.ends_with("vj") - && y.between("ndiff", "vj").parse::<usize>().is_ok() - && y.between("ndiff", "vj").force_usize() >= 1 - { - ok = true; - break; - } - } - } - } - if !ok { - to_check.push(x.clone()); - } - } - if !to_check.is_empty() { - check_gene_fb(&ctl, &gex_info, &to_check, "parseable"); - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Check cvars args. - -pub fn check_cvars(ctl: &EncloneControl) { - for x in ctl.clono_print_opt.cvars.iter() { - let mut ok = CVARS_ALLOWED.contains(&(*x).as_str()); - if x.starts_with("ndiff") - && x.ends_with("vj") - && x.between("ndiff", "vj").parse::<usize>().is_ok() - && x.between("ndiff", "vj").force_usize() >= 1 - { - ok = true; - } - if !ok { - eprintln!( - "\nUnrecognized variable {} for CVARS or CVARSP. \ - Please type \"enclone help cvars\".\n", - x - ); - std::process::exit(1); - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Check lvars args. - -pub fn check_lvars(ctl: &EncloneControl, gex_info: &GexInfo) { - let mut to_check = Vec::<String>::new(); - let ends0 = [ - "_g", "_ab", "_ag", "_cr", "_cu", "_g_μ", "_ab_μ", "_ag_μ", "_cr_μ", "_cu_μ", "_g_%", - ]; - let suffixes = ["", "_min", "_max", "_μ", "_Σ"]; - let mut ends = Vec::<String>::new(); - for x in ends0.iter() { - for y in suffixes.iter() { - ends.push(format!("{}{}", x, y)); - } - } - let mut nd_used = false; - 'main_loop: for x in ctl.clono_print_opt.lvars.iter() { - // See if type is ok. - - if *x == "type" { - let mut specified = false; - for i in 0..gex_info.cell_type_specified.len() { - if gex_info.cell_type_specified[i] { - specified = true; - } - } - if !ctl.gen_opt.internal_run { - eprintln!( - "\nUnrecognized variable {} for LVARS. Please type \ - \"enclone help lvars\".\n", - x - ); - std::process::exit(1); - } - if !specified { - eprintln!( - "\nYou've used the lead variable \"type\", but the file \ - cell_types.csv was not found.\n\ - This could be because you're using a GEX pipestance that was \ - run using too old a version of Cell Ranger.\n\ - Or it might have been generated using the CS pipeline.\n\ - Or you might have copied the pipestance outs but not included \ - that file.\n" - ); - std::process::exit(1); - } - } - - // Check alt_bc_fields. - - for li in 0..ctl.origin_info.alt_bc_fields.len() { - for i in 0..ctl.origin_info.alt_bc_fields[li].len() { - if ctl.origin_info.alt_bc_fields[li][i].0 == *x { - continue 'main_loop; - } - } - } - - // Check for nd<k>. - - if x.starts_with("nd") - && x.after("nd").parse::<usize>().is_ok() - && x.after("nd").force_usize() >= 1 - { - if nd_used { - eprintln!("\nOnly one instance of the lead variable nd<k> is allowed.\n"); - std::process::exit(1); - } - nd_used = true; - continue; - } - - // Check for pe<n> and npe<n> and ppe<n>. - - if x.starts_with("pe") && x.after("pe").parse::<usize>().is_ok() { - continue; - } - if x.starts_with("npe") && x.after("npe").parse::<usize>().is_ok() { - continue; - } - if x.starts_with("ppe") && x.after("ppe").parse::<usize>().is_ok() { - continue; - } - - // Check for patterns. - - if is_pattern(&x, false) { - continue; - } - - // The rest. - - if !gex_info.have_gex - && (x.starts_with("gex") || x.starts_with("n_gex") || x == "clust" || x == "type") - { - eprintln!( - "\nCan't use LVARS or LVARSP variable {} without having gene \ - expression data.\n", - x - ); - std::process::exit(1); - } - if x.ends_with("_cell") { - eprintln!("\nFields ending with _cell cannot be used in LVARS or LVARSP.\n"); - std::process::exit(1); - } - let gpvar = x.starts_with('g') && x.after("g").parse::<usize>().is_ok(); - if !(LVARS_ALLOWED.contains(&x.as_str()) || gpvar) { - let mut end_ok = false; - for i in 0..ends.len() { - if x.ends_with(&ends[i]) { - end_ok = true; - } - } - if !end_ok && !x.starts_with("n_") { - eprintln!( - "\nUnrecognized variable {} for LVARS. Please type \ - \"enclone help lvars\".\n", - x - ); - std::process::exit(1); - } else { - to_check.push(x.clone()); - } - } - } - if !to_check.is_empty() { - check_gene_fb(&ctl, &gex_info, &to_check, "lead"); - } -} diff --git a/enclone/src/read_json.rs b/enclone/src/read_json.rs deleted file mode 100644 index f8beee46e..000000000 --- a/enclone/src/read_json.rs +++ /dev/null @@ -1,757 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -use vdj_ann::*; - -use self::annotate::*; -use self::refx::*; -use self::transcript::*; -use crate::explore::*; -use debruijn::dna_string::*; -use enclone_core::defs::*; -use io_utils::*; -use itertools::Itertools; -use rayon::prelude::*; -use serde_json::Value; -use std::sync::atomic::AtomicBool; -use std::sync::atomic::Ordering; -use std::{collections::HashMap, io::BufReader}; -use string_utils::*; -use vector_utils::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn json_error(json: Option<&str>, ctl: &EncloneControl, exiting: &AtomicBool, msg: &str) { - // The following line prevents error messages from this function from being - // printed multiple times. - if !exiting.swap(true, Ordering::Relaxed) { - eprint!( - "\nThere is something wrong with the contig annotations in the Cell Ranger output \ - file" - ); - if json.is_some() { - eprint!("\n{}.", json.unwrap()); - } else { - eprint!("."); - } - eprint!("\n\npossibly relevant internal data: {}", msg); - if ctl.gen_opt.internal_run { - eprint!( - "\n\nATTENTION INTERNAL 10X USERS!\n\ - Quite possibly you are using data from a Cell Ranger run carried out using a \ - version\n\ - between 3.1 and 4.0. For certain of these versions, it is necessary to add the\n\ - argument CURRENT_REF to your command line. If that doesn't work, please see below." - ); - } - eprintln!( - "\n\nHere are possible sources of this problem:\n\n\ - 1. If the file was generated using \ - Cell Ranger version < 3.1, please either\nregenerate the file using the \ - current Cell Ranger version, or else run this program with the RE option to\n\ - regenerate annotations from scratch, but we warn you that this code \ - is not guaranteed to run\ncorrectly on outdated json files.\n\n\ - 2. Make sure you have the correct chain type, TCR or BCR.\n\n\ - 3. Make sure you have the correct reference sequence. See \ - \"enclone help faq\".\n\n\ - 4. If none of these apply, please report the problem to \ - enclone@10xgenomics.com. But please\nfirst rerun with RE to confirm the problem.\n" - ); - std::process::exit(1); - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -fn parse_vector_entry_from_json( - x: &Vec<u8>, - json: &String, - accept_inconsistent: bool, - origin_info: &OriginInfo, - li: usize, - refdata: &RefData, - to_ref_index: &HashMap<usize, usize>, - reannotate: bool, - ctl: &EncloneControl, - vdj_cells: &mut Vec<String>, - gex_cells: &mut Vec<String>, - gex_cells_specified: &mut bool, - cr_version: &mut String, - tigs: &mut Vec<TigData>, - exiting: &AtomicBool, -) { - let v: Value = serde_json::from_str(strme(&x)).unwrap(); - let barcode = &v["barcode"].to_string().between("\"", "\"").to_string(); - - // Get cell status. Sometime after CR 4.0 was released, and before 4.1 was released, - // we added new fields is_asm_cell and is_gex_cell to the json file. The value of - // is_asm_cell is the original determination of "cell" in the VDJ pipeline, whereas the - // value of is_gex_cell is that for the GEX pipeline. - - let mut is_cell = v["is_cell"].as_bool().unwrap_or(false); - let is_asm_cell = v["is_asm_cell"].as_bool().unwrap_or(false); - if is_asm_cell { - is_cell = true; - } - - let is_gex_cell = v["is_gex_cell"].as_bool(); - if is_gex_cell.is_some() { - *gex_cells_specified = true; - } - if is_gex_cell == Some(true) { - gex_cells.push(barcode.clone()); - } - - if !ctl.gen_opt.ncell && !is_cell { - return; - } - if is_cell { - vdj_cells.push(barcode.clone()); - } - - // Proceed. - - if !v["productive"].as_bool().unwrap_or(false) { - return; - } - if !ctl.gen_opt.ncell && !v["high_confidence"].as_bool().unwrap_or(false) { - return; - } - let tigname = &v["contig_name"].to_string().between("\"", "\"").to_string(); - let full_seq = &v["sequence"].to_string().between("\"", "\"").to_string(); - let mut left = false; - let (mut v_ref_id, mut j_ref_id) = (1000000, 0); - let mut d_ref_id: Option<usize> = None; - let mut c_ref_id = None; - let mut chain_type = String::new(); - let mut u_ref_id = None; - let (mut tig_start, mut tig_stop) = (-1 as isize, -1 as isize); - let mut v_stop = 0; - let mut v_stop_ref = 0; - let mut j_start = 0; - let mut j_start_ref = 0; - let mut c_start = None; - let mut annv = Vec::<(i32, i32, i32, i32, i32)>::new(); - let cdr1_aa: String = String::new(); // NEED TO POPULATE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - let cdr2_aa: String = String::new(); // NEED TO POPULATE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - let cdr3_aa: String; - let cdr3_dna: String; - let cdr1_start: Option<usize> = None; // NEED TO POPULATE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - let cdr2_start: Option<usize> = None; // NEED TO POPULATE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - let mut cdr3_start: usize; - if v.get("version").is_some() { - *cr_version = v["version"].to_string().between("\"", "\"").to_string(); - } - - // Reannotate. - - if reannotate { - let x = DnaString::from_dna_string(&full_seq); - let mut ann = Vec::<(i32, i32, i32, i32, i32)>::new(); - annotate_seq(&x, &refdata, &mut ann, true, false, true); - - // If there are multiple V segment alignments, possibly reduce to just one. - - let mut ann2 = Vec::<(i32, i32, i32, i32, i32)>::new(); - let mut j = 0; - while j < ann.len() { - let t = ann[j].2 as usize; - let mut k = j + 1; - while k < ann.len() { - if refdata.segtype[ann[k].2 as usize] != refdata.segtype[t] { - break; - } - k += 1; - } - if refdata.segtype[t] == "V".to_string() && k - j > 1 { - let mut entries = 1; - if j < ann.len() - 1 && ann[j + 1].2 as usize == t { - if (ann[j].0 + ann[j].1 == ann[j + 1].0 && ann[j].3 + ann[j].1 < ann[j + 1].3) - || (ann[j].0 + ann[j].1 < ann[j + 1].0 - && ann[j].3 + ann[j].1 == ann[j + 1].3) - { - entries = 2; - } - } - for l in j..j + entries { - ann2.push(ann[l].clone()); - } - } else { - for l in j..k { - ann2.push(ann[l].clone()); - } - } - j = k; - } - ann = ann2; - - // Proceed. - - if ctl.gen_opt.trace_barcode == barcode.to_string() { - let mut log = Vec::<u8>::new(); - print_some_annotations(&refdata, &ann, &mut log, false); - print!("\n{}", strme(&log)); - } - let mut log = Vec::<u8>::new(); - if ctl.gen_opt.trace_barcode == barcode.to_string() { - if !is_valid(&x, &refdata, &ann, true, &mut log) { - print!("{}", strme(&log)); - println!("invalid"); - return; - } - } else if !is_valid(&x, &refdata, &ann, false, &mut log) { - return; - } - let mut cdr3 = Vec::<(usize, Vec<u8>, usize, usize)>::new(); - get_cdr3_using_ann(&x, &refdata, &ann, &mut cdr3); - cdr3_aa = stringme(&cdr3[0].1); - cdr3_start = cdr3[0].0; - cdr3_dna = x - .slice(cdr3_start, cdr3_start + 3 * cdr3_aa.len()) - .to_string(); - let mut seen_j = false; - for i in 0..ann.len() { - let t = ann[i].2 as usize; - if refdata.is_u(t) { - u_ref_id = Some(t); - } else if refdata.is_v(t) && !seen_j { - v_ref_id = t; - annv.push(ann[i].clone()); - chain_type = refdata.name[t][0..3].to_string(); - if chain_type == "IGH".to_string() || chain_type == "TRB".to_string() { - left = true; - } - if ann[i].3 == 0 { - tig_start = ann[i].0 as isize; - if tig_start > cdr3_start as isize { - panic!( - "Something is wrong with the CDR3 start for this contig:\n\n{}.", - &full_seq - ); - } - cdr3_start -= tig_start as usize; - } - v_stop = (ann[i].0 + ann[i].1) as usize; - v_stop_ref = (ann[i].3 + ann[i].1) as usize; - } else if refdata.is_d(t) { - d_ref_id = Some(t); - } else if refdata.is_j(t) { - j_ref_id = t; - tig_stop = (ann[i].0 + ann[i].1) as isize; - j_start = ann[i].0 as usize; - j_start_ref = ann[i].3 as usize; - seen_j = true; - } else if refdata.is_c(t) { - c_ref_id = Some(t); - c_start = Some(ann[i].0 as usize); - } - } - for i in (0..annv.len()).rev() { - annv[i].0 -= annv[0].0; - } - } else { - // Use annotations from json file. - - cdr3_aa = v["cdr3"].to_string().between("\"", "\"").to_string(); - cdr3_dna = v["cdr3_seq"].to_string().between("\"", "\"").to_string(); - cdr3_start = v["cdr3_start"].as_u64().unwrap() as usize; - let ann = v["annotations"].as_array().unwrap(); - let mut cigarv = String::new(); // cigar for V segment - for i in 0..ann.len() { - let a = &ann[i]; - let region_type = &a["feature"]["region_type"]; - let feature_id = a["feature"]["feature_id"].as_u64().unwrap() as usize; - if !to_ref_index.contains_key(&feature_id) { - continue; - } - let feature_idx = to_ref_index[&feature_id]; - let ref_start = a["annotation_match_start"].as_u64().unwrap() as usize; - if region_type == "L-REGION+V-REGION" { - v_stop = a["contig_match_end"].as_i64().unwrap() as usize; - v_stop_ref = a["annotation_match_end"].as_i64().unwrap() as usize; - } - let gene_name = a["feature"]["gene_name"] - .to_string() - .between("\"", "\"") - .to_string(); - if refdata.name[feature_idx] != gene_name && !accept_inconsistent { - if !exiting.swap(true, Ordering::Relaxed) { - eprintln!( - "\nThere is an inconsistency between the reference \ - file used to create the Cell Ranger output files in\n{}\nand the \ - reference that enclone is using.\n\nFor example, the feature \ - numbered {} is\nthe gene {} in one and the gene {} in the other.\n\n\ - As far as we know, this type of error can only occur with Cell Ranger \ - versions before 4.0.\n\n\ - If this is mouse data, please use the argument MOUSE, and that may \ - solve the problem.\n\n\ - A solution that should always work is to supply\n\ - REF=vdj_reference_fasta_filename as an argument to enclone.\n", - json.rev_before("/"), - feature_id, - gene_name, - refdata.name[feature_idx] - ); - std::process::exit(1); - } - } - if region_type == "L-REGION+V-REGION" && ref_start == 0 { - let chain = a["feature"]["chain"] - .to_string() - .between("\"", "\"") - .to_string(); - // if !chain.starts_with("IG") { continue; } // ******************* - tig_start = a["contig_match_start"].as_i64().unwrap() as isize; - cdr3_start -= tig_start as usize; - chain_type = chain.clone(); - if chain == "IGH".to_string() || chain == "TRB".to_string() { - left = true; - } - v_ref_id = feature_idx; - cigarv = a["cigar"].to_string().between("\"", "\"").to_string(); - } else { - // also check for IG chain????????????????????????????????????????? - let ref_stop = a["annotation_match_end"].as_u64().unwrap() as usize; - let ref_len = a["annotation_length"].as_u64().unwrap() as usize; - if region_type == "J-REGION" && ref_stop == ref_len { - tig_stop = a["contig_match_end"].as_i64().unwrap() as isize; - j_ref_id = feature_idx; - j_start = a["contig_match_start"].as_i64().unwrap() as usize; - j_start_ref = a["annotation_match_start"].as_i64().unwrap() as usize; - } - if region_type == "5'UTR" { - u_ref_id = Some(feature_idx); - } - if region_type == "D-REGION" { - d_ref_id = Some(feature_idx); - } - if region_type == "C-REGION" { - c_ref_id = Some(feature_idx); - c_start = Some(a["contig_match_start"].as_i64().unwrap() as usize); - } - } - } - if v_ref_id == 1000000 { - return; - } - - // Compute annv from cigarv. We don't compute the mismatch entry. - - let mut cg = Vec::<Vec<u8>>::new(); // pieces of cigar string - let mut piece = Vec::<u8>::new(); - for c in cigarv.chars() { - piece.push(c as u8); - if c.is_ascii_alphabetic() { - cg.push(piece.clone()); - piece.clear(); - } - } - let t = v_ref_id as i32; - let (mut len1, mut len2) = (0, 0); - let (mut ins, mut del) = (0, 0); - for i in 0..cg.len() { - let x = strme(&cg[i][0..cg[i].len() - 1]).force_i32(); - if cg[i][cg[i].len() - 1] == b'M' { - if len1 == 0 { - len1 = x; - } else if len2 == 0 { - len2 = x; - } else { - // probably can't happen - len1 = 0; - len2 = 0; - break; - } - } - if cg[i][cg[i].len() - 1] == b'I' { - ins = x; - } - if cg[i][cg[i].len() - 1] == b'D' { - del = x; - } - } - annv.push((0 as i32, len1, t, 0, 0)); - if ins > 0 && ins % 3 == 0 && del == 0 && len2 > 0 { - let start = (len1 + ins) as i32; - annv.push((start, len2, t, len1, 0)); - } else if del > 0 && del % 3 == 0 && ins == 0 && len2 > 0 { - annv.push((len1, len2, t, len1 + del, 0)); - } - } - - // Correct CDR3 start for insertion. - - if annv.len() == 2 && annv[1].0 > annv[0].0 + annv[0].1 { - let ins = annv[1].0 - annv[0].0 - annv[0].1; - cdr3_start -= ins as usize; - } - - // Keep going. - - if tig_start < 0 || tig_stop < 0 { - let msg = format!("tig_start = {}, tig_stop = {}", tig_start, tig_stop); - json_error(Some(&json), &ctl, exiting, &msg); - } - let (tig_start, tig_stop) = (tig_start as usize, tig_stop as usize); - let quals0 = v["quals"].to_string(); - let quals0 = quals0.after("\"").as_bytes(); - let mut quals = Vec::<u8>::new(); - let mut slashed = false; - for i in 0..quals0.len() - 1 { - if !slashed && quals0[i] == b'\\' - /* && ( i == 0 || quals0[i-1] != b'\\' ) */ - { - slashed = true; - continue; - } - slashed = false; - quals.push(quals0[i]); - } - assert_eq!(full_seq.len(), quals.len()); - let seq = full_seq[tig_start..tig_stop].to_string(); - for i in 0..quals.len() { - quals[i] -= 33 as u8; - } - let full_quals = quals.clone(); - let quals = quals[tig_start..tig_stop].to_vec(); - // let cdr3_dna = &v["cdr3_seq"].to_string().between("\"", "\"").to_string(); - let umi_count = v["umi_count"].as_i64().unwrap() as usize; - let read_count = v["read_count"].as_i64().unwrap() as usize; - let mut origin = None; - let mut donor = None; - let mut tag = None; - if origin_info.origin_for_bc[li].contains_key(&barcode.clone()) { - origin = Some(origin_info.origin_for_bc[li][&barcode.clone()].clone()); - } else { - // the way we use s1 here is flaky - if origin_info.origin_id[li].len() > 0 - && (origin_info.origin_id[li] != "s1".to_string() - || origin_info.origin_for_bc[li].len() == 0) - { - origin = Some(origin_info.origin_id[li].clone()); - } - } - if origin_info.donor_for_bc[li].contains_key(&barcode.clone()) { - donor = Some(origin_info.donor_for_bc[li][&barcode.clone()].clone()); - } else { - // the way we use d1 here is flaky - if origin_info.origin_id[li].len() > 0 - && (origin_info.donor_id[li] != "d1".to_string() - || origin_info.donor_for_bc[li].len() == 0) - { - donor = Some(origin_info.donor_id[li].clone()); - } - } - if origin_info.tag[li].contains_key(&barcode.clone()) { - tag = Some(origin_info.tag[li][&barcode.clone()].clone()); - } - let mut origin_index = None; - let mut donor_index = None; - let mut tag_index = None; - if origin.is_some() { - if origin.is_some() { - origin_index = Some(bin_position(&origin_info.origin_list, &origin.unwrap()) as usize); - } - if donor.is_some() { - donor_index = Some(bin_position(&origin_info.donor_list, &donor.unwrap()) as usize); - } - } - if tag.is_some() { - tag_index = Some(bin_position(&origin_info.tag_list, &tag.unwrap()) as usize); - } - tigs.push(TigData { - cdr3_dna: cdr3_dna.to_string(), - len: seq.len(), - seq: seq.as_bytes().to_vec(), - v_start: tig_start, - v_stop: v_stop, - v_stop_ref: v_stop_ref, - j_start: j_start, - j_start_ref: j_start_ref, - j_stop: tig_stop, - c_start: c_start, - full_seq: full_seq.as_bytes().to_vec(), - v_ref_id: v_ref_id, - d_ref_id: d_ref_id, - j_ref_id: j_ref_id, - c_ref_id: c_ref_id, - u_ref_id: u_ref_id, - cdr1_aa: cdr1_aa.to_string(), - cdr1_start: cdr1_start, - cdr2_aa: cdr2_aa.to_string(), - cdr2_start: cdr2_start, - cdr3_aa: cdr3_aa.to_string(), - cdr3_start: cdr3_start, - quals: quals, - full_quals: full_quals, - barcode: barcode.to_string(), - tigname: tigname.to_string(), - left: left, - dataset_index: li, - origin_index: origin_index, - donor_index: donor_index, - tag_index: tag_index, - umi_count: umi_count, - read_count: read_count, - chain_type: chain_type.clone(), - annv: annv.clone(), - }); -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Parse the JSON annotations file. -// -// In the future could be converted to LazyWrite: -// https://martian-lang.github.io/martian-rust/doc/martian_filetypes/json_file/ -// index.html#lazy-readwrite-example. -// -// Tracking contigs using bc_cdr3_aa; could improve later. -// -// This section requires 3.1. If you want to avoid that, do something to make tig_start -// and tig_stop always nonnegative. Or use the RE option. -// -// Computational performance. It would appear that nearly all the time here is spent in -// two lines: -// -// read_vector_entry_from_json(&mut f) { -// let v: Value = serde_json::from_str(strme(&x)).unwrap(); -// (Should retest.) -// -// and simply reading the file lines is several times faster. So the way we parse the -// files is suboptimal. If we want to make this faster, one option would be to speed up -// this code. Another would be to write out a binary version of the JSON file that contains -// only the information that we need. - -pub fn read_json( - accept_inconsistent: bool, - origin_info: &OriginInfo, - li: usize, - json: &String, - refdata: &RefData, - to_ref_index: &HashMap<usize, usize>, - reannotate: bool, - cr_version: &mut String, - ctl: &EncloneControl, - mut vdj_cells: &mut Vec<String>, - mut gex_cells: &mut Vec<String>, - gex_cells_specified: &mut bool, -) -> Vec<Vec<TigData>> { - *gex_cells_specified = false; - let mut tigs = Vec::<TigData>::new(); - let mut jsonx = json.clone(); - if !path_exists(&json) { - jsonx = format!("{}.lz4", json); - } - if jsonx.contains('/') { - let p = jsonx.rev_before("/"); - if !path_exists(&p) { - eprintln!( - "\nThere should be a directory\n\ - \"{}\"\n\ - but it does not exist. Please check how you have specified the\n\ - input files to enclone, including the PRE argument.\n", - p - ); - std::process::exit(1); - } - } - if !path_exists(&jsonx) { - eprintln!( - "\nThe path\n\ - \"{}\"\n\ - does not exist. Please check how you have specified the\n\ - input files to enclone, including the PRE argument.\n", - jsonx - ); - std::process::exit(1); - } - let mut f = BufReader::new(open_maybe_compressed(&jsonx)); - // ◼ This loop could be speeded up, see comments above. - let mut xs = Vec::<Vec<u8>>::new(); - loop { - match read_vector_entry_from_json(&mut f) { - None => break, - Some(x) => { - xs.push(x); - } - } - } - let mut results = Vec::<(usize, Vec<String>, Vec<String>, bool, String, Vec<TigData>)>::new(); - for i in 0..xs.len() { - results.push(( - i, - Vec::<String>::new(), - Vec::<String>::new(), - false, - String::new(), - Vec::<TigData>::new(), - )); - } - let exiting = AtomicBool::new(false); - results.par_iter_mut().for_each(|res| { - let i = res.0; - parse_vector_entry_from_json( - &xs[i], - &json, - accept_inconsistent, - &origin_info, - li, - &refdata, - &to_ref_index, - reannotate, - &ctl, - &mut res.1, - &mut res.2, - &mut res.3, - &mut res.4, - &mut res.5, - &exiting, - ); - }); - for i in 0..xs.len() { - vdj_cells.append(&mut results[i].1); - gex_cells.append(&mut results[i].2); - if results[i].3 { - *gex_cells_specified = true; - } - if results[i].4.len() > 0 { - *cr_version = results[i].4.clone(); - } - tigs.append(&mut results[i].5); - } - unique_sort(&mut gex_cells); - let mut tig_bc = Vec::<Vec<TigData>>::new(); - let mut r = 0; - while r < tigs.len() { - let mut s = r + 1; - while s < tigs.len() { - if tigs[s].barcode != tigs[r].barcode { - break; - } - s += 1; - } - /* - let (mut have_left, mut have_right) = (false, false); - for u in r..s { - if tigs[u].left { - have_left = true; - } else { - have_right = true; - } - } - */ - - // For now we require at most four contigs (but we don't yet merge foursies). - - if - /* have_left && have_right && */ - s - r <= 4 { - let mut bc_tigs = Vec::<TigData>::new(); - for u in r..s { - bc_tigs.push(tigs[u].clone()); - } - bc_tigs.sort(); - tig_bc.push(bc_tigs); - } - r = s; - } - unique_sort(&mut vdj_cells); - tig_bc -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Parse the JSON annotations file(s). - -pub fn parse_json_annotations_files( - mut ctl: &mut EncloneControl, - tig_bc: &mut Vec<Vec<TigData>>, - refdata: &RefData, - to_ref_index: &HashMap<usize, usize>, - vdj_cells: &mut Vec<Vec<String>>, - gex_cells: &mut Vec<Vec<String>>, - gex_cells_specified: &mut Vec<bool>, -) { - // (origin index, contig name, V..J length): (?) - let mut results = Vec::<( - usize, - Vec<(String, usize)>, - Vec<Vec<TigData>>, - Vec<Vec<u8>>, // logs - String, - Vec<String>, - Vec<String>, - bool, - )>::new(); - for i in 0..ctl.origin_info.dataset_path.len() { - results.push(( - i, - Vec::<(String, usize)>::new(), - Vec::<Vec<TigData>>::new(), - Vec::<Vec<u8>>::new(), - String::new(), - Vec::<String>::new(), - Vec::<String>::new(), - false, - )); - } - // Note: only tracking truncated seq and quals initially - let ann; - if !ctl.gen_opt.cellranger { - ann = "all_contig_annotations.json"; - } else { - ann = "contig_annotations.json"; - } - results.par_iter_mut().for_each(|res| { - let li = res.0; - let json = format!("{}/{}", ctl.origin_info.dataset_path[li], ann); - let json_lz4 = format!("{}/{}.lz4", ctl.origin_info.dataset_path[li], ann); - if !path_exists(&json) && !path_exists(&json_lz4) { - eprintln!("\ncan't find {} or {}\n", json, json_lz4); - std::process::exit(1); - } - let tig_bc: Vec<Vec<TigData>> = read_json( - ctl.gen_opt.accept_inconsistent, - &ctl.origin_info, - li, - &json, - &refdata, - &to_ref_index, - ctl.gen_opt.reannotate, - &mut res.4, - &ctl, - &mut res.5, - &mut res.6, - &mut res.7, - ); - res.5.sort(); - explore(li, &tig_bc, &ctl); - res.2 = tig_bc; - }); - let mut versions = Vec::<String>::new(); - for i in 0..results.len() { - tig_bc.append(&mut results[i].2.clone()); - ctl.gen_opt.cr_version = results[i].4.clone(); - if results[i].4.len() == 0 { - versions.push("≤3.1".to_string()); - } else { - versions.push(results[i].4.clone()); - } - vdj_cells.push(results[i].5.clone()); - gex_cells.push(results[i].6.clone()); - gex_cells_specified.push(results[i].7.clone()); - } - if !ctl.gen_opt.internal_run { - unique_sort(&mut versions); - if versions.len() > 1 - && versions != vec!["4.0".to_string(), "4009.52.0-82-g2244c685a".to_string()] - { - eprintln!( - "\nYou're using output from multiple Cell Ranger versions = {},\n\ - which is not allowed.\n", - versions.iter().format(", ") - ); - std::process::exit(1); - } - } -} diff --git a/enclone/src/run_test.rs b/enclone/src/run_test.rs deleted file mode 100644 index 01eb72f63..000000000 --- a/enclone/src/run_test.rs +++ /dev/null @@ -1,296 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -use crate::misc3::parse_bsv; -use ansi_escape::*; -use enclone_core::testlist::*; -use io_utils::*; -use itertools::Itertools; -use std::cmp::min; -use std::fs::read_to_string; -use std::io::Write; -use std::process::Command; -use string_utils::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Run an enclone test. - -pub fn run_test( - enclone: &str, // name of the enclone executable - it: usize, // test number - test: &str, // arguments for the test - testname: &str, // test category e.g. "test" or "ext_test" - ok: &mut bool, // true if test passes - logx: &mut String, // logging from test - out: &mut String, // stdout of test -) { - let mut test = test.replace("\n", ""); - for _ in 0..3 { - test = test.replace(" ", " "); - } - let mut expect_null = false; - let mut expect_fail = false; - let mut expect_ok = false; - let mut set_in_stone = false; - let mut no_pre = false; - let mut nforce = false; - let mut ncores = false; - if test.contains(" EXPECT_NULL") { - test = test.replace(" EXPECT_NULL", ""); - expect_null = true; - } - if test.contains(" EXPECT_FAIL") { - test = test.replace(" EXPECT_FAIL", ""); - expect_fail = true; - } - if test.contains(" EXPECT_OK") { - test = test.replace(" EXPECT_OK", ""); - expect_ok = true; - } - if test.contains(" SET_IN_STONE") { - test = test.replace(" SET_IN_STONE", ""); - set_in_stone = true; - } - if test.contains(" NO_PRE") { - test = test.replace(" NO_PRE", ""); - no_pre = true; - } - if test.contains(" NFORCE") { - test = test.replace(" NFORCE", ""); - nforce = true; - } - if test.contains(" NCORES") { - test = test.replace(" NCORES", ""); - ncores = true; - } - test = test.replace("{TEST_FILES_VERSION}", &format!("{}", TEST_FILES_VERSION)); - let mut log = Vec::<u8>::new(); - let out_file = format!("testx/inputs/outputs/enclone_{}{}_output", testname, it + 1); - let mut pre_arg = format!( - "PRE=../enclone-data/big_inputs/version{}", - TEST_FILES_VERSION - ); - let mut local_pre_arg = format!( - "PRE=enclone-data/big_inputs/version{},enclone_main", - TEST_FILES_VERSION - ); - if no_pre { - pre_arg = String::new(); - local_pre_arg = String::new(); - } - if !path_exists(&out_file) && !expect_fail && !expect_ok { - fwriteln!(log, "\nYou need to create the output file {}.\n", out_file); - fwriteln!( - log, - "Do this by executing the following command from \ - the top level of the enclone repo:\n" - ); - emit_bold_escape(&mut log); - fwriteln!( - log, - "enclone {} {} > enclone_main/testx/inputs/outputs/enclone_{}{}_output; \ - git add enclone_main/testx/inputs/outputs/enclone_{}{}_output\n", - local_pre_arg, - test, - testname, - it + 1, - testname, - it + 1 - ); - emit_end_escape(&mut log); - *logx = stringme(&log); - } else { - let mut old = String::new(); - if !expect_fail && !expect_ok { - old = read_to_string(&out_file).unwrap(); - } - let args = parse_bsv(&test); - - // Form the command and execute it. - - let mut new = Command::new(&enclone); - let mut new = new.arg(&args[0]); - if !no_pre { - new = new.arg(&pre_arg); - } - for i in 1..args.len() { - new = new.arg(&args[i]); - } - if !nforce { - new = new.arg("FORCE_EXTERNAL") - } - if !ncores { - // Cap number of cores at 24. Surprisingly, for testing on a 64-core - // server, this significantly reduces wallclock. And substituting either - // 16 or 64 is slower. Slower at the time of testing! As we add tests or - // change the algorithms, this may change. - new = new.arg("MAX_CORES=24") - } - // dubious use of expect: - let new = new - .output() - .expect(&format!("failed to execute enclone for test{}", it + 1)); - let new_err = strme(&new.stderr).split('\n').collect::<Vec<&str>>(); - let new2 = stringme(&new.stdout); - *out = new2.clone(); - - // Process tests that were supposed to fail or supposed to succeed. - - if expect_fail || expect_ok { - *ok = false; - if new.status.code().is_none() { - fwriteln!(log, "\nCommand for subtest {} failed.", it + 1); - fwriteln!( - log, - "Something really funky happened, status code unavailable.\n" - ); - } else { - let status = new.status.code().unwrap(); - if expect_fail { - if status == 0 { - fwriteln!(log, "\nCommand for subtest {} failed.", it + 1); - fwriteln!( - log, - "That test was supposed to have failed, but instead \ - succeeded.\n" - ); - } else if status != 1 { - fwriteln!(log, "\nCommand for subtest {} failed.", it + 1); - fwriteln!( - log, - "That test was supposed to have failed with exit status 1,\n\ - but instead failed with exit status {}.\n", - status - ); - } else { - *ok = true; - } - } else { - if status != 0 { - fwriteln!(log, "\nCommand for subtest {} failed.", it + 1); - fwrite!( - log, - "That test was supposed to have succeeded, but instead \ - failed, with stderr = {}", - new_err.iter().format("\n") - ); - } else { - *ok = true; - } - } - } - *logx = stringme(&log); - - // Process tests that yield the expected stdout. - } else if old == new2 { - *ok = true; - if old.len() <= 1 && !expect_null { - fwriteln!( - log, - "\nWarning: old output for subtest {} has {} bytes.\n", - it + 1, - old.len() - ); - } - if new.stderr.len() > 0 { - fwriteln!(log, "Command for subtest {} failed.\n", it + 1); - fwriteln!(log, "stderr has {} bytes:", new.stderr.len()); - fwrite!(log, "{}", strme(&new.stderr)); - *ok = false; - } - *logx = stringme(&log); - - // Process tests that yield unexpected stdout. - } else { - fwriteln!(log, "\nSubtest {}: old and new differ", it + 1); - fwriteln!( - log, - "old has u8 length {} and new has u8 length {}", - old.len(), - new2.len() - ); - let mut oldc = Vec::<char>::new(); - let mut newc = Vec::<char>::new(); - for c in old.chars() { - oldc.push(c); - } - for c in new2.chars() { - newc.push(c); - } - fwriteln!( - log, - "old has char length {} and new has char length {}", - oldc.len(), - newc.len() - ); - for i in 0..min(oldc.len(), newc.len()) { - if oldc[i] != newc[i] { - fwriteln!( - log, - "the first difference is at character {}: old = \"{}\", \ - new = \"{}\"\n", - i, - oldc[i], - newc[i] - ); - break; - } - } - fwrite!(log, "old:\n{}", old); - fwrite!(log, "new:\n{}", new2); - if new_err.len() != 1 || new_err[0].len() != 0 { - fwriteln!(log, "stderr has {} lines:", new_err.len()); - for i in 0..new_err.len() { - fwriteln!(log, "{}", new_err[i]); - } - } - // let f = format!( - // "testx/inputs/version{}/{}/outs/all_contig_annotations.json.lz4", - // version, args[0].after("=") ); - // if !path_exists(&f) { - // println!( "Perhaps you forgot to lz4 compress the json file.\n" ); - // std::process::exit(1); - // } - // println!( "The size of {} is {} bytes.", f, fs::metadata(&f).unwrap().len() ); - - fwriteln!( - log, - "enclone subtest {} failed. If you are happy with the new output, \ - you can replace the\noutput by executing the following command from \ - the top level of the enclone repo (essential):\n", - it + 1 - ); - if set_in_stone { - fwriteln!( - log, - "🔴 However, the output of this test was not supposed to have changed.\n\ - 🔴 Please be extremely careful if you change it.\n", - ); - } - emit_bold_escape(&mut log); - fwriteln!( - log, - "enclone {} {} \ - > enclone_main/testx/inputs/outputs/enclone_{}{}_output\n", - local_pre_arg, - test, - testname, - it + 1 - ); - emit_end_escape(&mut log); - fwrite!(log, "and then committing the changed file. "); - fwriteln!( - log, - "You can then retest using:\n\n\ - cargo test -p enclone enclone -- --nocapture" - ); - if new2.len() > 0 { - fwriteln!(log, ""); - *logx = stringme(&log); - } else if old != new2 { - fwriteln!(log, "old != new"); - *logx = stringme(&log); - } - } - } -} diff --git a/enclone/src/secret.rs b/enclone/src/secret.rs index ef742f4e3..ecfb05c3e 100644 --- a/enclone/src/secret.rs +++ b/enclone/src/secret.rs @@ -1,29 +1,38 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // Get counts for secreted and membrane proteins. - -use enclone_core::defs::*; -use std::collections::HashMap; +// +// The basic problem with this is the relevant exon junctions are too far from the 5' end, +// so there is not enough signal to be useful. +// +// Also what is done below appears not to handle the IGHG case correctly. +// +// For IGHG, the boundaries should be +// (A)CH2-(B)CH3-CHS [secreted] +// (A)CH2-(B)Mx [membrane]. + +use enclone_core::defs::EncloneControl; use std::process::Command; -use string_utils::*; -use vector_utils::*; +use std::{collections::HashMap, path::Path}; +use string_utils::{strme, TextUtils}; +use vector_utils::next_diff1_3; // copied from tenkit2/pack_dna.rs: -pub fn reverse_complement(x: &mut Vec<u8>) { +pub fn reverse_complement(x: &mut [u8]) { x.reverse(); - for j in 0..x.len() { - x[j] = match x[j] { + for xj in x { + *xj = match *xj { b'A' => b'T', b'C' => b'G', b'G' => b'C', b'T' => b'A', - _ => x[j], + _ => *xj, } } } -pub fn fetch_secmem(ctl: &mut EncloneControl) { +pub fn fetch_secmem(ctl: &mut EncloneControl) -> Result<(), String> { // Define the CH3 exon boundaries, and the sequences that could follow it, both in // GRCh38 or GRCm38 coordinates. @@ -31,12 +40,12 @@ pub fn fetch_secmem(ctl: &mut EncloneControl) { let ch3; let fol; if species == "human" { - ch3 = vec![ + ch3 = [ ('-', "chr14:105600482-105600805"), ('-', "chr14:105840368-105840691"), ('-', "chr14:105854918-105855235"), ]; - fol = vec![ + fol = [ ("TACCTG", "M1"), ("GTGAAA", "M2"), ("GTGAAG", "M2"), @@ -48,14 +57,15 @@ pub fn fetch_secmem(ctl: &mut EncloneControl) { ("GCCCGC", "S"), ("GGACAG", "S"), ("GGGGTG", "S"), - ]; + ] + .as_ref(); } else { - ch3 = vec![ + ch3 = [ ('-', "chr12:113414273-113414593"), ('-', "chr12:113271711-113272031"), ('-', "chr12:113421370-113421686"), ]; - fol = vec![ + fol = [ ("GAGCTAGAC", "M1"), ("GAGCTGGAA", "M1"), ("GAGGGGGAG", "M1"), @@ -75,24 +85,25 @@ pub fn fetch_secmem(ctl: &mut EncloneControl) { ("GAGGTGCAC", "S"), ("GCCAGCGCT", "S"), ("GGCCAGCGC", "S"), - ]; + ] + .as_ref(); } // Traverse the datasets. - for q in 0..ctl.origin_info.n() { + for gex_path in ctl.origin_info.gex_path.iter().take(ctl.origin_info.n()) { let mut data = Vec::<(String, String, String)>::new(); // (barcode, umi, class) - let bam = format!("{}/possorted_genome_bam.bam", ctl.origin_info.gex_path[q]); + let bam = Path::new(gex_path).join("possorted_genome_bam.bam"); // Traverse the boundaries. - for i in 0..ch3.len() { + for ch3i in ch3 { // Call samtools. let o = Command::new("samtools") .arg("view") .arg(&bam) - .arg(&ch3[i].1) + .arg(ch3i.1) .output() .expect("failed to execute samtools"); @@ -104,15 +115,15 @@ pub fn fetch_secmem(ctl: &mut EncloneControl) { let pos = fields[3].force_usize(); let cigar = fields[5]; let seq = fields[9]; - let (mut barcode, mut umi) = (String::new(), String::new()); - for j in 11..fields.len() { - if fields[j].starts_with("CB:Z:") { - barcode = fields[j].after("CB:Z:").to_string(); - } else if fields[j].starts_with("UB:Z:") { - umi = fields[j].after("UB:Z:").to_string(); + let (mut barcode, mut umi) = ("", ""); + for &fj in &fields[11..] { + if fj.starts_with("CB:Z:") { + barcode = fj.after("CB:Z:"); + } else if fj.starts_with("UB:Z:") { + umi = fj.after("UB:Z:"); } } - if barcode.len() == 0 { + if barcode.is_empty() { continue; } @@ -137,43 +148,36 @@ pub fn fetch_secmem(ctl: &mut EncloneControl) { let mut ref_pos = pos; let mut read_pos = 1; - let low = ch3[i].1.after(":").before("-").force_usize(); - let high = ch3[i].1.after("-").force_usize(); + let low = ch3i.1.after(":").before("-").force_usize(); + let high = ch3i.1.after("-").force_usize(); let mut ext = 0; let mut ext_seq = Vec::<u8>::new(); - for j in 0..cg.len() { - let x = cg[j][cg[j].len() - 1]; - let n = strme(&cg[j][0..cg[j].len() - 1]).force_usize(); + for cgj in cg { + let x = cgj[cgj.len() - 1]; + let n = strme(&cgj[..cgj.len() - 1]).force_usize(); if x == b'M' { - if ch3[i].0 == '-' { - if read_pos > 1 && ref_pos < high && ref_pos + n > low { - if read_pos + low > ref_pos + 1 { - ext = read_pos + low - ref_pos - 1; - ext_seq = seq.as_bytes()[0..ext].to_vec(); - reverse_complement(&mut ext_seq); - break; - } - } - } else { - if ref_pos <= high && ref_pos + n > high { - ext = ref_pos + n - high; - ext_seq = seq.as_bytes()[seq.len() - ext..].to_vec(); + if ch3i.0 == '-' { + if read_pos > 1 + && ref_pos < high + && ref_pos + n > low + && read_pos + low > ref_pos + 1 + { + ext = read_pos + low - ref_pos - 1; + ext_seq = seq.as_bytes()[0..ext].to_vec(); + reverse_complement(&mut ext_seq); break; } + } else if ref_pos <= high && ref_pos + n > high { + ext = ref_pos + n - high; + ext_seq = seq.as_bytes()[seq.len() - ext..].to_vec(); + break; } ref_pos += n; read_pos += n; - } else if x == b'N' { - ref_pos += n; - } else if x == b'S' { - read_pos += n; - } else if x == b'I' { - read_pos += n; - } else if x == b'D' { + } else if x == b'N' || x == b'S' || x == b'I' || x == b'D' { ref_pos += n; } else { - eprintln!("\nUnexpected character in cigar string.\n"); - std::process::exit(1); + return Err("\nUnexpected character in cigar string.\n".to_string()); } } @@ -185,18 +189,17 @@ pub fn fetch_secmem(ctl: &mut EncloneControl) { // Print. - let mut class; - if species == "human" { - class = stringme(&ext_seq[0..6]); + let mut class = if species == "human" { + strme(&ext_seq[0..6]) } else { - class = stringme(&ext_seq[0..9]); - } - for j in 0..fol.len() { - if strme(&ext_seq).starts_with(&fol[j].0) { - class = fol[j].1.to_string(); + strme(&ext_seq[0..9]) + }; + for &fj in fol { + if strme(&ext_seq).starts_with(fj.0) { + class = fj.1; } } - data.push((barcode, umi, class)); + data.push((barcode.to_string(), umi.to_string(), class.to_string())); } } @@ -219,10 +222,10 @@ pub fn fetch_secmem(ctl: &mut EncloneControl) { l += 1; } let (mut s, mut m) = (0, 0); - for z in k..l { - if data[z].2.starts_with('M') { + for dz in &data[k..l] { + if dz.2.starts_with('M') { m += 1; - } else if data[z].2.starts_with('S') { + } else if dz.2.starts_with('S') { s += 1; } } @@ -233,9 +236,10 @@ pub fn fetch_secmem(ctl: &mut EncloneControl) { } k = l; } - h.insert(data[i].0.clone(), (sec, mem)); + h.insert(data[i].0.to_string(), (sec, mem)); i = j; } ctl.origin_info.secmem.push(h); } + Ok(()) } diff --git a/enclone/src/subset_json.rs b/enclone/src/subset_json.rs index e2fd6d44a..c2ed059c8 100644 --- a/enclone/src/subset_json.rs +++ b/enclone/src/subset_json.rs @@ -1,18 +1,18 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // Extract the entries in a given all_contig_annotations.json file that corrrespond to barcodes // in a given sorted vector. -use io_utils::*; -use std::fs::File; -use std::io::{BufRead, BufReader}; -use string_utils::*; -use vector_utils::*; +use io_utils::open_userfile_for_read; +use std::fmt::Write; +use std::io::BufRead; +use string_utils::TextUtils; +use vector_utils::bin_member; -pub fn subset_all_contig_annotations_json(filename: &str, barcodes: &Vec<String>) -> String { +pub fn subset_all_contig_annotations_json(filename: &str, barcodes: &[String]) -> String { let mut x = "[\n".to_string(); let mut lines = Vec::<String>::new(); - let f = open_for_read![&filename]; + let f = open_userfile_for_read(filename); let mut keep = false; for line in f.lines() { let s = line.unwrap(); @@ -21,20 +21,21 @@ pub fn subset_all_contig_annotations_json(filename: &str, barcodes: &Vec<String> } if s == "]" { if keep { - for i in 0..lines.len() { - x += &format!("{}\n", lines[i]); + for line in &lines { + writeln!(x, "{line}").unwrap(); } } break; } - lines.push(s.clone()); + lines.push(s); + let s = lines.last().unwrap().as_str(); if s.starts_with(" \"barcode\": \"") { let t = s.between(" \"barcode\": \"", "\""); - keep = bin_member(&barcodes, &t.to_string()); + keep = bin_member(barcodes, &t.to_string()); } else if s.starts_with(" }") { if keep { - for i in 0..lines.len() { - x += &format!("{}\n", lines[i]); + for line in &lines { + writeln!(&mut x, "{line}").unwrap(); } } lines.clear(); diff --git a/enclone_args/Cargo.toml b/enclone_args/Cargo.toml new file mode 100644 index 000000000..d05c770ec --- /dev/null +++ b/enclone_args/Cargo.toml @@ -0,0 +1,51 @@ +[package] +name = "enclone_args" +version = "0.5.219" +authors = ["""David Jaffe <david.jaffe@10xgenomics.com>, + Nigel Delaney <nigel.delaney@10xgenomics.com>, + Keri Dockter <keri.dockter@10xgenomics.com>, + Jessica Hamel <jessica.hamel@10xgenomics.com>, + Lance Hepler <lance.hepler@10xgenomics.com>, + Shaun Jackman <shaun.jackman@10xgenomics.com>, + Sreenath Krishnan <sreenath.krishnan@10xgenomics.com>, + Meryl Lewis <meryl.lewis@10xgenomics.com>, + Alvin Liang <alvin.liang@10xgenomics.com>, + Patrick Marks <patrick.marks@10xgenomics.com>, + Wyatt McDonnell <wyatt.mcdonnell@10xgenomics.com>"""] +edition = "2021" +license-file = "LICENSE.txt" +publish = false + +# This crate is for preprocessing including argument processing and initial reading/testing of +# input files. + +# Please do not edit crate versions within this file. Instead edit the file master.toml +# in the root of the enclone repo. + +[dependencies] +debruijn = "0.3" +enclone_core = { path = "../enclone_core" } +enclone_vars = { path = "../enclone_vars" } +evalexpr = ">=7, <12" +expr_tools = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +io_utils = { version = "0.3", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +itertools.workspace = true +mirror_sparse_matrix = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +rand = "0.8" +rayon = "1" +regex = { version = "1", default-features = false, features = ["std", "perf"] } +serde_json = "1" +string_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +vdj_ann = { version = "0.4", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +vector_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } + +[target.'cfg(not(windows))'.dependencies.hdf5] +git = "https://github.com/10XGenomics/hdf5-rust.git" +branch = "conda_nov2021" +features = ["conda"] +default-features = false + +[target.'cfg(windows)'.dependencies.hdf5] +git = "https://github.com/10XGenomics/hdf5-rust.git" +branch = "conda_nov2021" +default-features = false diff --git a/enclone_args/LICENSE.txt b/enclone_args/LICENSE.txt new file mode 120000 index 000000000..4ab43736a --- /dev/null +++ b/enclone_args/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/enclone_args/src/enclone.testdata.bcr.gex b/enclone_args/src/enclone.testdata.bcr.gex new file mode 100644 index 000000000..38c17997c --- /dev/null +++ b/enclone_args/src/enclone.testdata.bcr.gex @@ -0,0 +1,243 @@ +# These are mostly matched BCR/GEX datasets, all human except one mouse. +# Some do not have GEX, as noted by absence of GEX. +# Some of BCR, then GEX, then BCR lines. The first two are matched and the last has no GEX. +# +# These overlap other dataset lists, and many of the ids given here are not given in any +# other list. All are included in the big VDJ dataset collection that is internal to 10x, +# and not all are distributable. +# +# Note that in some cases, multiple donors etc. are included within individual sets. +# +# To see the entirety of false positives for the human datasets, one may do the following: +# enclone BIB=1-39 BUILT_IN MIN_DONORS=2 PLAIN SUMMARY +# +# original cells fps notes +# 144787 0 specificity test using only public data +# 638431 25 current specificity test +# 2016131 193 all the data (but not updated for a long time) + +DONOR=1 +NAME=B cells +BCR=140696-140711 +GEX=140712-140727 + +DONOR=2 +NAME=PBMC +BCR=135799-135830,140688-140695,140680-140687 +GEX=135767-135798,140612-140619,140620-140627 + +DONOR=3 +NAME=JWY545 +BCR=145309-145324,143550,179159,155246-155253,1016691-1016692,180025-180037,1009445-1009448,1009450-1009468,1016687-1016688,1018289-1018293 +GEX=145261-145276,143453,174145,155641-155648,1016669-1016670,180007-180019,1008841-1008844,1008846-1008864,1019932-1019933,1017640-1017644 +BCR=165808,1017974-1017977,1018095-1018098,1018288,1018296-1018298,1018301,1021358,1021360-1021362,1029474-1029477,1031844-1031848,1031850,1031851,1031853,1031854,1031856-1031858 +PUBLIC_BCR_SUBSET=165808,1017974-1017977,1018095-1018098,1018288,1018289,1018291,1018293,1018296-1018298,1018301,1021358,1021360-1021362,1029474-1029477,1031844-1031848,1031850,1031851,1031853-1031854,1031856-1031858 + +DONOR=4 +NAME=AGBT pre/post-vaccination (4 donors, 2 timepoints) PBMC +BCR=174919-174951,174953-174958,174960-174966,174999-175003,175005-175006,175008-175010,175012-175013 +GEX=173169-173201,173203-173208,173210-173216,179831-179835,173223-173224,173226-173228,173230-173231 + +DONOR=5 +NAME=CRC-3111 tumor plus normal (PBMC) +BCR=1005384,1005385,1005386,1005387 +GEX=1010615,1010616,1005229,1005230 + +DONOR=6 +NAME=melanoma +NOTE=excluded BCR=1021301/GEX=1019532 because appears to be mixed +BCR=1021300,1021302-1021303,1020665-1020668 +GEX=1019531,1019533-1019534,1020597-1020600 +BCR=116156-116159,116164-116170,118053,121352,132884,132885,132890,132891,132896,132897,132902,132903,132908,132909,132914,132915 + +DONOR=7 +NAME= +BCR=106236,106237,106240-106244,107518,107520,107522,107524,107525,107527,107529,107531-107534,107537,107539,107541,107542,107544-107551,107555-107557,107560,107574-107577,108161,108162,108165,108167,108168,109767,109768,109770,109772,110552-110583,110588,110592,110597,110601-110603,110607,110608,110610,114510,114512,114515,114517,114519,114521,114524 + +DONOR=8 +NAME=lung cancer +BCR=163919,1020669-1020676,1021304-1021311,1032722-1032729,1027521-1027536 +GEX=160549,1020601-1020608,1019535-1019542,1032690-1032697,1027481-1027496 + +DONOR=9 +NAME=ovarian cancer +BCR=123085-123086,123089-123090 +GEX=123749-123750,123753-123754 +PUBLIC_BCR_SUBSET=123085,123089,124547 (to sort out) + +DONOR=10 +NAME=MALT +BCR=83808-83809,86233-86234 +GEX=83216-83217,85651-85652 +BCR=86229-86230,89644-89645 +PUBLIC_BCR_SUBSET=83808,86233 + +DONOR=11 +NAME=PBMC +BCR=129476-129486,131256,131270,131275,134532-134538 +GEX=131092-131102,127966,127948,127953,133968-133974 +BCR=117458-117459,131235,131237,131240,131268 + +DONOR=12 +NAME=Lupus2 +BCR=86202-86208,86210-86216 +GEX=86518-86524,86218-86224 +BCR=86228,86237,86238,112741,112740,113526 +PUBLIC_BCR_SUBSET=86237 + +DONOR=13 +NAME= +BCR=40043 + +DONOR=14 +NAME= +BCR=40086 + +DONOR=15 +NAME= +BCR=83817,83819,83821 + +DONOR=16 +NAME= +BCR=91673 + +DONOR=17 +NAME=ChronicLymphLeuk +BCR=48620,48634 + +DONOR=18 +NAME=Lupus1 +BCR=48622 + +DONOR=19 +NAME=multiple sclerosis +BCR=48618 + +DONOR=20 +NAME= +BCR=48614 + +DONOR=21 +NAME= +BCR=45977,45987 +PUBLIC_BCR_SUBSET=45977 + +DONOR=22 +NAME=rheumatoid arthritis +BCR=86225,86226,89642,89643 + +DONOR=23 +NAME= +BCR=48612,48630 + +DONOR=24 +NAME=multiple myeloma 1 +BCR=48616,48632,83815,83816,86239,86240 + +DONOR=25 +NAME=multiple myeloma 2 (conceivably the same donor as previous) +BCR=48626 + +DONOR=26 +NAME= +BCR=129973,129974,129979,129980 + +DONOR=27 +NAME= +BCR=129517-129520 + +DONOR=28 +NAME= +BCR=165807 +PUBLIC_BCR_SUBSET=165807 + +DONOR=29 +NAME= +BCR=40935-40940,40943-40948,40951-40956,40959-40962,40964,42817,46032,47203,47204,47215,47216,99634-99645 +PUBLIC_BCR_SUBSET=40935-40938,40943-40946,40951-40954,40959-40962,46032,47203,47204,47215,47216 + +DONOR=30 +NAME= +BCR=90100,90103-90113 + +DONOR=31 +NAME= +BCR=129975,129976,131015,131036,131065,131066,132882,132883,132888,132889,132895,132900,132901,132906,132907,132910,132912,132913,134592,1202248,1202256 + +DONOR=32 +NAME= +BCR=79619-79622,79627-79630,85328-85333,85344-85349,85360-85365,86231,86232,87252-87259,87302,87483,88348-88364 +PUBLIC_BCR_SUBSET=85333,86231 + +DONOR=33 +NAME= +BCR=47199,47200,47211-47214 +PUBLIC_BCR_SUBSET=47199,47200,47211-47214 + +DONOR=34 +NAME= +BCR=44979-44982,44987,44988,43892-43899 +PUBLIC_BCR_SUBSET=44979-44982,44987,44988,43892-43899 + +DONOR=35 +NAME= +BCR=91295-91302,91312,91314,91316,91318,91320,91322,91324,92751,92758,92763,95455,106060,106062,114844,116087-116112,117417,117703,117704,117707,118175-118192,123137,123138,123141,123142,123176,123178,123179,123182,123183,123186,123187,123190,123191,124481-124490,128037,128040,128045,128048,140331-140364,140366-140369 +PUBLIC_BCR_SUBSET=124481-124490,128037,128040,128045,128048 + +DONOR=36 +NAME= +BCR=52177 +PUBLIC_BCR_SUBSET=52177 + +DONOR=37 +NAME= +BCR=47201 +PUBLIC_BCR_SUBSET=47201 + +DONOR=38 +NAME= +BCR=47202 +PUBLIC_BCR_SUBSET=47202 + +DONOR=39 +NAME= +BCR=42787-42789 +PUBLIC_BCR_SUBSET=42787-42789 + +DONOR=40 +NAME= +BCR=1092610-1092621,1095618-1095621,1095627-1095629,1100224-1100236,1100238-1100239,1100663-1100670,1100684-1100686,1123243-1123250,1123332-1123363,1125912-1125935,1133573-1133604,1134298-1134301,1228434-1228481,1266491-1266498,1266515-1266530 + +DONOR=41 +NAME=melanoma +BCR=1117088-1117110,1127527-1127558 + +DONOR=42 +NAME=LIBRA_seq/N45 +BCR=LIBRA_seq/N45 + +DONOR=43 +NAME=LIBRA_seq/N90 +BCR=LIBRA_seq/N90 + +DONOR=44 +NAME=test1 +BCR=1279053,1279061,1287192-1287195,1287200-1287203,1279050,1279058,1287196-1287197,1287204-1287205,1279051,1279059,1287198-1287199,1287206-1287207,1279052,1279060 + +DONOR=45 +NAME=test2 +BCR=1279049,1279057,1287176-1287179,1287184-1287187,1279054,1279062,1287180-1287181,1287188-1287189,1279055,1279063,1287182-1287183,1287190-1287191 + +DONOR=46 +NAME=test3 +BCR=1279065,1279073,1287144-1287147,1287152-1287155,1279066,1279074,1287156-1287157,1287148-1287149,1279067,1279075,1287150-1287151,1287158-1287159,1279068,1279076 + +DONOR=47 +NAME=test4 +BCR=1279069,1279077,1287160-1287163,1287168-1287171,1279070,1279078,1287164-1287165,1287172-1287173,1279071,1279079,1287166-1287167,1287174-1287175,1279072,1279080 + +DONOR=m1 +NAME=mouse +BCR=1023660-1023661,1023652-1023653,1023933-1023934,1023925-1023926 +GEX=1018214-1018215,1018198-1018199,1018206-1018207,1018190-1018191 +SPECIES=mouse diff --git a/enclone_args/src/lib.rs b/enclone_args/src/lib.rs new file mode 100644 index 000000000..45cf34085 --- /dev/null +++ b/enclone_args/src/lib.rs @@ -0,0 +1,51 @@ +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. +#![allow(clippy::needless_range_loop)] + +use io_utils::path_exists; + +pub mod load_gex; +pub mod load_gex_core; +pub mod load_gex_util; +pub mod proc_args; +pub mod proc_args2; +pub mod proc_args3; +pub mod proc_args_check; +pub mod proc_args_post; +pub mod process_special_arg1; +pub mod process_special_arg2; +pub mod read_json; + +// parse_csv_pure: same as parse_csv, but don't strip out quotes + +pub fn parse_csv_pure(x: &str) -> Vec<&str> { + let w = x.char_indices().collect::<Vec<_>>(); + let mut y = Vec::new(); + let (mut quotes, mut i) = (0, 0); + while i < w.len() { + let mut j = i; + while j < w.len() { + if quotes % 2 == 0 && w[j].1 == ',' { + break; + } + if w[j].1 == '"' { + quotes += 1; + } + j += 1; + } + let (start, stop) = (w[i].0, w.get(j).map_or(x.len(), |(ind, _)| *ind)); + y.push(&x[start..stop]); + i = j + 1; + } + if !w.is_empty() && w.last().unwrap().1 == ',' { + y.push(""); + } + y +} + +pub fn fnx(outs: &str, name: &str) -> String { + let mut file = format!("{outs}/../{name}"); + if !path_exists(&file) { + file = format!("{outs}/{name}"); + } + file +} diff --git a/enclone_args/src/load_gex.rs b/enclone_args/src/load_gex.rs new file mode 100644 index 000000000..d66f51022 --- /dev/null +++ b/enclone_args/src/load_gex.rs @@ -0,0 +1,195 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. +// +// Load gene expression and feature barcoding (antibody, antigen) data from +// Cell Ranger outputs. + +use crate::load_gex_core::load_gex; +use enclone_core::defs::{EncloneControl, GexInfo}; + +use hdf5::Dataset; +use mirror_sparse_matrix::MirrorSparseMatrix; +use rayon::prelude::*; +use std::fmt::Write; +use std::{collections::HashMap, time::Instant}; +use vector_utils::{bin_position, unique_sort}; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Get gene expression and feature barcoding counts. + +pub fn get_gex_info(ctl: &mut EncloneControl) -> Result<GexInfo, String> { + let mut gex_features = Vec::<Vec<String>>::new(); + let mut gex_barcodes = Vec::<Vec<String>>::new(); + let mut gex_matrices = Vec::<MirrorSparseMatrix>::new(); + let mut fb_top_barcodes = Vec::<Vec<String>>::new(); + let mut fb_top_matrices = Vec::<MirrorSparseMatrix>::new(); + let mut fb_top_reads_barcodes = Vec::<Vec<String>>::new(); + let mut fb_top_reads_matrices = Vec::<MirrorSparseMatrix>::new(); + let mut fb_total_umis = Vec::<u64>::new(); + let mut fb_total_reads = Vec::<u64>::new(); + let mut fb_brn = Vec::<Vec<(String, u32, u32)>>::new(); + let mut fb_brnr = Vec::<Vec<(String, u32, u32)>>::new(); + let mut fb_bdcs = Vec::<Vec<(String, u32, u32, u32)>>::new(); + let mut feature_refs = Vec::<String>::new(); + let mut cluster = Vec::<HashMap<String, usize>>::new(); + let mut cell_type = Vec::<HashMap<String, String>>::new(); + let mut cell_type_specified = Vec::<bool>::new(); + let mut pca = Vec::<HashMap<String, Vec<f64>>>::new(); + let mut gex_mults = Vec::<f64>::new(); + let mut fb_mults = Vec::<f64>::new(); + let mut gex_cell_barcodes = Vec::<Vec<String>>::new(); + let mut have_gex = false; + let mut have_fb = false; + let mut h5_paths = Vec::<String>::new(); + let mut feature_metrics = Vec::<HashMap<(String, String), String>>::new(); + let mut json_metrics = Vec::<HashMap<String, f64>>::new(); + let mut metrics = Vec::<String>::new(); + load_gex( + ctl, + &mut gex_features, + &mut gex_barcodes, + &mut gex_matrices, + &mut fb_top_barcodes, + &mut fb_top_matrices, + &mut fb_top_reads_barcodes, + &mut fb_top_reads_matrices, + &mut fb_total_umis, + &mut fb_total_reads, + &mut fb_brn, + &mut fb_brnr, + &mut fb_bdcs, + &mut feature_refs, + &mut cluster, + &mut cell_type, + &mut cell_type_specified, + &mut pca, + &mut gex_mults, + &mut fb_mults, + &mut gex_cell_barcodes, + &mut have_gex, + &mut have_fb, + &mut h5_paths, + &mut feature_metrics, + &mut json_metrics, + &mut metrics, + )?; + let t = Instant::now(); + if ctl.gen_opt.gene_scan_test.is_some() && !ctl.gen_opt.accept_inconsistent { + let mut allf = gex_features.clone(); + unique_sort(&mut allf); + if allf.len() != 1 { + let mut msg = format!( + "\nCurrently, SCAN requires that all datasets have identical \ + features, and they do not.\n\ + There are {} datasets and {} feature sets after removal of \ + duplicates.\nClassification of features sets:\n\n", + gex_features.len(), + allf.len() + ); + for (f, id) in gex_features.iter().zip(ctl.origin_info.dataset_id.iter()) { + let p = bin_position(&allf, f); + writeln!(msg, "{id} ==> {p}").unwrap(); + } + msg += "\n"; + return Err(msg); + } + } + let mut h5_data = Vec::<Option<Dataset>>::new(); + let mut h5_indices = Vec::<Option<Dataset>>::new(); + let mut h5_indptr = Vec::<Vec<u32>>::new(); + if ctl.gen_opt.h5 { + let gex_outs = &ctl.origin_info.gex_path; + for i in 0..ctl.origin_info.dataset_path.len() { + // let bin_file = format!("{}/feature_barcode_matrix.bin", gex_outs[i]); + if !gex_outs[i].is_empty() + /* && !(path_exists(&bin_file) && !ctl.gen_opt.force_h5) */ + { + let f = &h5_paths[i]; + + let h = hdf5::File::open(f).unwrap(); + + h5_data.push(Some(h.dataset("matrix/data").unwrap())); + h5_indices.push(Some(h.dataset("matrix/indices").unwrap())); + let indptr = h.dataset("matrix/indptr").unwrap(); + let x: Vec<u32> = indptr.as_reader().read().unwrap().to_vec(); + h5_indptr.push(x); + } else { + h5_data.push(None); + h5_indices.push(None); + h5_indptr.push(Vec::<u32>::new()); + } + } + } + fn compute_feature_id(gex_features: &[String]) -> HashMap<String, usize> { + let mut x = HashMap::<String, usize>::new(); + for (j, f) in gex_features.iter().enumerate() { + let ff = f.splitn(4, '\t').take(3).collect::<Vec<&str>>(); + for z in 0..2 { + if ff[2].starts_with("Antibody") { + x.insert(format!("{}_ab", ff[z]), j); + } else if ff[2].starts_with("CRISPR") { + x.insert(format!("{}_cr", ff[z]), j); + } else if ff[2].starts_with("CUSTOM") { + x.insert(format!("{}_cu", ff[z]), j); + } else if ff[2].starts_with("Gene") { + x.insert(format!("{}_g", ff[z]), j); + } else if ff[2].starts_with("Antigen") { + x.insert(format!("{}_ag", ff[z]), j); + } + } + } + x + } + let n = gex_features.len(); + let pi = (0..n).into_par_iter(); + let mut feature_id = Vec::<HashMap<String, usize>>::new(); + pi.map(|i| compute_feature_id(&gex_features[i])) + .collect_into_vec(&mut feature_id); + let is_gex = gex_features + .iter() + .map(|g| { + g.iter() + .map(|f| { + let ff = f.split('\t').nth(2).unwrap(); + ff.starts_with("Gene") + }) + .collect() + }) + .collect(); + ctl.perf_stats(&t, "after load_gex"); + + // Answer. + + Ok(GexInfo { + gex_features, + gex_barcodes, + gex_matrices, + fb_top_barcodes, + fb_top_matrices, + fb_top_reads_barcodes, + fb_top_reads_matrices, + fb_total_umis, + fb_total_reads, + fb_brn, + fb_brnr, + fb_bdcs, + feature_refs, + cluster, + cell_type, + cell_type_specified, + pca, + gex_cell_barcodes, + gex_mults, + fb_mults, + h5_data, + h5_indices, + h5_indptr, + is_gex, + feature_id, + have_gex, + have_fb, + feature_metrics, + json_metrics, + metrics, + }) +} diff --git a/enclone_args/src/load_gex_core.rs b/enclone_args/src/load_gex_core.rs new file mode 100644 index 000000000..05a3c869e --- /dev/null +++ b/enclone_args/src/load_gex_core.rs @@ -0,0 +1,928 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. +// +// Load gene expression and feature barcoding (antibody, antigen) data from Cell Ranger outputs. + +use crate::load_gex_util::{ + find_cluster_file, find_feature_metrics_file, find_json_metrics_file, find_metrics_file, + find_pca_file, +}; +use crate::{fnx, parse_csv_pure}; +use enclone_core::defs::EncloneControl; +use enclone_core::slurp::slurp_h5; +use io_utils::{dir_list, open_for_read, open_userfile_for_read, path_exists}; +use itertools::Itertools; +use mirror_sparse_matrix::{ + get_code_version_from_file, read_from_file, write_to_file, MirrorSparseMatrix, +}; +use rayon::prelude::*; +use serde_json::Value; +use std::{ + collections::HashMap, + convert::TryInto, + fmt::Write, + fs::{read_to_string, remove_file, File}, + io::{BufRead, Read}, + time::Instant, +}; +use string_utils::{parse_csv, TextUtils}; +use vector_utils::{unique_sort, VecUtils}; + +pub fn load_gex( + ctl: &mut EncloneControl, + gex_features: &mut Vec<Vec<String>>, + gex_barcodes: &mut Vec<Vec<String>>, + gex_matrices: &mut Vec<MirrorSparseMatrix>, + fb_top_barcodes: &mut Vec<Vec<String>>, + fb_top_matrices: &mut Vec<MirrorSparseMatrix>, + fb_top_reads_barcodes: &mut Vec<Vec<String>>, + fb_top_reads_matrices: &mut Vec<MirrorSparseMatrix>, + fb_total_umis: &mut Vec<u64>, + fb_total_reads: &mut Vec<u64>, + fb_brn: &mut Vec<Vec<(String, u32, u32)>>, + fb_brnr: &mut Vec<Vec<(String, u32, u32)>>, + fb_bdcs: &mut Vec<Vec<(String, u32, u32, u32)>>, + feature_refs: &mut Vec<String>, + cluster: &mut Vec<HashMap<String, usize>>, + cell_type: &mut Vec<HashMap<String, String>>, + cell_type_specified: &mut Vec<bool>, + pca: &mut Vec<HashMap<String, Vec<f64>>>, + gex_mults: &mut Vec<f64>, + fb_mults: &mut Vec<f64>, + gex_cell_barcodes: &mut Vec<Vec<String>>, + have_gex: &mut bool, + have_fb: &mut bool, + h5_paths: &mut Vec<String>, + feature_metrics: &mut Vec<HashMap<(String, String), String>>, + json_metrics: &mut Vec<HashMap<String, f64>>, + metrics: &mut Vec<String>, +) -> Result<(), String> { + let t = Instant::now(); + let mut results = Vec::<( + usize, + Vec<String>, + Vec<String>, + MirrorSparseMatrix, + Option<f64>, + Option<f64>, + Vec<String>, + HashMap<String, usize>, + HashMap<String, String>, + HashMap<String, Vec<f64>>, + bool, + String, + String, + MirrorSparseMatrix, + Vec<String>, + Vec<String>, + HashMap<(String, String), String>, + HashMap<String, f64>, + String, + u64, + Vec<(String, u32, u32)>, + String, + (Vec<f32>, Vec<Vec<u8>>), + Vec<(String, u32, u32)>, + MirrorSparseMatrix, + Vec<String>, + u64, + Vec<(String, u32, u32, u32)>, + )>::new(); + for i in 0..ctl.origin_info.gex_path.len() { + results.push(( + i, + Vec::<String>::new(), + Vec::<String>::new(), + MirrorSparseMatrix::new(), + None, + None, + Vec::<String>::new(), + HashMap::<String, usize>::new(), + HashMap::<String, String>::new(), + HashMap::<String, Vec<f64>>::new(), + false, + String::new(), + String::new(), + MirrorSparseMatrix::new(), + Vec::<String>::new(), + Vec::<String>::new(), + HashMap::<(String, String), String>::new(), + HashMap::<String, f64>::new(), + String::new(), + 0, + Vec::new(), + String::new(), + (Vec::new(), Vec::new()), + Vec::new(), + MirrorSparseMatrix::new(), + Vec::<String>::new(), + 0, + Vec::new(), + )); + } + let gex_outs = &ctl.origin_info.gex_path; + // Here and in other places, where an error message can be printed in a parallel loop, it + // would be better if the thread could use a global lock to prevent multiple threads from + // issuing an error message. + // + // A lot of time is spent in this parallel loop. Some things are known about this: + // 1. When running it over a large number of datasets, the observed load average is ~2, so + // somehow the parallelism is not working. + // 2. We know where the time is spent in the loop, and this is marked below. + results.par_iter_mut().for_each(|r| { + let pathlist = &mut r.15; + let i = r.0; + if !gex_outs[i].is_empty() { + // First define the path where the GEX files should live, and make sure that the path + // exists. + + let root = gex_outs[i].clone(); + let mut outs = root.clone(); + if root.ends_with("/outs") && path_exists(&root) { + outs = root; + } else if root.ends_with("/outs") { + outs = root.before("/outs").to_string(); + if !path_exists(&outs) { + r.11 = format!( + "\nThe directory\n{outs}\ndoes not exist. Something must be amiss with \ + the arguments to PRE and/or GEX and/or META.\n" + ); + return; + } + } + + // Define the file paths and test for their existence. + + let mut h5_path = String::new(); + let h5p = [ + "raw_feature_bc_matrix.h5", + "raw_gene_bc_matrices_h5.h5", + "multi/count/raw_feature_bc_matrix.h5", + ]; + for x in h5p.iter() { + let p = format!("{outs}/{x}"); + if path_exists(&p) { + pathlist.push(p.clone()); + h5_path = p; + break; + } + } + if h5_path.is_empty() { + r.11 = format!( + "\nThe file raw_feature_bc_matrix.h5 is not in the directory\n{outs}\n\ + and neither is the older-named version raw_gene_bc_matrices_h5.h5. Perhaps \ + something\nis amiss with the arguments to PRE and/or GEX and/or META.\n" + ); + return; + } + r.12 = h5_path.clone(); + let types_file = format!("{outs}/analysis_csv/celltypes/celltypes.csv"); + + // Define possible places for the analysis directory. + + let mut analysis = Vec::<String>::new(); + analysis.push(outs.to_string()); + analysis.push(format!("{outs}/analysis_csv")); + analysis.push(format!("{outs}/analysis")); + analysis.push(format!("{outs}/count/analysis")); + let pso1 = format!("{outs}/per_sample_outs"); + let pso2 = format!("{outs}/../per_sample_outs"); + for pso in [pso1, pso2].iter() { + if path_exists(pso) { + let samples = dir_list(pso); + if samples.solo() { + let a = format!("{pso}/{}/count/analysis", samples[0]); + analysis.push(a); + let a = format!("{pso}/{}/count/analysis_csv", samples[0]); + analysis.push(a); + } + } + } + + // Find files. + + let pca_file = find_pca_file(ctl, &outs, &analysis, pathlist); + let json_metrics_file = find_json_metrics_file(ctl, &outs, &analysis, pathlist); + let feature_metrics_file = find_feature_metrics_file(ctl, &outs, &analysis, pathlist); + let metrics_file = find_metrics_file(ctl, &outs, &analysis, pathlist); + let cluster_file = find_cluster_file(ctl, &outs, &analysis, pathlist); + + // Proceed. + + let bin_file = format!("{outs}/feature_barcode_matrix.bin"); + for f in [pca_file.clone(), cluster_file.clone()].iter() { + if !path_exists(f) { + r.11 = format!( + "\nThe file\n{f}\ndoes not exist. \ + Perhaps one of your directories is missing some stuff.\n\n\ + One possibility is that you ran \"cellranger count\" using only \ + feature barcode (antibody) data,\nand you had less then ten antibodies. \ + Currently if you do this, cellranger will not run the\nsecondary \ + analyses, so you'll be missing some files. A workaround is to add \ + some \"fake\" antibodies\nto pad out the total number to ten.\n\n\ + Another possibility is that this is a multi run, and the path you \ + provided\nis to a subdirectory of the outs folder. In that case it may \ + work to provide the path to outs\nor (equivalently) the parent \ + directory.\n" + ); + return; + } else { + pathlist.push(f.to_string()); + } + } + + // Find metrics summary file. + + let mut csv = String::new(); + let mut csvs = Vec::<String>::new(); + csvs.push(format!("{outs}/metrics_summary.csv")); + csvs.push(format!("{outs}/metrics_summary_csv.csv")); + let pso = format!("{outs}/per_sample_outs"); + if path_exists(&pso) { + let samples = dir_list(&pso); + if samples.solo() { + let a = format!("{pso}/{}/metrics_summary.csv", samples[0]); + csvs.push(a); + let a = format!("{pso}/{}/metrics_summary_csv.csv", samples[0]); + csvs.push(a); + } + } + for c in &csvs { + if path_exists(c) { + csv = c.clone(); + pathlist.push(c.to_string()); + break; + } + } + if csv.is_empty() { + r.11 = format!( + "\nSomething wrong with GEX or META argument:\ncan't find the file \ + metrics_summary.csv or metrics_summary_csv.csv in the directory\n\ + {outs}" + ); + return; + } + + // Determine the state of affairs of the bin file. We end with one of three outcomes: + // + // 1. We're not using the bin file at all. + // 2. We are reading the bin file. + // 3. We are writing the bin file. + + let mut bin_file_state = 1; + if !ctl.gen_opt.force_h5 { + let bin_file_exists = path_exists(&bin_file); + if !bin_file_exists { + if !ctl.gen_opt.h5 { + bin_file_state = 3; + } + } else { + pathlist.push(bin_file.clone()); + // THE FOLLOWING LINE HAS BEEN OBSERVED TO FAIL SPORADICALLY. THIS HAS + // HAPPENED MULTIPLE TIMES. THE FAIL WAS IN + // binary_read_to_ref::<u32>(&mut ff, &mut x[0], 11).unwrap(); + // WHERE THE unwrap() FAILED ON + // UnexpectedEof, error: "failed to fill whole buffer". + // + // 2/15/21: this should now be fixed. + + let v = get_code_version_from_file(&bin_file); + if v == 1 { + bin_file_state = 2; + } else { + bin_file_state = 3; + } + } + } + + // If we need to write feature_barcode_matrix.bin, make sure that's possible, before + // spending a lot of time reading other stuff. + + if bin_file_state == 3 { + let f = File::create(&bin_file); + if f.is_err() { + r.11 = format!( + "\nenclone is trying to create the path\n{bin_file}\n\ + but that path cannot be created. This path is for the binary GEX \ + matrix file that enclone can read\n\ + faster than the hdf5 file. Your options are:\n\ + 1. Make that location writable (or fix the path, if it's wrong).\n\ + 2. Find a new location where you can write.\n\ + 3. Don't specify NH5 (if you specified it).\n" + ); + return; + } + remove_file(&bin_file).unwrap(); + } + + // Read cell types. + + if path_exists(&types_file) { + pathlist.push(types_file.clone()); + let f = open_userfile_for_read(&types_file); + let mut count = 0; + for line in f.lines() { + count += 1; + if count == 1 { + continue; + } + let s = line.unwrap(); + let barcode = s.before(","); + let cell_type = s.after(","); + r.8.insert(barcode.to_string(), cell_type.to_string()); + r.10 = true; + } + } else if ctl.gen_opt.mark_stats + || ctl.gen_opt.mark_stats2 + || ctl.clono_filt_opt_def.marked_b + { + r.11 = format!( + "\nIf you use MARK_STATS or MARK_STATS2 or MARKED_B, celltypes.csv has to \ + exist, and this file\n{types_file}\ndoes not exist.\n" + ); + return; + } + + // Read json metrics file. Note that we do not enforce the requirement of this + // file, so it may not be present. Also it is not present in the outs folder of CS + // pipelines, and a customer would have to rerun with --vdrmode=disable to avoid + // deleting the file, and then move it to outs so enclone could find it. + + if !json_metrics_file.is_empty() { + let m = std::fs::read_to_string(&json_metrics_file).unwrap(); + let v: Value = serde_json::from_str(&m).unwrap(); + let z = v.as_object().unwrap(); + for (var, value) in z.iter() { + if value.as_f64().is_some() { + let value = value.as_f64().unwrap(); + r.17.insert(var.to_string(), value); + } + } + } + + // Read and parse metrics file. Rewrite as metrics class, metric name, metric value. + + if !metrics_file.is_empty() { + let m = std::fs::read_to_string(&metrics_file).unwrap(); + let fields = parse_csv_pure(m.before("\n")); + let (mut class, mut name, mut value) = (None, None, None); + for field in fields { + if field == "Library Type" { + class = Some(i); + } else if field == "Metric Name" { + name = Some(i); + } else if field == "Metric Value" { + value = Some(i); + } + } + let (class, name, value) = (class.unwrap(), name.unwrap(), value.unwrap()); + let mut lines = Vec::<String>::new(); + let mut first = true; + for line in m.lines() { + if first { + first = false; + } else { + let fields = parse_csv_pure(line); + lines.push(format!( + "{},{},{}", + fields[class], fields[name], fields[value] + )); + } + } + r.18 = format!("{}\n", lines.iter().format("\n")); + } + + // Read feature metrics file. Note that we do not enforce the requirement of this + // file, so it may not be present. + + if !feature_metrics_file.is_empty() { + let f = open_for_read![&feature_metrics_file]; + let mut feature_pos = HashMap::<String, usize>::new(); + let mut xfields = Vec::<String>::new(); + for (count, line) in f.lines().enumerate() { + let s = line.unwrap(); + let fields = parse_csv(&s); + if count == 0 { + for (j,field) in fields.iter().enumerate() { + feature_pos.insert(field.to_string(), j); + } + xfields = fields.clone(); + } else { + let feature_type = &fields[feature_pos["feature_type"]]; + let mut feature; + for pass in 1..=2 { + if pass == 1 { + feature = fields[feature_pos["feature_name"]].clone(); + } else { + feature = fields[feature_pos["feature_id"]].clone(); + } + if feature_type.starts_with("Antibody") { + feature += "_ab"; + } else if feature_type.starts_with("CRISPR") { + feature += "_cr"; + } else if feature_type.starts_with("CUSTOM") { + feature += "_cu"; + } else if feature_type.starts_with("Gene") { + feature += "_g"; + } else if feature_type.starts_with("Antigen") { + feature += "_ag"; + } + for j in 0..fields.len() { + if xfields[j] == "num_umis" + || xfields[j] == "num_reads" + || xfields[j] == "num_umis_cells" + || xfields[j] == "num_reads_cells" + { + r.16.insert( + (feature.clone(), xfields[j].clone()), + fields[j].clone(), + ); + } + } + } + } + } + } + + // Read PCA file. + + let f = open_userfile_for_read(&pca_file); + let mut count = 0; + for line in f.lines() { + count += 1; + if count == 1 { + continue; + } + let s = line.unwrap(); + let barcode = s.before(","); + let y = s.after(",").split(',').map(str::force_f64).collect(); + // This assert is turned off because in fact there are not always 10 components. + // assert_eq!(x.len(), 10); + r.9.insert(barcode.to_string(), y); + } + + // Read graph clusters, and also get the cell barcodes from that. + + let f = open_userfile_for_read(&cluster_file); + let mut count = 0; + for line in f.lines() { + count += 1; + if count == 1 { + continue; + } + let s = line.unwrap(); + let (barcode, cluster) = (s.before(","), s.after(",").force_usize()); + r.7.insert(barcode.to_string(), cluster); + r.6.push(barcode.to_string()); + } + + // Get the multipliers gene and feature barcode counts. + + let (mut gene_mult, mut fb_mult) = (None, None); + let (mut rpc, mut fbrpc) = (None, None); + let mut lines = Vec::<String>::new(); + { + let f = open_userfile_for_read(&csv); + for line in f.lines() { + let s = line.unwrap(); + lines.push(s.to_string()); + } + } + if lines.is_empty() { + r.11 = format!("\nThe file\n{csv}\nis empty.\n"); + return; + } + let fields = parse_csv(&lines[0]); + if fields.contains(&"Metric Name".to_string()) + && fields.contains(&"Metric Value".to_string()) + && fields.contains(&"Library Type".to_string()) + { + let mut lib_field = 0; + let mut name_field = 0; + let mut value_field = 0; + for (i, field) in fields.iter().enumerate() { + if field == "Library Type" { + lib_field = i; + } else if field == "Metric Name" { + name_field = i; + } else if field == "Metric Value" { + value_field = i; + } + } + for (j,line) in lines.iter().enumerate().skip(1) { + let fields = parse_csv(line); + if fields.len() < lib_field + 1 + || fields.len() < name_field + 1 + || fields.len() < value_field + 1 + { + r.11 = format!( + "\nSomething appears to be wrong with the file\n{}:\n\ + line {} doesn't have enough fields.\n", + csv, + j + 1, + ); + return; + } + if fields[lib_field] == "Gene Expression" + && fields[name_field] == "Mean reads per cell" + { + let mut rpcx = fields[value_field].to_string(); + rpcx = rpcx.replace(',', ""); + rpcx = rpcx.replace('\"', ""); + if rpcx.parse::<usize>().is_err() { + r.11 = format!( + "\nSomething appears to be wrong with the file\n{csv}:\n\ + the Gene Expression Mean Reads per Cell value isn't an integer.\n" + ); + return; + } + rpc = Some(rpcx.force_usize() as isize); + // Note that where we have "Antibody Capture"/"Antigen Capture", we could hypothetically have + // "CRISPR Guide Capture" or "Custom Feature". + } else if (fields[lib_field] == "Antibody Capture" || fields[lib_field] == "Antigen Capture") + && fields[name_field] == "Mean reads per cell" + { + let mut fbrpcx = fields[value_field].to_string(); + fbrpcx = fbrpcx.replace(',', ""); + fbrpcx = fbrpcx.replace('\"', ""); + if fbrpcx.parse::<usize>().is_err() { + r.11 = format!( + "\nSomething appears to be wrong with the file\n{csv}:\n\ + the Antibody/Antigen Capture Mean Reads per Cell value isn't an integer.\n" + ); + return; + } + fbrpc = Some(fbrpcx.force_usize() as isize); + } + } + if rpc.is_none() && fbrpc.is_none() { + r.11 = format!( + "\nGene expression or feature barcode data was expected, however the \ + CSV file\n{csv}\n\ + does not have values for Gene Expression Mean Reads per Cell or + Antibody/Antigen Capture Mean Reads per Cell.\n\ + This is puzzling.\n", + ); + return; + } + } else { + let (mut rpc_field, mut fbrpc_field) = (None, None); + for (line_no,line) in lines.iter().enumerate() { + let s = line; + let fields = parse_csv(s); + if line_no == 0 { + for (i,field) in fields.iter().enumerate() { + if field == "Mean Reads per Cell" { + rpc_field = Some(i); + } else if field == "Antibody: Mean Reads per Cell" || field == "Antigen: Mean Reads per Cell"{ + fbrpc_field = Some(i); + } + } + } else if line_no == 1 { + if rpc_field.is_some() && rpc_field.unwrap() >= fields.len() { + r.11 = format!( + "\nSomething appears to be wrong with the file\n{csv}:\n\ + the second line doesn't have enough fields.\n" + ); + return; + } else if rpc_field.is_some() { + let mut rpcx = fields[rpc_field.unwrap()].to_string(); + rpcx = rpcx.replace(',', ""); + rpcx = rpcx.replace('\"', ""); + if rpcx.parse::<usize>().is_err() { + r.11 = format!( + "\nSomething appears to be wrong with the file\n{csv}:\n\ + the Mean Reads per Cell field isn't an integer.\n" + ); + return; + } + rpc = Some(rpcx.force_usize() as isize); + } + if fbrpc_field.is_some() && fbrpc_field.unwrap() >= fields.len() { + r.11 = format!( + "\nSomething appears to be wrong with the file\n{csv}:\n\ + the second line doesn't have enough fields.\n" + ); + return; + } else if fbrpc_field.is_some() { + let mut fbrpcx = fields[fbrpc_field.unwrap()].to_string(); + fbrpcx = fbrpcx.replace(',', ""); + fbrpcx = fbrpcx.replace('\"', ""); + if fbrpcx.parse::<usize>().is_err() { + r.11 = format!( + "\nSomething appears to be wrong with the file\n{csv}:\n\ + the Antibody/Antigen: Mean Reads per Cell field isn't an integer.\n" + ); + return; + } + fbrpc = Some(fbrpcx.force_usize() as isize); + } + } + } + if rpc.is_none() && fbrpc.is_none() { + r.11 = format!( + "\nGene expression or feature barcode data was expected, however the \ + CSV file\n{csv}\n\ + does not have a field \"Mean Reads per Cell\" or \ + \"Antibody: Mean Reads per Cell\".\n\ + This is puzzling, and might be because a file within the Cell Ranger outs \ + directory has been moved\n\ + from its original location.\n", + ); + return; + } + } + if let Some(rpc) = rpc { + const RPC_EXPECTED: f64 = 20_000.0; + gene_mult = Some(RPC_EXPECTED / rpc as f64); + } + if let Some(fbrpc) = fbrpc { + const FB_RPC_EXPECTED: f64 = 5_000.0; + fb_mult = Some(FB_RPC_EXPECTED / fbrpc as f64); + } + r.4 = gene_mult; + r.5 = fb_mult; + + // Read the top feature barcode matrix, by UMIs. + + let top_file = fnx(&outs, "feature_barcode_matrix_top.bin"); + if path_exists(&top_file) { + pathlist.push(top_file.clone()); + read_from_file(&mut r.13, &top_file); + for i in 0..r.13.nrows() { + r.14.push(r.13.row_label(i)); + } + } + + // Read the top feature barcode matrix, by reads. + + let top_file = fnx(&outs, "feature_barcode_matrix_top_reads.bin"); + if path_exists(&top_file) { + pathlist.push(top_file.clone()); + read_from_file(&mut r.24, &top_file); + for i in 0..r.24.nrows() { + r.25.push(r.24.row_label(i)); + } + } + + // Read the total UMIs. + + let top_file = fnx(&outs, "feature_barcode_matrix_top.total"); + if path_exists(&top_file) { + pathlist.push(top_file.clone()); + let mut f = open_for_read![&top_file]; + let mut bytes = Vec::<u8>::new(); + f.read_to_end(&mut bytes).unwrap(); + r.19 = u64::from_ne_bytes(bytes.try_into().unwrap()); + } + + // Read the total reads. + + let top_file = fnx(&outs, "feature_barcode_matrix_top.total_reads"); + if path_exists(&top_file) { + pathlist.push(top_file.clone()); + let mut f = open_for_read![&top_file]; + let mut bytes = Vec::<u8>::new(); + f.read_to_end(&mut bytes).unwrap(); + r.26 = u64::from_ne_bytes(bytes.try_into().unwrap()); + } + + // Read the barcode-ref-nonref UMI count file. + + let brn_file = fnx(&outs, "feature_barcode_matrix_top.brn"); + if path_exists(&brn_file) { + pathlist.push(brn_file.clone()); + let f = open_for_read![&brn_file]; + for line in f.lines() { + let s = line.unwrap(); + let fields = parse_csv(&s); + r.20.push(( + fields[0].to_string(), + fields[1].parse::<u32>().unwrap(), + fields[2].parse::<u32>().unwrap(), + )); + } + } + + // Read the barcode-ref-nonref read count file. + + let brnr_file = fnx(&outs, "feature_barcode_matrix_top.brnr"); + if path_exists(&brnr_file) { + pathlist.push(brnr_file.clone()); + let f = open_for_read![&brnr_file]; + for line in f.lines() { + let s = line.unwrap(); + let fields = parse_csv(&s); + r.23.push(( + fields[0].to_string(), + fields[1].parse::<u32>().unwrap(), + fields[2].parse::<u32>().unwrap(), + )); + } + } + + // Read the bdcs read count file. + + let bdcs_file = fnx(&outs, "feature_barcode_matrix_top.bdcs"); + if path_exists(&bdcs_file) { + pathlist.push(bdcs_file.clone()); + let f = open_for_read![&bdcs_file]; + for line in f.lines() { + let s = line.unwrap(); + let fields = parse_csv(&s); + r.27.push(( + fields[0].to_string(), + fields[1].parse::<u32>().unwrap(), + fields[2].parse::<u32>().unwrap(), + fields[3].parse::<u32>().unwrap(), + )); + } + } + + // Read the feature reference file. + + let fref_file = fnx(&outs, "feature_reference.csv"); + if path_exists(&fref_file) { + pathlist.push(fref_file.clone()); + r.21 = read_to_string(&fref_file).unwrap(); + } + + // Read the binary matrix file if appropriate. + + if bin_file_state == 2 { + read_from_file(&mut r.3, &bin_file); + let (n, k) = (r.3.nrows(), r.3.ncols()); + for i in 0..n { + r.2.push(r.3.row_label(i)); + } + for j in 0..k { + r.1.push(r.3.col_label(j)); + } + + // Otherwise we have to get stuff from the h5 file. + } else { + let mut matrix = Vec::<Vec<(i32, i32)>>::new(); + let s = slurp_h5( + &h5_path, + bin_file_state == 3, + &mut r.2, + &mut r.1, + &mut matrix, + ); + if let Err(err) = s { + r.11 = err; + return; + } + if bin_file_state == 3 { + r.3 = MirrorSparseMatrix::build_from_vec(&matrix, &r.2, &r.1); + write_to_file(&r.3, &bin_file); + // Note that if the dataset archive was complete, we would not need to do this. + if ctl.gen_opt.internal_run { + let earth = &ctl.gen_opt.config["earth"]; + if !bin_file.starts_with(earth) { + let bin_file_alt = + format!("{earth}/current{}", bin_file.after("current")); + write_to_file(&r.3, &bin_file_alt); + } + } + } + } + } + unique_sort(&mut r.6); + }); + for r in &results { + ctl.pathlist.extend(r.15.iter().cloned()); + } + ctl.perf_stats(&t, "in load_gex main loop"); + + // Test for error. + + let t = Instant::now(); + for r in &results { + if !r.11.is_empty() { + return Err(r.11.clone()); + } + } + + // Set have_gex and have_fb. + + for r in &results { + if r.4.is_some() { + *have_gex = true; + } + if r.5.is_some() { + *have_fb = true; + } + } + h5_paths.extend(results.iter().map(|r| r.12.clone())); + + // Add some metrics. + + let extras = [ + ( + "ANTIBODY_G_perfect_homopolymer_frac", + "Antibody Capture,G Homopolymer Frac", + ), + ( + "GRCh38_raw_rpc_20000_subsampled_filtered_bcs_median_unique_genes_detected", + "Gene Expression,GRCh38 Median genes per cell (20k raw reads per cell)", + ), + ( + "GRCh38_raw_rpc_20000_subsampled_filtered_bcs_median_counts", + "Gene Expression,GRCh38 Median UMI counts per cell (20k raw reads per cell)", + ), + ]; + for x in extras.iter() { + let metric_name = x.0.to_string(); + let metric_display_name = x.1.to_string(); + let mut have = false; + for result in &results { + if result.17.contains_key(&metric_name) { + have = true; + } + } + if have { + for result in results.iter_mut() { + let mut value = String::new(); + if result.17.contains_key(&metric_name) { + value = format!("{:.3}", result.17[&metric_name]); + } + writeln!(result.18, "{metric_display_name},{value}").unwrap(); + } + } + } + + // Save results. This avoids cloning, which saves a lot of time. + + let n = results.len(); + for ( + _i, + ( + _x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + _x11, + _x12, + x13, + x14, + _x15, + x16, + x17, + x18, + x19, + x20, + x21, + _x22, + x23, + x24, + x25, + x26, + x27, + ), + ) in results.into_iter().take(n).enumerate() + { + gex_features.push(x1); + gex_barcodes.push(x2); + gex_matrices.push(x3); + fb_top_matrices.push(x13); + fb_top_barcodes.push(x14); + let mut gex_mult = 1.0; + if let Some(x4) = x4 { + gex_mult = x4; + } + gex_mults.push(gex_mult); + let mut fb_mult = 1.0; + if let Some(x5) = x5 { + fb_mult = x5; + } + fb_mults.push(fb_mult); + gex_cell_barcodes.push(x6); + cluster.push(x7); + cell_type.push(x8); + pca.push(x9); + cell_type_specified.push(x10); + feature_metrics.push(x16); + json_metrics.push(x17); + metrics.push(x18); + fb_total_umis.push(x19); + fb_brn.push(x20); + feature_refs.push(x21); + fb_brnr.push(x23); + fb_top_reads_matrices.push(x24); + fb_top_reads_barcodes.push(x25); + fb_total_reads.push(x26); + fb_bdcs.push(x27); + } + + // Done. + + ctl.perf_stats(&t, "in load_gex tail"); + Ok(()) +} diff --git a/enclone_args/src/load_gex_util.rs b/enclone_args/src/load_gex_util.rs new file mode 100644 index 000000000..c0cd64360 --- /dev/null +++ b/enclone_args/src/load_gex_util.rs @@ -0,0 +1,110 @@ +// Copyright (c) 2022 10X Genomics, Inc. All rights reserved. + +use enclone_core::defs::EncloneControl; +use io_utils::{dir_list, path_exists}; +use vector_utils::VecUtils; + +pub fn find_pca_file( + _ctl: &EncloneControl, + _outs: &str, + analysis: &[String], + pathlist: &mut Vec<String>, +) -> String { + let mut pca_file = String::new(); + for x in analysis.iter() { + pca_file = format!("{x}/pca/10_components/projection.csv"); + if path_exists(&pca_file) { + pathlist.push(pca_file.clone()); + break; + } + pca_file = format!("{x}/pca/gene_expression_10_components/projection.csv"); + if path_exists(&pca_file) { + pathlist.push(pca_file.clone()); + break; + } + } + pca_file +} + +pub fn find_json_metrics_file( + ctl: &EncloneControl, + _outs: &str, + analysis: &[String], + pathlist: &mut Vec<String>, +) -> String { + let mut json_metrics_file = String::new(); + if !ctl.gen_opt.cellranger { + for x in analysis.iter() { + let f = format!("{x}/metrics_summary_json.json"); + if path_exists(&f) { + json_metrics_file = f.clone(); + pathlist.push(f); + break; + } + } + } + json_metrics_file +} + +pub fn find_feature_metrics_file( + ctl: &EncloneControl, + _outs: &str, + analysis: &[String], + pathlist: &mut Vec<String>, +) -> String { + let mut feature_metrics_file = String::new(); + if !ctl.gen_opt.cellranger { + for x in analysis.iter() { + let f = format!("{x}/per_feature_metrics.csv"); + if path_exists(&f) { + feature_metrics_file = f.clone(); + pathlist.push(f); + break; + } + } + } + feature_metrics_file +} + +pub fn find_metrics_file( + ctl: &EncloneControl, + outs: &str, + _analysis: &[String], + pathlist: &mut Vec<String>, +) -> String { + let mut metrics_file = String::new(); + if !ctl.gen_opt.cellranger { + let summary_dir = format!("{outs}/../multi_web_summary_json/metrics_summary_csv"); + if path_exists(&summary_dir) { + let list = dir_list(&summary_dir); + if list.solo() { + let path = format!("{summary_dir}/{}", list[0]); + pathlist.push(path.clone()); + metrics_file = path; + } + } + } + metrics_file +} + +pub fn find_cluster_file( + _ctl: &EncloneControl, + _outs: &str, + analysis: &[String], + pathlist: &mut Vec<String>, +) -> String { + let mut cluster_file = String::new(); + for x in analysis.iter() { + cluster_file = format!("{x}/clustering/graphclust/clusters.csv"); + if path_exists(&cluster_file) { + pathlist.push(cluster_file.clone()); + break; + } + cluster_file = format!("{x}/clustering/gene_expression_graphclust/clusters.csv"); + if path_exists(&cluster_file) { + pathlist.push(cluster_file.clone()); + break; + } + } + cluster_file +} diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs new file mode 100644 index 000000000..79ad45a81 --- /dev/null +++ b/enclone_args/src/proc_args.rs @@ -0,0 +1,978 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use crate::proc_args2::{ + is_f64_arg, is_i32_arg, is_simple_arg, is_string_arg, is_usize_arg, test_writeable, +}; +use crate::proc_args_post::proc_args_post; +use crate::process_special_arg1::process_special_arg1; +use crate::process_special_arg2::process_special_arg2; +use enclone_core::defs::{ClonotypeHeuristics, EncloneControl}; +use enclone_core::test_def::replace_at_test; +use enclone_core::{require_readable_file, tilde_expand_me}; +use itertools::Itertools; +use std::fmt::Write; +use std::{process::Command, time::Instant}; +use string_utils::{strme, TextUtils}; + +// Process arguments. + +pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String> { + // + // Start. + + let targs = Instant::now(); + let evil_eye = ctl.gen_opt.evil_eye; + if evil_eye { + println!("processing args"); + } + + // Check for @test1,...,@test4 and @test. + + let mut args = args.to_owned(); + for i in 0..args.len() { + replace_at_test(&mut args[i]); + } + + // Knobs. + let heur = ClonotypeHeuristics { + max_diffs: 1_000_000, + max_degradation: 2, + ref_v_trim: 15, + ref_j_trim: 15, + }; + ctl.heur = heur; + let mut args2 = Vec::<String>::new(); + for i in 0..args.len() { + if args[i].starts_with("BCR_GEX=") { + args2.push(format!("BCR={}", args[i].after("BCR_GEX="))); + args2.push(format!("GEX={}", args[i].after("BCR_GEX="))); + } else if args[i].starts_with("TCR_GEX=") { + args2.push(format!("TCR={}", args[i].after("TCR_GEX="))); + args2.push(format!("GEX={}", args[i].after("TCR_GEX="))); + } else if args[i].starts_with("GD_BC=") { + args2.push(format!( + "BC={}/outs/genetic_demux_results/clusters.tsv", + args[i].after("GD_BC=") + )); + } else { + args2.push(args[i].clone()); + } + } + args = args2; + + // Process special option SPLIT_COMMAND. + + if evil_eye { + println!("at split command"); + } + if ctl.gen_opt.split { + let (mut bcr, mut gex) = (Vec::<&str>::new(), Vec::<&str>::new()); + let mut args2 = Vec::<String>::new(); + for i in 1..args.len() { + if args[i] == "SPLIT_BY_COMMAND" { + } else if args[i].starts_with("BCR=") { + bcr = args[i].after("BCR=").split(',').collect::<Vec<&str>>(); + } else if args[i].starts_with("GEX=") { + gex = args[i].after("GEX=").split(',').collect::<Vec<&str>>(); + } else { + args2.push(args[i].to_string()); + } + } + for i in 0..bcr.len() { + let mut args = args2.clone(); + args.push(format!("BCR={}", bcr[i])); + args.push(format!("GEX={}", gex[i])); + println!("\nenclone {}\n", args.iter().format(" ")); + let o = Command::new("enclone") + .args(&args) + .output() + .expect("failed to execute enclone"); + print!("{}{}", strme(&o.stdout), strme(&o.stderr)); + if o.status.code() != Some(0) { + return Err("\nFAILED!\n".to_string()); + } + } + return Ok(()); + } + + // Set up general options. + + if evil_eye { + println!("setting up general options"); + } + ctl.gen_opt.h5_pre = true; + ctl.gen_opt.min_cells_exact = 1; + ctl.gen_opt.min_chains_exact = 1; + ctl.gen_opt.exact = None; + ctl.gen_opt.full_counts = true; + ctl.gen_opt.color = "codon".to_string(); + ctl.silent = true; + ctl.gen_opt.peer_group_dist = "MFL".to_string(); + ctl.gen_opt.color_by_rarity_pc = -1.0; + ctl.gen_opt.jscore_match = 20; + ctl.gen_opt.jscore_mismatch = -20; + ctl.gen_opt.jscore_gap_open = -120; + ctl.gen_opt.jscore_gap_extend = -20; + ctl.gen_opt.jscore_bits_multiplier = 2.2; + ctl.gen_opt.max_heavies = 1000000; + ctl.gen_opt.subsample = -1.0; + + // Set up clonotyping control parameters. + + ctl.clono_filt_opt.ncells_low = 1; + ctl.clono_filt_opt.ncells_high = 1_000_000_000; + ctl.clono_filt_opt.min_umi = 0; + ctl.clono_filt_opt.max_chains = 1000000; + ctl.clono_filt_opt.qual_filter = true; + ctl.clono_filt_opt_def.signature = true; + ctl.clono_filt_opt_def.weak_chains = true; + ctl.clono_filt_opt_def.weak_onesies = true; + ctl.clono_filt_opt_def.weak_foursies = true; + ctl.clono_filt_opt_def.doublet = true; + ctl.clono_filt_opt_def.bc_dup = true; + ctl.clono_filt_opt.max_datasets = 1000000000; + ctl.clono_filt_opt_def.umi_filt = true; + ctl.clono_filt_opt_def.umi_ratio_filt = true; + ctl.clono_filt_opt.max_exacts = 1_000_000_000; + + ctl.clono_print_opt.amino = vec![ + "cdr3".to_string(), + "var".to_string(), + "share".to_string(), + "donor".to_string(), + ]; + ctl.clono_print_opt.cvars = vec!["u".to_string(), "const".to_string(), "notes".to_string()]; + ctl.clono_print_opt.lvars = vec!["datasets".to_string(), "n".to_string()]; + + ctl.clono_group_opt.min_group = 1; + ctl.clono_group_opt.min_group_donors = 1; + + ctl.allele_alg_opt.min_mult = 4; + ctl.allele_alg_opt.min_alt = 4; + + ctl.join_alg_opt.max_score = 100_000.0; + ctl.join_alg_opt.merge_onesies = true; // should just kill this as an option + ctl.join_alg_opt.merge_onesies_ctl = true; + ctl.join_alg_opt.max_cdr3_diffs = 1000; + ctl.join_alg_opt.cdr3_mult = 5.0; + ctl.join_alg_opt.mult_pow = 80.0; + ctl.join_alg_opt.join_cdr3_ident = 85.0; + ctl.join_alg_opt.fwr1_cdr12_delta = 20.0; + ctl.join_alg_opt.cdr3_normal_len = 42; + ctl.join_alg_opt.auto_share = 15; + ctl.join_alg_opt.comp_filt = 8; + ctl.join_alg_opt.comp_filt_bound = 80; + ctl.join_alg_opt.split_max_chains = usize::MAX; + + ctl.join_print_opt.pfreq = 1_000_000_000; + ctl.join_print_opt.quiet = true; + + ctl.parseable_opt.pchains = "4".to_string(); + + // Pretest for consistency amongst TCR, BCR, GEX and META. Also preparse GEX. + + let (mut have_tcr, mut have_bcr, mut have_tcrgd) = (false, false, false); + let mut have_gex = false; + let mut have_meta = false; + let mut gex = String::new(); + let mut bc = String::new(); + let mut metas = Vec::<String>::new(); + let mut metaxs = Vec::<String>::new(); + let mut xcrs = Vec::<String>::new(); + for i in 1..args.len() { + if args[i].starts_with("BI=") || args[i].starts_with("BIB=") || args[i].starts_with("BIP") { + have_bcr = true; + } else if args[i].starts_with("TCR=") { + have_tcr = true; + } else if args[i].starts_with("TCRGD=") { + have_tcrgd = true; + } else if args[i].starts_with("BCR=") { + have_bcr = true; + } else if args[i].starts_with("GEX=") { + have_gex = true; + } else if args[i].starts_with("META=") || args[i].starts_with("METAX=") { + have_meta = true; + } + if args[i].starts_with("GEX=") { + gex = args[i].after("GEX=").to_string(); + } + if args[i].starts_with("BC=") { + bc = args[i].after("BC=").to_string(); + } + if is_simple_arg(&args[i], "MARK_STATS")? { + ctl.gen_opt.mark_stats = true; + } + if is_simple_arg(&args[i], "MARK_STATS2")? { + ctl.gen_opt.mark_stats2 = true; + } + if is_simple_arg(&args[i], "MARKED_B")? { + ctl.clono_filt_opt_def.marked_b = true; + } + } + if have_meta && (have_tcr || have_bcr || have_tcrgd || have_gex || !bc.is_empty()) { + return Err( + "\nIf META is specified, then none of TCR, TCRGD, BCR, GEX or BC can be specified.\n" + .to_string(), + ); + } + if have_tcr && have_bcr && have_tcrgd { + return Err( + "\nKindly please do not specify a combination of TCR, TCRGD, and BCR.\n".to_string(), + ); + } + let mut using_plot = false; + + // Preprocess BI and BIB and BIP arguments. + + for i in 1..args.len() { + if args[i].starts_with("BI=") || args[i].starts_with("BIB=") || args[i].starts_with("BIP") { + let bix = format!("{}=", args[i].before("=")); + if !ctl.gen_opt.internal_run && !args[i].starts_with("BIP=") { + return Err(format!("\nUnrecognized argument {}.\n", args[i])); + } + let x = args[i].after(&bix).split(',').collect::<Vec<&str>>(); + let mut y = Vec::<String>::new(); + for j in 0..x.len() { + if x[j].contains('-') { + let (start, stop) = (x[j].before("-"), x[j].after("-")); + if start.parse::<usize>().is_err() + || stop.parse::<usize>().is_err() + || start.force_usize() > stop.force_usize() + { + return Err("\nIllegal range in BI or BIB or BIP argument.\n".to_string()); + } + let (start, stop) = (start.force_usize(), stop.force_usize()); + for j in start..=stop { + y.push(format!("{j}")); + } + } else { + y.push(x[j].to_string()); + } + } + let mut args2 = Vec::<String>::new(); + let (mut bcrv, mut gexv) = (Vec::<String>::new(), Vec::<String>::new()); + for j in 0..i { + if args[j].starts_with("BCR=") { + bcrv.push(args[j].after("BCR=").to_string()); + } else if args[j].starts_with("GEX=") { + gexv.push(args[j].after("GEX=").to_string()); + } else { + args2.push(args[j].clone()); + } + } + const F: &str = include_str!["enclone.testdata.bcr.gex"]; + for n in y.iter() { + if *n != "m1" { + if n.parse::<usize>().is_err() || n.force_usize() < 1 || n.force_usize() > 47 { + return Err( + "\nBI and BIB and BIP only work for values n with if 1 <= n <= 47, \ + or n = m1.\n" + .to_string(), + ); + } + } else if y.len() > 1 { + return Err( + "\nFor BI and BIB and BIP, if you specify m, you can only specify m1.\n" + .to_string(), + ); + } + let mut found = false; + let mut bcr_seen = false; + for s in F.lines() { + if s == format!("DONOR={n}") { + found = true; + } else if found && s.starts_with("DONOR=") { + break; + } + if found { + if args[i].starts_with("BIP=") { + if s.starts_with("PUBLIC_BCR_SUBSET=") { + if bcrv.is_empty() { + bcrv.push(s.after("PUBLIC_BCR_SUBSET=").to_string()); + } else { + let n = bcrv.len(); + write!(bcrv[n - 1], ",{}", s.after("PUBLIC_BCR_SUBSET=")) + .unwrap(); + } + } + } else if s.starts_with("BCR=") { + if bcr_seen { + if args[i].starts_with("BIB=") { + let n = bcrv.len(); + write!(bcrv[n - 1], ",{}", s.after("BCR=")).unwrap(); + } + } else { + bcrv.push(s.after("BCR=").to_string()); + } + bcr_seen = true; + } + if s.starts_with("GEX=") { + gexv.push(s.after("GEX=").to_string()); + } + if s == "SPECIES=mouse" { + args2.push("MOUSE".to_string()); + } + } + } + } + for j in i + 1..args.len() { + if args[j].starts_with("BCR=") { + bcrv.push(args[j].after("BCR=").to_string()); + } else if args[j].starts_with("GEX=") { + gexv.push(args[j].after("GEX=").to_string()); + } else { + args2.push(args[j].clone()); + } + } + args2.push(format!("BCR={}", bcrv.iter().format(";"))); + if !gexv.is_empty() && args[i].starts_with("BI=") { + have_gex = true; + args2.push(format!("GEX={}", gexv.iter().format(";"))); + gex = format!("{}", gexv.iter().format(";")); + } + args = args2; + break; + } + } + + // Preprocess NALL and NALL_GEX. + + for i in 1..args.len() { + if args[i] == *"NALL" || args[i] == "NALL_CELL" || args[i] == "NALL_GEX" { + let f = [ + "NCELL", + "NGEX", + "NCROSS", + "NDOUBLET", + "NUMI", + "NUMI_RATIO", + "NGRAPH_FILTER", + "NMAX", + "NQUAL", + "NWEAK_CHAINS", + "NWEAK_ONESIES", + "NFOURSIE_KILL", + "NWHITEF", + "NBC_DUP", + "MIX_DONORS", + "NIMPROPER", + "NSIG", + ]; + for j in 0..f.len() { + if f[j] == "NCELL" { + if args[i] != "NALL_CELL" { + args.push(f[j].to_string()); + } + } else if f[j] == "NGEX" { + if args[i] != "NALL_GEX" { + args.push(f[j].to_string()); + } + } else { + args.push(f[j].to_string()); + } + } + break; + } + } + + // Define arguments that set something to true. + + let mut set_true = vec![ + ("ACCEPT_BROKEN", &mut ctl.gen_opt.accept_broken), + ("ACCEPT_INCONSISTENT", &mut ctl.gen_opt.accept_inconsistent), + ("ACCEPT_REUSE", &mut ctl.gen_opt.accept_reuse), + ( + "ALIGN_JALIGN_CONSISTENCY", + &mut ctl.gen_opt.align_jun_align_consistency, + ), + ("ALLOW_INCONSISTENT", &mut ctl.gen_opt.allow_inconsistent), + ("ANN", &mut ctl.join_print_opt.ann), + ("ANN0", &mut ctl.join_print_opt.ann0), + ("BARCODES", &mut ctl.clono_print_opt.barcodes), + ("BASELINE", &mut ctl.gen_opt.baseline), + ("BASICX", &mut ctl.join_alg_opt.basicx), + ("BCJOIN", &mut ctl.join_alg_opt.bcjoin), + ("BUILT_IN", &mut ctl.gen_opt.built_in), + ("CDIFF", &mut ctl.clono_filt_opt.cdiff), + ("CHAIN_BRIEF", &mut ctl.clono_print_opt.chain_brief), + ("COMPLETE", &mut ctl.gen_opt.complete), + ("CON", &mut ctl.allele_print_opt.con), + ("CON_CON", &mut ctl.gen_opt.con_con), + ("CON_TRACE", &mut ctl.allele_print_opt.con_trace), + ("CONP", &mut ctl.clono_print_opt.conp), + ("CONX", &mut ctl.clono_print_opt.conx), + ("CURRENT_REF", &mut ctl.gen_opt.current_ref), + ("DEBUG_TABLE_PRINTING", &mut ctl.debug_table_printing), + ("DEL", &mut ctl.clono_filt_opt.del), + ("DESCRIP", &mut ctl.gen_opt.descrip), + ("D_INCONSISTENT", &mut ctl.clono_filt_opt.d_inconsistent), + ("D_NONE", &mut ctl.clono_filt_opt.d_none), + ("D_SECOND", &mut ctl.clono_filt_opt.d_second), + ("EASY", &mut ctl.join_alg_opt.easy), + ("ECHO", &mut ctl.gen_opt.echo), + ("ECHOC", &mut ctl.gen_opt.echoc), + ("FAILS_ONLY", &mut ctl.gen_opt.fails_only), + ("FOLD_HEADERS", &mut ctl.gen_opt.fold_headers), + ("FORCE", &mut ctl.force), + ("FULL_SEQC", &mut ctl.clono_print_opt.full_seqc), + ("GAMMA_DELTA", &mut ctl.gen_opt.gamma_delta), + ("GRAPH", &mut ctl.gen_opt.graph), + ( + "GROUP_CDR3H_LEN_VAR", + &mut ctl.clono_group_opt.cdr3h_len_var, + ), + ("GROUP_NAIVE", &mut ctl.clono_group_opt.naive), + ("GROUP_NO_NAIVE", &mut ctl.clono_group_opt.no_naive), + ("HAVE_ONESIE", &mut ctl.clono_filt_opt.have_onesie), + ("HEAVY_CHAIN_REUSE", &mut ctl.gen_opt.heavy_chain_reuse), + ("IMGT", &mut ctl.gen_opt.imgt), + ("IMGT_FIX", &mut ctl.gen_opt.imgt_fix), + ("INDELS", &mut ctl.gen_opt.indels), + ("INFO_RESOLVE", &mut ctl.gen_opt.info_resolve), + ("INKT", &mut ctl.clono_filt_opt.inkt), + ("INTERNAL", &mut ctl.gen_opt.internal_run), + ("JC1", &mut ctl.gen_opt.jc1), + ("JOIN_FULL_DIFF", &mut ctl.join_alg_opt.join_full_diff), + ("MAIT", &mut ctl.clono_filt_opt.mait), + ("MARKED", &mut ctl.clono_filt_opt.marked), + ("MEAN", &mut ctl.clono_print_opt.mean), + ("MIX_DONORS", &mut ctl.clono_filt_opt_def.donor), + ("MIX_ONLY", &mut ctl.gen_opt.mix_only), + ("MOUSE", &mut ctl.gen_opt.mouse), + ("NCELL", &mut ctl.gen_opt.ncell), + ("NCROSS", &mut ctl.clono_filt_opt_def.ncross), + ("NEWICK", &mut ctl.gen_opt.newick), + ("NGEX", &mut ctl.clono_filt_opt_def.ngex), + ("NOGRAY", &mut ctl.nogray), + ("NGRAPH_FILTER", &mut ctl.gen_opt.ngraph_filter), + ("NGROUP", &mut ctl.clono_group_opt.ngroup), + ("NIMPROPER", &mut ctl.merge_all_impropers), + ("NMAX", &mut ctl.clono_filt_opt_def.nmax), + ("NO_ALT_ALLELES", &mut ctl.gen_opt.no_alt_alleles), + ("NO_NEWLINE", &mut ctl.gen_opt.no_newline), + ("NO_UNCAP_SIM", &mut ctl.gen_opt.no_uncap_sim), + ("NON_CELL_MARK", &mut ctl.clono_filt_opt_def.non_cell_mark), + ("NOPRINT", &mut ctl.gen_opt.noprint), + ("NOPRINTX", &mut ctl.gen_opt.noprintx), + ("NOSPACES", &mut ctl.gen_opt.nospaces), + ("NOTE_SIMPLE", &mut ctl.clono_print_opt.note_simple), + ("NPLAIN", &mut ctl.pretty), + ("NWHITEF", &mut ctl.gen_opt.nwhitef), + ("NWARN", &mut ctl.gen_opt.nwarn), + ("OLD_LIGHT", &mut ctl.join_alg_opt.old_light), + ("OLD_MULT", &mut ctl.join_alg_opt.old_mult), + ("PCELL", &mut ctl.parseable_opt.pbarcode), + ("PG_READABLE", &mut ctl.gen_opt.peer_group_readable), + ("PER_CELL", &mut ctl.clono_print_opt.bu), + ("PNO_HEADER", &mut ctl.parseable_opt.pno_header), + ("PRE_EVAL", &mut ctl.gen_opt.pre_eval), + ("PRE_EVAL_SHOW", &mut ctl.gen_opt.pre_eval_show), + ("PROTECT_BADS", &mut ctl.clono_filt_opt.protect_bads), + ("QUAD_HIVE", &mut ctl.plot_opt.plot_quad), + ("RE", &mut ctl.gen_opt.reannotate), + ("REPROD", &mut ctl.gen_opt.reprod), + ("REQUIRE_UNBROKEN_OK", &mut ctl.gen_opt.require_unbroken_ok), + ("REUSE", &mut ctl.gen_opt.reuse), + ("ROW_FILL_VERBOSE", &mut ctl.gen_opt.row_fill_verbose), + ("SCAN_EXACT", &mut ctl.gen_opt.gene_scan_exact), + ("SEQC", &mut ctl.clono_print_opt.seqc), + ("SHOW_BC", &mut ctl.join_print_opt.show_bc), + ("STABLE_DOC", &mut ctl.gen_opt.stable_doc), + ( + "SPLIT_PLOT_BY_DATASET", + &mut ctl.plot_opt.split_plot_by_dataset, + ), + ( + "SPLIT_PLOT_BY_ORIGIN", + &mut ctl.plot_opt.split_plot_by_origin, + ), + ("SUM", &mut ctl.clono_print_opt.sum), + ("SUMMARY", &mut ctl.gen_opt.summary), + ("SUMMARY_CLEAN", &mut ctl.gen_opt.summary_clean), + ("SUMMARY_CSV", &mut ctl.gen_opt.summary_csv), + ( + "SUPPRESS_ISOTYPE_LEGEND", + &mut ctl.plot_opt.plot_by_isotype_nolegend, + ), + ("TOP_GENES", &mut ctl.gen_opt.top_genes), + ("TOY", &mut ctl.gen_opt.toy), + ("TOY_COM", &mut ctl.gen_opt.toy_com), + ("UMI_FILT_MARK", &mut ctl.clono_filt_opt_def.umi_filt_mark), + ( + "UMI_RATIO_FILT_MARK", + &mut ctl.clono_filt_opt_def.umi_ratio_filt_mark, + ), + ("UNACCOUNTED", &mut ctl.perf_opt.unaccounted), + ("UTR_CON", &mut ctl.gen_opt.utr_con), + ("VDUP", &mut ctl.clono_filt_opt.vdup), + ("VIS_DUMP", &mut ctl.gen_opt.vis_dump), + ("VISUAL", &mut ctl.visual_mode), + ("WEAK", &mut ctl.gen_opt.weak), + ("WHITEF", &mut ctl.clono_filt_opt_def.whitef), + ]; + + // Define arguments that set something to false. + + let mut set_false = vec![ + ("H5_SLICE", &mut ctl.gen_opt.h5_pre), + ("NBC_DUP", &mut ctl.clono_filt_opt_def.bc_dup), + ("NDOUBLET", &mut ctl.clono_filt_opt_def.doublet), + ("NFOURSIE_KILL", &mut ctl.clono_filt_opt_def.weak_foursies), + ("NMERGE_ONESIES", &mut ctl.join_alg_opt.merge_onesies_ctl), + ("NQUAL", &mut ctl.clono_filt_opt.qual_filter), + ("NSIG", &mut ctl.clono_filt_opt_def.signature), + ("NSILENT", &mut ctl.silent), + ("NUMI", &mut ctl.clono_filt_opt_def.umi_filt), + ("NUMI_RATIO", &mut ctl.clono_filt_opt_def.umi_ratio_filt), + ("NWEAK_CHAINS", &mut ctl.clono_filt_opt_def.weak_chains), + ("NWEAK_ONESIES", &mut ctl.clono_filt_opt_def.weak_onesies), + ("PRINT_FAILED_JOINS", &mut ctl.join_print_opt.quiet), + ]; + + // Define arguments that set something to a usize. + + let set_usize = [ + ("AUTO_SHARE", &mut ctl.join_alg_opt.auto_share), + ("CDR3_NORMAL_LEN", &mut ctl.join_alg_opt.cdr3_normal_len), + ("CHAINS_EXACT", &mut ctl.gen_opt.chains_exact), + ("JUN_SHARE", &mut ctl.join_alg_opt.comp_filt), + ("JUN_SHARE_BOUND", &mut ctl.join_alg_opt.comp_filt_bound), + ("MAX_CDR3_DIFFS", &mut ctl.join_alg_opt.max_cdr3_diffs), + ("MAX_DATASETS", &mut ctl.clono_filt_opt.max_datasets), + ("MAX_DEGRADATION", &mut ctl.heur.max_degradation), + ("MAX_DIFFS", &mut ctl.heur.max_diffs), + ("MAX_EXACTS", &mut ctl.clono_filt_opt.max_exacts), + ("MIN_ALT", &mut ctl.allele_alg_opt.min_alt), + ("MIN_CELLS_EXACT", &mut ctl.gen_opt.min_cells_exact), + ("MIN_CHAINS_EXACT", &mut ctl.gen_opt.min_chains_exact), + ( + "MIN_DATASET_RATIO", + &mut ctl.clono_filt_opt.min_dataset_ratio, + ), + ("MIN_DATASETS", &mut ctl.clono_filt_opt.min_datasets), + ("MIN_EXACTS", &mut ctl.clono_filt_opt.min_exacts), + ("MIN_GROUP", &mut ctl.clono_group_opt.min_group), + ( + "MIN_GROUP_DONORS", + &mut ctl.clono_group_opt.min_group_donors, + ), + ("MIN_MULT", &mut ctl.allele_alg_opt.min_mult), + ("MIN_ORIGINS", &mut ctl.clono_filt_opt.min_origins), + ("MIN_UMIS", &mut ctl.clono_filt_opt.min_umi), + ("PFREQ", &mut ctl.join_print_opt.pfreq), + ("SUPER_COMP_FILT", &mut ctl.join_alg_opt.super_comp_filt), + ("SPLIT_MAX_CHAINS", &mut ctl.join_alg_opt.split_max_chains), + ]; + + // Define arguments that set something to an i32. + + let set_i32 = [ + ("JSCORE_GAP_EXTEND", &mut ctl.gen_opt.jscore_gap_extend), + ("JSCORE_GAP_OPEN", &mut ctl.gen_opt.jscore_gap_open), + ("JSCORE_MATCH", &mut ctl.gen_opt.jscore_match), + ("JSCORE_MISMATCH", &mut ctl.gen_opt.jscore_mismatch), + ]; + + // Define arguments that set something to an f64. + + let set_f64 = [ + ("CDR3_MULT", &mut ctl.join_alg_opt.cdr3_mult), + ("JSCORE_BITS_MULT", &mut ctl.gen_opt.jscore_bits_multiplier), + ("MULT_POW", &mut ctl.join_alg_opt.mult_pow), + ("SUBSAMPLE", &mut ctl.gen_opt.subsample), + ]; + + // Define arguments that set something to a string. + + let set_string = [ + ("AG_CENTER", &mut ctl.clono_group_opt.asymmetric_center), + ( + "AG_DIST_BOUND", + &mut ctl.clono_group_opt.asymmetric_dist_bound, + ), + ( + "AG_DIST_FORMULA", + &mut ctl.clono_group_opt.asymmetric_dist_formula, + ), + ("CLUSTAL_AA", &mut ctl.gen_opt.clustal_aa), + ("CLUSTAL_DNA", &mut ctl.gen_opt.clustal_dna), + ("CONFIG", &mut ctl.gen_opt.config_file), + ("EXT", &mut ctl.gen_opt.ext), + ("GROUP_CDR3", &mut ctl.clono_group_opt.cdr3), + ("PCHAINS", &mut ctl.parseable_opt.pchains), + ("SESSION_NAME", &mut ctl.gen_opt.session_name), + ("TRACE_BARCODE", &mut ctl.gen_opt.trace_barcode), + ]; + + // Define arguments that set something to a string that is an output file name. + + let set_string_writeable = [ + ("BINARY", &mut ctl.gen_opt.binary), + ("DONOR_REF_FILE", &mut ctl.gen_opt.dref_file), + ("FATE_FILE", &mut ctl.gen_opt.fate_file), + ("HONEY_OUT", &mut ctl.plot_opt.honey_out), + ("PROTO", &mut ctl.gen_opt.proto), + ("SUBSET_JSON", &mut ctl.gen_opt.subset_json), + ]; + + // Define arguments that set something to a string that is an output file name or stdout. + + let set_string_writeable_or_stdout = [ + ("PEER_GROUP", &mut ctl.gen_opt.peer_group_filename), + ("PHYLIP_AA", &mut ctl.gen_opt.phylip_aa), + ("PHYLIP_DNA", &mut ctl.gen_opt.phylip_dna), + ]; + + // Define arguments that set something to a string that is an input file name, represented + // as an option. + + let set_string_readable = [ + ( + "CLONOTYPE_GROUP_NAMES", + &mut ctl.gen_opt.clonotype_group_names, + ), + ("HONEY_IN", &mut ctl.plot_opt.honey_in), + ("PROTO_METADATA", &mut ctl.gen_opt.proto_metadata), + ]; + + // Define arguments that set something to a string that is an input file name, not represented + // as an option. + + let set_string_readable_plain = [ + ("BC_JOINT", &mut ctl.gen_opt.bc_joint), + ("EXTERNAL_REF", &mut ctl.gen_opt.external_ref), + ("POST_FILTER", &mut ctl.gen_opt.post_filter), + ("REF", &mut ctl.gen_opt.refname), + ]; + + // Define arguments that do nothing (because already parsed), and which have no "= value" part. + + let set_nothing_simple = [ + "CELLRANGER", + "COMP", + "COMPE", + "COMP2", + "CTRLC", + "DUMP_INTERNAL_IDS", + "EVIL_EYE", + "FORCE_EXTERNAL", + "LONG_HELP", + "MARKED_B", + "MARK_STATS", + "MARK_STATS2", + "NALL", + "NALL_CELL", + "NALL_GEX", + "NO_KILL", + "NOPAGER", + "NOPRETTY", + "PLAIN", + "PRINT_CPU", + "PRINT_CPU_INFO", + "PROFILE", + "SVG", + ]; + + // Define arguments that do nothing (because already parsed), and which may have + // an "= value" part. + + let set_nothing = [ + "BC", + "BI", + "CONFIG_DEFAULT", + "EMAIL", + "GEX", + "HTML", + "INTERNAL", + "BUG_REPORTS", + "PRE", + "PREPOST", + "SOURCE", + "VERBOSE", + ]; + + // Define arguments that set something to a string that is an input CSV file name. + + let set_string_readable_csv = [("INFO", &mut ctl.gen_opt.info)]; + + // Traverse arguments. + + let mut processed = vec![true; args.len()]; + if evil_eye { + println!("starting main args loop"); + } + 'args_loop: for i in 1..args.len() { + let mut arg = args[i].to_string(); + if evil_eye { + println!("processing arg = {arg}"); + } + + // Replace deprecated option. + + if arg == *"KEEP_IMPROPER" { + arg = "NIMPROPER".to_string(); + } + + // Strip out certain quoted expressions. + + if arg.contains("=\"") && arg.ends_with('\"') { + let mut quotes = 0; + for c in arg.chars() { + if c == '\"' { + quotes += 1; + } + } + if quotes == 2 { + arg = format!("{}={}", arg.before("="), arg.between("\"", "\"")); + } + } + args[i] = arg.clone(); + let arg = arg; + + // Check for weird case that might arise if testing code is screwed up. + + if arg.is_empty() { + return Err( + "\nYou've passed a null argument to enclone. Normally that isn't \ + possible.\nPlease take a detailed look at how you're invoking enclone.\n" + .to_string(), + ); + } + + // Process set_true arguments. + + for j in 0..set_true.len() { + if arg == *set_true[j].0 { + *(set_true[j].1) = true; + continue 'args_loop; + } + } + + // Process set_false arguments. + + for j in 0..set_false.len() { + if arg == *set_false[j].0 { + *(set_false[j].1) = false; + continue 'args_loop; + } + } + + // Process set_usize args. + + for j in 0..set_usize.len() { + if is_usize_arg(&arg, set_usize[j].0)? { + *(set_usize[j].1) = arg.after(&format!("{}=", set_usize[j].0)).force_usize(); + continue 'args_loop; + } + } + + // Process set_i32 args. + + for j in 0..set_i32.len() { + if is_i32_arg(&arg, set_i32[j].0)? { + *(set_i32[j].1) = arg.after(&format!("{}=", set_i32[j].0)).force_i32(); + continue 'args_loop; + } + } + + // Process set_f64 args. + + for j in 0..set_f64.len() { + if is_f64_arg(&arg, set_f64[j].0)? { + *(set_f64[j].1) = arg.after(&format!("{}=", set_f64[j].0)).force_f64(); + continue 'args_loop; + } + } + + // Process set_string args. + + for j in 0..set_string.len() { + if is_string_arg(&arg, set_string[j].0)? { + *(set_string[j].1) = arg.after(&format!("{}=", set_string[j].0)).to_string(); + continue 'args_loop; + } + } + + // Process set_string_writeable args. + + for j in 0..set_string_writeable.len() { + let var = &set_string_writeable[j].0; + if is_string_arg(&arg, var)? { + *(set_string_writeable[j].1) = arg.after(&format!("{var}=")).to_string(); + tilde_expand_me(&mut *set_string_writeable[j].1); + let val = &(set_string_writeable[j].1); + if evil_eye { + println!("creating file {val} to test writability"); + } + test_writeable(val, evil_eye)?; + continue 'args_loop; + } + } + + // Process set_string_writeable_or_stdout args. + + for j in 0..set_string_writeable_or_stdout.len() { + let var = &set_string_writeable_or_stdout[j].0; + if is_string_arg(&arg, var)? { + *(set_string_writeable_or_stdout[j].1) = arg.after(&format!("{var}=")).to_string(); + tilde_expand_me(&mut *set_string_writeable_or_stdout[j].1); + let val = &(set_string_writeable_or_stdout[j].1); + if *val != "stdout" { + test_writeable(val, evil_eye)?; + } + continue 'args_loop; + } + } + + // Process set_string_readable args. + + for j in 0..set_string_readable.len() { + let var = &set_string_readable[j].0; + if is_string_arg(&arg, var)? { + let mut val = arg.after(&format!("{var}=")).to_string(); + if val.is_empty() { + return Err(format!("\nFilename input in {val} cannot be empty.\n")); + } + tilde_expand_me(&mut val); + *(set_string_readable[j].1) = Some(val.clone()); + if evil_eye { + println!("testing ability to open file {val}"); + } + require_readable_file(&val, &arg)?; + if evil_eye { + println!("file open complete"); + } + continue 'args_loop; + } + } + + // Process set_string_readable_plain args. + + for j in 0..set_string_readable_plain.len() { + let var = &set_string_readable_plain[j].0; + if is_string_arg(&arg, var)? { + let mut val = arg.after(&format!("{var}=")).to_string(); + if val.is_empty() { + return Err(format!("\nFilename input in {val} cannot be empty.\n")); + } + tilde_expand_me(&mut val); + *(set_string_readable_plain[j].1) = val.clone(); + if evil_eye { + println!("testing ability to open file {val}"); + } + require_readable_file(&val, &arg)?; + if evil_eye { + println!("file open complete"); + } + continue 'args_loop; + } + } + + // Process set_string_readable_csv args. + + for j in 0..set_string_readable_csv.len() { + let var = &set_string_readable_csv[j].0; + if is_string_arg(&arg, var)? { + let mut val = arg.after(&format!("{var}=")).to_string(); + if val.is_empty() { + return Err(format!("\nFilename input in {val} cannot be empty.\n")); + } + tilde_expand_me(&mut val); + if !val.ends_with(".csv") { + return Err(format!( + "\nFilename input in {val} needs to end with .csv.\n" + )); + } + *(set_string_readable_csv[j].1) = Some(val.clone()); + require_readable_file(&val, &arg)?; + continue 'args_loop; + } + } + + // Process set_nothing_simple args. + + for j in 0..set_nothing_simple.len() { + if arg == *set_nothing_simple[j] { + continue 'args_loop; + } + } + + // Process set_nothing args. + + for j in 0..set_nothing.len() { + if arg == *set_nothing[j] || arg.starts_with(&format!("{}=", set_nothing[j])) { + continue 'args_loop; + } + } + + // Otherwise mark as not processed. + + processed[i] = false; + } + + // Process remaining args. + + if evil_eye { + println!("processing remaining args"); + } + for i in 1..args.len() { + if evil_eye { + println!("processing {}", args[i]); + } + if processed[i] { + continue; + } + if !process_special_arg1( + &args[i], + ctl, + &mut metas, + &mut metaxs, + &mut xcrs, + &mut using_plot, + )? { + process_special_arg2( + &args[i], + ctl, + &mut metas, + &mut metaxs, + &mut xcrs, + &mut using_plot, + )?; + } + } + + // Force visual mode if plot file is gui or if VIS_DUMP was invoked. + + if (ctl.plot_opt.plot_file == "gui" || ctl.plot_opt.plot_file == "gui_stdout") + && !ctl.gen_opt.vis_dump + { + ctl.visual_mode = true; + } + + // Record time. + + ctl.perf_stats(&targs, "in main args loop"); + + // Do residual argument processing. + + if ctl.gen_opt.internal_run && ctl.gen_opt.config.is_empty() { + return Err( + "\nYou need to set up your configuration file, please ask for help.\n".to_string(), + ); + } + if ctl.gen_opt.gamma_delta && !have_tcrgd || !ctl.gen_opt.gamma_delta && have_tcrgd { + return Err( + "\n. GAMMA_DELTA flag has to be enabled for using TCRGD= and vice versa.\n".to_string(), + ); + } + if ctl.gen_opt.gamma_delta && (have_bcr || have_gex || have_meta || have_tcr) { + return Err( + "\n. Unsupported input type in GAMMA_DELTA mode. Only TCRGD= input is supported.\n" + .to_string(), + ); + } + proc_args_post( + ctl, &args, &metas, &metaxs, &xcrs, have_gex, &gex, &bc, using_plot, + )?; + Ok(()) +} diff --git a/enclone_args/src/proc_args2.rs b/enclone_args/src/proc_args2.rs new file mode 100644 index 000000000..80d2d0ba3 --- /dev/null +++ b/enclone_args/src/proc_args2.rs @@ -0,0 +1,264 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use enclone_core::defs::EncloneControl; +use io_utils::{open_userfile_for_read, path_exists}; +use rayon::prelude::*; +use std::fmt::Write; +use std::fs::{remove_file, File}; +use std::{io::BufRead, time::Instant}; +use string_utils::TextUtils; +use vector_utils::next_diff; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Test a file for writeability by writing and then deleting it. + +pub fn test_writeable(val: &str, evil_eye: bool) -> Result<(), String> { + if evil_eye { + println!("creating file {val} to test writability"); + } + let f = File::create(val); + if f.is_err() { + let mut msgx = + format!("\nYou've specified an output file\n{val}\nthat cannot be written.\n"); + if val.contains('/') { + let dir = val.rev_before("/"); + let msg = if path_exists(dir) { + "exists" + } else { + "does not exist" + }; + writeln!(msgx, "Note that the path {dir} {msg}.").unwrap(); + } + return Err(msgx); + } + if evil_eye { + println!("removing file {val}"); + } + remove_file(val).unwrap_or_else(|_| panic!("could not remove file {val}")); + if evil_eye { + println!("removal of file {val} complete"); + } + Ok(()) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Simple arguments. We test for e.g. PLAIN or PLAIN=, the latter to allow for the case +// where the argument has been set by an environment variable. + +pub fn is_simple_arg(arg: &str, x: &str) -> Result<bool, String> { + if arg == x || arg == format!("{x}=") { + return Ok(true); + } else if arg.starts_with(&format!("{x}=")) { + return Err(format!( + "\nYour command line includes \"{arg}\", which is not a valid argument.\n\ + Perhaps you meant \"{x}\".\n" + )); + } + Ok(false) +} + +// Usize arguments. We require that these are nonnegative integers. + +pub fn is_usize_arg(arg: &str, x: &str) -> Result<bool, String> { + if arg == x { + return Err(format!( + "\nYour command line includes \"{arg}\", which is not a valid argument.\n\ + Perhaps you meant \"{x}=n\", where n >= 0 is an integer.\n" + )); + } else if arg.starts_with(&format!("{x}=")) { + let val = arg.after(&format!("{x}=")).parse::<usize>(); + if val.is_ok() { + return Ok(true); + } else { + return Err(format!( + "\nYour command line includes \"{arg}\", which is not a valid argument.\n\ + Perhaps you meant \"{x}=n\", where n >= 0 is an integer.\n" + )); + } + } + Ok(false) +} + +// Usize arguments. We require that these are nonnegative integers. + +pub fn is_i32_arg(arg: &str, x: &str) -> Result<bool, String> { + if arg == x { + return Err(format!( + "\nYour command line includes \"{arg}\", which is not a valid argument.\n\ + Perhaps you meant \"{x}=n\", where n >= 0 is an integer.\n" + )); + } else if arg.starts_with(&format!("{x}=")) { + let val = arg.after(&format!("{x}=")).parse::<i32>(); + if val.is_ok() { + return Ok(true); + } else { + return Err(format!( + "\nYour command line includes \"{arg}\", which is not a valid argument.\n\ + Perhaps you meant \"{x}=n\", where n is an integer.\n" + )); + } + } + Ok(false) +} + +pub fn is_f64_arg(arg: &str, x: &str) -> Result<bool, String> { + if arg == x { + return Err(format!( + "\nYour command line includes \"{arg}\", which is not a valid argument.\n\ + Perhaps you meant \"{x}=n\", where n is a floating point number.\n" + )); + } else if arg.starts_with(&format!("{x}=")) { + let val = arg.after(&format!("{x}=")).parse::<f64>(); + if val.is_ok() { + return Ok(true); + } else { + return Err(format!( + "\nYour command line includes \"{arg}\", which is not a valid argument.\n\ + Perhaps you meant \"{x}=n\", where n is a floating point number.\n" + )); + } + } + Ok(false) +} + +pub fn is_string_arg(arg: &str, x: &str) -> Result<bool, String> { + if arg == x { + return Err(format!( + "\nYour command line includes \"{arg}\", which is not a valid argument.\n\ + Perhaps you meant \"{x}=s\" for some string s.\n" + )); + } else if arg.starts_with(&format!("{x}=")) { + return Ok(true); + } + Ok(false) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn proc_args_tail(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String> { + let tall = Instant::now(); + let mut lvars_specified = false; + for arg in args.iter().skip(1) { + if arg.starts_with("LVARS=") { + lvars_specified = true; + } + } + if !ctl.clono_print_opt.amino.is_empty() { + ctl.clono_print_opt.cvars.insert(0, "amino".to_string()); + } + if ctl.gen_opt.mouse && !ctl.gen_opt.refname.is_empty() { + return Err( + "\nIf you specify REF, please do not also specify MOUSE. It is enough to\n\ + set REF to a mouse reference sequence.\n" + .to_string(), + ); + } + + // Remove "datasets" from lvars if there is only one dataset and LVARS not specified. + + if !lvars_specified && ctl.origin_info.dataset_path.len() == 1 { + ctl.clono_print_opt.lvars.remove(0); + } + + // Print command line arguments and dataset summary. + + if !ctl.silent { + println!(); + for i in 0..args.len() { + let mut x = args[i].clone(); + if i == 0 && x.contains('/') { + x = x.rev_after("/").to_string(); + } + if i > 0 { + print!(" "); + } + print!("{x}"); + } + println!(); + println!( + "\nThere are {} datasets from {} donors.", + ctl.origin_info.dataset_path.len(), + ctl.origin_info.donors + ); + } + + // Check for duplicated directory paths. + + let mut dp = ctl.origin_info.dataset_path.clone(); + dp.sort(); + let mut i = 0; + while i < dp.len() { + let j = next_diff(&dp, i); + if j - i > 1 { + return Err(format!("\nInput dataset path {} is duplicated.\n", dp[i])); + } + i = j; + } + if !ctl.silent { + println!(); + } + + // Get origin descriptions. Flaky and particularly flaky when internal origin args are paths, + // since it will look in outs for the file. + + if ctl.gen_opt.internal_run || ctl.gen_opt.descrip || ctl.visual_mode || ctl.gen_opt.vis_dump { + ctl.origin_info.descrips.clear(); + let mut results = vec![(0, "".to_string()); ctl.origin_info.n()]; + for i in 0..ctl.origin_info.n() { + results[i].0 = i; + } + results.par_iter_mut().for_each(|res| { + let i = res.0; + let mut d = ctl.origin_info.dataset_id[i].clone(); + let mut dir = ctl.origin_info.dataset_path[i].clone(); + if dir.ends_with("/outs") { + dir = dir.rev_before("/outs").to_string(); + } + let mut invo = format!("{dir}/_invocation"); + if !path_exists(&invo) { + invo = format!("{dir}/../../_invocation"); + } + if !path_exists(&invo) { + invo = format!("{dir}/../../../_invocation"); + } + if path_exists(&invo) { + let f = open_userfile_for_read(&invo); + for line in f.lines() { + let s = line.unwrap(); + // Leave sample_desc alone for internal architecture! + if s.contains("sample_desc ") { + d = s.between("\"", "\"").to_string(); + } + } + } + res.1 = d; + }); + for i in 0..ctl.origin_info.dataset_path.len() { + ctl.origin_info.descrips.push(results[i].1.clone()); + } + if ctl.gen_opt.descrip { + println!(); + for i in 0..ctl.origin_info.n() { + if i > 0 { + println!(); + } + println!( + "dataset {} ==> origin {} ==> donor {} ==> dataset descrip = {}", + ctl.origin_info.dataset_id[i], + // origin_id and donor_id don't make sense if bc specified in META + ctl.origin_info.origin_id[i], + ctl.origin_info.donor_id[i], + ctl.origin_info.descrips[i] + ); + println!("vdj path = {}", ctl.origin_info.dataset_path[i]); + if !ctl.origin_info.gex_path.is_empty() { + println!("gex path = {}", ctl.origin_info.gex_path[i]); + } + } + } + } + ctl.perf_stats(&tall, "in proc_args_tail"); + Ok(()) +} diff --git a/enclone_args/src/proc_args3.rs b/enclone_args/src/proc_args3.rs new file mode 100644 index 000000000..0b6f7c78a --- /dev/null +++ b/enclone_args/src/proc_args3.rs @@ -0,0 +1,903 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// This file contains the two functions proc_xcr and proc_meta. + +use enclone_core::defs::{EncloneControl, OriginInfo}; +use enclone_core::{expand_integer_ranges, fetch_url, tilde_expand_me}; +use io_utils::{dir_list, open_for_read, open_for_write_new, open_userfile_for_read, path_exists}; +use itertools::Itertools; +use rayon::prelude::*; +use std::collections::HashMap; +use std::fmt::Write as _; +use std::fs::File; +use std::io::{BufRead, BufReader, Read, Write}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::thread; +use std::time; +use std::time::Instant; +use string_utils::TextUtils; +use vector_utils::unique_sort; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +fn expand_analysis_sets(x: &str, ctl: &EncloneControl) -> Result<String, String> { + let mut tokens = Vec::<String>::new(); + let mut token = String::new(); + for c in x.chars() { + if c == ',' || c == ':' || c == ';' { + if !token.is_empty() { + tokens.push(token.clone()); + token.clear(); + } + tokens.push(c.to_string()); + } else { + token.push(c); + } + } + if !token.is_empty() { + tokens.push(token); + } + let mut tokens2 = Vec::<String>::new(); + for token in tokens { + if let Some(setid) = token.strip_prefix('S') { + if ctl.gen_opt.internal_run { + let url = format!("{}/{setid}", ctl.gen_opt.config["sets"]); + let m = fetch_url(&url)?; + if m.contains("\"analysis_ids\":[") { + let ids = m.between("\"analysis_ids\":[", "]"); + let mut ids = ids.replace(' ', ""); + ids = ids.replace('\n', ""); + let ids = ids.split(','); + let mut ids2 = Vec::<&str>::new(); + + // Remove wiped analysis IDs. + + for id in ids { + let url = format!("{}/{id}", ctl.gen_opt.config["ones"]); + let m = fetch_url(&url)?; + if m.contains("502 Bad Gateway") { + return Err(format!( + "\nWell, this is sad. The URL \ + {url} returned a 502 Bad Gateway \ + message. Please try again later or ask someone for help.\n" + )); + } + if !m.contains("\"wiped\"") { + ids2.push(id); + } + } + let mut enclone = "~/enclone".to_string(); + tilde_expand_me(&mut enclone); + if path_exists(&enclone) { + let mut sets = "~/enclone/sets".to_string(); + tilde_expand_me(&mut sets); + if !path_exists(&sets) { + std::fs::create_dir(&sets).unwrap(); + let mut setid = format!("~/enclone/sets/{setid}"); + tilde_expand_me(&mut setid); + if !path_exists(&setid) { + let mut f = open_for_write_new![&setid]; + let s = format!("{}\n", ids2.iter().format(",")); + f.write_all(s.as_bytes()).unwrap(); + } + } + } + + // Proceed. + + for (j, id) in ids2.into_iter().enumerate() { + if j > 0 { + tokens2.push(",".to_string()); + } + tokens2.push(id.to_string()); + } + continue; + } else { + return Err(format!( + "\nIt looks like you've provided an incorrect analysis set ID {setid}.\n" + )); + } + } else if setid.parse::<usize>().is_ok() { + let mut set_file = format!("~/enclone/sets/{setid}"); + tilde_expand_me(&mut set_file); + if path_exists(&set_file) { + let mut f = open_for_read![&set_file]; + let mut s = String::new(); + f.read_to_string(&mut s).unwrap(); + s = s.before("\n").to_string(); + let ids2 = s.split(','); + for (j, id) in ids2.enumerate() { + if j > 0 { + tokens2.push(",".to_string()); + } + tokens2.push(id.to_string()); + } + continue; + } + } + } + tokens2.push(token); + } + let mut y = String::new(); + for t in tokens2 { + y += t.as_str(); + } + Ok(y) +} + +// Functions to find the path to data. + +pub fn get_path_fail(p: &str, ctl: &EncloneControl, source: &str) -> Result<String, String> { + for x in ctl.gen_opt.pre.iter() { + let pp = format!("{x}/{p}"); + if path_exists(&pp) { + return Ok(pp); + } + } + if !path_exists(p) { + if ctl.gen_opt.pre.is_empty() { + let path = std::env::current_dir().unwrap(); + return Err(format!( + "\nIn directory {}, unable to find the path {}. This came from the {} argument.\n", + path.display(), + p, + source + )); + } else { + let path = std::env::current_dir().unwrap(); + let mut pre_msg = + "Here are the number of entries in your PRE directories:\n".to_string(); + for x in ctl.gen_opt.pre.iter() { + let mut count = "(does not exist)".to_string(); + if path_exists(x) { + count = dir_list(x).len().to_string(); + } + writeln!(pre_msg, "{x}: {count}").unwrap(); + } + return Err(format!( + "\nIn directory {}, unable to find the\npath {},\n\ + even if prepended by any of the directories \ + in\nPRE={}.\nThis came from the {} argument.\n{}", + path.display(), + p, + ctl.gen_opt.pre.iter().format(","), + source, + pre_msg + )); + } + } + Ok(p.to_string()) +} + +fn get_path(p: &str, ctl: &EncloneControl, ok: &mut bool) -> String { + *ok = false; + for x in ctl.gen_opt.pre.iter() { + let mut pp = format!("{x}/{p}"); + if pp.starts_with('~') { + tilde_expand_me(&mut pp); + } + if path_exists(&pp) { + *ok = true; + return pp; + } + } + let mut pp = p.to_string(); + if pp.starts_with('~') { + tilde_expand_me(&mut pp); + } + *ok = path_exists(&pp); + pp +} + +fn get_path_or_internal_id( + p: &str, + ctl: &EncloneControl, + source: &str, + spinlock: &Arc<AtomicUsize>, +) -> Result<String, String> { + if ctl.gen_opt.evil_eye { + println!("getting path for {p}"); + } + let mut ok = false; + let mut pp = get_path(p, ctl, &mut ok); + if !ok { + if !ctl.gen_opt.internal_run { + get_path_fail(&pp, ctl, source)?; + } else { + // For internal runs, try much harder. This is so that internal users can + // just type an internal numerical id for a dataset and have it always + // work. The code that's used here should be placed somewhere else. + + let mut q = p.to_string(); + if q.contains('/') { + q = q.before("/").to_string(); + } + if q.parse::<usize>().is_ok() { + if !ctl.gen_opt.config.contains_key("ones") { + let mut msg = "\nSomething is wrong. This is an internal run, but \ + the configuration\nvariable \"ones\" is undefined.\n" + .to_string(); + if ctl.gen_opt.config.is_empty() { + msg += "In fact, there are no configuration variables.\n"; + } else { + msg += "Here are the configuration variables that are defined:\n\n"; + for (key, value) in ctl.gen_opt.config.iter() { + write!(msg, "{key} = {value}").unwrap(); + } + msg += "\n"; + } + return Err(msg); + } + let url = format!("{}/{q}", ctl.gen_opt.config["ones"]); + // We force single threading around the https access because we observed + // intermittently very slow access without it. + while spinlock.load(Ordering::SeqCst) != 0 {} + spinlock.store(1, Ordering::SeqCst); + let m = fetch_url(&url)?; + spinlock.store(0, Ordering::SeqCst); + if m.contains("502 Bad Gateway") { + return Err(format!( + "\nWell this is sad. The URL \ + {url} yielded a 502 Bad Gateway \ + message. Please try again later or ask someone for help.\n" + )); + } + if m.contains("\"path\":\"") { + let path = m.between("\"path\":\"", "\""); + if !p.contains('/') { + pp = format!("{path}/outs"); + } else { + pp = format!("{path}/{}", p.after("/")); + } + if !path_exists(&pp) { + thread::sleep(time::Duration::from_millis(100)); + if path_exists(&pp) { + return Err(format!( + "\nYou are experiencing unstable filesystem access: \ + 100 milliseconds ago, \ + the path\n\ + {pp}\nwas not visible, but now it is. You might consider posting \ + this problem on an appropriate \ + the slack channel.\nOr retry again. enclone is \ + giving up because \ + if filesystem access blinks in and out of existence,\n\ + other more cryptic events are likely to occur.\n" + )); + } else { + return Err(format!( + "\nIt looks like you've provided an analysis ID for \ + which the pipeline outs folder\n{p}\nhas not yet been generated.\n\ + This path did not exist:\n{pp}\n\n\ + Here is the stdout:\n{m}\n" + )); + } + } + } else { + return Err(format!( + "\nIt looks like you've provided either an incorrect \ + analysis ID {p} or else one for which\n\ + the pipeline outs folder has not yet been generated.\n\ + This URL\n{url}\ndid not provide a path.\n" + )); + } + } else { + return Err(format!( + "\nAfter searching high and low, your path\n{p}\nfor {source} \ + cannot be found.\nPlease check its value and also the value \ + for PRE if you provided that.\n" + )); + } + } + } + if !pp.ends_with("/outs") && path_exists(format!("{pp}/outs")) { + pp = format!("{pp}/outs"); + } + if ctl.gen_opt.evil_eye { + println!("path found"); + } + Ok(pp) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Parse barcode-level information file. + +fn parse_bc(mut bc: String, ctl: &mut EncloneControl, call_type: &str) -> Result<(), String> { + let delimiter; + let file_type; + if bc.ends_with(".tsv") { + delimiter = '\t'; + file_type = "TSV"; + } else { + delimiter = ','; + file_type = "CSV"; + } + let mut origin_for_bc = HashMap::<String, String>::new(); + let mut donor_for_bc = HashMap::<String, String>::new(); + let mut tag = HashMap::<String, String>::new(); + let mut barcode_color = HashMap::<String, String>::new(); + let mut alt_bc_fields = Vec::<(String, HashMap<String, String>)>::new(); + let spinlock: Arc<AtomicUsize> = Arc::new(AtomicUsize::new(0)); + if !bc.is_empty() { + bc = get_path_or_internal_id(&bc, ctl, call_type, &spinlock)?; + let f = open_userfile_for_read(&bc); + let mut first = true; + let mut fieldnames = Vec::<String>::new(); + let mut barcode_pos = 0; + let (mut origin_pos, mut donor_pos, mut tag_pos, mut color_pos) = (None, None, None, None); + let mut to_alt = Vec::<isize>::new(); + for line in f.lines() { + let s = line.unwrap(); + if first { + let fields = s.split(delimiter).collect::<Vec<&str>>(); + to_alt = vec![-1_isize; fields.len()]; + if !fields.contains(&"barcode") { + let mut origin = "from the bc field used in META"; + if call_type == "BC" { + origin = "from the BC argument"; + } + return Err(format!( + "\nThe file\n{bc}\n{origin}\nis missing the barcode field.\n", + )); + } + for x in fields.iter() { + fieldnames.push(x.to_string()); + } + for i in 0..fields.len() { + if fields[i] == "color" { + color_pos = Some(i); + } + if fields[i] == "barcode" { + barcode_pos = i; + } else if fields[i] == "origin" { + origin_pos = Some(i); + } else if fields[i] == "donor" { + donor_pos = Some(i); + } else if fields[i] == "tag" { + tag_pos = Some(i); + } else { + to_alt[i] = alt_bc_fields.len() as isize; + alt_bc_fields + .push((fields[i].to_string(), HashMap::<String, String>::new())); + } + } + first = false; + } else { + let fields = s.split(delimiter).collect::<Vec<&str>>(); + if fields.len() != fieldnames.len() { + let mut origin = "bc in META"; + if call_type == "BC" { + origin = "BC"; + } + return Err(format!( + "\nThere is a line\n{}\nin a {} file defined by {}\n\ + that has {} fields, which isn't right, because the header line \ + has {} fields. This is for the file\n{}.\n", + s, + file_type, + origin, + fields.len(), + fieldnames.len(), + bc, + )); + } + for i in 0..fields.len() { + if to_alt[i] >= 0 { + alt_bc_fields[to_alt[i] as usize] + .1 + .insert(fields[barcode_pos].to_string(), fields[i].to_string()); + } + } + if !fields[barcode_pos].contains('-') { + let mut origin = "bc in META"; + if call_type == "BC" { + origin = "BC"; + } + return Err(format!( + "\nThe barcode \"{}\" appears in the file\n{bc}\ndefined \ + by {origin}. That doesn't make sense because a barcode\n\ + should include a hyphen.\n", + fields[barcode_pos] + )); + } + if let Some(origin_pos) = origin_pos { + origin_for_bc.insert( + fields[barcode_pos].to_string(), + fields[origin_pos].to_string(), + ); + } + if let Some(donor_pos) = donor_pos { + donor_for_bc.insert( + fields[barcode_pos].to_string(), + fields[donor_pos].to_string(), + ); + } + if let Some(tag_pos) = tag_pos { + tag.insert(fields[barcode_pos].to_string(), fields[tag_pos].to_string()); + } + if let Some(color_pos) = color_pos { + barcode_color.insert( + fields[barcode_pos].to_string(), + fields[color_pos].to_string(), + ); + } + } + } + } + ctl.origin_info.origin_for_bc.push(origin_for_bc); + ctl.origin_info.donor_for_bc.push(donor_for_bc); + ctl.origin_info.tag.push(tag); + ctl.origin_info.barcode_color.push(barcode_color); + ctl.origin_info.alt_bc_fields.push(alt_bc_fields); + Ok(()) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn proc_xcr( + f: &str, + gex: &str, + bc: &str, + have_gex: bool, + ctl: &mut EncloneControl, +) -> Result<(), String> { + ctl.origin_info = OriginInfo::default(); + if ((ctl.gen_opt.tcr || ctl.gen_opt.tcrgd) && f.starts_with("BCR=")) + || ((ctl.gen_opt.bcr || ctl.gen_opt.tcr) && f.starts_with("TCRGD=")) + || ((ctl.gen_opt.bcr || ctl.gen_opt.tcrgd) && f.starts_with("TCR=")) + { + return Err("\nOnly one of TCR, BCR, or TCRGD can be specified.\n".to_string()); + } + let t = Instant::now(); + ctl.gen_opt.tcr = f.starts_with("TCR="); + ctl.gen_opt.tcrgd = f.starts_with("TCRGD="); + ctl.gen_opt.bcr = f.starts_with("BCR="); + let val = if ctl.gen_opt.tcr { + f.after("TCR=") + } else if ctl.gen_opt.bcr { + f.after("BCR=") + } else if ctl.gen_opt.tcrgd { + f.after("TCRGD=") + } else { + f + }; + if val.is_empty() { + return Err(format!( + "\nYou can't write {f} with no value on the right hand side.\n\ + Perhaps you need to remove some white space from your command line.\n" + )); + } + let val = expand_integer_ranges(val); + let val = expand_analysis_sets(&val, ctl)?; + let donor_groups = if ctl.gen_opt.cellranger { + vec![&val[..]] + } else { + val.split(';').collect::<Vec<&str>>() + }; + let mut gex2 = expand_integer_ranges(gex); + gex2 = expand_analysis_sets(&gex2, ctl)?; + let donor_groups_gex = if ctl.gen_opt.cellranger { + vec![&gex2[..]] + } else { + gex2.split(';').collect::<Vec<&str>>() + }; + let donor_groups_bc = bc.split(';').collect::<Vec<&str>>(); + let xcr = if ctl.gen_opt.bcr { + "BCR" + } else if ctl.gen_opt.tcrgd { + "TCRGD" + } else { + "TCR" + }; + if have_gex && donor_groups_gex.len() != donor_groups.len() { + return Err(format!( + "\nThere are {} {} donor groups and {} GEX donor groups, so \ + the {} and GEX arguments do not exactly mirror each \ + other's structure.\n", + xcr, + donor_groups.len(), + donor_groups_gex.len(), + xcr + )); + } + if !bc.is_empty() && donor_groups_bc.len() != donor_groups.len() { + return Err(format!( + "\nThe {xcr} and BC arguments do not exactly mirror each \ + other's structure.\n" + )); + } + ctl.perf_stats(&t, "in proc_xcr 1"); + let t = Instant::now(); + for (id, d) in donor_groups.iter().enumerate() { + let origin_groups = if ctl.gen_opt.cellranger { + vec![&d[..]] + } else { + (*d).split(':').collect::<Vec<&str>>() + }; + let mut origin_groups_gex = Vec::<&str>::new(); + if have_gex { + if ctl.gen_opt.cellranger { + origin_groups_gex = vec![donor_groups_gex[id]]; + } else { + origin_groups_gex = donor_groups_gex[id].split(':').collect::<Vec<&str>>(); + } + if origin_groups_gex.len() != origin_groups.len() { + return Err(format!( + "\nFor donor {}, there are {} {} origin groups and {} GEX origin groups, so \ + the {} and GEX arguments do not exactly mirror each \ + other's structure.\n", + id + 1, + xcr, + origin_groups.len(), + origin_groups_gex.len(), + xcr + )); + } + } + let mut origin_groups_bc = Vec::<&str>::new(); + if !bc.is_empty() { + origin_groups_bc = donor_groups_bc[id].split(':').collect::<Vec<&str>>(); + if origin_groups_bc.len() != origin_groups.len() { + return Err(format!( + "\nThe {xcr} and BC arguments do not exactly mirror each \ + other's structure.\n" + )); + } + } + for (is, s) in origin_groups.iter().enumerate() { + let mut datasets = if ctl.gen_opt.cellranger { + vec![&s[..]] + } else { + (*s).split(',').collect::<Vec<&str>>() + }; + for ds in datasets.iter_mut() { + if ds.ends_with('/') { + *ds = ds.rev_before("/"); + } + } + let datasets_gex: Vec<&str>; + let mut datasets_bc = Vec::<&str>::new(); + if have_gex { + if ctl.gen_opt.cellranger { + datasets_gex = vec![origin_groups_gex[is]]; + } else { + datasets_gex = origin_groups_gex[is].split(',').collect::<Vec<&str>>(); + } + if datasets_gex.len() != datasets.len() { + return Err(format!( + "\nSee {} {} datasets and {} GEX datasets, so \ + the {} and GEX arguments do not exactly mirror each \ + other's structure.\n", + xcr, + datasets.len(), + datasets_gex.len(), + xcr + )); + } + } + if !bc.is_empty() { + datasets_bc = origin_groups_bc[is].split(',').collect::<Vec<&str>>(); + if datasets_bc.len() != datasets.len() { + return Err(format!( + "\nThe {xcr} and BC arguments do not exactly mirror each \ + other's structure.\n" + )); + } + } + for (ix, x) in datasets.iter().enumerate() { + ctl.origin_info.color.push("".to_string()); + ctl.origin_info.tag.push(HashMap::<String, String>::new()); + let donor_name = format!("d{}", id + 1); + let origin_name = format!("s{}", is + 1); + ctl.origin_info.donor_id.push(donor_name); + ctl.origin_info.origin_id.push(origin_name); + let mut dataset_name = (*x).to_string(); + if dataset_name.contains('/') { + dataset_name = dataset_name.rev_after("/").to_string(); + } + ctl.origin_info.descrips.push(dataset_name.clone()); + ctl.origin_info.dataset_id.push(dataset_name.clone()); + + // Now work on the BC path. + + let mut bcx = String::new(); + if !bc.is_empty() { + bcx = datasets_bc[ix].to_string(); + } + parse_bc(bcx, ctl, "BC")?; + } + } + } + ctl.perf_stats(&t, "in proc_xcr 2"); + + // Get paths. This will need to change when cellranger switches to multi. This code is + // parallelized because this code can indirectly make many calls to path_exists, and the wall + // clock time for these can add up. There should be a way to do this that does not involve + // multithreading. + + let t = Instant::now(); + let source = if f.contains('=') { f.before("=") } else { f }; + let mut results = Vec::<(String, String, bool, String)>::new(); + for (id, d) in donor_groups.iter().enumerate() { + let origin_groups = (*d).split(':').collect::<Vec<&str>>(); + let mut origin_groups_gex = Vec::<&str>::new(); + if have_gex { + origin_groups_gex = donor_groups_gex[id].split(':').collect::<Vec<&str>>(); + } + for (is, s) in origin_groups.iter().enumerate() { + let datasets = (*s).split(',').collect::<Vec<&str>>(); + let mut datasets_gex = Vec::<&str>::new(); + if have_gex { + datasets_gex = origin_groups_gex[is].split(',').collect::<Vec<&str>>(); + } + for (ix, x) in datasets.iter().enumerate() { + let p = (*x).to_string(); + let mut pg = String::new(); + if have_gex { + pg = datasets_gex[ix].to_string(); + } + results.push((p, pg, false, String::new())); + } + } + } + ctl.perf_stats(&t, "in proc_xcr 3"); + let t = Instant::now(); + let spinlock: Arc<AtomicUsize> = Arc::new(AtomicUsize::new(0)); + results.par_iter_mut().for_each(|res| { + let (p, pg) = (&mut res.0, &mut res.1); + let resx = get_path_or_internal_id(p, ctl, source, &spinlock); + match resx { + Err(resx) => res.3 = resx, + Ok(resx) => { + *p = resx; + if ctl.gen_opt.bcr && path_exists(format!("{p}/vdj_b")) { + *p = format!("{p}/vdj_b"); + } + if ctl.gen_opt.bcr && path_exists(format!("{p}/multi/vdj_b")) { + *p = format!("{p}/multi/vdj_b"); + } + if ctl.gen_opt.tcr && path_exists(format!("{p}/vdj_t")) { + *p = format!("{p}/vdj_t"); + } + if ctl.gen_opt.tcr && path_exists(format!("{p}/multi/vdj_t")) { + *p = format!("{p}/multi/vdj_t"); + } + if have_gex { + let resx = get_path_or_internal_id(pg, ctl, "GEX", &spinlock); + match resx { + Err(resx) => res.3 = resx, + Ok(resx) => { + *pg = resx; + if path_exists(format!("{pg}/count")) { + *pg = format!("{pg}/count"); + } + if path_exists(format!("{pg}/count_pd")) { + *pg = format!("{pg}/count_pd"); + } + } + } + } + } + } + }); + for result in results { + if !result.3.is_empty() { + return Err(result.3); + } + ctl.origin_info.dataset_path.push(result.0); + ctl.origin_info.gex_path.push(result.1); + } + ctl.perf_stats(&t, "in proc_xcr 4"); + Ok(()) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn proc_meta_core(lines: &[String], ctl: &mut EncloneControl) -> Result<(), String> { + let mut fields = Vec::<String>::new(); + let mut donors = Vec::<String>::new(); + for (count, s) in lines.iter().enumerate() { + if count == 0 { + fields.extend(s.split(',').map(str::to_string)); + let mut fields_sorted = fields.clone(); + unique_sort(&mut fields_sorted); + if fields_sorted.len() < fields.len() { + return Err( + "\nThe CSV file that you specified using the META or METAX argument \ + has duplicate field names\nin its first line.\n" + .to_string(), + ); + } + let allowed_fields = vec![ + "bc".to_string(), + "bcr".to_string(), + "donor".to_string(), + "gex".to_string(), + "origin".to_string(), + "tcr".to_string(), + "tcrgd".to_string(), + "color".to_string(), + ]; + for x in fields.iter() { + if !allowed_fields.contains(x) { + return Err(format!( + "\nThe CSV file that you specified using the META or METAX argument \ + has an illegal field name ({x}) in its first line.\n" + )); + } + } + ctl.gen_opt.tcr = fields.contains(&"tcr".to_string()); + ctl.gen_opt.tcrgd = fields.contains(&"tcrgd".to_string()); + ctl.gen_opt.bcr = fields.contains(&"bcr".to_string()); + if !ctl.gen_opt.tcr && !ctl.gen_opt.bcr && !ctl.gen_opt.tcrgd { + return Err( + "\nThe CSV file that you specified using the META or METAX argument \ + has neither the field tcr, tcrgd, or bcr in its first line.\n" + .to_string(), + ); + } + if ctl.gen_opt.tcr && ctl.gen_opt.bcr { + return Err( + "\nThe CSV file that you specified using the META or METAX argument \ + has both the fields tcr and bcr in its first line.\n" + .to_string(), + ); + } + if ctl.gen_opt.tcr && ctl.gen_opt.tcrgd { + return Err( + "\nThe CSV file that you specified using the META or METAX argument \ + has both the fields tcr and tcrgd in its first line.\n" + .to_string(), + ); + } + if ctl.gen_opt.bcr && ctl.gen_opt.tcrgd { + return Err( + "\nThe CSV file that you specified using the META or METAX argument \ + has both the fields tcrgd and bcr in its first line.\n" + .to_string(), + ); + } + } else if !s.starts_with('#') && !s.is_empty() { + let val = s.split(',').collect::<Vec<&str>>(); + if val.len() != fields.len() { + return Err(format!( + "\nMETA or METAX file line {} has a different number of fields than the \ + first line of the file.\n", + count + 1 + )); + } + let mut path = String::new(); + let mut abbr = String::new(); + let mut gpath = String::new(); + let mut origin = "s1".to_string(); + let mut donor = "d1".to_string(); + let mut color = "".to_string(); + let mut bc = "".to_string(); + for i in 0..fields.len() { + let x = &fields[i]; + let mut y = val[i].to_string(); + if y.starts_with('"') && y.ends_with('"') { + y = y.after("\"").rev_before("\"").to_string(); + } + if *x == "tcr" || *x == "bcr" || *x == "tcrgd" { + if y.contains(':') { + path = y.after(":").to_string(); + abbr = y.before(":").to_string(); + } else { + path = y.to_string(); + if path.contains('/') { + abbr = path.rev_after("/").to_string(); + } else { + abbr = path.clone(); + } + } + } else if *x == "gex" { + gpath = y.to_string(); + } else if *x == "origin" { + origin = y.to_string(); + } else if *x == "donor" { + donor = y.to_string(); + } else if *x == "color" { + color = y.to_string(); + } else if *x == "bc" && !y.is_empty() { + bc = y.to_string(); + } + } + + // Parse bc and finish up. + + parse_bc(bc.clone(), ctl, "META")?; + let current_ref = false; + let spinlock: Arc<AtomicUsize> = Arc::new(AtomicUsize::new(0)); + path = get_path_or_internal_id(&path, ctl, "META", &spinlock)?; + if ctl.gen_opt.bcr && path_exists(format!("{path}/vdj_b")) { + path = format!("{path}/vdj_b"); + } + if ctl.gen_opt.bcr && path_exists(format!("{path}/multi/vdj_b")) { + path = format!("{path}/multi/vdj_b"); + } + if ctl.gen_opt.tcr && path_exists(format!("{path}/vdj_t")) { + path = format!("{path}/vdj_t"); + } + if ctl.gen_opt.tcr && path_exists(format!("{path}/multi/vdj_t")) { + path = format!("{path}/multi/vdj_t"); + } + if ctl.gen_opt.tcrgd && path_exists(format!("{path}/vdj_t_gd")) { + path = format!("{path}/vdj_t_gd"); + } + if ctl.gen_opt.tcrgd && path_exists(format!("{path}/multi/vdj_t_gd")) { + path = format!("{path}/multi/vdj_t_gd"); + } + if !gpath.is_empty() { + gpath = get_path_or_internal_id(&gpath, ctl, "META", &spinlock)?; + if path_exists(format!("{gpath}/count")) { + gpath = format!("{gpath}/count"); + } + if path_exists(format!("{gpath}/count_pd")) { + gpath = format!("{gpath}/count_pd"); + } + } + if current_ref { + ctl.gen_opt.current_ref = true; + } + let dp = donors + .iter() + .enumerate() + .filter_map(|(j, dj)| if donor == *dj { Some(j) } else { None }) + .next(); + if dp.is_none() { + donors.push(donor.clone()); + } + ctl.origin_info.descrips.push(abbr.clone()); + ctl.origin_info.dataset_path.push(path); + ctl.origin_info.gex_path.push(gpath); + ctl.origin_info.dataset_id.push(abbr); + ctl.origin_info.donor_id.push(donor); + ctl.origin_info.origin_id.push(origin); + ctl.origin_info.color.push(color); + } + } + Ok(()) +} + +pub fn proc_meta(v: &[String], ctl: &mut EncloneControl) -> Result<(), String> { + let mut lines_all = Vec::<Vec<String>>::new(); + for f in v.iter() { + if !path_exists(f) { + return Err(format!( + "\nCan't find the file {f} referenced by your META argument.\n" + )); + } + let fx = File::open(f); + if fx.is_err() { + return Err(format!( + "\nProblem with META: unable to read from the file\n\ + \"{f}\".\nPlease check that that path makes sense and that you have read \ + permission for it.\n" + )); + } + let f = BufReader::new(fx.unwrap()); + let mut lines = Vec::<String>::new(); + for line in f.lines() { + let s = line.unwrap(); + lines.push(s); + } + lines_all.push(lines); + } + let mut lines = Vec::<String>::new(); + for j in 0..lines_all.len() { + if lines_all[j].is_empty() || lines_all[j][0] != lines_all[0][0] { + return Err( + "\nMETA files having different header lines have been specified.\n".to_string(), + ); + } + if j == 0 { + lines.push(lines_all[0][0].clone()); + } + for k in 1..lines_all[j].len() { + lines.push(lines_all[j][k].clone()); + } + } + proc_meta_core(&lines, ctl) +} diff --git a/enclone_args/src/proc_args_check.rs b/enclone_args/src/proc_args_check.rs new file mode 100644 index 000000000..f18b319fb --- /dev/null +++ b/enclone_args/src/proc_args_check.rs @@ -0,0 +1,808 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Check lvars, cvars, and pcols. + +use enclone_core::allowed_vars::{ + CVARS_ALLOWED, CVARS_ALLOWED_PCELL, GVARS_ALLOWED, LVARS_ALLOWED, PCVARS_ALLOWED, + PLVARS_ALLOWED, +}; +use enclone_core::defs::{EncloneControl, GexInfo}; +use itertools::Itertools; +use rayon::prelude::*; +use regex::Regex; +use std::time::Instant; +use string_utils::{strme, TextUtils}; +use vector_utils::{bin_member, unique_sort}; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Get known features. This code is inefficient. + +pub fn get_known_features(gex_info: &GexInfo) -> Result<Vec<String>, String> { + let mut known_features = Vec::<String>::new(); + let suffixes = ["", "_min", "_max", "_μ", "_Σ"]; + let suffixes_g = ["", "_min", "_max", "_μ", "_Σ", "_%"]; + let mut results = Vec::<(usize, Vec<String>, String)>::new(); + for i in 0..gex_info.gex_features.len() { + results.push((i, Vec::<String>::new(), String::new())); + } + results.par_iter_mut().for_each(|res| { + let i = res.0; + for j in 0..gex_info.gex_features[i].len() { + let f = &gex_info.gex_features[i][j]; + let ff = f.split('\t').collect::<Vec<&str>>(); + if ff.len() != 3 { + res.2 = format!( + "\nUnexpected structure of features file, at this line\n{f}\n\ + Giving up.\n" + ); + return; + } + for z in 0..2 { + if ff[2].starts_with("Antibody") { + for s in suffixes.iter() { + res.1.push(format!("{}_ab{s}", ff[z])); + } + } else if ff[2].starts_with("CRISPR") { + for s in suffixes.iter() { + res.1.push(format!("{}_cr{s}", ff[z])); + } + } else if ff[2].starts_with("CUSTOM") { + for s in suffixes.iter() { + res.1.push(format!("{}_cu{s}", ff[z])); + } + } else if ff[2].starts_with("Antigen") { + for s in suffixes.iter() { + res.1.push(format!("{}_ag{s}", ff[z])); + } + } else { + for s in suffixes_g.iter() { + res.1.push(format!("{}_g{s}", ff[z])); + } + } + } + } + }); + for result in &results { + if !result.2.is_empty() { + return Err(result.2.clone()); + } + } + for result in &results { + known_features.extend(result.1.iter().cloned()); + } + known_features.par_sort(); + known_features.dedup(); + Ok(known_features) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn involves_gex_fb(x: &str) -> bool { + let ends0 = [ + "_g", "_ab", "_ag", "_cr", "_cu", "_g_μ", "_ab_μ", "_ag_μ", "_cr_μ", "_cu_μ", "_g_%", + ]; + let suffixes = ["", "_min", "_max", "_μ", "_Σ"]; + let mut ends = Vec::<String>::new(); + for z in ends0.iter() { + for y in suffixes.iter() { + ends.push(format!("{z}{y}")); + } + } + let x = { + let x = if x.contains(':') { x.rev_after(":") } else { x }; + if x.ends_with("_cell") { + x.rev_before("_cell") + } else { + x + } + }; + ends.iter().any(|y| x.ends_with(y)) + || x == "gex" + || x.starts_with("gex_") + || x == "n_gex" + || x == "clust" + || x == "type" + || x == "entropy" + || x == "cred" + || x == "cred_cell" +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn is_pattern(x: &str, parseable: bool) -> bool { + let ends0 = [ + "_g", "_ab", "_ag", "_cr", "_cu", "_g_μ", "_ab_μ", "_ag_μ", "_cr_μ", "_cu_μ", "_g_%", + ]; + let suffixes = ["", "_min", "_max", "_μ", "_Σ"]; + let x = { + let mut x = x; + if x.contains(':') { + x = x.rev_after(":"); + } + if parseable && x.ends_with("_cell") { + x = x.rev_before("_cell"); + } + x + }; + let mut pat = false; + for y in ends0 + .into_iter() + .flat_map(|z| suffixes.iter().map(move |&y| format!("{z}{y}"))) + { + if x.ends_with(&y) { + let p = x.rev_before(&y); + if !p.is_empty() && Regex::new(p).is_ok() { + let mut ok = true; + let mut special = false; + let p = p.as_bytes(); + for &pi in p { + if !(pi.is_ascii_uppercase() + || pi.is_ascii_lowercase() + || pi.is_ascii_digit() + || b".-_[]()|*".contains(&pi)) + { + ok = false; + break; + } + if b"[]()|*".contains(&pi) { + special = true; + } + } + if ok && special { + pat = true; + break; + } + } + } + } + pat +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +fn check_gene_fb( + ctl: &EncloneControl, + gex_info: &GexInfo, + to_check: &[String], + category: &str, +) -> Result<(), String> { + let g_ends0 = ["_g"]; + let fb_ends0 = ["_ab", "_cr", "_cu", "_ag"]; + let suffixes = ["", "_min", "_max", "_μ", "_Σ"]; + let suffixes_g = ["", "_min", "_max", "_μ", "_Σ", "_%"]; + let g_ends = g_ends0 + .iter() + .flat_map(|&x| suffixes_g.iter().map(move |&y| format!("{x}{y}"))) + .collect::<Vec<_>>(); + for x in to_check { + let x = if x.contains(':') { + x.after(":") + } else { + x.as_str() + }; + if !gex_info.have_gex && !gex_info.have_fb && (x == "n_gex" || x == "n_gex_cell") { + if category == "parseable" { + return Err(format!( + "\nParseable field {x} does not make sense because neither gene expression \ + nor feature barcode data\nwere provided as input.\n" + )); + } else { + return Err(format!( + "\nLead variable {x} does not make sense because neither gene expression \ + not feature barcode data\nwere provided as input.\n" + )); + } + } + if !gex_info.have_gex { + let mut problem = false; + for y in g_ends.iter() { + if x.ends_with(y) { + problem = true; + } + } + if problem + || x == "gex" + || x.starts_with("gex_") + || x == "clust" + || x == "type" + || x == "entropy" + || x == "cred" + || x == "cred_cell" + { + if category == "parseable" { + return Err(format!( + "\nParseable field {x} does not make sense because gene expression \ + data\nwere not provided as input.\n" + )); + } else { + return Err(format!( + "\nLead variable {x} does not make sense because gene expression \ + data\nwere not provided as input.\n" + )); + } + } + } + if !gex_info.have_fb { + for y in fb_ends0 + .into_iter() + .flat_map(|x| suffixes.iter().map(move |&y| format!("{x}{y}"))) + { + if x.ends_with(&y) { + if category == "parseable" { + return Err(format!( + "\nParseable field {x} does not make sense because feature \ + barcode data\nwere not provided as input.\n" + )); + } else { + return Err(format!( + "\nLead variable {x} does not make sense because feature barcode \ + data\nwere not provided as input.\n" + )); + } + } + } + } + } + + // Get known features. This code is inefficient. + + let known_features = get_known_features(gex_info)?; + + // Do the check. + + for ci in to_check { + let mut x = ci.as_str(); + if x.contains(':') { + x = x.after(":"); + } + let mut y = x; + if category == "parseable" && y.ends_with("_cell") { + y = y.before("_cell"); + } + if !bin_member(&known_features, &y.to_string()) { + let mut n_var = false; + if x.starts_with("n_") { + n_var = true; + let mut is_dataset_name = false; + let mut is_origin_name = false; + let mut is_donor_name = false; + let mut is_tag_name = false; + let name = x.after("n_").to_string(); + let s = ctl.origin_info.n(); + for j in 0..s { + if ctl.origin_info.dataset_id[j] == name { + is_dataset_name = true; + } + } + for j in 0..ctl.origin_info.origin_list.len() { + if ctl.origin_info.origin_list[j] == name { + is_origin_name = true; + } + } + for j in 0..ctl.origin_info.donor_list.len() { + if ctl.origin_info.donor_list[j] == name { + is_donor_name = true; + } + } + for j in 0..ctl.origin_info.tag_list.len() { + if ctl.origin_info.tag_list[j] == name { + is_tag_name = true; + } + } + let msg = "\nSuggested reading: \"enclone help input\" and \ + \"enclone help glossary\".\n"; + if !is_dataset_name && !is_origin_name && !is_donor_name && !is_tag_name { + return Err(format!( + "\nYou've used the {category} variable {x}, and yet {name} \ + does not name a dataset, nor an origin,\nnor a donor, nor a tag.\n{msg}" + )); + } + let mut types = 0; + if is_dataset_name { + types += 1; + } + if is_origin_name { + types += 1; + } + if is_donor_name { + types += 1; + } + if is_tag_name { + types += 1; + } + if is_dataset_name && is_origin_name && is_donor_name { + return Err(format!( + "\nYou've used the {category} variable {x}, and yet {name} \ + names a dataset, an origin, and a donor. That's ambiguous.\n{msg}" + )); + } + if is_dataset_name && is_origin_name { + return Err(format!( + "\nYou've used the {category} variable {x}, and yet {name} \ + names a dataset and an origin. That's ambiguous.\n{msg}" + )); + } + if is_dataset_name && is_donor_name { + return Err(format!( + "\nYou've used the {category} variable {x}, and yet {name} \ + names a dataset and a donor. That's ambiguous.\n{msg}" + )); + } + if is_origin_name && is_donor_name { + return Err(format!( + "\nYou've used the {category} variable {x}, and yet {name} \ + names an origin and a donor. That's ambiguous.\n{msg}" + )); + } + if types != 1 { + return Err(format!( + "\nYou've used the {category} variable {x}, and yet {name} \ + names a tag and also a dataset, origin or donor.\n\ + That's ambiguous.\n{msg}" + )); + } + } + if !n_var { + let mut alts = Vec::<&str>::new(); + for y in known_features.iter() { + if x.eq_ignore_ascii_case(y) { + alts.push(y.as_str()); + } + } + if category == "lead" { + if x.is_empty() { + continue; + } + if !alts.is_empty() { + return Err(format!( + "\nThe variable {} for LVARS is unrecognized. Might you have \ + meant {}?\nPlease type \"enclone help lvars\".\n", + x, + alts.iter().format(" or "), + )); + } + return Err(format!( + "\nThe variable {x} for LVARS is unrecognized. Please type \ + \"enclone help lvars\".\n" + )); + } else { + if !alts.is_empty() { + return Err(format!( + "\nUnrecognized parseable variable {}. Might you have meant {}?\n\ + Please type \ + \"enclone help parseable\".\nIf the variable is a chain variable \ + (cvar), please make sure it is suffixed with the chain index.\n", + x, + alts.iter().format(" or "), + )); + } + return Err(format!( + "\nUnrecognized parseable variable {x}. Please type \ + \"enclone help parseable\".\nIf the variable is a chain variable (cvar), \ + please make sure it is suffixed with the chain index.\n" + )); + } + } + } + } + Ok(()) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Check pcols args. + +pub fn check_pcols( + ctl: &EncloneControl, + gex_info: &GexInfo, + cols: &[String], + allow_cell: bool, +) -> Result<(), String> { + let mut alt_bcs = Vec::<String>::new(); + for li in 0..ctl.origin_info.alt_bc_fields.len() { + for i in 0..ctl.origin_info.alt_bc_fields[li].len() { + alt_bcs.push(ctl.origin_info.alt_bc_fields[li][i].0.clone()); + } + } + unique_sort(&mut alt_bcs); + let mut to_check = Vec::<String>::new(); + let pchains = &ctl.parseable_opt.pchains; + let ends = build_ends(); + let mut nd_used = false; + for x in cols.iter() { + let mut x = x.to_string(); + if x.contains(':') { + x = x.after(":").to_string(); + } + let mut ok = false; + // Note that the following test is probably redundant with some of the testing below. + if check_one_lvar(&x, ctl, gex_info, &mut nd_used, &ends, false)? { + ok = true; + } + for i in 0..ctl.gen_opt.info_fields.len() { + if *x == ctl.gen_opt.info_fields[i] { + ok = true; + } + } + if bin_member(&alt_bcs, &x) { + ok = true; + } + for y in ctl.clono_print_opt.lvars.iter() { + if y.contains(':') { + let y = y.before(":"); + if x == y { + ok = true; + } + } + } + for y in PLVARS_ALLOWED.iter() { + if x == *y { + ok = true; + } + } + for y in ctl.origin_info.dataset_list.iter() { + if *x == format!("{y}_barcodes") { + ok = true; + } + } + if ctl.parseable_opt.pbarcode { + if x == "barcode" { + ok = true; + } + for y in ctl.origin_info.dataset_list.iter() { + if *x == format!("{y}_barcode") { + ok = true; + } + } + } + let gpvar = x.starts_with('g') && x.after("g").parse::<usize>().is_ok(); + + if !gex_info.have_gex && !gex_info.have_fb && x.starts_with("n_gex") { + return Err(format!( + "\nCan't use parseable variable {x} without having gene \ + expression or feature barcode data.\n" + )); + } + if !gex_info.have_gex && (x.starts_with("gex") || x == "clust") || x == "type" { + return Err(format!( + "\nCan't use parseable variable {x} without having gene \ + expression data.\n" + )); + } + if LVARS_ALLOWED.contains(&x.as_str()) || gpvar || is_pattern(&x, true) { + ok = true; + } else { + let mut y = Vec::<u8>::new(); + for c in x.chars().rev() { + if c.is_ascii_digit() { + y.push(c as u8); + } else { + break; + } + } + y.reverse(); + let ps = strme(&y); + if !ps.is_empty() + && (pchains == "max" + || (ps.force_usize() > 0 && ps.force_usize() <= pchains.force_usize())) + { + let y = x.rev_before(ps); + if CVARS_ALLOWED.contains(&y) + || (allow_cell && CVARS_ALLOWED_PCELL.contains(&y)) + || PCVARS_ALLOWED.contains(&y) + || y.starts_with("ndiff") + && y.ends_with("vj") + && y.between("ndiff", "vj").parse::<usize>().is_ok() + && y.between("ndiff", "vj").force_usize() >= 1 + || (y.starts_with("cdr1_aa_") + || y.starts_with("cdr2_aa_") + || y.starts_with("cdr3_aa_")) + && y.after("aa_").contains('_') + && y.between("aa_", "_").parse::<isize>().is_ok() + && y.after("aa_").after("_").ends_with("_ext") + && y.after("aa_").between("_", "_ext").parse::<isize>().is_ok() + { + ok = true; + } + } + } + if !ok { + to_check.push(x.to_string()); + } + } + if !to_check.is_empty() { + check_gene_fb(ctl, gex_info, &to_check, "parseable")?; + } + Ok(()) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Check cvars args. + +pub fn check_cvars(ctl: &EncloneControl) -> Result<(), String> { + for x in ctl.clono_print_opt.cvars.iter() { + let mut x = x.to_string(); + if x.contains(':') { + x = x.after(":").to_string(); + } + let ok = CVARS_ALLOWED.contains(&x.as_str()) + || x.starts_with("ndiff") + && x.ends_with("vj") + && x.between("ndiff", "vj").parse::<usize>().is_ok() + && x.between("ndiff", "vj").force_usize() >= 1 + || (x.starts_with("cdr1_aa_") + || x.starts_with("cdr2_aa_") + || x.starts_with("cdr3_aa_")) + && x.after("aa_").contains('_') + && x.between("aa_", "_").parse::<usize>().is_ok() + && x.after("aa_").after("_").ends_with("_ext") + && x.after("aa_").between("_", "_ext").parse::<usize>().is_ok() + || x.starts_with('q') + && x.ends_with('_') + && x.after("q").rev_before("_").parse::<usize>().is_ok(); + if !ok { + return Err(format!( + "\nUnrecognized variable {x} for CVARS or CVARSP. \ + Please type \"enclone help cvars\".\n" + )); + } + } + Ok(()) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn check_one_lvar( + x: &str, + ctl: &EncloneControl, + gex_info: &GexInfo, + nd_used: &mut bool, + ends: &[String], + is_lvar: bool, +) -> Result<bool, String> { + for i in 0..ctl.gen_opt.info_fields.len() { + if *x == ctl.gen_opt.info_fields[i] { + return Ok(true); + } + } + let mut x = x.to_string(); + if x.contains(':') { + x = x.after(":").to_string(); + } + + // See if type is ok. + + if x == "type" { + let mut specified = false; + for i in 0..gex_info.cell_type_specified.len() { + if gex_info.cell_type_specified[i] { + specified = true; + } + } + if !ctl.gen_opt.internal_run && !x.is_empty() { + return Err(format!( + "\nUnrecognized variable {x} for LVARS or PCOLS. Please type \ + \"enclone help lvars\".\n" + )); + } + if !specified { + return Err( + "\nYou've used the lead or parseable variable \"type\", but the file \ + cell_types.csv was not found.\n\ + This could be because you're using a GEX pipestance that was \ + run using too old a version of Cell Ranger.\n\ + Or it might have been generated using the CS pipeline.\n\ + Or you might have copied the pipestance outs but not included \ + that file.\n" + .to_string(), + ); + } + } + + // Check alt_bc_fields. + + for li in 0..ctl.origin_info.alt_bc_fields.len() { + for i in 0..ctl.origin_info.alt_bc_fields[li].len() { + if ctl.origin_info.alt_bc_fields[li][i].0 == x { + return Ok(true); + } + } + } + + // Check names defined by VAR_DEF. + + for i in 0..ctl.gen_opt.var_def.len() { + if x == ctl.gen_opt.var_def[i].0 { + return Ok(true); + } + } + + // Check for fb<n> and fb<n>_n, and _cell versions. + + if x.starts_with("fb") { + let mut y = x.after("fb").to_string(); + if y.ends_with("_cell") { + y = y.rev_before("_cell").to_string(); + } + if y.ends_with("_n") { + y = y.rev_before("_n").to_string(); + } + if y.parse::<usize>().is_ok() && y.force_usize() >= 1 { + if ctl.origin_info.n() != 1 { + return Err( + "\nThe variables fb<n> and fb<n>_n can only be used if there is just one \ + dataset.\n" + .to_string(), + ); + } + if !gex_info.fb_top_matrices[0].initialized() { + return Err( + "\nThe variables fb<n> and fb<n>_n can only be used if the file \ + feature_barcode_matrix_top.bin was generated.\n" + .to_string(), + ); + } + return Ok(true); + } + } + + // Check for nd<k>. + + if x.starts_with("nd") + && x.after("nd").parse::<usize>().is_ok() + && x.after("nd").force_usize() >= 1 + { + if *nd_used { + return Err("\nOnly one instance of the lead variable nd<k> is allowed.\n".to_string()); + } + *nd_used = true; + return Ok(true); + } + + // Check for [abbr:]count_<regex> and similar. + + if x.starts_with("count_") || x.contains(":count_") { + let mut z = x.to_string(); + if x.contains(":count_") { + z = x.after(":").to_string(); + } + let mut class = "count_".to_string(); + if z.starts_with("count_cdr1_") + || z.starts_with("count_cdr2_") + || z.starts_with("count_cdr3_") + || z.starts_with("count_fwr1_") + || z.starts_with("count_fwr2_") + || z.starts_with("count_fwr3_") + || z.starts_with("count_fwr4_") + || z.starts_with("count_cdr_") + || z.starts_with("count_fwr_") + { + class = format!("count_{}_", z.between("_", "_")); + } + let y = z.after(&class); + let reg = Regex::new(y); + if reg.is_err() || y.contains('_') { + return Err(format!( + "\nThe string after {class} in your lead or parseable variable {x} is not a valid \ + regular expression for amino acids.\n" + )); + } + return Ok(true); + } + + // Check for pe<n> and npe<n> and ppe<n>. + + if x.starts_with("pe") && x.after("pe").parse::<usize>().is_ok() { + return Ok(true); + } + if x.starts_with("npe") && x.after("npe").parse::<usize>().is_ok() { + return Ok(true); + } + if x.starts_with("ppe") && x.after("ppe").parse::<usize>().is_ok() { + return Ok(true); + } + + // Check for patterns. + + if is_pattern(&x, false) { + return Ok(true); + } + + // The rest. + + if !gex_info.have_gex && !gex_info.have_fb && x.starts_with("n_gex") { + return Err(format!( + "\nCan't use LVARS or LVARSP or PCOLS variable {x} without having gene \ + expression or feature barcode data.\n" + )); + } + if !gex_info.have_gex && (x.starts_with("gex") || x == "clust" || x == "type") { + return Err(format!( + "\nCan't use LVARS or LVARSP or PCOLS variable {x} without having gene \ + expression data.\n" + )); + } + let gpvar = x.starts_with('g') && x.after("g").parse::<usize>().is_ok(); + if gpvar { + return Ok(true); + } + if !LVARS_ALLOWED.contains(&x.as_str()) { + let mut end_ok = false; + for end in ends { + if x.ends_with(end) { + end_ok = true; + } + } + if end_ok { + return Ok(false); + } + if is_lvar && !x.starts_with("n_") && !x.is_empty() { + return Err(format!( + "\nUnrecognized variable {x} for LVARS. Please type \ + \"enclone help lvars\".\n" + )); + } else { + return Ok(false); + } + } + Ok(true) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn build_ends() -> Vec<String> { + let mut ends = Vec::<String>::new(); + let ends0 = [ + "_g", "_ab", "_ag", "_cr", "_cu", "_g_μ", "_ab_μ", "_ag_μ", "_cr_μ", "_cu_μ", "_g_%", + ]; + let suffixes = ["", "_min", "_max", "_μ", "_Σ"]; + for x in ends0.iter() { + for y in suffixes.iter() { + ends.push(format!("{x}{y}")); + } + } + ends +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Check lvars args. + +pub fn check_lvars(ctl: &EncloneControl, gex_info: &GexInfo) -> Result<(), String> { + let t = Instant::now(); + let mut to_check = Vec::<String>::new(); + let ends = build_ends(); + let mut nd_used = false; + for x in ctl.clono_print_opt.lvars.iter() { + if x.ends_with("_cell") { + return Err( + "\nFields ending with _cell cannot be used in LVARS or LVARSP.\n".to_string(), + ); + } + if !check_one_lvar(x, ctl, gex_info, &mut nd_used, &ends, true)? { + to_check.push(x.clone()); + } + } + ctl.perf_stats(&t, "checking lvars top"); + let t = Instant::now(); + if !to_check.is_empty() { + check_gene_fb(ctl, gex_info, &to_check, "lead")?; + } + ctl.perf_stats(&t, "checking gene"); + Ok(()) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Check gvars args. + +pub fn check_gvars(ctl: &EncloneControl) -> Result<(), String> { + for x in ctl.gen_opt.gvars.iter() { + if !GVARS_ALLOWED.contains(&x.as_str()) { + return Err(format!("\nUnknown global variable {x}.\n")); + } + } + Ok(()) +} diff --git a/enclone_args/src/proc_args_post.rs b/enclone_args/src/proc_args_post.rs new file mode 100644 index 000000000..2eecc22a9 --- /dev/null +++ b/enclone_args/src/proc_args_post.rs @@ -0,0 +1,711 @@ +// Copyright (c) 2022 10X Genomics, Inc. All rights reserved. + +use crate::proc_args2::proc_args_tail; +use crate::proc_args3::{get_path_fail, proc_meta, proc_meta_core, proc_xcr}; +use crate::proc_args_check::check_cvars; +use enclone_core::defs::EncloneControl; +use enclone_core::tilde_expand_me; +use enclone_vars::encode_arith; +use evalexpr::build_operator_tree; +use expr_tools::vars_of_node; +use io_utils::{open_for_read, open_userfile_for_read, path_exists}; +use std::collections::HashMap; +use std::io::BufRead; +use std::time::Instant; +use string_utils::{parse_csv, TextUtils}; +use vector_utils::{bin_member, next_diff, sort_sync2, unique_sort}; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Parse joint barcode-level information file from BC_JOINT. + +fn parse_bc_joint(ctl: &mut EncloneControl) -> Result<(), String> { + let bc = &ctl.gen_opt.bc_joint; + let delimiter = if bc.ends_with(".tsv") { '\t' } else { ',' }; + let n = ctl.origin_info.n(); + let mut origin_for_bc = vec![HashMap::<String, String>::new(); n]; + let mut donor_for_bc = vec![HashMap::<String, String>::new(); n]; + let mut tag = vec![HashMap::<String, String>::new(); n]; + let mut barcode_color = vec![HashMap::<String, String>::new(); n]; + let mut alt_bc_fields = vec![Vec::<(String, HashMap<String, String>)>::new(); n]; + let f = open_userfile_for_read(bc); + let mut first = true; + let mut fieldnames = Vec::<String>::new(); + let mut dataset_pos = 0; + let mut barcode_pos = 0; + let (mut origin_pos, mut donor_pos, mut tag_pos, mut color_pos) = (None, None, None, None); + let mut to_alt = Vec::<isize>::new(); + let mut to_origin_pos = HashMap::<String, usize>::new(); + for i in 0..ctl.origin_info.n() { + to_origin_pos.insert(ctl.origin_info.dataset_id[i].clone(), i); + } + for line in f.lines() { + let s = line.unwrap(); + if first { + let fields = s.split(delimiter).collect::<Vec<&str>>(); + to_alt = vec![-1_isize; fields.len()]; + if !fields.contains(&"dataset") { + return Err(format!("\nThe file\n{bc}\nis missing the dataset field.\n",)); + } + if !fields.contains(&"barcode") { + return Err(format!("\nThe file\n{bc}\nis missing the barcode field.\n",)); + } + for x in fields.iter() { + fieldnames.push(x.to_string()); + } + for (i, field) in fields.into_iter().enumerate() { + if field == "color" { + color_pos = Some(i); + } + if field == "barcode" { + barcode_pos = i; + } else if field == "dataset" { + dataset_pos = i; + } else if field == "origin" { + origin_pos = Some(i); + } else if field == "donor" { + donor_pos = Some(i); + } else if field == "tag" { + tag_pos = Some(i); + } else { + to_alt[i] = alt_bc_fields[0].len() as isize; + for li in alt_bc_fields.iter_mut().take(ctl.origin_info.n()) { + li.push((field.to_string(), HashMap::<String, String>::new())); + } + } + } + first = false; + } else { + let fields = s.split(delimiter).collect::<Vec<&str>>(); + if fields.len() != fieldnames.len() { + return Err(format!( + "\nThere is a line\n{}\nin {}\n\ + that has {} fields, which isn't right, because the header line \ + has {} fields.\n", + s, + bc, + fields.len(), + fieldnames.len(), + )); + } + let dataset = fields[dataset_pos].to_string(); + if !to_origin_pos.contains_key(&dataset) { + return Err(format!( + "\nIn the file\n{bc},\nthe value\n{dataset}\nis found for dataset, however that is \ + not an abbreviated dataset name.\n", + )); + } + let li = to_origin_pos[&dataset]; + for i in 0..fields.len() { + if to_alt[i] >= 0 { + alt_bc_fields[li][to_alt[i] as usize] + .1 + .insert(fields[barcode_pos].to_string(), fields[i].to_string()); + } + } + if !fields[barcode_pos].contains('-') { + return Err(format!( + "\nThe barcode \"{}\" appears in the file\n{bc}.\n\ + That doesn't make sense because a barcode\nshould include a hyphen.\n", + fields[barcode_pos], + )); + } + + if let Some(origin_pos) = origin_pos { + origin_for_bc[li].insert( + fields[barcode_pos].to_string(), + fields[origin_pos].to_string(), + ); + } + if let Some(donor_pos) = donor_pos { + donor_for_bc[li].insert( + fields[barcode_pos].to_string(), + fields[donor_pos].to_string(), + ); + } + if let Some(tag_pos) = tag_pos { + tag[li].insert(fields[barcode_pos].to_string(), fields[tag_pos].to_string()); + } + if let Some(color_pos) = color_pos { + barcode_color[li].insert( + fields[barcode_pos].to_string(), + fields[color_pos].to_string(), + ); + } + } + } + ctl.origin_info.origin_for_bc = origin_for_bc; + ctl.origin_info.donor_for_bc = donor_for_bc; + ctl.origin_info.tag = tag; + ctl.origin_info.barcode_color = barcode_color; + ctl.origin_info.alt_bc_fields = alt_bc_fields; + Ok(()) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn proc_args_post( + ctl: &mut EncloneControl, + args: &[String], + metas: &[String], + metaxs: &[String], + xcrs: &[String], + have_gex: bool, + gex: &str, + bc: &str, + using_plot: bool, +) -> Result<(), String> { + // Process INFO. + + let t = Instant::now(); + if ctl.gen_opt.info.is_some() { + let f = open_for_read![&ctl.gen_opt.info.as_ref().unwrap()]; + let mut lines = Vec::<String>::new(); + for line in f.lines() { + let s = line.unwrap(); + lines.push(s); + } + if lines.is_empty() { + return Err(format!( + "\nThe file {} is empty.\n", + ctl.gen_opt.info.as_ref().unwrap() + )); + } + let fields = lines[0].split(',').collect::<Vec<&str>>(); + if !fields.contains(&"vj_seq1") || !fields.contains(&"vj_seq2") { + return Err(format!( + "\nThe CSV file {} needs to have fields vj_seq1 and vj_seq2.\n", + ctl.gen_opt.info.as_ref().unwrap() + )); + } + for &field in &fields { + if field != "vj_seq1" && field != "vj_seq2" { + ctl.gen_opt.info_fields.push(field.to_string()); + ctl.gen_opt.info_fields.push(format!("log10({field})")); + } + } + let mut tags = Vec::<String>::new(); + for (i, line) in lines.iter().enumerate().skip(1) { + let vals = parse_csv(line); + if vals.len() != fields.len() { + eprintln!( + "\nINFO file line {} has length {} whereas the file has {} fields. \ + The line is\n{}\n", + i + 1, + vals.len(), + fields.len(), + line + ); + } + let (mut vj1, mut vj2) = (String::new(), String::new()); + let mut other = Vec::<String>::new(); + for i in 0..vals.len() { + if fields[i] == "vj_seq1" { + vj1 = vals[i].to_string(); + } else if fields[i] == "vj_seq2" { + vj2 = vals[i].to_string(); + } else { + other.push(vals[i].to_string()); + let mut log10_val = "".to_string(); + if vals[i].parse::<f64>().is_ok() { + let val = vals[i].force_f64(); + if val > 0.0 { + log10_val = format!("{:.2}", val.log10()); + } + } + other.push(log10_val); + } + } + let tag = format!("{vj1}_{vj2}"); + if ctl.gen_opt.info_resolve && ctl.gen_opt.info_data.contains_key(&tag) { + continue; + } + tags.push(tag.clone()); + sort_sync2(&mut ctl.gen_opt.info_fields, &mut other); + ctl.gen_opt.info_data.insert(tag, other); + } + tags.sort(); + let mut i = 0; + while i < tags.len() { + let j = next_diff(&tags, i); + if j - i > 1 { + return Err(format!( + "\nThe immune receptor sequence pair\n{},\n {}\nappears more than once \ + in the file {}.\n", + tags[i].before("_"), + tags[i].after("_"), + ctl.gen_opt.info.as_ref().unwrap(), + )); + } + i = j; + } + } + + // Expand ~ and ~user in output file names. + + let mut files = [ + &mut ctl.plot_opt.plot_file, + &mut ctl.gen_opt.fasta_filename, + &mut ctl.gen_opt.fasta_aa_filename, + &mut ctl.gen_opt.dref_file, + &mut ctl.parseable_opt.pout, + ]; + for f in files.iter_mut() { + tilde_expand_me(f); + } + + // Test VAR_DEF arguments for circularity. + + let mut var_def_vars = Vec::<Vec<String>>::new(); + let n = ctl.gen_opt.var_def.len(); + for i in 0..n { + let x = &ctl.gen_opt.var_def[i].2; + var_def_vars.push(vars_of_node(x)); + } + let mut edges = Vec::<(usize, usize)>::new(); + for (i, vari) in ctl.gen_opt.var_def.iter().take(n).enumerate() { + for (j, varj) in var_def_vars.iter().take(n).enumerate() { + if bin_member(varj, &vari.0) { + edges.push((i, j)); + } + } + } + let mut reach = vec![vec![false; n]; n]; + loop { + let mut progress = false; + for &(i, j) in &edges { + if !reach[i][j] { + reach[i][j] = true; + progress = true; + } + for l in 0..n { + if reach[l][i] && !reach[l][j] { + reach[l][j] = true; + progress = true; + } + if reach[j][l] && !reach[i][l] { + reach[i][l] = true; + progress = true; + } + } + } + if !progress { + break; + } + } + for (i, r) in reach.into_iter().enumerate().take(n) { + if r[i] { + return Err( + "\nVAR_DEF arguments define a circular chain of dependencies.\n".to_string(), + ); + } + } + + // Substitute VAR_DEF into VAR_DEF. + + loop { + let mut progress = false; + for i in 0..n { + for (j, var_def_j) in var_def_vars.iter_mut().enumerate().take(n) { + if bin_member(var_def_j, &ctl.gen_opt.var_def[i].0) { + let sub = encode_arith(&ctl.gen_opt.var_def[i].0); + ctl.gen_opt.var_def[j].1 = ctl.gen_opt.var_def[j] + .1 + .replace(&sub, &format!("({})", ctl.gen_opt.var_def[i].1)); + ctl.gen_opt.var_def[j].2 = + build_operator_tree(&ctl.gen_opt.var_def[j].1).unwrap(); + let x = &ctl.gen_opt.var_def[j].2; + *var_def_j = vars_of_node(x); + progress = true; + } + } + } + if !progress { + break; + } + } + + // Substitute VAR_DEF into ALL_BC. + + for i in 0..ctl.gen_opt.all_bc_fields.len() { + for j in 0..ctl.gen_opt.var_def.len() { + if ctl.gen_opt.all_bc_fields[i] == ctl.gen_opt.var_def[j].0 { + ctl.gen_opt.all_bc_fields[i] = ctl.gen_opt.var_def[j].3.clone(); + break; + } + } + } + + // Sanity check grouping arguments. + + if ctl.clono_group_opt.style == "asymmetric" + && (ctl.clono_group_opt.asymmetric_center.is_empty() + || ctl.clono_group_opt.asymmetric_dist_formula.is_empty() + || ctl.clono_group_opt.asymmetric_dist_bound.is_empty()) + { + return Err( + "\nIf the AGROUP option is used to specify asymmetric grouping, then all\n\ + of the options AG_CENTER, AG_DIST_FORMULA and AG_DIST_BOUND must also be \ + specified.\n" + .to_string(), + ); + } + if (!ctl.clono_group_opt.asymmetric_center.is_empty() + || !ctl.clono_group_opt.asymmetric_dist_formula.is_empty() + || !ctl.clono_group_opt.asymmetric_dist_bound.is_empty()) + && ctl.clono_group_opt.style == "symmetric" + { + return Err("\nIf any of the asymmetric grouping options AG_CENTER or \ + AG_DIST_FORMULA or\nAG_DIST_BOUND are specified, then the option AGROUP \ + must also be specified, to turn on asymmetric grouping.\n" + .to_string()); + } + if ctl.clono_group_opt.style == "asymmetric" { + if ctl.clono_group_opt.asymmetric_center != "from_filters" + && ctl.clono_group_opt.asymmetric_center != "copy_filters" + { + return Err( + "\nThe only allowed forms for AG_CENTER are AG_CENTER=from_filters\n\ + and AG_CENTER=copy_filters.\n" + .to_string(), + ); + } + if ctl.clono_group_opt.asymmetric_dist_formula != "cdr3_edit_distance" { + return Err( + "\nThe only allowed form for AG_DIST_FORMULA is cdr3_edit_distance.\n".to_string(), + ); + } + let ok1 = ctl + .clono_group_opt + .asymmetric_dist_bound + .starts_with("top=") + && ctl + .clono_group_opt + .asymmetric_dist_bound + .after("top=") + .parse::<usize>() + .is_ok(); + let ok2 = ctl + .clono_group_opt + .asymmetric_dist_bound + .starts_with("max=") + && ctl + .clono_group_opt + .asymmetric_dist_bound + .after("max=") + .parse::<f64>() + .is_ok(); + if !ok1 && !ok2 { + return Err( + "\nThe only allowed forms for AG_DIST_BOUND are top=n, where n is an\n\ + integer, and max=d, where d is a number.\n" + .to_string(), + ); + } + } + + // Sanity check other arguments (and more below). + + if !ctl.parseable_opt.pcols_show.is_empty() + && ctl.parseable_opt.pcols_show.len() != ctl.parseable_opt.pcols.len() + { + return Err( + "\nThe number of fields provided to PCOLS_SHOW has to match that for PCOLS.\n" + .to_string(), + ); + } + if ctl.plot_opt.split_plot_by_dataset && ctl.plot_opt.split_plot_by_origin { + return Err( + "\nOnly one of SPLIT_PLOT_BY_DATASET and SPLIT_PLOT_BY_ORIGIN can be specified.\n" + .to_string(), + ); + } + if ctl.clono_print_opt.amino.is_empty() && ctl.clono_print_opt.cvars.is_empty() { + return Err( + "\nSorry, use of both CVARS= and AMINO= (setting both to null) is not \ + supported.\n" + .to_string(), + ); + } + if ctl.parseable_opt.pchains.parse::<usize>().is_err() && ctl.parseable_opt.pchains != "max" { + return Err( + "\nThe only allowed values for PCHAINS are a positive integer and max.\n".to_string(), + ); + } + if ctl.gen_opt.align_jun_align_consistency && ctl.pretty { + return Err( + "\nIf you use ALIGN_JALIGN_CONSISTENCY, you should also use PLAIN.\n".to_string(), + ); + } + if ctl.gen_opt.gene_scan_exact && ctl.gen_opt.gene_scan_test.is_none() { + return Err( + "\nIt doesn't make sense to specify SCAN_EXIT unless SCAN is also specified.\n" + .to_string(), + ); + } + if ctl.clono_print_opt.conx && ctl.clono_print_opt.conp { + return Err("\nPlease specify at most one of CONX and CONP.\n".to_string()); + } + if ctl.clono_filt_opt.cdr3.is_some() && !ctl.clono_filt_opt.cdr3_lev.is_empty() { + return Err( + "\nPlease use the CDR3 argument to specify either a regular expression or a\n\ + Levenshtein distance pattern, but not both.\n" + .to_string(), + ); + } + if ctl.gen_opt.clustal_aa != *"" + && ctl.gen_opt.clustal_aa != *"stdout" + && !ctl.gen_opt.clustal_aa.ends_with(".tar") + { + return Err( + "\nIf the value of CLUSTAL_AA is not stdout, it must end in .tar.\n".to_string(), + ); + } + if ctl.gen_opt.clustal_dna != *"" + && ctl.gen_opt.clustal_dna != *"stdout" + && !ctl.gen_opt.clustal_dna.ends_with(".tar") + { + return Err( + "\nIf the value of CLUSTAL_DNA is not stdout, it must end in .tar.\n".to_string(), + ); + } + if ctl.gen_opt.phylip_aa != *"" + && ctl.gen_opt.phylip_aa != *"stdout" + && !ctl.gen_opt.phylip_aa.ends_with(".tar") + { + return Err( + "\nIf the value of PHYLIP_AA is not stdout, it must end in .tar.\n".to_string(), + ); + } + if ctl.gen_opt.phylip_dna != *"" + && ctl.gen_opt.phylip_dna != *"stdout" + && !ctl.gen_opt.phylip_dna.ends_with(".tar") + { + return Err( + "\nIf the value of PHYLIP_DNA is not stdout, it must end in .tar.\n".to_string(), + ); + } + if ctl.clono_filt_opt_def.umi_filt && ctl.clono_filt_opt_def.umi_filt_mark { + return Err( + "\nIf you use UMI_FILT_MARK, you should also use NUMI, to turn off \ + the filter,\nas otherwise nothing will be marked.\n" + .to_string(), + ); + } + if ctl.clono_filt_opt_def.umi_ratio_filt && ctl.clono_filt_opt_def.umi_ratio_filt_mark { + return Err( + "\nIf you use UMI_RATIO_FILT_MARK, you should also use NUMI_RATIO, to turn off \ + the filter,\nas otherwise nothing will be marked.\n" + .to_string(), + ); + } + ctl.perf_stats(&t, "after main args loop 1"); + + // Process TCR, BCR and META. + + let t = Instant::now(); + check_cvars(ctl)?; + if !metas.is_empty() { + let mut v = Vec::<String>::with_capacity(metas.len()); + for meta in metas { + let f = get_path_fail(meta, ctl, "META")?; + if f.contains('/') { + let d = f.rev_before("/").to_string(); + if !ctl.gen_opt.pre.contains(&d) { + ctl.gen_opt.pre.push(d); + } + } + v.push(f); + } + proc_meta(&v, ctl)?; + } + if !metaxs.is_empty() { + let lines: Vec<_> = metaxs[metaxs.len() - 1] + .split(';') + .map(str::to_string) + .collect(); + proc_meta_core(&lines, ctl)?; + } + ctl.perf_stats(&t, "in proc_meta"); + if !xcrs.is_empty() { + let arg = &xcrs[xcrs.len() - 1]; + proc_xcr(arg, gex, bc, have_gex, ctl)?; + } + + // Process BC_JOINT. + + if !ctl.gen_opt.bc_joint.is_empty() { + parse_bc_joint(ctl)?; + } + + // More argument sanity checking. + + let t = Instant::now(); + if ctl.clono_filt_opt.dataset.is_some() { + let d = &ctl.clono_filt_opt.dataset.as_ref().unwrap(); + for x in d.iter() { + if !ctl.origin_info.dataset_id.contains(x) { + return Err(format!( + "\nDATASET argument has {} in it, which is not a known \ + dataset name.\n", + *x + )); + } + } + } + let bcr_only = [ + "PEER_GROUP", + "PG_READABLE", + "PG_DIST", + "COLOR=peer", + "CONST_IGH", + "CONST_IGL", + ]; + if !ctl.gen_opt.bcr { + for arg in &args[1..] { + for x in bcr_only.iter() { + if arg == x || arg.starts_with(&format!("{x}=")) { + return Err(format!("\nThe option {x} does not make sense for TCR.\n")); + } + } + } + } + + // Proceed. + + for i in 0..ctl.origin_info.n() { + let (mut cells_cr, mut rpc_cr) = (None, None); + if ctl.gen_opt.internal_run { + let p = &ctl.origin_info.dataset_path[i]; + let mut f = format!("{p}/metrics_summary_csv.csv"); + if !path_exists(&f) { + f = format!("{p}/metrics_summary.csv"); + } + if path_exists(&f) { + let f = open_userfile_for_read(&f); + let mut count = 0; + let (mut cells_field, mut rpc_field) = (None, None); + for line in f.lines() { + count += 1; + let s = line.unwrap(); + let fields = parse_csv(&s); + for (i, x) in fields.iter().enumerate() { + if count == 1 { + if *x == "Estimated Number of Cells" { + cells_field = Some(i); + } else if *x == "Mean Read Pairs per Cell" { + rpc_field = Some(i); + } + } else if count == 2 { + if Some(i) == cells_field { + let mut n = x.to_string(); + if n.contains('\"') { + n = n.between("\"", "\"").to_string(); + } + n = n.replace(',', ""); + cells_cr = Some(n.force_usize()); + } else if Some(i) == rpc_field { + let mut n = x.to_string(); + if n.contains('\"') { + n = n.between("\"", "\"").to_string(); + } + n = n.replace(',', ""); + rpc_cr = Some(n.force_usize()); + } + } + } + } + } + } + ctl.origin_info.cells_cellranger.push(cells_cr); + ctl.origin_info + .mean_read_pairs_per_cell_cellranger + .push(rpc_cr); + } + if ctl.plot_opt.plot_by_isotype { + if using_plot || ctl.plot_opt.use_legend { + return Err("\nPLOT_BY_ISOTYPE cannot be used with PLOT or LEGEND.\n".to_string()); + } + if !ctl.gen_opt.bcr { + return Err("\nPLOT_BY_ISOTYPE can only be used with BCR data.\n".to_string()); + } + if ctl.plot_opt.plot_by_mark { + return Err( + "\nPLOT_BY_ISOTYPE and PLOT_BY_MARK cannot be used together.\n".to_string(), + ); + } + } + if ctl.plot_opt.plot_by_mark && (using_plot || ctl.plot_opt.use_legend) { + return Err("\nPLOT_BY_MARK cannot be used with PLOT or LEGEND.\n".to_string()); + } + if ctl.parseable_opt.pbarcode && ctl.parseable_opt.pout.is_empty() { + return Err( + "\nIt does not make sense to specify PCELL unless POUT is also specified.\n" + .to_string(), + ); + } + let mut donors = Vec::<String>::new(); + let mut origins = Vec::<String>::new(); + let mut tags = Vec::<String>::new(); + let mut origin_for_bc = Vec::<String>::new(); + let mut donor_for_bc = Vec::<String>::new(); + for i in 0..ctl.origin_info.n() { + for x in ctl.origin_info.origin_for_bc[i].iter() { + origins.push(x.1.clone()); + origin_for_bc.push(x.1.clone()); + } + for x in ctl.origin_info.donor_for_bc[i].iter() { + donors.push(x.1.clone()); + donor_for_bc.push(x.1.clone()); + } + for x in ctl.origin_info.tag[i].iter() { + tags.push((x.1).clone()); + } + donors.push(ctl.origin_info.donor_id[i].clone()); + origins.push(ctl.origin_info.origin_id[i].clone()); + } + unique_sort(&mut donors); + unique_sort(&mut origins); + unique_sort(&mut tags); + unique_sort(&mut origin_for_bc); + unique_sort(&mut donor_for_bc); + ctl.origin_info.donors = donors.len(); + ctl.origin_info.dataset_list = ctl.origin_info.dataset_id.clone(); + unique_sort(&mut ctl.origin_info.dataset_list); + ctl.origin_info.origin_list = origins.clone(); + ctl.origin_info.donor_list = donors.clone(); + ctl.origin_info.tag_list = tags; + for i in 0..ctl.origin_info.donor_for_bc.len() { + if !ctl.origin_info.donor_for_bc[i].is_empty() { + ctl.clono_filt_opt_def.donor = true; + } + } + ctl.perf_stats(&t, "after main args loop 2"); + proc_args_tail(ctl, args)?; + + // Sort chains_to_align. + + unique_sort(&mut ctl.gen_opt.chains_to_align); + unique_sort(&mut ctl.gen_opt.chains_to_align2); + unique_sort(&mut ctl.gen_opt.chains_to_jun_align); + unique_sort(&mut ctl.gen_opt.chains_to_jun_align2); + + // Check for invalid variables in linear conditions. + + for i in 0..ctl.clono_filt_opt.bounds.len() { + ctl.clono_filt_opt.bounds[i].require_valid_variables(ctl)?; + } + if ctl.gen_opt.gene_scan_test.is_some() { + ctl.gen_opt + .gene_scan_test + .as_ref() + .unwrap() + .require_valid_variables(ctl)?; + ctl.gen_opt + .gene_scan_control + .as_ref() + .unwrap() + .require_valid_variables(ctl)?; + } + Ok(()) +} diff --git a/enclone_args/src/process_special_arg1.rs b/enclone_args/src/process_special_arg1.rs new file mode 100644 index 000000000..a4e9eba98 --- /dev/null +++ b/enclone_args/src/process_special_arg1.rs @@ -0,0 +1,586 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Process a special argument, i.e. one that does not fit into a neat bucket. + +use crate::proc_args2::test_writeable; +use crate::proc_args2::{is_simple_arg, is_usize_arg}; +use enclone_core::cell_color::{ + CellColor, ColorByCategoricalVariableValue, ColorByDataset, ColorByVariableValue, +}; +use enclone_core::defs::EncloneControl; +use enclone_core::tilde_expand_me; +use enclone_vars::encode_arith; +use evalexpr::build_operator_tree; +use expr_tools::test_functions_in_node; +use io_utils::path_exists; +use itertools::Itertools; +use std::fmt::Write; +use std::fs::{read_to_string, remove_file, File}; +use string_utils::TextUtils; +use vector_utils::{unique_sort, VecUtils}; + +pub fn process_special_arg1( + arg: &str, + ctl: &mut EncloneControl, + _metas: &mut [String], + _metaxs: &mut [String], + _xcrs: &mut [String], + _using_plot: &mut bool, +) -> Result<bool, String> { + // Process the argument. + + if is_simple_arg(arg, "SEQ")? { + ctl.join_print_opt.seq = true; + + // Not movable. + } else if arg.starts_with("PG_DIST=") { + let dist = arg.after("PG_DIST="); + if dist != "MFL" { + return Err("\nCurrently the only allowed value for PG_DIST is MFL.\n".to_string()); + } + ctl.gen_opt.peer_group_dist = dist.to_string(); + } else if is_simple_arg(arg, "H5")? { + ctl.gen_opt.force_h5 = true; + } else if is_simple_arg(arg, "NH5")? { + ctl.gen_opt.force_h5 = false; + } else if arg == "LEGEND" { + ctl.plot_opt.use_legend = true; + } else if arg == "MAX_HEAVIES=1" { + ctl.gen_opt.max_heavies = 1; + } else if arg.starts_with("ALIGN_2ND") { + let n = arg.after("ALIGN_2ND"); + if n.parse::<usize>().is_err() || n.force_usize() == 0 { + return Err(format!("\nArgument {arg} is not properly specified.\n")); + } + ctl.gen_opt.chains_to_align2.push(n.force_usize()); + } else if arg.starts_with("ALIGN") { + let n = arg.after("ALIGN"); + if n.parse::<usize>().is_err() || n.force_usize() == 0 { + return Err(format!("\nArgument {arg} is not properly specified.\n")); + } + ctl.gen_opt.chains_to_align.push(n.force_usize()); + } else if arg.starts_with("GROUP_DONOR=") { + ctl.clono_group_opt + .donor + .push(arg.after("GROUP_DONOR=").to_string()); + } else if arg.starts_with("JALIGN_2ND") { + let n = arg.after("JALIGN_2ND"); + if n.parse::<usize>().is_err() || n.force_usize() == 0 { + return Err(format!("\nArgument {arg} is not properly specified.\n")); + } + ctl.gen_opt.chains_to_jun_align2.push(n.force_usize()); + } else if arg.starts_with("ALL_BC=") || arg.starts_with("ALL_BCH=") { + let parts; + if arg.starts_with("ALL_BC=") { + parts = arg.after("ALL_BC=").split(',').collect::<Vec<&str>>(); + } else { + parts = arg.after("ALL_BCH=").split(',').collect::<Vec<&str>>(); + ctl.gen_opt.all_bc_human = true; + } + if parts.is_empty() || parts[0].is_empty() { + return Err( + "\nFor ALL_BC/ALL_BCH, at a minimum, a filename must be provided.\n".to_string(), + ); + } + if !ctl.gen_opt.all_bc_filename.is_empty() { + return Err("\nThe argument ALL_BC/ALL_BCH may only be used once.\n".to_string()); + } + ctl.gen_opt.all_bc_filename = parts[0].to_string(); + test_writeable(&ctl.gen_opt.all_bc_filename, ctl.gen_opt.evil_eye)?; + ctl.gen_opt + .all_bc_fields + .extend(parts.into_iter().skip(1).map(str::to_string)); + ctl.gen_opt.all_bc_fields_orig = ctl.gen_opt.all_bc_fields.clone(); + } else if arg.starts_with("STATE_NARRATIVE=") { + let mut narrative = arg.after("STATE_NARRATIVE=").to_string(); + if narrative.starts_with('@') { + let filename = narrative.after("@"); + if !path_exists(filename) { + return Err( + "\nThe file referenced by your STATE_NARRATIVE argument could not be found.\n" + .to_string(), + ); + } + narrative = read_to_string(filename).unwrap(); + ctl.gen_opt.state_narrative = narrative; + } + } else if arg.starts_with("SESSION_NARRATIVE=") { + let mut narrative = arg.after("SESSION_NARRATIVE=").to_string(); + if narrative.starts_with('@') { + let filename = narrative.after("@"); + if !path_exists(filename) { + return Err( + "\nThe file referenced by your SESSION_NARRATIVE argument could not be found.\n" + .to_string() + ); + } + narrative = read_to_string(filename).unwrap(); + ctl.gen_opt.session_narrative = narrative; + } + } else if arg.starts_with("JOIN_BASIC=") { + let val = arg.after("JOIN_BASIC="); + if val.parse::<f64>().is_err() || val.force_f64() < 0.0 || val.force_f64() > 100.0 { + return Err( + "\nArgument to JOIN_BASIC needs to be a number between 0 and 100.\n".to_string(), + ); + } + ctl.join_alg_opt.basic = Some(val.force_f64()); + } else if arg.starts_with("JOIN_BASIC_H=") { + let val = arg.after("JOIN_BASIC_H="); + if val.parse::<f64>().is_err() || val.force_f64() < 0.0 || val.force_f64() > 100.0 { + return Err( + "\nArgument to JOIN_BASIC_H needs to be a number between 0 and 100.\n".to_string(), + ); + } + ctl.join_alg_opt.basic_h = Some(val.force_f64()); + } else if arg.starts_with("JOIN_CDR3_IDENT=") { + let val = arg.after("JOIN_CDR3_IDENT="); + if val.parse::<f64>().is_err() || val.force_f64() < 0.0 || val.force_f64() > 100.0 { + return Err( + "\nArgument to JOIN_CDR3_IDENT needs to be a number between 0 and 100.\n" + .to_string(), + ); + } + ctl.join_alg_opt.join_cdr3_ident = val.force_f64(); + } else if arg.starts_with("FWR1_CDR12_DELTA=") { + let val = arg.after("FWR1_CDR12_DELTA="); + if val.parse::<f64>().is_err() || val.force_f64() < 0.0 || val.force_f64() > 100.0 { + return Err( + "\nArgument to FWR1_CDR12_DELTA needs to be a number between 0 and 100.\n" + .to_string(), + ); + } + ctl.join_alg_opt.fwr1_cdr12_delta = val.force_f64(); + } else if arg.starts_with("DATASET=") { + let d = arg + .after("DATASET=") + .split('|') + .map(str::to_owned) + .collect(); + ctl.clono_filt_opt.dataset = Some(d); + } else if arg.starts_with("HONEY=") { + let parts = { + let mut parts = Vec::<Vec<&str>>::new(); + let subparts = arg.after("HONEY=").split(',').collect::<Vec<&str>>(); + if subparts.is_empty() || !subparts[0].contains('=') { + return Err("\nSyntax for HONEY=... is incorrect.\n".to_string()); + } + let mut part = Vec::<&str>::new(); + for subpart in subparts { + if subpart.contains('=') && !part.is_empty() { + parts.push(part.clone()); + part.clear(); + } + part.push(subpart); + } + if !part.is_empty() { + parts.push(part); + } + parts + }; + ctl.plot_opt.use_legend = true; + let mut out_count = 0; + let mut legend_count = 0; + let mut color_count = 0; + let (mut min, mut max) = (None, None); + let (mut var, mut display_var) = (String::new(), String::new()); + let mut schema = String::new(); + for p in parts { + let mut p = p.clone(); + let part_name = p[0].before("="); + p[0] = p[0].after("="); + let err = format!( + "\nUnrecognized {} specification {}.\n", + part_name, + p.iter().format(",") + ); + if part_name == "out" { + if p.len() > 2 { + return Err(err); + } + let filename = p[0]; + if p.len() == 2 { + if let Ok(pi) = p[1].parse::<usize>() { + ctl.plot_opt.png_width = Some(pi); + if !filename.ends_with(".png") { + return Err("\nWidth specification for the HONEY argument only \ + makes sense if the filename ends with .png.\n" + .to_string()); + } + } + } + if filename != "stdout" + && filename != "stdout.png" + && filename != "gui" + && !filename.ends_with(".svg") + && !filename.ends_with(".png") + { + return Err( + "\nHONEY out filename needs to end with .svg or .png.\n".to_string() + ); + } + ctl.plot_opt.plot_file = filename.to_string(); + out_count += 1; + } else if part_name == "legend" { + if p.solo() && p[0] == "none" { + ctl.plot_opt.use_legend = false; + legend_count += 1; + } else { + return Err(err); + } + } else if part_name == "color" { + color_count += 1; + if p.len() == 1 && p[0] == "dataset" { + schema = "dataset".to_string(); + let v = ColorByDataset {}; + let cc = CellColor::ByDataset(v); + ctl.plot_opt.cell_color = cc; + } else if p[0] == "catvar" { + if p.len() != 3 { + return Err(err); + } + let vars = p[1].split('+').map(str::to_owned).collect(); + if !p[2].starts_with("maxcat:") + || p[2].after("maxcat:").parse::<usize>().is_err() + || p[2].after("maxcat:").force_usize() == 0 + { + return Err(err); + } + let n = p[2].after("maxcat:").force_usize(); + + let v = ColorByCategoricalVariableValue { vars, maxcat: n }; + let cc = CellColor::ByCategoricalVariableValue(v); + ctl.plot_opt.cell_color = cc; + } else { + if p[0] != "var" || p.len() < 2 { + return Err(err); + } + schema = "variable".to_string(); + var = p[1].to_string(); + display_var = var.clone(); + if var.contains(':') { + display_var = var.before(":").to_string(); + var = var.after(":").to_string(); + } + if p.len() >= 3 && !p[2].is_empty() && p[2] != "turbo" { + return Err(err); + } + if p.len() >= 4 { + let scale = &p[3..]; + if !scale.is_empty() && scale[0] != "minmax" { + return Err(err); + } + if scale.len() >= 2 { + if scale[1].parse::<f64>().is_err() { + return Err(err); + } + min = Some(scale[1].force_f64()); + } + if scale.len() >= 3 { + if scale[2].parse::<f64>().is_err() { + return Err(err); + } + max = Some(scale[2].force_f64()); + } + if min.is_some() && max.is_some() && min >= max { + return Err(err); + } + } + } + } else { + return Err(format!("\nUnrecognized specification {part_name}=....\n")); + } + } + if out_count == 0 { + return Err("\nHONEY=... must specify out=....\n".to_string()); + } + if out_count > 1 { + return Err("\nHONEY=... must specify out=... only once.\n".to_string()); + } + if legend_count > 1 { + return Err("\nHONEY=... may specify legend=... only once.\n".to_string()); + } + if color_count == 0 { + return Err("\nHONEY=... must specify color=....\n".to_string()); + } + if color_count > 1 { + return Err("\nHONEY=... must specify color=... only once.\n".to_string()); + } + if schema == "dataset" { + let v = ColorByDataset {}; + let cc = CellColor::ByDataset(v); + ctl.plot_opt.cell_color = cc; + } else if schema == "variable" { + let v = ColorByVariableValue { + var, + display_var, + min, + max, + }; + let cc = CellColor::ByVariableValue(v); + ctl.plot_opt.cell_color = cc; + } + } else if arg.starts_with("VAR_DEF=") { + let val = arg.after("VAR_DEF="); + if !val.contains(':') { + return Err(format!("\nCould not find : in {arg}.\n")); + } + let name = val.before(":"); + let expr = val.after(":"); + let eval = encode_arith(expr); + let compiled = build_operator_tree(&eval); + if compiled.is_err() { + return Err(format!( + "\nUnable to represent \"{expr}\" as a valid expression. You might \ + check the following:\n\ + • arithmetic operators + - * / must have a blank on both sides\n\ + • parentheses must be balanced\n", + )); + } + let compiled = compiled.unwrap(); + let res = test_functions_in_node(&compiled); + if res.is_err() { + let err = res.as_ref().err().unwrap(); + return Err(format!( + "\n{err}\nYou might check the following:\n\ + • arithmetic operators + - * / must have a blank on both sides\n", + )); + } + ctl.gen_opt + .var_def + .push((name.to_string(), eval, compiled, expr.to_string())); + } else if arg.starts_with("MIN_DONORS=") { + let n = arg.after("MIN_DONORS="); + if n.parse::<usize>().is_err() || n.force_usize() == 0 { + return Err(format!("\nArgument {arg} is not properly specified.\n")); + } + let n = n.force_usize(); + ctl.clono_filt_opt.min_donors = n; + if n >= 2 { + ctl.clono_filt_opt_def.donor = true; + } + } else if arg.starts_with("JALIGN") { + let n = arg.after("JALIGN"); + if n.parse::<usize>().is_err() || n.force_usize() == 0 { + return Err(format!("\nArgument {arg} is not properly specified.\n")); + } + ctl.gen_opt.chains_to_jun_align.push(n.force_usize()); + } else if arg.starts_with("FB_SHOW=") { + let fields = arg.after("FB_SHOW=").split(','); + let mut found_k = false; + let mut ok = true; + for field in fields { + if field.parse::<usize>().is_ok() { + if found_k { + return Err("\nFB_SHOW argument contains more than one integer.\n".to_string()); + } + found_k = true; + } else { + if field.len() != 15 { + ok = false; + } + for c in field.chars() { + if c != 'A' && c != 'C' && c != 'G' && c != 'T' { + ok = false; + } + } + } + } + if !ok { + return Err("\nFB_SHOW argument must be a comma-separated list \ + containing at most one nonnegative integer and zero or more DNA \ + sequences of length 15 (in the alphabet A,C,G,T).\n" + .to_string()); + } + ctl.gen_opt.fb_show = arg.after("FB_SHOW=").to_string(); + } else if arg.starts_with("POUT=") { + let val = arg.after("POUT="); + ctl.parseable_opt.pout = val.to_string(); + tilde_expand_me(&mut ctl.parseable_opt.pout); + if val != "stdout" && val != "stdouth" && val != "/dev/null" { + test_writeable(val, ctl.gen_opt.evil_eye)?; + } + } else if arg.starts_with("SIM_MAT_PLOT=") { + let fields = arg.after("SIM_MAT_PLOT=").split(',').collect::<Vec<&str>>(); + if fields.len() < 2 { + return Err( + "\nSIM_MAT_PLOT requires at least two comma-separated arguments.\n".to_string(), + ); + } + let mut val = fields[0].to_string(); + tilde_expand_me(&mut val); + ctl.plot_opt.sim_mat_plot_file = val.clone(); + if val != "stdout" && val != "stdouth" && val != "gui" { + let f = File::create(&val); + if f.is_err() { + let mut emsg = + format!("\nYou've specified an output file\n{val}\nthat cannot be written.\n"); + if val.contains('/') { + let dir = val.rev_before("/"); + let msg = if path_exists(dir) { + "exists" + } else { + "does not exist" + }; + writeln!(emsg, "Note that the path {dir} {msg}.").unwrap(); + } + return Err(emsg); + } + remove_file(&val).unwrap_or_else(|_| panic!("could not remove file {val}")); + } + ctl.plot_opt.sim_mat_plot_vars.clear(); + ctl.plot_opt + .sim_mat_plot_vars + .extend(fields.into_iter().skip(1).map(str::to_string)); + } else if arg.starts_with("G=") { + let mut x = Vec::<usize>::new(); + if arg != "G=all" { + let s = arg.after("G=").split(','); + let mut ok = false; + for si in s { + if si.parse::<usize>().is_ok() { + let n = si.force_usize(); + if n >= 1 { + x.push(n); + ok = true; + } + } else if let Some((a, b)) = si.split_once('-') { + if a.parse::<usize>().is_ok() && b.parse::<usize>().is_ok() { + let (a, b) = (a.force_usize(), b.force_usize()); + if 1 <= a && a <= b { + for j in a..=b { + x.push(j); + } + ok = true; + } + } + } + if !ok { + return Err( + "\nArgument to G= must be a comma separated list of positive integers or \ + hyphenated rangers of positive integers or all.\n" + .to_string(), + ); + } + } + unique_sort(&mut x); + } + ctl.gen_opt.group_post_filter = Some(x); + } else if arg.starts_with("PLOTXY_EXACT=") { + let fields = arg.after("PLOTXY_EXACT=").split(',').collect::<Vec<&str>>(); + if fields.len() != 3 && fields.len() != 4 { + return Err( + "\nPLOTXY_EXACT requires three or four comma-separated arguments.\n".to_string(), + ); + } + if fields.len() == 4 && fields[3] != "sym" { + return Err( + "\nIf four arguments are supplied to PLOTXY_EXACT, then the fourth argument \ + must be sym.\n" + .to_string(), + ); + } + ctl.plot_opt.plot_xy_sym = fields.len() == 4; + if fields[0].is_empty() || fields[1].is_empty() || fields[2].is_empty() { + return Err("\nArguments to PLOTXY_EXACT must be non-null.\n".to_string()); + } + let mut xvar = fields[0].to_string(); + let mut yvar = fields[1].to_string(); + if xvar.starts_with("log10(") && xvar.ends_with(')') { + xvar = xvar.between("log10(", ")").to_string(); + ctl.plot_opt.plot_xy_x_log10 = true; + } + if yvar.starts_with("log10(") && yvar.ends_with(')') { + yvar = yvar.between("log10(", ")").to_string(); + ctl.plot_opt.plot_xy_y_log10 = true; + } + ctl.plot_opt.plot_xy_xvar = xvar; + ctl.plot_opt.plot_xy_yvar = yvar; + let mut val = fields[2].to_string(); + tilde_expand_me(&mut val); + ctl.plot_opt.plot_xy_filename = val.clone(); + if val != "stdout" && val != "stdouth" && val != "gui" { + let f = File::create(&val); + if f.is_err() { + let mut emsg = + format!("\nYou've specified an output file\n{val}\nthat cannot be written.\n"); + if val.contains('/') { + let dir = val.rev_before("/"); + let msg = if path_exists(dir) { + "exists" + } else { + "does not exist" + }; + writeln!(emsg, "Note that the path {dir} {msg}.").unwrap(); + } + return Err(emsg); + } + remove_file(&val).unwrap_or_else(|_| panic!("could not remove file {val}")); + } + } else if is_usize_arg(arg, "REQUIRED_FPS")? { + ctl.gen_opt.required_fps = Some(arg.after("REQUIRED_FPS=").force_usize()); + } else if is_usize_arg(arg, "REQUIRED_CELLS")? { + ctl.gen_opt.required_cells = Some(arg.after("REQUIRED_CELLS=").force_usize()); + } else if is_usize_arg(arg, "REQUIRED_DONORS")? { + ctl.gen_opt.required_donors = Some(arg.after("REQUIRED_DONORS=").force_usize()); + } else if is_usize_arg(arg, "REQUIRED_CLONOTYPES")? { + ctl.gen_opt.required_clonotypes = Some(arg.after("REQUIRED_CLONOTYPES=").force_usize()); + } else if is_usize_arg(arg, "REQUIRED_TWO_CELL_CLONOTYPES")? { + ctl.gen_opt.required_two_cell_clonotypes = + Some(arg.after("REQUIRED_TWO_CELL_CLONOTYPES=").force_usize()); + } else if is_usize_arg(arg, "REQUIRED_TWO_CHAIN_CLONOTYPES")? { + ctl.gen_opt.required_two_chain_clonotypes = + Some(arg.after("REQUIRED_TWO_CHAIN_CLONOTYPES=").force_usize()); + } else if is_usize_arg(arg, "REQUIRED_THREE_CHAIN_CLONOTYPES")? { + ctl.gen_opt.required_three_chain_clonotypes = + Some(arg.after("REQUIRED_THREE_CHAIN_CLONOTYPES=").force_usize()); + } else if is_usize_arg(arg, "REQUIRED_FOUR_CHAIN_CLONOTYPES")? { + ctl.gen_opt.required_four_chain_clonotypes = + Some(arg.after("REQUIRED_FOUR_CHAIN_CLONOTYPES=").force_usize()); + } else if is_usize_arg(arg, "REQUIRED_DATASETS")? { + ctl.gen_opt.required_datasets = Some(arg.after("REQUIRED_DATASETS=").force_usize()); + } else if is_usize_arg(arg, "EXACT")? { + ctl.gen_opt.exact = Some(arg.after("EXACT=").force_usize()); + } else if is_usize_arg(arg, "MIN_CHAINS")? { + ctl.clono_filt_opt.min_chains = arg.after("MIN_CHAINS=").force_usize(); + } else if is_usize_arg(arg, "MAX_CHAINS")? { + ctl.clono_filt_opt.max_chains = arg.after("MAX_CHAINS=").force_usize(); + } else if is_usize_arg(arg, "MIN_CELLS")? { + ctl.clono_filt_opt.ncells_low = arg.after("MIN_CELLS=").force_usize(); + } else if is_usize_arg(arg, "MAX_CELLS")? { + ctl.clono_filt_opt.ncells_high = arg.after("MAX_CELLS=").force_usize(); + } else if arg.starts_with("EXFASTA=") { + ctl.gen_opt.fasta = arg.after("EXFASTA=").to_string(); + } else if arg.starts_with("FASTA=") { + ctl.gen_opt.fasta_filename = arg.after("FASTA=").to_string(); + } else if arg.starts_with("FASTA_AA=") { + ctl.gen_opt.fasta_aa_filename = arg.after("FASTA_AA=").to_string(); + + // Other. + } else if arg == "AGROUP" { + if ctl.clono_group_opt.style == "symmetric" { + return Err( + "\nSymmetric and asymmetric grouping options cannot both be specified.\n" + .to_string(), + ); + } + ctl.clono_group_opt.style = "asymmetric".to_string(); + } else if arg == "GROUP_VJ_REFNAME" { + ctl.clono_group_opt.style = "symmetric".to_string(); + ctl.clono_group_opt.vj_refname = true; + } else if arg == "GROUP_VJ_REFNAME_HEAVY" { + ctl.clono_group_opt.style = "symmetric".to_string(); + ctl.clono_group_opt.vj_heavy_refname = true; + } else if arg == "GROUP_VDJ_REFNAME_HEAVY" { + ctl.clono_group_opt.style = "symmetric".to_string(); + ctl.clono_group_opt.vdj_heavy_refname = true; + } else if arg == "GROUP_VJ_REFNAME_STRONG" { + ctl.clono_group_opt.style = "symmetric".to_string(); + ctl.clono_group_opt.vj_refname = true; + ctl.clono_group_opt.vj_len = true; + ctl.clono_group_opt.cdr3_len = true; + } else { + return Ok(false); + } + Ok(true) +} diff --git a/enclone_args/src/process_special_arg2.rs b/enclone_args/src/process_special_arg2.rs new file mode 100644 index 000000000..65ef7b695 --- /dev/null +++ b/enclone_args/src/process_special_arg2.rs @@ -0,0 +1,687 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Process a special argument, i.e. one that does not fit into a neat bucket. + +use crate::proc_args2::{is_f64_arg, is_usize_arg}; +use enclone_core::defs::EncloneControl; +use enclone_core::linear_condition::LinearCondition; +use enclone_core::{require_readable_file, tilde_expand_me}; +use evalexpr::build_operator_tree; +use io_utils::open_for_read; +use regex::Regex; +use std::io::BufRead; +use string_utils::{parse_csv, TextUtils}; +use vector_utils::unique_sort; + +pub fn process_special_arg2( + arg: &str, + ctl: &mut EncloneControl, + metas: &mut Vec<String>, + metaxs: &mut Vec<String>, + xcrs: &mut Vec<String>, + using_plot: &mut bool, +) -> Result<(), String> { + // Process the argument. + + if arg.starts_with("GROUP=") { + if ctl.clono_group_opt.style == "asymmetric" { + return Err( + "\nSymmetric and asymmetric grouping options cannot both be specified.\n" + .to_string(), + ); + } + ctl.clono_group_opt.style = "symmetric".to_string(); + let c = arg.after("GROUP=").split(','); + for x in c { + if x == "vj_refname" { + ctl.clono_group_opt.vj_refname = true; + } else if x == "v_heavy_refname" { + ctl.clono_group_opt.v_heavy_refname = true; + } else if x == "vj_heavy_refname" { + ctl.clono_group_opt.vj_heavy_refname = true; + } else if x == "vdj_refname" { + ctl.clono_group_opt.vdj_refname = true; + } else if x == "vdj_heavy_refname" { + ctl.clono_group_opt.vdj_heavy_refname = true; + } else if x == "len" { + ctl.clono_group_opt.vj_len = true; + } else if x == "cdr3_len" { + ctl.clono_group_opt.cdr3_len = true; + } else if x == "cdr3_heavy_len" { + ctl.clono_group_opt.cdr3_heavy_len = true; + } else if x == "cdr3_light_len" { + ctl.clono_group_opt.cdr3_light_len = true; + } else if x.starts_with("cdr3_aa_heavy≥") && x.contains('@') { + let x = x.after("cdr3_aa_heavy≥"); + if !x.contains("%:h:@") || x.before("%:h:@").parse::<f64>().is_err() { + return Err("\nIllegal cdr3_aa_heavy≥n%:h:@f argument in GROUP.\n".to_string()); + } + let val = x.before("%:h:@").force_f64(); + let f = x.after("%:h:@"); + require_readable_file(f, "GROUP")?; + let mut m = Vec::<Vec<f64>>::new(); + let ff = open_for_read![&f]; + for line in ff.lines() { + let mut s = line.unwrap(); + let sb = s.replace(' ', ""); + if sb == "ACDEFGHIKLMNPQRSTVWY" { + continue; + } + if s.len() > 2 && s.as_bytes()[0] >= b'A' { + s = s[2..].to_string(); + } + let fields = s.split(' ').collect::<Vec<&str>>(); + if fields.len() != 20 { + return Err("\nIllegal cdr3_aa_heavy≥n%:h:@f argument in GROUP: \ + file does not meet requirements.\n" + .to_string()); + } + let row = fields + .into_iter() + .map(|field| { + field.parse::<f64>().map_err(|_| { + "\nIllegal cdr3_aa_heavy≥n%:h:@f argument in GROUP: \ + file does not meet requirements.\n" + .to_string() + }) + }) + .collect::<Result<Vec<_>, _>>()?; + m.push(row); + } + if m.len() != 20 { + return Err("\nIllegal cdr3_aa_heavy≥n%:h:@f argument in GROUP: \ + file does not meet requirements.\n" + .to_string()); + } + ctl.clono_group_opt.cdr3_heavy_pc_hf = Some((val, m)); + } else if x.starts_with("≥light") && x.ends_with('%') { + let val = x.after("≥").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for light in GROUP.\n".to_string()); + } + ctl.clono_group_opt.light_pc = Some(val.force_f64()); + } else if x.starts_with("≥aa_light") && x.ends_with('%') { + let val = x.after("≥").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for aa_light in GROUP.\n".to_string()); + } + ctl.clono_group_opt.aa_light_pc = Some(val.force_f64()); + } else if x.starts_with("light>=") && x.ends_with('%') { + let val = x.after(">=").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for light in GROUP.\n".to_string()); + } + ctl.clono_group_opt.light_pc = Some(val.force_f64()); + } else if x.starts_with("aa_light>=") && x.ends_with('%') { + let val = x.after(">=").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for aa_light in GROUP.\n".to_string()); + } + ctl.clono_group_opt.aa_light_pc = Some(val.force_f64()); + } else if x.starts_with("light⩾") && x.ends_with('%') { + let val = x.after("⩾").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for light in GROUP.\n".to_string()); + } + ctl.clono_group_opt.light_pc = Some(val.force_f64()); + } else if x.starts_with("aa_light⩾") && x.ends_with('%') { + let val = x.after("⩾").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for aa_light in GROUP.\n".to_string()); + } + ctl.clono_group_opt.aa_light_pc = Some(val.force_f64()); + } else if x.starts_with("heavy≥") && x.ends_with('%') { + let val = x.after("≥").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for heavy in GROUP.\n".to_string()); + } + ctl.clono_group_opt.heavy_pc = Some(val.force_f64()); + } else if x.starts_with("aa_heavy≥") && x.ends_with('%') { + let val = x.after("≥").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for aa_heavy in GROUP.\n".to_string()); + } + ctl.clono_group_opt.aa_heavy_pc = Some(val.force_f64()); + } else if x.starts_with("heavy>=") && x.ends_with('%') { + let val = x.after(">=").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for heavy in GROUP.\n".to_string()); + } + ctl.clono_group_opt.heavy_pc = Some(val.force_f64()); + } else if x.starts_with("aa_heavy>=") && x.ends_with('%') { + let val = x.after(">=").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for aa_heavy in GROUP.\n".to_string()); + } + ctl.clono_group_opt.aa_heavy_pc = Some(val.force_f64()); + } else if x.starts_with("heavy⩾") && x.ends_with('%') { + let val = x.after("⩾").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for heavy in GROUP.\n".to_string()); + } + ctl.clono_group_opt.heavy_pc = Some(val.force_f64()); + } else if x.starts_with("aa_heavy⩾") && x.ends_with('%') { + let val = x.after("⩾").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for aa_heavy in GROUP.\n".to_string()); + } + ctl.clono_group_opt.aa_heavy_pc = Some(val.force_f64()); + } else if x.starts_with("cdr3_light≥") && x.ends_with('%') { + let val = x.after("≥").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for cdr3_light in GROUP.\n".to_string()); + } + ctl.clono_group_opt.cdr3_light_pc = Some(val.force_f64()); + } else if x.starts_with("cdr3_aa_light≥") && x.ends_with('%') { + let val = x.after("≥").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for cdr3_aa_light in GROUP.\n".to_string()); + } + ctl.clono_group_opt.cdr3_aa_light_pc = Some(val.force_f64()); + } else if x.starts_with("cdr3_light>=") && x.ends_with('%') { + let val = x.after(">=").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for cdr3_light in GROUP.\n".to_string()); + } + ctl.clono_group_opt.cdr3_light_pc = Some(val.force_f64()); + } else if x.starts_with("cdr3_aa_light>=") && x.ends_with('%') { + let val = x.after(">=").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for cdr3_aa_light in GROUP.\n".to_string()); + } + ctl.clono_group_opt.cdr3_aa_light_pc = Some(val.force_f64()); + } else if x.starts_with("cdr3_light⩾") && x.ends_with('%') { + let val = x.after("⩾").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for cdr3_light in GROUP.\n".to_string()); + } + ctl.clono_group_opt.cdr3_light_pc = Some(val.force_f64()); + } else if x.starts_with("cdr3_aa_light⩾") && x.ends_with('%') { + let val = x.after("⩾").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for cdr3_aa_light in GROUP.\n".to_string()); + } + ctl.clono_group_opt.cdr3_aa_light_pc = Some(val.force_f64()); + } else if x.starts_with("cdr3_heavy≥") && x.ends_with('%') { + let val = x.after("≥").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for cdr3_heavy in GROUP.\n".to_string()); + } + ctl.clono_group_opt.cdr3_heavy_pc = Some(val.force_f64()); + } else if x.starts_with("cdr3_aa_heavy≥") && x.ends_with('%') { + let val = x.after("≥").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for cdr3_aa_heavy in GROUP.\n".to_string()); + } + ctl.clono_group_opt.cdr3_aa_heavy_pc = Some(val.force_f64()); + } else if x.starts_with("cdr3_heavy>=") && x.ends_with('%') { + let val = x.after(">=").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for cdr3_heavy in GROUP.\n".to_string()); + } + ctl.clono_group_opt.cdr3_heavy_pc = Some(val.force_f64()); + } else if x.starts_with("cdr3_aa_heavy>=") && x.ends_with('%') { + let val = x.after(">=").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for cdr3_aa_heavy in GROUP.\n".to_string()); + } + ctl.clono_group_opt.cdr3_aa_heavy_pc = Some(val.force_f64()); + } else if x.starts_with("cdr3_heavy⩾") && x.ends_with('%') { + let val = x.after("⩾").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for cdr3_heavy in GROUP.\n".to_string()); + } + ctl.clono_group_opt.cdr3_heavy_pc = Some(val.force_f64()); + } else if x.starts_with("cdr3_aa_heavy⩾") && x.ends_with('%') { + let val = x.after("⩾").rev_before("%"); + if val.parse::<f64>().is_err() { + return Err("\nIllegal value for cdr3_aa_heavy in GROUP.\n".to_string()); + } + ctl.clono_group_opt.cdr3_aa_heavy_pc = Some(val.force_f64()); + } else { + return Err(format!("\nUnrecognized condition {x} in GROUP argument.\n")); + } + } + } else if arg.starts_with("DIFF_STYLE=") { + ctl.gen_opt.diff_style = arg.after("=").to_string(); + if ctl.gen_opt.diff_style != "C1" && ctl.gen_opt.diff_style != "C2" { + return Err("\nThe only allowed values for DIFF_STYLE are C1 and C2.\n".to_string()); + } + } else if arg.starts_with("COLOR=") { + ctl.gen_opt.color = arg.after("COLOR=").to_string(); + if ctl.gen_opt.color != *"codon" + && ctl.gen_opt.color != *"codon-diffs" + && ctl.gen_opt.color != *"property" + { + let mut ok = false; + if arg.starts_with("COLOR=peer.") { + let pc = arg.after("COLOR=peer."); + if pc.parse::<f64>().is_ok() { + let pc = pc.force_f64(); + if (0.0..=100.0).contains(&pc) { + ok = true; + ctl.gen_opt.color_by_rarity_pc = pc; + } + } + } + if !ok { + return Err( + "\nThe specified value for COLOR is not allowed. Please see \ + \"enclone help color\".\n" + .to_string(), + ); + } + } + } else if arg == "TREE" { + ctl.gen_opt.tree_on = true; + } else if arg == "TREE=const" { + // this is for backward compatibility + ctl.gen_opt.tree_on = true; + ctl.gen_opt.tree.push("const1".to_string()); + } else if arg.starts_with("TREE=") { + ctl.gen_opt.tree_on = true; + let p = arg.after("TREE=").split(','); + for pi in p { + ctl.gen_opt.tree.push(pi.to_string()); + } + } else if arg.starts_with("FCELL=") // FCELL retained for backward compatibility + || arg.starts_with("KEEP_CELL_IF") + { + let condition = if arg.starts_with("FCELL") { + arg.after("FCELL=") + } else { + arg.after("KEEP_CELL_IF=") + }; + let con = condition.as_bytes(); + for i in 0..con.len() { + if i > 0 + && i < con.len() - 1 + && con[i] == b'=' + && con[i - 1] != b'=' + && con[i - 1] != b'<' + && con[i - 1] != b'>' + && con[i + 1] != b'=' + { + return Err(format!( + "\nConstraints for {} cannot use =. Please use == instead.\n", + arg.before("="), + )); + } + } + let condition = condition.replace('\'', "\""); + let compiled = build_operator_tree(&condition); + if compiled.is_err() { + return Err(format!("\n{} usage incorrect.\n", arg.before("="))); + } + ctl.clono_filt_opt_def.fcell.push(compiled.unwrap()); + } else if arg.starts_with("LEGEND=") { + let x = parse_csv(arg.after("LEGEND=")); + if x.is_empty() || x.len() % 2 != 0 { + return Err("\nValue of LEGEND doesn't make sense.\n".to_string()); + } + ctl.plot_opt.use_legend = true; + for i in 0..x.len() / 2 { + ctl.plot_opt + .legend + .push((x[2 * i].clone(), x[2 * i + 1].clone())); + } + } else if arg.starts_with("BARCODE=") { + let bcs = arg.after("BARCODE=").split(','); + let x = bcs + .map(|bcj| { + if !bcj.contains('-') { + return Err( + "\nValue for a barcode in BARCODE argument is invalid, must contain -.\n" + .to_string(), + ); + } + Ok(bcj.to_string()) + }) + .collect::<Result<Vec<_>, String>>()?; + ctl.clono_filt_opt.barcode = x; + } else if arg.starts_with("F=") { + // deprecated but retained for backward compatibility + let filt = arg.after("F=").to_string(); + ctl.clono_filt_opt.bounds.push(LinearCondition::new(&filt)?); + ctl.clono_filt_opt.bound_type.push("mean".to_string()); + } else if arg.starts_with("KEEP_CLONO_IF_CELL_MEAN=") { + let filt = arg.after("KEEP_CLONO_IF_CELL_MEAN=").to_string(); + ctl.clono_filt_opt.bounds.push(LinearCondition::new(&filt)?); + ctl.clono_filt_opt.bound_type.push("mean".to_string()); + } else if arg.starts_with("KEEP_CLONO_IF_CELL_MIN=") { + let filt = arg.after("KEEP_CLONO_IF_CELL_MIN=").to_string(); + ctl.clono_filt_opt.bounds.push(LinearCondition::new(&filt)?); + ctl.clono_filt_opt.bound_type.push("min".to_string()); + } else if arg.starts_with("KEEP_CLONO_IF_CELL_MAX=") { + let filt = arg.after("KEEP_CLONO_IF_CELL_MAX=").to_string(); + ctl.clono_filt_opt.bounds.push(LinearCondition::new(&filt)?); + ctl.clono_filt_opt.bound_type.push("max".to_string()); + } else if arg.starts_with("SCAN=") { + let mut x = arg.after("SCAN=").to_string(); + x = x.replace(' ', ""); + let x = x.split(',').collect::<Vec<&str>>(); + if x.len() != 3 { + return Err("\nArgument to SCAN must have three components.\n".to_string()); + } + ctl.gen_opt.gene_scan_test = Some(LinearCondition::new(x[0])?); + ctl.gen_opt.gene_scan_control = Some(LinearCondition::new(x[1])?); + let threshold = LinearCondition::new(x[2])?; + for i in 0..threshold.var.len() { + if threshold.var[i] != *"t" && threshold.var[i] != *"c" { + return Err("\nIllegal variable in threshold for scan.\n".to_string()); + } + } + ctl.gen_opt.gene_scan_threshold = Some(threshold); + } else if arg.starts_with("PLOT=") { + *using_plot = true; + let x = arg.after("PLOT=").split(',').collect::<Vec<&str>>(); + if x.is_empty() { + return Err("\nArgument to PLOT is invalid.\n".to_string()); + } + ctl.plot_opt.plot_file = x[0].to_string(); + for &xj in &x[1..] { + if !xj.contains("->") { + return Err("\nArgument to PLOT is invalid.\n".to_string()); + } + ctl.gen_opt + .origin_color_map + .insert(xj.before("->").to_string(), xj.after("->").to_string()); + } + } else if arg.starts_with("PLOT2=") { + *using_plot = true; + let x = arg.after("PLOT2=").split(',').collect::<Vec<&str>>(); + if x.is_empty() { + return Err("\nArgument to PLOT is invalid.\n".to_string()); + } + if x.len() % 2 != 1 { + return Err("\nArgument to PLOT is invalid.\n".to_string()); + } + ctl.plot_opt.plot_file = x[0].to_string(); + for j in (1..x.len()).step_by(2) { + let condition = x[j].to_string(); + let color = x[j + 1].to_string(); + if !condition.contains('=') { + return Err("\nArgument to PLOT is invalid.\n".to_string()); + } + ctl.plot_opt.plot_conditions.push(condition); + ctl.plot_opt.plot_colors.push(color); + } + } else if arg.starts_with("PLOT_BY_ISOTYPE=") { + ctl.plot_opt.plot_by_isotype = true; + ctl.plot_opt.plot_file = arg.after("PLOT_BY_ISOTYPE=").to_string(); + if ctl.plot_opt.plot_file.is_empty() { + return Err("\nFilename value needs to be supplied to PLOT_BY_ISOTYPE.\n".to_string()); + } + } else if arg.starts_with("PLOT_BY_ISOTYPE_COLOR=") { + if arg.after("PLOT_BY_ISOTYPE_COLOR=").is_empty() { + return Err( + "\nA value needs to be specified for the PLOT_BY_ISOTYPE_COLOR \ + argument.\n" + .to_string(), + ); + } + ctl.plot_opt.plot_by_isotype_color.extend( + arg.after("PLOT_BY_ISOTYPE_COLOR=") + .split(',') + .map(str::to_string), + ); + } else if arg.starts_with("PLOT_BY_MARK=") { + ctl.plot_opt.plot_by_mark = true; + ctl.plot_opt.plot_file = arg.after("PLOT_BY_MARK=").to_string(); + if ctl.plot_opt.plot_file.is_empty() { + return Err("\nFilename value needs to be supplied to PLOT_BY_MARK.\n".to_string()); + } + } else if is_usize_arg(arg, "MAX_CORES")? { + let nthreads = arg.after("MAX_CORES=").force_usize(); + let _ = rayon::ThreadPoolBuilder::new() + .num_threads(nthreads) + .build_global(); + } else if arg.starts_with("PCOLS=") { + ctl.parseable_opt.pcols.clear(); + for pi in arg.after("PCOLS=").split(',') { + let mut x = pi.replace("_sum", "_Σ"); + x = x.replace("_mean", "_μ"); + ctl.parseable_opt.pcols.push(x); + ctl.parseable_opt.pcols_sort = ctl.parseable_opt.pcols.clone(); + ctl.parseable_opt.pcols_sortx = ctl + .parseable_opt + .pcols + .iter() + .map(|cj| { + if cj.contains(':') { + cj.before(":").to_string() + } else { + cj.clone() + } + }) + .collect(); + unique_sort(&mut ctl.parseable_opt.pcols_sort); + unique_sort(&mut ctl.parseable_opt.pcols_sortx); + } + } else if arg.starts_with("PCOLS_SHOW=") { + ctl.parseable_opt.pcols_show.clear(); + ctl.parseable_opt + .pcols_show + .extend(arg.after("PCOLS_SHOW=").split(',').map(str::to_string)); + } else if arg.starts_with("VJ=") { + ctl.clono_filt_opt.vj = arg.after("VJ=").as_bytes().to_vec(); + for &c in ctl.clono_filt_opt.vj.iter() { + if !(c == b'A' || c == b'C' || c == b'G' || c == b'T') { + return Err("\nIllegal value for VJ, must be over alphabet ACGT.\n".to_string()); + } + } + } else if arg.starts_with("AMINO=") { + ctl.clono_print_opt.amino.clear(); + for x in arg.after("AMINO=").split(',') { + if !x.is_empty() { + ctl.clono_print_opt.amino.push(x.to_string()); + } + } + for x in ctl.clono_print_opt.amino.iter() { + let mut ok = false; + if *x == "cdr1" + || *x == "cdr2" + || *x == "cdr3" + || *x == "fwr1" + || *x == "fwr2" + || *x == "fwr3" + || *x == "fwr4" + || *x == "var" + || *x == "share" + || *x == "donor" + || *x == "donorn" + { + ok = true; + } else if x.contains('-') { + let (start, stop) = (x.before("-"), x.after("-")); + if start.parse::<usize>().is_ok() + && stop.parse::<usize>().is_ok() + && start.force_usize() <= stop.force_usize() + { + ok = true; + } + } + if !ok { + return Err(format!( + "\nUnrecognized variable {x} for AMINO. Please type \ + \"enclone help amino\".\n" + )); + } + } + } else if arg.starts_with("CVARS=") { + ctl.clono_print_opt.cvars.clear(); + for x in arg.after("CVARS=").split(',') { + if !x.is_empty() { + ctl.clono_print_opt.cvars.push(x.to_string()); + } + } + for x in ctl.clono_print_opt.cvars.iter_mut() { + *x = x.replace("_sum", "_Σ"); + *x = x.replace("_mean", "_μ"); + } + } else if arg.starts_with("CVARSP=") { + for x in arg.after("CVARSP=").split(',') { + if !x.is_empty() { + ctl.clono_print_opt.cvars.push(x.to_string()); + } + } + for x in ctl.clono_print_opt.cvars.iter_mut() { + *x = x.replace("_sum", "_Σ"); + *x = x.replace("_mean", "_μ"); + } + } else if arg.starts_with("LVARS=") { + ctl.clono_print_opt.lvars.clear(); + for x in arg.after("LVARS=").split(',') { + ctl.clono_print_opt.lvars.push(x.to_string()); + } + for x in ctl.clono_print_opt.lvars.iter_mut() { + *x = x.replace("_sum", "_Σ"); + *x = x.replace("_mean", "_μ"); + } + } else if arg.starts_with("LVARSP=") { + let lvarsp = arg.after("LVARSP=").split(','); + for x in lvarsp { + ctl.clono_print_opt.lvars.push(x.to_string()); + } + for x in ctl.clono_print_opt.lvars.iter_mut() { + *x = x.replace("_sum", "_Σ"); + *x = x.replace("_mean", "_μ"); + } + } else if arg.starts_with("DVARS=") { + ctl.gen_opt.dvars.clear(); + for x in arg.after("DVARS=").split(',') { + ctl.gen_opt.dvars.push(x.to_string()); + } + } else if arg.starts_with("GVARS=") { + ctl.gen_opt.gvars.clear(); + for x in arg.after("GVARS=").split(',') { + ctl.gen_opt.gvars.push(x.to_string()); + } + } else if is_f64_arg(arg, "MAX_SCORE")? { + ctl.join_alg_opt.max_score = arg.after("MAX_SCORE=").force_f64(); + } else if is_f64_arg(arg, "MAX_LOG_SCORE")? { + let x = arg.after("MAX_LOG_SCORE=").force_f64(); + ctl.join_alg_opt.max_score = 10.0_f64.powf(x); + } else if arg.starts_with("CONST_IGH=") { + let reg = Regex::new(&format!("^{}$", arg.after("CONST_IGH="))); + if reg.is_err() { + return Err(format!( + "\nYour CONST_IGH value {} could not be parsed as a regular expression.\n", + arg.after("CONST_IGH=") + )); + } + ctl.clono_filt_opt.const_igh = Some(reg.unwrap()); + } else if arg.starts_with("CONST_IGKL=") { + let reg = Regex::new(&format!("^{}$", arg.after("CONST_IGKL="))); + if reg.is_err() { + return Err(format!( + "\nYour CONST_IGKL value {} could not be parsed as a regular expression.\n", + arg.after("CONST_IGKL=") + )); + } + ctl.clono_filt_opt.const_igkl = Some(reg.unwrap()); + } else if arg.starts_with("CDR3=") { + let mut lev = true; + let re = Regex::new(r"[A-Z]+~[0-9]+").unwrap(); + for field in arg.split('|') { + if !re.is_match(field) { + lev = false; + } + } + if lev { + let mut ok = true; + for field in arg.split('|') { + if !field.contains('~') { + ok = false; + } else { + let _f1 = field.before("~"); + let f2 = field.after("~"); + if f2.parse::<usize>().is_err() { + ok = false; + } + } + } + if !ok { + return Err(format!( + "\nLooks like your CDR3 value {} is trying to be an Levenshtein distance\n\ + pattern, but it is not.\n", + arg.after("CDR3=") + )); + } + ctl.clono_filt_opt.cdr3_lev = arg.after("=").to_string(); + } else { + let reg = Regex::new(&format!("^{}$", arg.after("CDR3="))); + if reg.is_err() { + return Err(format!( + "\nYour CDR3 value {} could not be parsed as a regular expression.\n", + arg.after("CDR3=") + )); + } + ctl.clono_filt_opt.cdr3 = Some(reg.unwrap()); + } + } else if is_usize_arg(arg, "CHAINS")? { + ctl.clono_filt_opt.min_chains = arg.after("CHAINS=").force_usize(); + ctl.clono_filt_opt.max_chains = arg.after("CHAINS=").force_usize(); + } else if arg.starts_with("SEG=") { + let mut y = arg + .after("SEG=") + .split('|') + .map(str::to_string) + .collect::<Vec<_>>(); + y.sort(); + ctl.clono_filt_opt.seg.push(y); + } else if arg.starts_with("SEGN=") { + let fields = arg.after("SEGN=").split('|'); + let mut y = Vec::<String>::new(); + for x in fields { + if x.parse::<i32>().is_err() { + return Err("\nInvalid argument to SEGN.\n".to_string()); + } + y.push(x.to_string()); + } + y.sort(); + ctl.clono_filt_opt.segn.push(y); + } else if arg.starts_with("NSEG=") { + let fields = arg.after("NSEG=").split('|'); + let mut y = Vec::<String>::new(); + for x in fields { + y.push(x.to_string()); + } + y.sort(); + ctl.clono_filt_opt.nseg.push(y); + } else if arg.starts_with("NSEGN=") { + let fields = arg.after("NSEGN=").split('|'); + let mut y = Vec::<String>::new(); + for x in fields { + if x.parse::<i32>().is_err() { + return Err("\nInvalid argument to NSEGN.\n".to_string()); + } + y.push(x.to_string()); + } + y.sort(); + ctl.clono_filt_opt.nsegn.push(y); + } else if is_usize_arg(arg, "CELLS")? { + ctl.clono_filt_opt.ncells_low = arg.after("CELLS=").force_usize(); + ctl.clono_filt_opt.ncells_high = ctl.clono_filt_opt.ncells_low; + } else if arg.starts_with("META=") { + let v = arg.after("META=").split(','); + for f in v { + let mut f = f.to_string(); + tilde_expand_me(&mut f); + metas.push(f); + } + } else if arg.starts_with("METAX=") { + let f = arg.after("METAX="); + let f = f.chars().filter(|c| !c.is_whitespace()).collect(); + metaxs.push(f); + } else if arg.starts_with("TCR=") + || arg.starts_with("BCR=") + || arg.starts_with("TCRGD=") + || (!arg.is_empty() && arg.as_bytes()[0] >= b'0' && arg.as_bytes()[0] <= b'9') + { + xcrs.push(arg.to_string()); + } else if arg != "--help" { + return Err(format!("\nUnrecognized argument {arg}.\n")); + } + Ok(()) +} diff --git a/enclone_args/src/read_json.rs b/enclone_args/src/read_json.rs new file mode 100644 index 000000000..b5a376765 --- /dev/null +++ b/enclone_args/src/read_json.rs @@ -0,0 +1,974 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Fields that are used in all_contig_annotations.json: +// • barcode +// • is_cell and is_asm_cell -- both are optional, but at least one needs to be present and +// true for a cell called by the VDJ pipeline +// • is_gex_cell -- optional +// • productive -- optional but should be true for contigs to be used +// • high_confidence -- optional but should be true for contigs to be used +// • contig_name +// • sequence +// • version -- optional +// • validated_umis -- optional +// • non_validated_umis -- optional +// • invalidated_umis -- optional +// • fraction_of_reads_for_this_barcode_provided_as_input_to_assembly -- optional +// • quals +// • umi_count +// • read_count +// • cdr3, unless in reannotate mode +// • cdr3_seq, unless in reannotate mode +// • cdr3_start, unless in reannotate mode +// • annotations, unless in reannotate mode. + +use self::annotate::{annotate_seq, get_cdr3_using_ann, print_some_annotations}; +use self::refx::RefData; +use self::transcript::is_valid; +use debruijn::dna_string::DnaString; +use enclone_core::barcode_fate::BarcodeFate; +use enclone_core::defs::{EncloneControl, OriginInfo, TigData}; +use io_utils::{open_maybe_compressed, path_exists, read_vector_entry_from_json}; +use rand::Rng; +use rayon::prelude::*; +use serde_json::Value; +use std::fmt::Write; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::{collections::HashMap, io::BufReader}; +use string_utils::{stringme, strme, TextUtils}; +use vdj_ann::{annotate, refx, transcript}; +use vector_utils::{bin_position, erase_if, unique_sort}; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +fn json_error( + json: Option<&str>, + ctl: &EncloneControl, + exiting: &AtomicBool, + msg: &str, +) -> Result<(), String> { + // The following line prevents error messages from this function from being + // printed multiple times. + let mut msgx = String::new(); + if !exiting.swap(true, Ordering::Relaxed) { + msgx = "\nThere is something wrong with the contig annotations in the cellranger output \ + file" + .to_string(); + if json.is_some() { + write!(msgx, "\n{}.", json.unwrap()).unwrap(); + } else { + msgx += "."; + } + if ctl.gen_opt.internal_run { + writeln!(msgx, "\n\npossibly relevant internal data: {msg}").unwrap(); + } + if ctl.gen_opt.internal_run { + msgx += "\n\nATTENTION INTERNAL 10X USERS!\n\ + Quite possibly you are using data from a cellranger run carried out using a \ + version\n\ + between 3.1 and 4.0. For certain of these versions, it is necessary to add the\n\ + argument CURRENT_REF to your command line. If that doesn't work, \ + please see below.\n"; + } + msgx += "\n\nHere is what you should do:\n\n\ + 1. If you used cellranger version ≥ 4.0, the problem is very likely\n\ + that the directory outs/vdj_reference was not retained, so enclone\n\ + didn't see it, and had to guess what the reference sequence was.\n\ + Fix this and everything should be fine.\n\n\ + 2. If you used cellranger version 3.1, then you need to add a command-line\n\ + argument REF=<vdj_reference_fasta_file_name>, or if you already did that,\n\ + make sure it is the *same* as that which you gave cellranger.\n\n\ + 3. If you used cellranger version < 3.1 (the only other possibility), then\n\ + you have options:\n\ + • rerun cellranger using the current version\n\ + • or provide an argument REF= as above and RE to force reannotation\n\ + • or provide the argument BUILT_IN to use the current reference and force\n \ + reannotation (and MOUSE if you used mouse); only works with human and mouse.\n\n\ + Note that one way to get the error is to specify TCR when you meant BCR, or the\n\ + other way.\n\n\ + If you're stuck, please write to us at enclone@10xgenomics.com.\n"; + } + Err(msgx) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +fn parse_vector_entry_from_json( + x: &[u8], + json: &str, + accept_inconsistent: bool, + origin_info: &OriginInfo, + li: usize, + refdata: &RefData, + to_ref_index: &HashMap<usize, usize>, + reannotate: bool, + ctl: &EncloneControl, + vdj_cells: &mut Vec<String>, + gex_cells: &mut Vec<String>, + gex_cells_specified: &mut bool, + cr_version: &mut String, + tigs: &mut Vec<TigData>, + exiting: &AtomicBool, +) -> Result<(), String> { + let v: Value = match serde_json::from_slice(x) { + Err(_) => { + return Err(format!( + "\nInternal error, failed to parse a value from a string. The string is:\n{}\n", + strme(x) + )); + } + Ok(v) => v, + }; + let barcode = v["barcode"].to_string().between("\"", "\"").to_string(); + + // Get cell status. Sometime after CR 4.0 was released, and before 4.1 was released, + // we added new fields is_asm_cell and is_gex_cell to the json file. The value of + // is_asm_cell is the original determination of "cell" in the VDJ pipeline, whereas the + // value of is_gex_cell is that for the GEX pipeline. + + let mut is_cell = v["is_cell"].as_bool().unwrap_or(false); + let is_asm_cell = v["is_asm_cell"].as_bool().unwrap_or(false); + if is_asm_cell { + is_cell = true; + } + + let is_gex_cell = v["is_gex_cell"].as_bool(); + if is_gex_cell.is_some() { + *gex_cells_specified = true; + } + if is_gex_cell == Some(true) { + gex_cells.push(barcode.clone()); + } + + if !ctl.gen_opt.ncell && !is_cell { + return Ok(()); + } + if is_cell { + vdj_cells.push(barcode.clone()); + } + + // Proceed. + + if !ctl.gen_opt.reprod && !v["productive"].as_bool().unwrap_or(false) { + return Ok(()); + } + if !ctl.gen_opt.reprod && !ctl.gen_opt.ncell && !v["high_confidence"].as_bool().unwrap_or(false) + { + return Ok(()); + } + let tigname = v["contig_name"].to_string().between("\"", "\"").to_string(); + let full_seq = &v["sequence"].to_string().between("\"", "\"").to_string(); + let mut left = false; + let (mut v_ref_id, mut j_ref_id) = (1000000, 0); + let mut d_ref_id: Option<usize> = None; + let mut c_ref_id = None; + let mut chain_type = String::new(); + let mut u_ref_id = None; + let (mut tig_start, mut tig_stop) = (-1_isize, -1_isize); + let mut v_stop = 0; + let mut v_stop_ref = 0; + let mut d_start = None; + let mut j_start = 0; + let mut j_start_ref = 0; + let mut c_start = None; + let mut annv = Vec::<(i32, i32, i32, i32, i32)>::new(); + let mut cdr3_aa: String; + let mut cdr3_dna: String; + let mut cdr3_start: usize; + if v.get("version").is_some() { + *cr_version = v["version"].to_string().between("\"", "\"").to_string(); + } + + // Read validated and non-validated UMIs. + + let mut validated_umis = Vec::<String>::new(); + let mut validated_umis_present = false; + let val = v["validated_umis"].as_array(); + if let Some(val) = val { + validated_umis_present = true; + for vi in val { + validated_umis.push(vi.to_string().between("\"", "\"").to_string()); + } + } + let mut non_validated_umis = Vec::<String>::new(); + let mut non_validated_umis_present = false; + let non_val = v["non_validated_umis"].as_array(); + if let Some(non_val) = non_val { + non_validated_umis_present = true; + for nv in non_val { + non_validated_umis.push(nv.to_string().between("\"", "\"").to_string()); + } + } + let mut invalidated_umis = Vec::<String>::new(); + let mut invalidated_umis_present = false; + let inval = v["invalidated_umis"].as_array(); + if let Some(inval) = inval { + invalidated_umis_present = true; + for inv in inval { + invalidated_umis.push(inv.to_string().between("\"", "\"").to_string()); + } + } + + // Read fraction_of_reads_for_this_barcode_provided_as_input_to_assembly. + + let mut frac_reads_used = None; + let f = v["fraction_of_reads_for_this_barcode_provided_as_input_to_assembly"].as_f64(); + if let Some(f) = f { + frac_reads_used = Some((f * 1_000_000.0).round() as u32); + } + + // Reannotate. + + if reannotate || ctl.gen_opt.reprod { + let x = DnaString::from_dna_string(full_seq); + let mut ann = Vec::<(i32, i32, i32, i32, i32)>::new(); + annotate_seq(&x, refdata, &mut ann, true, false, true); + + // If there are multiple V segment alignments, possibly reduce to just one. + + let mut ann2 = Vec::<(i32, i32, i32, i32, i32)>::new(); + let mut j = 0; + while j < ann.len() { + let t = ann[j].2 as usize; + let mut k = j + 1; + while k < ann.len() { + if refdata.segtype[ann[k].2 as usize] != refdata.segtype[t] { + break; + } + k += 1; + } + if refdata.segtype[t] == "V" && k - j > 1 { + let mut entries = 1; + if j < ann.len() - 1 + && ann[j + 1].2 as usize == t + && ((ann[j].0 + ann[j].1 == ann[j + 1].0 && ann[j].3 + ann[j].1 < ann[j + 1].3) + || (ann[j].0 + ann[j].1 < ann[j + 1].0 + && ann[j].3 + ann[j].1 == ann[j + 1].3)) + { + entries = 2; + } + ann2.extend(&ann[j..j + entries]); + } else { + ann2.extend(&ann[j..k]); + } + j = k; + } + ann = ann2; + + // Proceed. + + if ctl.gen_opt.trace_barcode == *barcode { + let mut log = Vec::<u8>::new(); + print_some_annotations(refdata, &ann, &mut log, false); + print!("\n{}", strme(&log)); + } + let mut log = Vec::<u8>::new(); + if ctl.gen_opt.trace_barcode == *barcode { + if !is_valid( + &x, + refdata, + &ann, + true, + &mut log, + Some(ctl.gen_opt.gamma_delta), + ) { + print!("{}", strme(&log)); + println!("invalid"); + return Ok(()); + } + } else if !is_valid( + &x, + refdata, + &ann, + false, + &mut log, + Some(ctl.gen_opt.gamma_delta), + ) { + return Ok(()); + } + let mut cdr3 = Vec::<(usize, Vec<u8>, usize, usize)>::new(); + get_cdr3_using_ann(&x, refdata, &ann, &mut cdr3); + cdr3_aa = stringme(&cdr3[0].1); + cdr3_start = cdr3[0].0; + cdr3_dna = x + .slice(cdr3_start, cdr3_start + 3 * cdr3_aa.len()) + .to_string(); + let mut seen_j = false; + for anni in ann { + let t = anni.2 as usize; + if refdata.is_u(t) { + u_ref_id = Some(t); + } else if refdata.is_v(t) && !seen_j { + v_ref_id = t; + annv.push(anni); + chain_type = refdata.name[t][0..3].to_string(); + if chain_type == *"IGH" + || chain_type == *"TRB" + || (chain_type == *"TRD" && ctl.gen_opt.gamma_delta) + { + left = true; + } + if anni.3 == 0 { + tig_start = anni.0 as isize; + if tig_start > cdr3_start as isize { + panic!( + "Something is wrong with the CDR3 start for this contig:\n\n{}.", + &full_seq + ); + } + cdr3_start -= tig_start as usize; + } + v_stop = (anni.0 + anni.1) as usize; + v_stop_ref = (anni.3 + anni.1) as usize; + } else if refdata.is_d(t) { + d_start = Some(anni.0 as usize); + d_ref_id = Some(t); + } else if refdata.is_j(t) { + j_ref_id = t; + tig_stop = (anni.0 + anni.1) as isize; + j_start = anni.0 as usize; + j_start_ref = anni.3 as usize; + seen_j = true; + } else if refdata.is_c(t) { + c_ref_id = Some(t); + c_start = Some(anni.0 as usize); + } + } + for i in (0..annv.len()).rev() { + annv[i].0 -= annv[0].0; + } + } else { + // Use annotations from json file. + + cdr3_aa = v["cdr3"].to_string().between("\"", "\"").to_string(); + cdr3_dna = v["cdr3_seq"].to_string().between("\"", "\"").to_string(); + cdr3_start = v["cdr3_start"].as_u64().unwrap() as usize; + let ann = v["annotations"].as_array(); + if ann.is_none() { + return Err(format!( + "\nThe file\n{json}\ndoes not contain annotations. To use enclone with it, \ + please specify the argument BUILT_IN\nto force use of the internal \ + reference and recompute annotations.\n" + )); + } + let ann = ann.unwrap(); + let mut cigarv = String::new(); // cigar for V segment + for a in ann { + let region_type = &a["feature"]["region_type"]; + let feature_id = a["feature"]["feature_id"].as_u64().unwrap() as usize; + if !to_ref_index.contains_key(&feature_id) { + continue; + } + let feature_idx = to_ref_index[&feature_id]; + let ref_start = a["annotation_match_start"].as_u64().unwrap() as usize; + if region_type == "L-REGION+V-REGION" { + v_stop = a["contig_match_end"].as_i64().unwrap() as usize; + v_stop_ref = a["annotation_match_end"].as_i64().unwrap() as usize; + } + let gene_name = a["feature"]["gene_name"] + .to_string() + .between("\"", "\"") + .to_string(); + if refdata.name[feature_idx] != gene_name + && !accept_inconsistent + && !exiting.swap(true, Ordering::Relaxed) + { + return Err(format!( + "\nThere is an inconsistency between the reference \ + file used to create the Cell Ranger output files in\n{}\nand the \ + reference that enclone is using.\n\nFor example, the feature \ + numbered {} is\nthe gene {} in one and the gene {} in the other.\n\n\ + As far as we know, this type of error can only occur with Cell Ranger \ + versions before 4.0.\n\n\ + If this is mouse data, please use the argument MOUSE, and that may \ + solve the problem.\n\n\ + If this is human or mouse data, and you are OK with using the current \ + built-in reference that\nenclone has, \ + you can instead add the argument BUILT_IN to the command line. This \ + forces\nrecomputation of annotations and may be somewhat slower.\n\n\ + A solution that should always work is to supply\n\ + REF=vdj_reference_fasta_filename as an argument to enclone.\n", + json.rev_before("/"), + feature_id, + gene_name, + refdata.name[feature_idx] + )); + } + if region_type == "L-REGION+V-REGION" && ref_start == 0 { + let chain = a["feature"]["chain"] + .to_string() + .between("\"", "\"") + .to_string(); + // if !chain.starts_with("IG") { continue; } // ******************* + tig_start = a["contig_match_start"].as_i64().unwrap() as isize; + cdr3_start -= tig_start as usize; + chain_type = chain.clone(); + if chain == *"IGH" + || chain == *"TRB" + || (chain == *"TRD" && ctl.gen_opt.gamma_delta) + { + left = true; + } + v_ref_id = feature_idx; + cigarv = a["cigar"].to_string().between("\"", "\"").to_string(); + } else { + // also check for IG chain????????????????????????????????????????? + let ref_stop = a["annotation_match_end"].as_u64().unwrap() as usize; + let ref_len = a["annotation_length"].as_u64().unwrap() as usize; + if region_type == "J-REGION" && ref_stop == ref_len { + tig_stop = a["contig_match_end"].as_i64().unwrap() as isize; + j_ref_id = feature_idx; + j_start = a["contig_match_start"].as_i64().unwrap() as usize; + j_start_ref = a["annotation_match_start"].as_i64().unwrap() as usize; + } + if region_type == "5'UTR" { + u_ref_id = Some(feature_idx); + } + if region_type == "D-REGION" { + d_start = Some(a["contig_match_start"].as_i64().unwrap() as usize); + d_ref_id = Some(feature_idx); + } + if region_type == "C-REGION" { + c_ref_id = Some(feature_idx); + c_start = Some(a["contig_match_start"].as_i64().unwrap() as usize); + } + } + } + if v_ref_id == 1000000 { + return Ok(()); + } + + // Compute annv from cigarv. We don't compute the mismatch entry. + + let mut cg = Vec::<Vec<u8>>::new(); // pieces of cigar string + let mut piece = Vec::<u8>::new(); + for c in cigarv.chars() { + piece.push(c as u8); + if c.is_ascii_alphabetic() { + cg.push(piece.clone()); + piece.clear(); + } + } + let t = v_ref_id as i32; + let (mut len1, mut len2) = (0, 0); + let (mut ins, mut del) = (0, 0); + for cgi in cg { + let x = strme(&cgi[0..cgi.len() - 1]).force_i32(); + if cgi[cgi.len() - 1] == b'M' { + if len1 == 0 { + len1 = x; + } else if len2 == 0 { + len2 = x; + } else { + // probably can't happen + len1 = 0; + len2 = 0; + break; + } + } + if cgi[cgi.len() - 1] == b'I' { + ins = x; + } + if cgi[cgi.len() - 1] == b'D' { + del = x; + } + } + annv.push((0_i32, len1, t, 0, 0)); + if ins > 0 && ins % 3 == 0 && del == 0 && len2 > 0 { + let start = len1 + ins; + annv.push((start, len2, t, len1, 0)); + } else if del > 0 && del % 3 == 0 && ins == 0 && len2 > 0 { + annv.push((len1, len2, t, len1 + del, 0)); + } + let rt = &refdata.refs[v_ref_id]; + if annv.len() == 2 && annv[0].1 as usize > rt.len() { + let msg = format!("annv[0].1 = {}, rt.len() = {}", annv[0].1, rt.len()); + json_error(None, ctl, exiting, &msg)?; + } + + // Check to see if the CDR3 sequence has changed. This could happen if the cellranger + // version for all_contig_annotations.json used an older version of the CDR3 calculation + // than is used in the current version of enclone. This could result in internal + // inconsistencies, leading to an assert somewhere downstream. + + let mut cdr3 = Vec::<(usize, Vec<u8>, usize, usize)>::new(); + let x = DnaString::from_dna_string(full_seq); + get_cdr3_using_ann(&x, refdata, &annv, &mut cdr3); + if cdr3.is_empty() { + return Ok(()); + } + let cdr3_aa_alt = stringme(&cdr3[0].1); + if cdr3_aa != cdr3_aa_alt { + // This is particularly pathological and rare: + + if tig_start as usize > cdr3[0].0 { + return Ok(()); + } + + // Define start. + + cdr3_start = cdr3[0].0 - tig_start as usize; + + // Define cdr3. + + cdr3_aa = cdr3_aa_alt; + cdr3_dna = x + .slice(cdr3_start, cdr3_start + 3 * cdr3_aa.len()) + .to_string(); + } + } + + // Test for two very rare conditions where the CDR3 is busted. This could be confusing to + // users if they hit one of these. + // Case 1: seen on 47680, barcode CGCCAAGTCCATGAAC-1. + // Case 2: seen on 99640, barcode CAGTAACCATGTCGAT-1. + // It is not known if these correspond to bugs in cellranger that were subsequently fixed. + + if cdr3_aa.contains('*') { + return Ok(()); + } + if cdr3_start + 3 * cdr3_aa.len() > tig_stop as usize - tig_start as usize { + return Ok(()); + } + + // Keep going. + + if tig_start < 0 || tig_stop < 0 { + let msg = format!("tig_start = {tig_start}, tig_stop = {tig_stop}"); + json_error(Some(json), ctl, exiting, &msg)?; + } + let (tig_start, tig_stop) = (tig_start as usize, tig_stop as usize); + let quals0 = v["quals"].to_string(); + let quals0 = quals0.after("\"").as_bytes(); + let mut quals = Vec::<u8>::new(); + let mut slashed = false; + for &qual in quals0.iter().take(quals0.len() - 1) { + if !slashed && qual == b'\\' + /* && ( i == 0 || quals0[i-1] != b'\\' ) */ + { + slashed = true; + continue; + } + slashed = false; + quals.push(qual); + } + assert_eq!(full_seq.len(), quals.len()); + let seq = &full_seq[tig_start..tig_stop].to_string(); + for qual in quals.iter_mut() { + *qual -= 33_u8; + } + let full_quals = quals; + let quals = full_quals[tig_start..tig_stop].to_vec(); + let umi_count = v["umi_count"].as_i64().unwrap() as usize; + let read_count = v["read_count"].as_i64().unwrap() as usize; + let origin = origin_info.origin_for_bc[li].get(&barcode).or_else(|| { + // the way we use s1 here is flaky + if !origin_info.origin_id[li].is_empty() + && (origin_info.origin_id[li] != *"s1" || origin_info.origin_for_bc[li].is_empty()) + { + Some(&origin_info.origin_id[li]) + } else { + None + } + }); + let donor = origin_info.donor_for_bc[li].get(&barcode).or_else(|| { + // the way we use d1 here is flaky + if !origin_info.origin_id[li].is_empty() + && (origin_info.donor_id[li] != *"d1" || origin_info.donor_for_bc[li].is_empty()) + { + Some(&origin_info.donor_id[li]) + } else { + None + } + }); + let tag = origin_info.tag[li].get(&barcode); + let mut origin_index = None; + let mut donor_index = None; + let mut tag_index = None; + if let Some(origin) = origin { + origin_index = Some(bin_position(&origin_info.origin_list, origin) as usize); + if let Some(donor) = donor { + donor_index = Some(bin_position(&origin_info.donor_list, donor) as usize); + } + } + if let Some(tag) = tag { + tag_index = Some(bin_position(&origin_info.tag_list, tag) as usize); + } + let mut valu = None; + if validated_umis_present { + valu = Some(validated_umis); + } + let mut non_valu = None; + if non_validated_umis_present { + non_valu = Some(non_validated_umis); + } + let mut invalu = None; + if invalidated_umis_present { + invalu = Some(invalidated_umis); + } + tigs.push(TigData { + cdr3_dna, + len: seq.len(), + v_start: tig_start, + v_stop, + v_stop_ref, + d_start, + j_start, + j_start_ref, + j_stop: tig_stop, + c_start, + full_seq: full_seq.as_bytes().to_vec(), + v_ref_id, + d_ref_id, + j_ref_id, + c_ref_id, + u_ref_id, + fr1_start: 0, + cdr1_start: None, + fr2_start: None, + cdr2_start: None, + fr3_start: None, + cdr3_aa, + cdr3_start, + quals, + full_quals, + barcode, + tigname, + left, + dataset_index: li, + origin_index, + donor_index, + tag_index, + umi_count, + read_count, + chain_type, + annv, + validated_umis: valu, + non_validated_umis: non_valu, + invalidated_umis: invalu, + frac_reads_used, + }); + Ok(()) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Parse the JSON annotations file. +// +// In the future could be converted to LazyWrite: +// https://martian-lang.github.io/martian-rust/doc/martian_filetypes/json_file/ +// index.html#lazy-readwrite-example. +// +// Tracking contigs using bc_cdr3_aa; could improve later. +// +// This section requires 3.1. If you want to avoid that, do something to make tig_start +// and tig_stop always nonnegative. Or use the RE option. +// +// Computational performance. It would appear that nearly all the time here is spent in +// two lines: +// +// read_vector_entry_from_json(&mut f) { +// let v: Value = serde_json::from_str(strme(&x)).unwrap(); +// (Should retest.) +// +// and simply reading the file lines is several times faster. So the way we parse the +// files is suboptimal. If we want to make this faster, one option would be to speed up +// this code. Another would be to write out a binary version of the JSON file that contains +// only the information that we need. + +pub fn read_json( + accept_inconsistent: bool, + origin_info: &OriginInfo, + li: usize, + json: &String, + refdata: &RefData, + to_ref_index: &HashMap<usize, usize>, + reannotate: bool, + cr_version: &mut String, + ctl: &EncloneControl, + vdj_cells: &mut Vec<String>, + gex_cells: &mut Vec<String>, + gex_cells_specified: &mut bool, +) -> Result<Vec<Vec<TigData>>, String> { + *gex_cells_specified = false; + let mut tigs = Vec::<TigData>::new(); + let mut jsonx = json.clone(); + if !path_exists(json) { + jsonx = format!("{json}.lz4"); + } + if jsonx.contains('/') { + let p = jsonx.rev_before("/"); + if !path_exists(p) { + return Err(format!( + "\nThere should be a directory\n\ + \"{p}\"\n\ + but it does not exist. Please check how you have specified the\n\ + input files to enclone, including the PRE argument.\n" + )); + } + } + if !path_exists(&jsonx) { + return Err(format!( + "\nThe path\n\ + \"{jsonx}\"\n\ + does not exist. Please check how you have specified the\n\ + input files to enclone, including the PRE argument.\n" + )); + } + let mut f = BufReader::new(open_maybe_compressed(&jsonx)); + // ◼ This loop could be speeded up, see comments above. + let mut xs = Vec::<Vec<u8>>::new(); + loop { + let x = read_vector_entry_from_json(&mut f); + if x.is_err() { + eprintln!("\nProblem reading {jsonx}.\n"); + return Err(x.err().unwrap()); + } + match x.unwrap() { + None => break, + Some(x) => { + xs.push(x); + } + } + } + let mut results = Vec::<( + usize, + Vec<String>, + Vec<String>, + bool, + String, + Vec<TigData>, + String, + )>::new(); + for i in 0..xs.len() { + results.push(( + i, + Vec::<String>::new(), + Vec::<String>::new(), + false, + String::new(), + Vec::<TigData>::new(), + String::new(), + )); + } + let exiting = AtomicBool::new(false); + results.par_iter_mut().for_each(|res| { + let i = res.0; + let resx = parse_vector_entry_from_json( + &xs[i], + json, + accept_inconsistent, + origin_info, + li, + refdata, + to_ref_index, + reannotate, + ctl, + &mut res.1, + &mut res.2, + &mut res.3, + &mut res.4, + &mut res.5, + &exiting, + ); + if let Err(resx) = resx { + res.6 = resx; + } + }); + for result in &results { + if !result.6.is_empty() { + return Err(result.6.clone()); + } + } + for result in results.iter_mut().take(xs.len()) { + vdj_cells.append(&mut result.1); + gex_cells.append(&mut result.2); + if result.3 { + *gex_cells_specified = true; + } + if !result.4.is_empty() { + *cr_version = result.4.clone(); + } + tigs.append(&mut result.5); + } + unique_sort(gex_cells); + let mut tig_bc = Vec::<Vec<TigData>>::new(); + let mut r = 0; + while r < tigs.len() { + let mut s = r + 1; + while s < tigs.len() { + if tigs[s].barcode != tigs[r].barcode { + break; + } + s += 1; + } + + // For now we require at most four contigs (but we don't yet merge foursies). + + if s - r <= 4 || ctl.clono_filt_opt_def.nmax { + let mut bc_tigs = tigs[r..s].to_vec(); + bc_tigs.sort(); + tig_bc.push(bc_tigs); + } + r = s; + } + unique_sort(vdj_cells); + + // Subsample. + + if ctl.gen_opt.subsample >= 0.0 { + let mut rng = rand::thread_rng(); + let mut to_delete1 = vec![false; tig_bc.len()]; + let mut to_delete2 = vec![false; vdj_cells.len()]; + let mut to_delete3 = vec![false; gex_cells.len()]; + for (bc, del) in tig_bc.iter().zip(to_delete1.iter_mut()) { + let y: f64 = rng.gen(); + if y < 1.0 - ctl.gen_opt.subsample { + *del = true; + let bc = &bc[0].barcode; + let p = bin_position(vdj_cells, bc); + if p >= 0 { + to_delete2[p as usize] = true; + } + let p = bin_position(gex_cells, bc); + if p >= 0 { + to_delete3[p as usize] = true; + } + } + } + erase_if(&mut tig_bc, &to_delete1); + erase_if(vdj_cells, &to_delete2); + erase_if(gex_cells, &to_delete3); + } + + // Done. + + Ok(tig_bc) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Parse the JSON annotations file(s). + +pub fn parse_json_annotations_files( + ctl: &EncloneControl, + tig_bc: &mut Vec<Vec<TigData>>, + refdata: &RefData, + to_ref_index: &HashMap<usize, usize>, + vdj_cells: &mut Vec<Vec<String>>, + gex_cells: &mut Vec<Vec<String>>, + gex_cells_specified: &mut Vec<bool>, + fate: &mut [HashMap<String, BarcodeFate>], +) -> Result<(), String> { + // (origin index, contig name, V..J length): (?) + let mut results = Vec::<( + usize, + Vec<(String, usize)>, + Vec<Vec<TigData>>, + Vec<Vec<u8>>, // logs + String, + Vec<String>, + Vec<String>, + bool, + String, + )>::new(); + for i in 0..ctl.origin_info.dataset_path.len() { + results.push(( + i, + Vec::<(String, usize)>::new(), + Vec::<Vec<TigData>>::new(), + Vec::<Vec<u8>>::new(), + String::new(), + Vec::<String>::new(), + Vec::<String>::new(), + false, + String::new(), + )); + } + // Note: only tracking truncated seq and quals initially + let ann = if !ctl.gen_opt.cellranger { + "all_contig_annotations.json" + } else { + "contig_annotations.json" + }; + results.par_iter_mut().for_each(|res| { + let li = res.0; + let json = format!("{}/{ann}", ctl.origin_info.dataset_path[li]); + let json_lz4 = format!("{}/{ann}.lz4", ctl.origin_info.dataset_path[li]); + if !path_exists(&json) && !path_exists(&json_lz4) { + res.8 = format!("\ncan't find {json} or {json_lz4}\n"); + return; + } + let resx = read_json( + ctl.gen_opt.accept_inconsistent, + &ctl.origin_info, + li, + &json, + refdata, + to_ref_index, + ctl.gen_opt.reannotate, + &mut res.4, + ctl, + &mut res.5, + &mut res.6, + &mut res.7, + ); + if let Ok(resx) = resx { + let tig_bc: Vec<Vec<TigData>> = resx; + res.5.sort(); + res.2 = tig_bc; + } else { + res.8 = resx.err().unwrap(); + } + }); + for result in &results { + if !result.8.is_empty() { + return Err(result.8.clone()); + } + } + let mut versions = Vec::<String>::new(); + for i in 0..results.len() { + tig_bc.append(&mut results[i].2.clone()); + // ctl.gen_opt.cr_version = results[i].4.clone(); + if results[i].4.is_empty() { + versions.push("≤3.1".to_string()); + } else { + versions.push(results[i].4.clone()); + } + vdj_cells.push(results[i].5.clone()); + gex_cells.push(results[i].6.clone()); + gex_cells_specified.push(results[i].7); + + let cells = &results[i].5; + let mut found = vec![false; cells.len()]; + let tigs = &results[i].2; + for tig in tigs { + let p = bin_position(cells, &tig[0].barcode); + if p >= 0 { + found[p as usize] = true; + } + } + for j in 0..found.len() { + if !found[j] { + fate[i].insert(cells[j].clone(), BarcodeFate::NonProductive); + } + } + } + /* + if !ctl.gen_opt.internal_run { + unique_sort(&mut versions); + if versions.len() > 1 + && versions != vec!["4.0".to_string(), "4009.52.0-82-g2244c685a".to_string()] + { + let args: Vec<String> = env::args().collect(); + return Err(format!( + "\nYou're using output from multiple Cell Ranger versions = {},\n\ + which is not allowed. Your command was:\n{}\n", + versions.iter().format(", "), + args.iter().format(","), + )); + } + } + */ + Ok(()) +} diff --git a/enclone_core/Cargo.toml b/enclone_core/Cargo.toml index 692d93f2b..00c348eb1 100644 --- a/enclone_core/Cargo.toml +++ b/enclone_core/Cargo.toml @@ -1,44 +1,60 @@ [package] name = "enclone_core" -version = "0.4.49" +version = "0.5.219" authors = ["""David Jaffe <david.jaffe@10xgenomics.com>, + Nigel Delaney <nigel.delaney@10xgenomics.com>, Keri Dockter <keri.dockter@10xgenomics.com>, + Jessica Hamel <jessica.hamel@10xgenomics.com>, + Lance Hepler <lance.hepler@10xgenomics.com>, Shaun Jackman <shaun.jackman@10xgenomics.com>, Sreenath Krishnan <sreenath.krishnan@10xgenomics.com>, Meryl Lewis <meryl.lewis@10xgenomics.com>, + Alvin Liang <alvin.liang@10xgenomics.com>, Patrick Marks <patrick.marks@10xgenomics.com>, Wyatt McDonnell <wyatt.mcdonnell@10xgenomics.com>"""] -edition = "2018" -build = "build.rs" +edition = "2021" +license-file = "LICENSE.txt" publish = false +include = ["src/*.rs", "LICENSE.txt", "src/mammalian_fixed_len.table"] # Please do not edit crate versions within this file. Instead edit the file master.toml # in the root of the enclone repo. [dependencies] -ansi_escape = "0.1.0" -bio = "0.31.0" -bytes = "0.5.5" -debruijn = "0.3.2" -io_utils = "0.2" -mirror_sparse_matrix = "0.1.4" -perf_stats = "0.1.2" -regex = "1.3.1" -serde = "1.0.90" -serde_derive = "1.0.102" -serde_json = "*" -string_utils = "0.1.1" -vector_utils = "0.1.3" - -[dependencies.hdf5] +amino = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +ansi_escape = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +attohttpc = { version = ">=0.19, <0.27", default-features = false, features = ["compress", "tls-rustls"] } +bio_edit = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +debruijn = "0.3" +enclone_proto = { path = "../enclone_proto" } +evalexpr = ">=7, <12" +io_utils = { version = "0.3", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +itertools.workspace = true +lazy_static = "1" +mirror_sparse_matrix = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +perf_stats = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +qd = { git = "https://github.com/Barandis/qd" } +rayon = "1" +regex = { version = "1", default-features = false, features = ["std", "perf"] } +stats_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +string_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +superslice = "1" +tables = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +vdj_ann = { version = "0.4", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +vector_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +zstd = ">=0.10, <0.13" +serde = "1.0" + +[target.'cfg(not(windows))'.dependencies] +tilde-expand = "0.1" + +[target.'cfg(not(windows))'.dependencies.hdf5] +git = "https://github.com/10XGenomics/hdf5-rust.git" +branch = "conda_nov2021" features = ["conda"] -git = "https://github.com/pmarks/hdf5-rs.git" -rev = "0c98e57b2af1f4247708c198b324ba3a8bc18dba" - -[build-dependencies] -chrono = "0.4.11" -string_utils = "0.1.1" - - - +default-features = false +[target.'cfg(windows)'.dependencies.hdf5] +git = "https://github.com/10XGenomics/hdf5-rust.git" +branch = "conda_nov2021" +default-features = false diff --git a/enclone_core/LICENSE.txt b/enclone_core/LICENSE.txt new file mode 120000 index 000000000..4ab43736a --- /dev/null +++ b/enclone_core/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/enclone_core/build.rs b/enclone_core/build.rs deleted file mode 100644 index 98259c2d3..000000000 --- a/enclone_core/build.rs +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// The purpose of this file is to make some version information available so that it can be -// printed out at appropriate points by enclone. This files is a slightly modified version -// of https://vallentin.dev/2019/06/06/versioning. - -extern crate chrono; -extern crate string_utils; - -use chrono::prelude::*; -use std::env::consts::{ARCH, OS}; -use std::process::Command; -use string_utils::*; - -#[cfg(debug_assertions)] -const BUILD_TYPE: &'static str = "debug"; -#[cfg(not(debug_assertions))] -const BUILD_TYPE: &'static str = "release"; - -fn main() { - let version_string = format!( - "{} : {}{} : {} : {} : {} : {}", - get_branch_name(), - get_commit_hash(), - if is_working_tree_clean() { "" } else { "+" }, - get_commit_date(), - BUILD_TYPE, - OS, - ARCH - ); - println!("cargo:rustc-env=VERSION_STRING={}", version_string); -} - -fn get_commit_hash() -> String { - match std::env::var("GITHUB_SHA") { - Ok(v) => return v[0..7].to_string(), - _ => (), - } - - let output = Command::new("git") - .arg("log") - .arg("-1") - .arg("--pretty=format:%h") // Abbreviated commit hash - .current_dir(env!("CARGO_MANIFEST_DIR")) - .output() - .unwrap(); - assert!(output.status.success()); - String::from_utf8_lossy(&output.stdout).to_string() -} - -fn is_github() -> bool { - match std::env::var("GITHUB_SHA") { - Ok(_) => true, - _ => false, - } -} - -// We used to have the commit date here but this is easier and serves the same purpose for -// the version string. - -fn get_commit_date() -> String { - Local::now().to_string().before(" ").to_string() -} - -fn get_branch_name() -> String { - if is_github() { - match std::env::var("GITHUB_REF") { - Ok(v) => return v, - _ => return "master".to_string(), - } - } - - let output = Command::new("git") - .arg("rev-parse") - .arg("--abbrev-ref") - .arg("HEAD") - .current_dir(env!("CARGO_MANIFEST_DIR")) - .output() - .unwrap(); - assert!(output.status.success()); - String::from_utf8_lossy(&output.stdout) - .trim_end() - .to_string() -} - -fn is_working_tree_clean() -> bool { - match std::env::var("GITHUB_SHA") { - Ok(_) => return true, - _ => (), - } - - let status = Command::new("git") - .arg("diff") - .arg("--quiet") - .arg("--exit-code") - .current_dir(env!("CARGO_MANIFEST_DIR")) - .status() - .unwrap(); - status.code().unwrap() == 0 -} diff --git a/enclone_core/src/align_to_vdj_ref.rs b/enclone_core/src/align_to_vdj_ref.rs new file mode 100644 index 000000000..cdfe7a654 --- /dev/null +++ b/enclone_core/src/align_to_vdj_ref.rs @@ -0,0 +1,432 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. +// +// Align a sequence to a concatenated V(D)J reference, encouraging insertions exactly at +// junction points and deletions that bridge junction points. +// +// ASSESSMENT +// +// enclone BI=1-4,9 BUILT_IN SUBSET_JSON=subset/outs/all_contig_annotations.json +// MIN_EXACTS=2 MAX_EXACTS=10 NOPRINT +// +// (slow) +// +// enclone BCR=subset GVARS=d_inconsistent_%,d_inconsistent_n NOPRINT +// +// (fast) +// +// inconsistency rate from this = 13.33% +// sample size = 53,373 +// +// If you mess with this, you can test your changes with "cargo t test_enclone_d" and +// "merge_html BUILD" and then manually examine the D gene page. Note carefully that we do not +// want to worsen the placement of indels. Also run the above big test. + +use bio_edit::alignment::pairwise::{Aligner, Scoring, MIN_SCORE}; +use bio_edit::alignment::AlignmentMode; +use bio_edit::alignment::AlignmentOperation::{Del, Ins, Match, Subst}; +use std::fmt::Write; +use string_utils::strme; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Create zero-one vectors corresponding to indel-free aligned parts of the D gene; a zero denotes +// a mismatch. + +pub fn zero_one( + ops: &Vec<bio_edit::alignment::AlignmentOperation>, + start: usize, + stop: usize, +) -> Vec<Vec<u8>> { + let mut zos = Vec::<Vec<u8>>::new(); + { + let mut rpos = 0; + let mut zo = Vec::<u8>::new(); + for &om in ops { + match om { + Match => { + if rpos >= start && rpos < stop { + zo.push(1); + } + rpos += 1; + } + Subst => { + if rpos >= start && rpos < stop { + zo.push(0); + } + rpos += 1; + } + Del => { + if !zo.is_empty() { + zos.push(zo.clone()); + zo.clear(); + } + rpos += 1; + } + Ins => { + if !zo.is_empty() { + zos.push(zo.clone()); + zo.clear(); + } + } + _ => {} + }; + } + if !zo.is_empty() { + zos.push(zo.clone()); + } + } + zos +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Compute a match bit score. + +pub fn match_bit_score(zos: &Vec<Vec<u8>>) -> f64 { + let mut bits = 0.0_f64; + for zo in zos { + for start in 0..zo.len() { + for stop in start + 1..=zo.len() { + let b = &zo[start..stop]; + let n = b.len(); + let mut k = 0; // number of mismatches + for &bz in b { + if bz == 0 { + k += 1; + } + } + + // Let p be the probability that a random DNA sequence of length n will match a + // given DNA sequence with ≤ k mismatches = sum{l=0..=k} (n choose l) * 3^l / 4^n. + + let mut sum = 0.0; + let mut choose = 1.0; + for l in 0..=k { + sum += choose; + choose *= (n - l) as f64; + choose /= (l + 1) as f64; + choose *= 3.0; + } + let p = sum / 4.0_f64.powi(n as i32); + + // Update bits. + + bits = bits.max(-p.log2()); + } + } + } + bits +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// This is the same as bio::Alignment::cigar(&self, hard_clip: bool), but works on a vector +// of alignment operations, and assumes hard_clip is false. Code copied from bio source. + +pub fn cigar( + ops: &Vec<bio_edit::alignment::AlignmentOperation>, + xstart: usize, + xend: usize, + xlen: usize, +) -> String { + use bio_edit::alignment::AlignmentOperation; + let clip_str = "S"; + let add_op = |op: AlignmentOperation, k, cigar: &mut String| match op { + AlignmentOperation::Match => write!(cigar, "{k}=").unwrap(), + AlignmentOperation::Subst => write!(cigar, "{k}X").unwrap(), + AlignmentOperation::Del => write!(cigar, "{k}D").unwrap(), + AlignmentOperation::Ins => write!(cigar, "{k}I").unwrap(), + _ => {} + }; + + let mut cigar = "".to_owned(); + if ops.is_empty() { + return cigar; + } + + let mut last = ops[0]; + if xstart > 0 { + write!(cigar, "{xstart}{clip_str}").unwrap(); + } + let mut k = 1; + for &op in ops[1..].iter() { + if op == last { + k += 1; + } else { + add_op(last, k, &mut cigar); + k = 1; + } + last = op; + } + add_op(last, k, &mut cigar); + if xlen > xend { + write!(cigar, "{}{clip_str}", xlen - xend).unwrap() + } + cigar +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn align_to_vdj_ref( + seq: &[u8], + vref: &[u8], + dref: &[u8], + d2ref: &[u8], + jref: &[u8], + drefname: &str, // useful for debugging + left: bool, + jscore_match: i32, + jscore_mismatch: i32, + jscore_gap_open: i32, + jscore_gap_extend: i32, + jscore_bits_multiplier: f64, +) -> (Vec<bio_edit::alignment::AlignmentOperation>, f64) { + // Define penalties. + + let matchp = jscore_match; + let mismatch = jscore_mismatch; + let gap_open = jscore_gap_open; + let gap_extend = jscore_gap_extend; + let gap_open_at_boundary = -40_i32; + let gap_extend_at_boundary = -10_i32; + let del_gap_extend_at_boundary = -20_i32; + let align_div = 10.0; + let bits_multiplier = jscore_bits_multiplier; + const MIN_BITS_FOR_D2: f64 = 14.0; + const D2_PENALTY: f64 = -15.0; + + // Define scoring function. It does not appear that the aligner is scoring in exactly the + // intended fashion in all cases. This is likely more of an issue for alv and alj. The + // problem has to do with the interpretation of being at the boundary. This is still somewhat + // of a problem since although we're rescoring to "fix" the problem, the aligner might have + // chosen a suboptimal placement in the first place. + // + // Note handling of deletions that bridge boundaries. + + let rescore = |ops: &Vec<bio_edit::alignment::AlignmentOperation>| -> f64 { + let mut score = 0_i32; + let mut i = 0; + let mut rpos = 0; + let b1 = vref.len(); + let b2 = vref.len() + dref.len(); + let b3 = vref.len() + dref.len() + d2ref.len(); + while i < ops.len() { + if ops[i] == Match { + rpos += 1; + score += matchp; + i += 1; + } else if ops[i] == Subst { + rpos += 1; + score += mismatch; + i += 1; + } else if ops[i] == Ins { + let mut j = i + 1; + while j < ops.len() && ops[j] == Ins { + j += 1; + } + if (rpos == vref.len() + dref.len() + d2ref.len()) + || (rpos == vref.len() || rpos == vref.len() + dref.len()) + { + score += gap_open_at_boundary + (j - i - 1) as i32 * gap_extend_at_boundary; + } else { + score += gap_open + (j - i - 1) as i32 * gap_extend; + } + i = j; + } else if ops[i] == Del { + let mut j = i + 1; + while j < ops.len() && ops[j] == Del { + j += 1; + } + if (rpos <= b1 && rpos + j - i >= b1) + || (rpos <= b2 && rpos + j - i >= b2) + || (rpos <= b3 && rpos + j - i >= b3) + { + score += gap_open_at_boundary + (j - i - 1) as i32 * del_gap_extend_at_boundary; + } else { + score += gap_open + (j - i - 1) as i32 * gap_extend; + } + rpos += j - i; + i = j; + } + } + score as f64 / align_div + }; + + // Build concatenation. + + let mut concat = Vec::<u8>::with_capacity(vref.len() + dref.len() + d2ref.len() + jref.len()); + concat.extend(vref); + concat.extend(dref); + concat.extend(d2ref); + concat.extend(jref); + + // Set clip penalties. Note that yclip_suffix was set to zero. This was + // accompanied by a change to bio_edit in commit ccabb0dd1768738bdeee5b62458048d74f6dcfab, + // and the entire commit is very flaky. The alignment of these two sequences illustrates + // the problem if one does not make the commit: + // TTACTGTAAAGTCATGCTCTATGATAGTCGTGGTTCTGACTACTACTACGTTATGGACGTCTGGGGC + // TTACTGTGCGAGACAGTATTACTATGATAGTAGTGGTTATTACTACATTACTACTACTACTACGGTATGGACGTCTGGGGC. + + // Make alignment. + + let mut scoring = Scoring::from_scores(gap_open, gap_extend, matchp, mismatch); + scoring.xclip_prefix = MIN_SCORE; + scoring.xclip_suffix = MIN_SCORE; + scoring.yclip_prefix = MIN_SCORE; + scoring.yclip_suffix = 0; + let mut aligner = Aligner::with_scoring(scoring); + let mut gap_open_fn = vec![0_i32; concat.len() + 1]; + for (j, gap) in gap_open_fn.iter_mut().enumerate().skip(1) { + if j == vref.len() + || j == vref.len() + dref.len() + || j == vref.len() + dref.len() + d2ref.len() + { + *gap = gap_open_at_boundary; + } else { + *gap = gap_open; + } + } + let mut gap_extend_fn = vec![0_i32; concat.len() + 1]; + for (j, gap) in gap_extend_fn.iter_mut().enumerate().skip(1) { + if j == vref.len() + || j == vref.len() + dref.len() + || j == vref.len() + dref.len() + d2ref.len() + { + *gap = gap_extend_at_boundary; + } else { + *gap = gap_extend; + } + } + let mut al = aligner.custom_with_gap_fns(seq, &concat, &gap_open_fn, &gap_extend_fn); + al.mode = AlignmentMode::Semiglobal; + let mut ops = al.operations; + + // Fix alignments. + + let mut edits = Vec::<(usize, bio_edit::alignment::AlignmentOperation)>::new(); + let mut i = 0; + let mut pos = 0; + let mut rpos = 0; + let b1 = vref.len(); + let b2 = vref.len() + dref.len(); + let b3 = vref.len() + dref.len() + d2ref.len(); + let mut edited = vec![false; ops.len()]; + while i < ops.len() { + if ops[i] == Match || ops[i] == Subst { + pos += 1; + rpos += 1; + i += 1; + } else if ops[i] == Ins { + let mut j = i + 1; + while j < ops.len() && ops[j] == Ins { + j += 1; + } + pos += j - i; + i = j; + } else if ops[i] == Del { + let mut j = i + 1; + while j < ops.len() && ops[j] == Del { + j += 1; + } + let k = j - i; + for bi in [b1, b2, b3].iter() { + let bi = *bi; + + // Maybe can shift right one. + + if rpos < bi + && rpos + k >= bi + && j < ops.len() + && pos < seq.len() + && rpos < concat.len() + && ops[j] == Subst + && seq[pos] == concat[rpos] + { + edits.push((i, Match)); + edits.push((j, Del)); + edited[i] = true; + edited[j] = true; + break; + + // Maybe can shift left one. + } else if rpos + k > bi + && i > 0 + && ops[i - 1] == Subst + && seq[pos - 1] == concat[rpos + k - 1] + && !edited[i - 1] + { + edits.push((i - 1, Del)); + edits.push((j - 1, Match)); + edited[i - 1] = true; + edited[j - 1] = true; + break; + } + } + rpos += j - i; + i = j; + } + } + for x in edits.iter() { + ops[x.0] = x.1; + } + + // Create zero-one vectors corresponding to indel-free aligned parts of the D gene; a zero + // denotes a mismatch. Then compute a match bit score. + + let zos1 = zero_one(&ops, vref.len(), vref.len() + dref.len()); + let zos2 = zero_one( + &ops, + vref.len() + dref.len(), + vref.len() + dref.len() + d2ref.len(), + ); + let bits1 = match_bit_score(&zos1); + let bits2 = match_bit_score(&zos2); + let mut bits = bits1.max(bits2); + if !d2ref.is_empty() && bits1.min(bits2) < MIN_BITS_FOR_D2 { + bits = 0.0; + } + + // Possibly emit verbose logging. + + let verbose = false; + if verbose { + let full_score = rescore(&ops) + bits_multiplier * bits; + println!( + "\n{} ==> score = {:.1}, bits = {:.1}, full_score = {:.1}", + drefname, + rescore(&ops), + bits, + full_score, + ); + println!("seq = {}", strme(seq)); + println!("ref = {}", strme(&concat)); + use itertools::Itertools; + for zo in zos1.iter() { + print!("{}", zo.iter().format("")); + } + println!(); + println!("ops = {:?}", ops.iter().format(",")); + } + + // Add a constant times bits to the alignment score (null case handled differently). + // + // Note that we do not allow the null case if there is an insertion in the alignment. In an + // an earlier version, we allowed many more null cases, and we believe that this was distorting + // our inconsistency scoring. This is because calling null makes it easier to be consistent. + + if left && dref.is_empty() { + if !ops.contains(&Ins) { + bits = 10.0; + } else { + bits = -1000.0; + } + } + let mut full_score = rescore(&ops) + bits_multiplier * bits; + if drefname.contains(':') { + full_score += D2_PENALTY; + } + + // Return the alignment and score. + + (ops, full_score) +} diff --git a/enclone_core/src/allowed_vars.rs b/enclone_core/src/allowed_vars.rs new file mode 100644 index 000000000..68c2ea10e --- /dev/null +++ b/enclone_core/src/allowed_vars.rs @@ -0,0 +1,177 @@ +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. +// +// Field (variable) names. + +// Lead variables for exact subclonotypes and cells. + +pub const LVARS_ALLOWED: [&str; 42] = [ + "datasets", + "origins", + "donors", + "datasets_cell", + "origins_cell", + "donors_cell", + "n", + "gex", + "gex_min", + "gex_max", + "gex_μ", + "gex_Σ", + "gex_cell", + "n_gex_cell", + "n_gex", + "n_b", + "clust", + "cred", + "cred_cell", + "type", + "entropy", + "entropy_cell", + "near", + "far", + "dref", + "dref_aa", + "dref_max", + "ext", + "mark", + "inkt", + "mait", + "sec", + "mem", + "filter", + "nchains", + "nchains_present", + "clonotype_ncells", + "nbc", + "hcomp", + "jun_ins", + "jun_mat", + "jun_sub", +]; + +// Chain variables that can be used for contigs and chains + +pub const CVARS_ALLOWED: [&str; 95] = [ + "var", + "u", + "u_min", + "u_max", + "u_Σ", + "u_μ", + "comp", + "edit", + "cigar", + "r", + "r_min", + "r_max", + "r_Σ", + "r_μ", + "const", + "white", + "cdr1_dna", + "cdr1_dna_ref", + "cdr2_dna", + "cdr2_dna_ref", + "cdr3_dna", + "cdr1_len", + "cdr2_len", + "cdr3_len", + "cdr1_aa", + "cdr1_aa_north", + "cdr1_aa_ref", + "cdr2_aa", + "cdr2_aa_north", + "cdr2_aa_ref", + "cdr3_aa", + "cdr3_aa_north", + "cdr3_aa_conx", + "cdr3_aa_conp", + "fwr1_dna", + "fwr1_dna_ref", + "fwr2_dna", + "fwr2_dna_ref", + "fwr3_dna", + "fwr3_dna_ref", + "fwr4_dna", + "fwr4_dna_ref", + "fwr1_len", + "fwr2_len", + "fwr3_len", + "fwr4_len", + "fwr1_aa", + "fwr1_aa_ref", + "fwr2_aa", + "fwr2_aa_ref", + "fwr3_aa", + "fwr3_aa_ref", + "fwr4_aa", + "fwr4_aa_ref", + "ulen", + "vjlen", + "clen", + "cdiff", + "udiff", + "notes", + "d_univ", + "d_donor", + "aa%", + "dna%", + "nval", + "nival", + "nnval", + "valumis", + "nvalumis", + "ivalumis", + "valbcumis", + "nvalbcumis", + "ivalbcumis", + "d_frame", + "d_start", + "v_name_orig", + "v_name", + "d_name", + "j_name", + "v_id", + "d_id", + "j_id", + "const_id", + "utr_id", + "utr_name", + "cdr3_start", + "v_start", + "d1_name", + "d2_name", + "d1_score", + "d2_score", + "d_delta", + "d_Δ", + "allele", + "allele_d", +]; + +// We should not have to specify this. +pub const CVARS_ALLOWED_PCELL: [&str; 3] = ["u_cell", "r_cell", "v_name_orig_cell"]; + +pub const PLVARS_ALLOWED: [&str; 5] = [ + "group_id", + "group_ncells", + "clonotype_id", + "exact_subclonotype_id", + "barcodes", +]; + +pub const PCVARS_ALLOWED: [&str; 11] = [ + "var_indices_dna", + "var_indices_aa", + "share_indices_dna", + "share_indices_aa", + "cdr3_aa", + "seq", + "vj_seq", + "vj_seq_nl", + "vj_aa", + "vj_aa_nl", + "var_aa", +]; + +pub const GVARS_ALLOWED: [&str; 2] = ["d_inconsistent_%", "d_inconsistent_n"]; diff --git a/enclone_core/src/barcode_fate.rs b/enclone_core/src/barcode_fate.rs new file mode 100644 index 000000000..0f6f91b64 --- /dev/null +++ b/enclone_core/src/barcode_fate.rs @@ -0,0 +1,88 @@ +use serde::{Deserialize, Serialize}; + +/// Different reasons why a barcode which have productive contig(s) +/// are not called as cells by enclone +/// +/// For more explanation, see <https://10xgenomics.github.io/enclone/pages/auto/default_filters.html> +/// and <https://10xgenomics.github.io/enclone/pages/auto/help.special.html> +#[derive(Serialize, Deserialize, Clone)] +pub enum BarcodeFate { + Doublet, + WeakChains, + /// The barcode was not called as cell by the assembler + NotAsmCell, + FoursieKill, + NotGexCell, + Signature, + /// Find and mark for deletion exact subclonotypes having a variant base in V..J that, + /// accounting for all the cells in all the exact subclonotypes, never occurs as Q60 + /// doesn't occur as Q40 twice, and disagrees with the reference. + Qual, + Umi, + UmiRatio, + /// Filter out putative gel bead contamination. We look for cases where inside a + /// given exact subclonotype, the same first or last half of a barcode is reused, and one + /// instance has at least 10-fold higher UMI count. If the fraction of the "bad" + /// clones is at least 20%, delete them. + GelBeadContamination, + /// Delete duplicated barcodes within an exact subclonotype. + /// Should not happen when enclone is invoked via cellranger + DuplicatedBarcode, + /// If a V..J segment appears in exactly one dataset, with frequency n, let x be the total + /// number of productive pairs for that dataset, and let y be the total number of productive + /// pairs for all datasets from the same origin. If (x/y)^n <= 10^-6, i.e. the probability + /// that assuming even distribution, all instances of that V..J ended up in that one dataset, + /// delete all the productive pairs for that V..J segment that do not have at least 100 + /// supporting UMIs. (Note no attempt to do Bonferroni correction.) + /// + /// For the case of two datasets for one origin, with equal numbers of productive pairs in + /// each, this corresponds roughly to the case n = 20. + /// + /// Note that we could modify this to allow *some* occurrences in other datasets. + /// + /// There are only certain ways that these misdistribution events could happen: + /// + /// 1. A cell (and particularly a plasma cell or plasmablast) bursts after drawing cells to + /// make libraries, leaving behind cell fragments that seed separate GEMs + /// (probably most likely). + /// 2. Multiple gel beads end up in one GEM. + /// 3. Something involving like cells sticking together and subsequently separating. + /// 4. Physical contamination of libraries. + /// 5. Informatic mixup of libraries. + /// 6. Nothing other than a low probability event (unlikely). + /// + /// Note that in case 1, we have evidence that a plasma cell or plasmablast existed in the + /// original cells that were drawn (perhaps breaking up in the process of drawing), and was + /// subsequently distintegrated. + Cross, + /// Filter out exact subclonotypes having more than one chain, but all of the same type. + /// For example, the filter removes all exact subclonotypes having two TRA chains and + /// no other chains + Improper, + GraphFilter, + /// No productive contigs for this barcode. This will only happen + /// when certain default filters are turned off + NonProductive, +} + +impl BarcodeFate { + pub fn label(&self) -> &'static str { + match self { + BarcodeFate::Doublet => "DOUBLET", + BarcodeFate::WeakChains => "WEAK_CHAINS", + BarcodeFate::NotAsmCell => "CELL", + BarcodeFate::FoursieKill => "FOURSIE_KILL", + BarcodeFate::NotGexCell => "GEX", + BarcodeFate::Signature => "SIGNATURE", + BarcodeFate::Qual => "QUAL", + BarcodeFate::Umi => "UMI", + BarcodeFate::UmiRatio => "UMI_RATIO", + BarcodeFate::GelBeadContamination => "WHITEF", + BarcodeFate::DuplicatedBarcode => "BC_DUP", + BarcodeFate::Cross => "CROSS", + BarcodeFate::Improper => "IMPROPER", + BarcodeFate::GraphFilter => "GRAPH_FILTER", + BarcodeFate::NonProductive => "PRODUCTIVE", + } + } +} diff --git a/enclone_core/src/cell_color.rs b/enclone_core/src/cell_color.rs new file mode 100644 index 000000000..2289f2eb6 --- /dev/null +++ b/enclone_core/src/cell_color.rs @@ -0,0 +1,74 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. +// +// Define the coloring scheme for cells. +// +// color_spec: +// - const,color all cells are assigned the given color (default/black) +// - iso,color1,...,colorn by isotype; uses default colors if none provided +// - var,name,minmax,min,max by values of given variable +// - catvar,varlist,n by categorical variables +// - dataset by dataset, using the color field in META +// - origin,origin1,color1,...,originn,colorn by origin, using the given assignment +// - bc by color to barcode assignment, via BC or META/bc +// - mark (internal) +// +// Related, doesn't really belong here: +// +// HONEY=file:color-spec:legend-spec +// none +// origin,blue,123085,red,123089 +// +// The default for :color-spec is const,black. The :legend-spec would usually be omitted. +// +// Multiple HONEY commands can be supplied (but only one for enclone visual). + +use std::collections::HashMap; + +#[derive(Clone)] +pub struct ColorByIsotype { + pub color: Vec<String>, + pub show_legend: bool, +} + +// For coloring by variable value, the value of a variable is first truncated to the range +// [min, max], then scaled to [0, 1], then converted to 0,...,255, and then converted to a color +// using the turbo color scheme. If the value of a variable is unspecified for a given cell, +// or not convertible to a number, black is used. + +#[derive(Clone)] +pub struct ColorByVariableValue { + pub var: String, + pub display_var: String, + pub min: Option<f64>, + pub max: Option<f64>, +} + +#[derive(Clone)] +pub struct ColorByCategoricalVariableValue { + pub vars: Vec<String>, + pub maxcat: usize, +} + +#[derive(Clone)] +pub struct ColorByDataset {} + +#[derive(Clone)] +pub struct ColorBySample { + pub by_meta: bool, + pub specification: HashMap<String, String>, +} + +#[derive(Clone)] +pub struct ColorByBarcodeSpecification {} + +#[derive(Clone, Default)] +pub enum CellColor { + #[default] + Unspecified, + ByIsotype(ColorByIsotype), + ByVariableValue(ColorByVariableValue), + ByCategoricalVariableValue(ColorByCategoricalVariableValue), + BySample(ColorBySample), + ByBarcodeSpecification(ColorByBarcodeSpecification), + ByDataset(ColorByDataset), +} diff --git a/enclone_core/src/combine_group_pics.rs b/enclone_core/src/combine_group_pics.rs new file mode 100644 index 000000000..579a1960f --- /dev/null +++ b/enclone_core/src/combine_group_pics.rs @@ -0,0 +1,86 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use crate::defs::justification; +use ansi_escape::{emit_eight_bit_color_escape, emit_end_escape}; +use io_utils::{fwrite, fwriteln}; +use std::io::Write; +use string_utils::{stringme, strme}; +use tables::print_tabular; + +pub fn combine_group_pics( + group_pics: &[String], + last_widths: &[u32], + parseable_stdouth: bool, + noprint: bool, + noprintx: bool, + html: bool, + ngroup: bool, + pretty: bool, +) -> String { + let mut glog = Vec::<u8>::new(); + let mut done = false; + if noprint && parseable_stdouth && !group_pics.is_empty() { + let mut rows = Vec::<Vec<String>>::new(); + for pic in group_pics { + let mut r = pic.split('\n'); + r.next_back(); // Skip the last element + for rj in r { + let s = rj.split('\t').map(str::to_owned).collect(); + rows.push(s); + } + } + let mut same = true; + let n = rows[0].len(); + for row in rows.iter().skip(1) { + if row.len() != n { + same = false; + } + } + if same { + let justify = rows[0] + .iter() + .map(String::as_str) + .map(justification) + .collect(); + print_tabular(&mut glog, &rows, 2, Some(justify)); + done = true; + } + } + + if !done { + // Get the newlines right is tricky, so they're marked. + + for i in 0..group_pics.len() { + if !noprint { + if !html && !ngroup && (!noprintx || i > 0) { + fwriteln!(glog, ""); // NEWLINE 1 + } + + // If we just printed a clonotype box, output a bar. + + if i > 0 && last_widths[i - 1] > 0 { + if ngroup || html { + fwriteln!(glog, ""); // NEWLINE 2 + } + if pretty { + let mut log = Vec::<u8>::new(); + emit_eight_bit_color_escape(&mut log, 44); + fwrite!(glog, "{}", strme(&log)); + } + fwrite!(glog, "╺{}╸", "━".repeat((last_widths[i - 1] - 2) as usize)); + if !ngroup { + fwriteln!(glog, ""); // NEWLINE 3 + } + fwriteln!(glog, ""); // NEWLINE 4 + if pretty { + let mut log = Vec::<u8>::new(); + emit_end_escape(&mut log); + fwrite!(glog, "{}", strme(&log)); + } + } + } + glog.append(&mut group_pics[i].as_bytes().to_vec()); + } + } + stringme(&glog) +} diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index fe3d4bad5..1ab19cd0a 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -1,93 +1,59 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. -use debruijn::dna_string::*; +use crate::cell_color::CellColor; +use crate::linear_condition::LinearCondition; +use debruijn::dna_string::DnaString; +use evalexpr::Node; use hdf5::Dataset; -use mirror_sparse_matrix::*; -use perf_stats::*; + +use io_utils::{open_for_read, path_exists}; +use mirror_sparse_matrix::MirrorSparseMatrix; +use perf_stats::elapsed; + +#[cfg(not(target_os = "windows"))] +use perf_stats::peak_mem_usage_gb; + use regex::Regex; use std::cmp::max; use std::collections::HashMap; -use std::time::Instant; -use string_utils::*; -use vector_utils::*; - -// Field (variable) names. -// Lead variables for exact subclonotypes and cells. -pub const LVARS_ALLOWED: [&str; 28] = [ - "datasets", - "origins", - "donors", - "n", - "gex", - "gex_min", - "gex_max", - "gex_μ", - "gex_Σ", - "gex_cell", - "n_gex_cell", - "n_gex", - "n_b", - "clust", - "cred", - "cred_cell", - "type", - "entropy", - "entropy_cell", - "near", - "far", - "dref", - "ext", - "mark", - "inkt", - "mait", - "sec", - "mem", -]; -// Chain variables that can be used for contigs and chains -pub const CVARS_ALLOWED: [&str; 25] = [ - "var", "u", "u_min", "u_max", "u_Σ", "u_μ", "comp", "edit", "r", "r_min", "r_max", "r_Σ", - "r_μ", "const", "white", "cdr3_dna", "cdr3_len", "ulen", "vjlen", "clen", "cdiff", "udiff", - "notes", "d_univ", "d_donor", +use std::io::BufRead; +use std::sync::atomic::AtomicBool; +use std::time::{Instant, SystemTime}; +use string_utils::TextUtils; +use vector_utils::unique_sort; + +pub static FAILED: AtomicBool = AtomicBool::new(false); + +pub const HELP_PAGES: [&str; 21] = [ + "all", + "amino", + "color", + "command", + "cvars", + "display", + "example1", + "example2", + "faq", + "filter", + "glossary", + "how", + "indels", + "input", + "input_tech", + "lvars", + "main", + "parseable", + "quick", + "setup", + "special", ]; -pub const CVARS_ALLOWED_PCELL: [&str; 2] = ["u_cell", "r_cell"]; - -pub const PLVARS_ALLOWED: [&str; 7] = [ - "group_id", - "group_ncells", - "clonotype_id", - "clonotype_ncells", - "nchains", - "exact_subclonotype_id", - "barcodes", -]; - -pub const PCVARS_ALLOWED: [&str; 19] = [ - "v_name", - "d_name", - "j_name", - "v_id", - "d_id", - "j_id", - "var_indices_dna", - "var_indices_aa", - "share_indices_dna", - "share_indices_aa", - "v_start", - "const_id", - "utr_id", - "utr_name", - "cdr3_start", - "cdr3_aa", - "seq", - "vj_seq", - "var_aa", -]; +pub const MAX_CDR3_DIFFS_TO_JOIN: usize = 5; // Clonotyping algorithm heuristics. -#[derive(Default)] +#[derive(Default, PartialEq, Eq)] pub struct ClonotypeHeuristics { pub max_diffs: usize, pub max_degradation: usize, @@ -95,195 +61,9 @@ pub struct ClonotypeHeuristics { pub ref_j_trim: usize, } -#[derive(Clone)] -pub struct LinearCondition { - pub coeff: Vec<f64>, // left hand side (lhs) coefficients - pub var: Vec<String>, // left hand side variables (parallel to coefficients) - pub rhs: f64, // right hand side; sum of lhs must exceed rhs - pub sense: String, // le, ge, lt, gt -} - -impl LinearCondition { - pub fn n(&self) -> usize { - self.coeff.len() - } - - pub fn new(x: &str) -> LinearCondition { - let y = x.replace(" ", ""); - let lhs: String; - let mut rhs: String; - let sense: String; - if y.contains(">=") { - lhs = y.before(">=").to_string(); - rhs = y.after(">=").to_string(); - sense = "ge".to_string(); - } else if y.contains('≥') { - lhs = y.before("≥").to_string(); - rhs = y.after("≥").to_string(); - sense = "ge".to_string(); - } else if y.contains("<=") { - lhs = y.before("<=").to_string(); - rhs = y.after("<=").to_string(); - sense = "le".to_string(); - } else if y.contains('≤') { - lhs = y.before("≤").to_string(); - rhs = y.after("≤").to_string(); - sense = "le".to_string(); - } else if y.contains('<') { - lhs = y.before("<").to_string(); - rhs = y.after("<").to_string(); - sense = "lt".to_string(); - } else if y.contains('>') { - lhs = y.before(">").to_string(); - rhs = y.after(">").to_string(); - sense = "gt".to_string(); - } else { - eprintln!( - "\nImproperly formatted condition, no inequality symbol, \ - please type \"enclone help display\": {}.\n", - x - ); - std::process::exit(1); - } - if !rhs.contains('.') { - rhs += ".0"; - } - if !rhs.parse::<f64>().is_ok() { - eprintln!( - "\nImproperly formatted condition, right-hand side invalid: {}.\n", - x - ); - std::process::exit(1); - } - let rhs = rhs.force_f64(); - let mut parts = Vec::<String>::new(); - let mut last = 0; - let lhsx = lhs.as_bytes(); - let mut parens = 0 as isize; - for i in 0..lhsx.len() { - if i > 0 && parens == 0 && (lhsx[i] == b'+' || lhsx[i] == b'-') { - if lhsx[last] != b'+' { - parts.push(stringme(&lhsx[last..i])); - } else { - parts.push(stringme(&lhsx[last + 1..i])); - } - last = i; - } - if lhsx[i] == b'(' { - parens += 1; - } else if lhsx[i] == b')' { - parens -= 1; - } - } - let mut coeff = Vec::<f64>::new(); - let mut var = Vec::<String>::new(); - parts.push(lhs[last..].to_string()); - for i in 0..parts.len() { - parts[i] = parts[i].replace("(", ""); - parts[i] = parts[i].replace(")", ""); - if parts[i].contains('*') { - let mut coeffi = parts[i].before("*").to_string(); - let vari = parts[i].after("*"); - if !coeffi.contains('.') { - coeffi += ".0"; - } - if !coeffi.parse::<f64>().is_ok() { - eprintln!( - "\nImproperly formatted condition, coefficient {} is invalid: {}.\n", - coeffi, x - ); - std::process::exit(1); - } - coeff.push(coeffi.force_f64()); - var.push(vari.to_string()); - } else { - let mut coeffi = 1.0; - let mut start = 0; - if parts[i].starts_with('-') { - coeffi = -1.0; - start = 1; - } - coeff.push(coeffi); - var.push(parts[i][start..].to_string()); - } - } - LinearCondition { - coeff: coeff, - var: var, - rhs: rhs, - sense: sense, - } - } - - pub fn satisfied(&self, val: &Vec<f64>) -> bool { - let mut lhs = 0.0; - for i in 0..self.coeff.len() { - lhs += self.coeff[i] * val[i]; - } - if self.sense == "lt".to_string() { - return lhs < self.rhs; - } else if self.sense == "gt".to_string() { - return lhs > self.rhs; - } else if self.sense == "le".to_string() { - return lhs <= self.rhs; - } else { - return lhs >= self.rhs; - } - } - - pub fn require_valid_variables(&self, ctl: &EncloneControl) { - let lvars = &ctl.clono_print_opt.lvars; - let mut lvars0 = Vec::<String>::new(); - let exclude = vec![ - "datasets", - "donors", - "near", - "far", - "dref", - "n_gex_cell", - "n_gex", - "n_b", - "clust", - "cred", - "type", - "gex", - "gex_min", - "gex_max", - "gex_mean", - "gex_sum", - "entropy", - "ext", - ]; - for j in 0..lvars.len() { - let mut ok = true; - for m in 0..exclude.len() { - if lvars[j] == exclude[m] { - ok = false; - } - } - if lvars[j].starts_with("g") && lvars[j].after("g").parse::<usize>().is_ok() { - ok = false; - } - if ok { - lvars0.push(lvars[j].clone()); - } - } - unique_sort(&mut lvars0); - for i in 0..self.var.len() { - if !bin_member(&lvars0, &self.var[i]) { - eprintln!( - "\nFound invalid variable {} in linear condition.\n", - self.var[i] - ); - std::process::exit(1); - } - } - } -} - // Origin info data structure. -#[derive(Default)] +#[derive(Default, PartialEq, Eq)] pub struct OriginInfo { // parallel vectors pub descrips: Vec<String>, // map dataset index to dataset long name @@ -310,7 +90,7 @@ pub struct OriginInfo { pub alt_bc_fields: Vec<Vec<(String, HashMap<String, String>)>>, pub cells_cellranger: Vec<Option<usize>>, pub mean_read_pairs_per_cell_cellranger: Vec<Option<usize>>, - // map dataset index to a map of barcode to (secreted, membrane) UMI counts + // map dataset index to a map of barcode to (secreted, membrane) UMI counts: pub secmem: Vec<HashMap<String, (usize, usize)>>, } @@ -323,10 +103,9 @@ impl OriginInfo { // Miscellaneous general options. -#[derive(Default)] +#[derive(Default, PartialEq)] pub struct GeneralOpt { pub pre: Vec<String>, - pub insertions: bool, pub indels: bool, pub reannotate: bool, pub heavy_chain_reuse: bool, @@ -338,7 +117,7 @@ pub struct GeneralOpt { pub weak: bool, pub tcr: bool, pub bcr: bool, - pub exp: bool, + pub tcrgd: bool, pub reuse: bool, pub fasta: String, pub fasta_filename: String, @@ -354,6 +133,7 @@ pub struct GeneralOpt { pub exact: Option<usize>, pub binary: String, pub proto: String, + pub fate_file: String, // Optional path to a json file containing metadata pub proto_metadata: Option<String>, pub h5: bool, @@ -367,7 +147,16 @@ pub struct GeneralOpt { pub mouse: bool, pub refname: String, pub noprint: bool, + pub noprintx: bool, pub required_fps: Option<usize>, + pub required_cells: Option<usize>, + pub required_clonotypes: Option<usize>, + pub required_donors: Option<usize>, + pub required_two_cell_clonotypes: Option<usize>, + pub required_two_chain_clonotypes: Option<usize>, + pub required_three_chain_clonotypes: Option<usize>, + pub required_four_chain_clonotypes: Option<usize>, + pub required_datasets: Option<usize>, pub cellranger: bool, pub summary: bool, pub summary_clean: bool, @@ -377,12 +166,9 @@ pub struct GeneralOpt { pub gene_scan_test: Option<LinearCondition>, pub gene_scan_control: Option<LinearCondition>, pub gene_scan_threshold: Option<LinearCondition>, - pub plot_file: String, - pub plot_by_isotype: bool, - pub plot_by_mark: bool, + pub gene_scan_exact: bool, + pub clonotype_group_names: Option<String>, pub origin_color_map: HashMap<String, String>, - pub use_legend: bool, - pub legend: Vec<(String, String)>, pub accept_inconsistent: bool, // TEMPORARY! pub current_ref: bool, // TEMPORARY! pub internal_run: bool, @@ -394,27 +180,123 @@ pub struct GeneralOpt { pub stable_doc: bool, pub imgt: bool, pub imgt_fix: bool, - pub ngroup: bool, pub jc1: bool, pub trace_barcode: String, pub ncell: bool, pub baseline: bool, pub echo: bool, + pub echoc: bool, pub mark_stats: bool, pub mark_stats2: bool, pub print_cpu: bool, pub print_cpu_info: bool, pub newick: bool, - pub tree: String, + pub tree_on: bool, + pub tree: Vec<String>, pub allow_inconsistent: bool, pub color: String, + pub color_by_rarity_pc: f64, pub species: String, // human or mouse or unknown, determined from the reference sequence pub using_secmem: bool, + pub diff_style: String, + pub accept_broken: bool, + pub require_unbroken_ok: bool, + pub built_in: bool, + pub reprod: bool, + pub peer_group_filename: String, + pub peer_group_dist: String, + pub peer_group_readable: bool, + pub subset_json: String, + pub fold_headers: bool, + pub no_uncap_sim: bool, + pub profile: bool, + pub nopager: bool, + pub info: Option<String>, + pub info_fields: Vec<String>, + pub info_data: HashMap<String, Vec<String>>, + pub info_resolve: bool, + pub internal_data_dir: String, + pub row_fill_verbose: bool, + pub config_file: String, + pub config: HashMap<String, String>, + pub top_genes: bool, + pub toy_com: bool, + pub chains_to_align: Vec<usize>, + pub chains_to_align2: Vec<usize>, + pub chains_to_jun_align: Vec<usize>, + pub chains_to_jun_align2: Vec<usize>, + pub align_jun_align_consistency: bool, + pub dvars: Vec<String>, // per dataset variables + pub gvars: Vec<String>, // per run variables + pub jscore_match: i32, + pub jscore_mismatch: i32, + pub jscore_bits_multiplier: f64, + pub jscore_gap_open: i32, + pub jscore_gap_extend: i32, + pub split: bool, + pub max_heavies: usize, + pub cpu_all_start: usize, + pub cpu_this_start: usize, + pub evil_eye: bool, // extra printing to try to trace hangs + pub toy: bool, // toy with phylogeny + pub group_post_filter: Option<Vec<usize>>, + pub no_newline: bool, + pub fb_show: String, + pub var_def: Vec<(String, String, Node, String)>, // {(variable, value, compiled value, expr)} + pub nospaces: bool, + pub subsample: f64, + pub all_bc_filename: String, + pub all_bc_human: bool, + pub all_bc_fields: Vec<String>, + pub all_bc_fields_orig: Vec<String>, + pub gamma_delta: bool, + pub pre_eval: bool, + pub pre_eval_show: bool, + pub external_ref: String, + pub fails_only: bool, + pub bc_joint: String, + pub post_filter: String, + pub mix_only: bool, + pub no_alt_alleles: bool, + pub vis_dump: bool, + pub session_name: String, + pub state_narrative: String, + pub session_narrative: String, +} + +// Some plot options. Note that plot options are not allowed to affect intermediate computation. + +#[derive(Clone, Default)] +pub struct PlotOpt { + pub cell_color: CellColor, + pub plot_xy_filename: String, + pub plot_xy_xvar: String, + pub plot_xy_yvar: String, + pub plot_xy_x_log10: bool, + pub plot_xy_y_log10: bool, + pub plot_xy_sym: bool, + pub plot_conditions: Vec<String>, + pub plot_colors: Vec<String>, + pub plot_file: String, + pub plot_by_isotype: bool, + pub plot_by_isotype_nolegend: bool, + pub plot_by_isotype_color: Vec<String>, + pub plot_by_mark: bool, + pub plot_quad: bool, + pub use_legend: bool, + pub legend: Vec<(String, String)>, + pub sim_mat_plot_file: String, + pub sim_mat_plot_vars: Vec<String>, + pub honey_in: Option<String>, + pub honey_out: String, + pub split_plot_by_dataset: bool, + pub split_plot_by_origin: bool, + pub png_width: Option<usize>, } // Allele-finding algorithmic options. -#[derive(Default)] +#[derive(Default, PartialEq, Eq)] pub struct AlleleAlgOpt { pub min_mult: usize, pub min_alt: usize, @@ -422,37 +304,88 @@ pub struct AlleleAlgOpt { // Allele-finding print options. -#[derive(Default)] +#[derive(Default, PartialEq, Eq)] pub struct AllelePrintOpt { pub con: bool, // print alternate consensus sequences pub con_trace: bool, // tracing for con } +// Data about alleles + +#[derive(Clone, Default)] +pub struct AlleleData { + pub alt_refs: Vec<(usize, usize, DnaString, usize, bool)>, + pub var_pos: Vec<Vec<usize>>, + pub var_bases: Vec<Vec<Vec<u8>>>, +} + // Join printing options. -#[derive(Default)] +#[derive(Default, PartialEq, Eq)] pub struct JoinPrintOpt { pub seq: bool, // print sequences of contigs, before truncation to V..J pub ann: bool, // print annotations of contigs pub ann0: bool, // print annotations of contigs, after truncation to V..J pub show_bc: bool, // show barcodes - pub quiet: bool, // don't print join events + pub quiet: bool, // do not print join events pub pfreq: usize, // show data for 1/n joins even if correct } // Join algorithmic options. -#[derive(Default)] +#[derive(Default, PartialEq)] pub struct JoinAlgOpt { - pub max_score: f64, // max score for join - pub easy: bool, // make joins even if core condition violated - pub merge_onesies: bool, // create and merge onesies where completely unambiguous - pub bcjoin: bool, // join only by barcode identity + pub max_score: f64, // max score for join + pub easy: bool, // make joins even if core condition violated + pub merge_onesies: bool, // create and merge onesies where completely unambiguous + pub merge_onesies_ctl: bool, // restriction on onesie merger + pub bcjoin: bool, // join only by barcode identity pub max_cdr3_diffs: usize, + pub cdr3_mult: f64, // multiplier for checking CDR3 SHM concentration + pub old_mult: bool, + pub mult_pow: f64, + pub old_light: bool, + pub basic_h: Option<f64>, + pub basic: Option<f64>, + pub basicx: bool, + pub join_full_diff: bool, + pub join_cdr3_ident: f64, + pub join_cdr12h_ident: f64, + pub fwr1_cdr12_delta: f64, + pub cdr3_normal_len: usize, + pub auto_share: usize, + pub comp_filt: usize, + pub comp_filt_bound: usize, + pub super_comp_filt: usize, + /// Break up clonotypes than have `split_max_chains` chains or more + pub split_max_chains: usize, } // Clonotype filtering options. // These fall into 2 categories: 1) on by default and 2) user-specified. +// Note that ClonoFiltOpt options are not allowed to affect intermediate computation. + +#[derive(Default, PartialEq)] +pub struct ClonoFiltOptDefault { + pub marked_b: bool, // only print clonotypes having a mark and which are typed as B cells + pub donor: bool, // allow cells from different donors to be placed in the same clonotype + pub weak_foursies: bool, // filter weak foursies + pub ngex: bool, // turn off gex filtering, + pub non_cell_mark: bool, + pub weak_onesies: bool, // filter weak onesies + pub doublet: bool, // filter putative doublets + pub fcell: Vec<Node>, // constraints from FCELL + pub umi_filt: bool, // umi count filter + pub umi_filt_mark: bool, // umi count filter (but only mark) + pub umi_ratio_filt: bool, // umi ratio filter + pub umi_ratio_filt_mark: bool, // umi ratio filter (but only mark) + pub weak_chains: bool, // filter weak chains from clonotypes + pub whitef: bool, // only show clonotypes exhibiting whitelist contamination + pub ncross: bool, // turn off cross filtering, + pub bc_dup: bool, // filter duplicated barcodes within an exact subclonotype + pub signature: bool, // signature filtering + pub nmax: bool, // turn off max contigs filter +} #[derive(Default)] pub struct ClonoFiltOpt { @@ -462,45 +395,43 @@ pub struct ClonoFiltOpt { pub min_datasets: usize, // only show clonotypes involving at least this many datasets pub max_datasets: usize, // only show clonotypes involving at most this many datasets pub min_dataset_ratio: usize, // see "enclone help filter" + pub min_donors: usize, // only show clonotypes having at least this many donors + pub min_origins: usize, // only show clonotypes involving at least this many origins pub min_chains: usize, // only show clonotypes with at least this many chains pub max_chains: usize, // only show clonotypes with at most this many chains - pub ngex: bool, // turn off gex filtering, - pub ncross: bool, // turn off cross filtering, - pub cdr3: Option<Regex>, // only show clonotypes having one of these CDR3_AA sequences - pub whitef: bool, // only show clonotypes exhibiting whitelist contamination + pub cdr3: Option<Regex>, // only show clonotypes whose CDR3_AA matches regular expression + pub cdr3_lev: String, // only show clonotypes whose CDR3_AA matches Levenshtein dist pattern pub protect_bads: bool, // protect bads from deletion pub fail_only: bool, // only print fails pub seg: Vec<Vec<String>>, // only show clonotypes using one of these VDJ segment names pub segn: Vec<Vec<String>>, // only show clonotypes using one of these VDJ segment numbers + pub nseg: Vec<Vec<String>>, // do not show clonotypes using one of these VDJ segment names + pub nsegn: Vec<Vec<String>>, // do not show clonotypes using one of these VDJ segment numbers pub min_exacts: usize, // only show clonotypes having at least this many exact subclonotypes - pub vj: Vec<u8>, // only show clonotypes having exactly this full length V..J sequence - pub vdup: bool, // only show clonotypes having a same V segment in two chains - pub have_onesie: bool, // only show clonotypes including a onesie exact subclonotype - pub cdiff: bool, // only show clonotypes having a constant region difference - pub del: bool, // only show clonotypes exhibiting a deletion - pub qual_filter: bool, // filter out exact subclonotypes having a weak base - pub weak_chains: bool, // filter weak chains from clonotypes - pub weak_onesies: bool, // filter weak onesies - pub weak_foursies: bool, // filter weak foursies - pub bc_dup: bool, // filter duplicated barcodes within an exact subclonotype - pub donor: bool, // allow cells from different donors to be placed in the same clonotype + pub max_exacts: usize, + pub vj: Vec<u8>, // only show clonotypes having exactly this full length V..J sequence + pub vdup: bool, // only show clonotypes having a same V segment in two chains + pub have_onesie: bool, // only show clonotypes including a onesie exact subclonotype + pub cdiff: bool, // only show clonotypes having a constant region difference + pub del: bool, // only show clonotypes exhibiting a deletion + pub qual_filter: bool, // filter out exact subclonotypes having a weak base pub bounds: Vec<LinearCondition>, // bounds on certain variables + pub bound_type: Vec<String>, // types of those bounds pub barcode: Vec<String>, // requires one of these barcodes - pub umi_filt: bool, // umi count filter - pub umi_filt_mark: bool, // umi count filter (but only mark) - pub non_cell_mark: bool, - pub marked: bool, // only print clonotypes having a mark - pub marked_b: bool, // only print clonotypes having a mark and which are typed as B cells - pub umi_ratio_filt: bool, // umi ratio filter - pub umi_ratio_filt_mark: bool, // umi ratio filter (but only mark) - pub fcell: Vec<(String, String)>, // constaints from FCELL + pub marked: bool, // only print clonotypes having a mark pub inkt: bool, pub mait: bool, + pub d_inconsistent: bool, + pub d_none: bool, + pub d_second: bool, + pub const_igh: Option<Regex>, + pub const_igkl: Option<Regex>, + pub dataset: Option<Vec<String>>, // only show clonotypes having one of the listed dataset names } // Clonotype printing options. -#[derive(Default)] +#[derive(Default, PartialEq, Eq)] pub struct ClonoPrintOpt { pub bu: bool, // print barcodes and UMI counts pub seqc: bool, // print V..J sequence for each chain if constant across clonotype @@ -514,72 +445,167 @@ pub struct ClonoPrintOpt { pub chain_brief: bool, // show abbreviated chain headers pub sum: bool, // print sum row pub mean: bool, // print mean row + pub conx: bool, + pub conp: bool, } // Clonotype grouping options. -#[derive(Default)] +#[derive(Default, PartialEq)] pub struct ClonoGroupOpt { - pub heavy_cdr3_aa: bool, // group by perfect identity of cdr3_aa IGH or TRB - pub vj_refname: bool, // group by having the same VJ reference names + // SYMMETRIC AND ASYMMETRIC + pub ngroup: bool, // do not print group headers + pub min_group: usize, // minimum number of clonotypes in group to print + pub min_group_donors: usize, // minimum number of donors in a group to print + pub cdr3h_len_var: bool, // requires different heavy chain CDR3 lengths + pub style: String, // symmetric or unsymmetric or unspecified + pub cdr3: String, // only print groups having this CDR3 + pub donor: Vec<String>, // only print groups having all these donors + pub naive: bool, // only print groups having an exact subclonotype with dref = 0 + pub no_naive: bool, // only print groups lacking an exact subclonotype with dref = 0 + // SYMMETRIC GROUPING CONTROLS + pub vj_refname: bool, // group by having the same VJ reference names + pub v_heavy_refname: bool, // group by having the same heavy V reference name + pub vj_heavy_refname: bool, // group by having the same heavy VJ reference names + pub vdj_refname: bool, // group by having the same VDJ reference names + pub vdj_heavy_refname: bool, // group by having the same heavy VDJ reference names + pub vj_len: bool, // group by V..J of same length + pub cdr3_len: bool, // group by CDR3 of same length + pub cdr3_heavy_len: bool, // group by heavy chain CDR3 of same length + pub cdr3_light_len: bool, // group by light chain CDR3 of same length + pub cdr3_heavy_pc: Option<f64>, // group if nuke aa identity >= given percent on heavy chain + pub cdr3_light_pc: Option<f64>, // group if nuke aa identity >= given percent on light chain + pub cdr3_aa_heavy_pc: Option<f64>, // group if CDR3 aa identity >= given percent on heavy chain + pub cdr3_aa_light_pc: Option<f64>, // group if CDR3 aa identity >= given percent on light chain + pub heavy_pc: Option<f64>, // group if nucleotide identity >= given percent on heavy chain + pub light_pc: Option<f64>, // group if nucleotide identity >= given percent on light chain + pub aa_heavy_pc: Option<f64>, // group if amino acid identity >= given percent on heavy chain + pub aa_light_pc: Option<f64>, // group if amino acid identity >= given percent on light chain + pub cdr3_heavy_pc_hf: Option<(f64, Vec<Vec<f64>>)>, // implement cdr3_aa_heavy≥n%:h:@f + + // ASYMMETRIC GROUPING CONTROLS + pub asymmetric_center: String, // definition of center for asymmetric grouping + pub asymmetric_dist_formula: String, // definition of distance formula for asymmetric grouping + pub asymmetric_dist_bound: String, // definition of distance bound for asymmetric grouping + // DEPRECATED pub vj_refname_strong: bool, // group by having the same VJ reference names, but stronger - pub min_group: usize, // minimum number of clonotypes in group to print } // Parseable output options. -#[derive(Default)] +#[derive(Default, PartialEq, Eq)] pub struct ParseableOpt { pub pout: String, // name of parseable output file - pub pchains: usize, // number of chains to show in parseable output + pub pchains: String, // number of chains to show in parseable output pub pcols: Vec<String>, // column names to show in parseable output + pub pcols_show: Vec<String>, // replacement column names for the actual header line pub pcols_sort: Vec<String>, // sorted column names to show in parseable output pub pcols_sortx: Vec<String>, // same but before colon if present pub pbarcode: bool, // generate output per barcode rather than per exact subclonotype + pub pno_header: bool, // suppress header line +} + +// Computational performance options. + +#[derive(Default, PartialEq, Eq)] +pub struct PerfOpt { + pub comp: bool, // print computational performance stats + pub comp2: bool, // print more detailed computational performance stats + pub unaccounted: bool, // show unaccounted time at each step + pub comp_enforce: bool, // comp plus enforce no unaccounted time } // Set up control datastructure (EncloneControl). This is stuff that is constant for a given -// run of enclone. +// run of enclone. If you add something to this, be sure to update the "changed" section in +// enclone_server.rs, if needed. #[derive(Default)] pub struct EncloneControl { - pub gen_opt: GeneralOpt, // miscellaneous general options - pub pretty: bool, // use escape characters to enhance view - pub silent: bool, // turn off extra logging - pub force: bool, // make joins even if redundant - pub comp: bool, // print computational performance stats - pub comp2: bool, // print more detailed computational performance stats - pub debug_table_printing: bool, // turn on debugging for table printing - pub onesie_mult: usize, // see main.rs - pub merge_all_impropers: bool, // merge all improper exact subclonotypes - pub heur: ClonotypeHeuristics, // algorithmic heuristics - pub origin_info: OriginInfo, // origin (sample) info - pub allele_alg_opt: AlleleAlgOpt, // algorithmic options for allele finding - pub allele_print_opt: AllelePrintOpt, // print options for allele finding - pub join_alg_opt: JoinAlgOpt, // algorithmic options for join - pub join_print_opt: JoinPrintOpt, // printing options for join operations - pub clono_filt_opt: ClonoFiltOpt, // filtering options for clonotypes - pub clono_print_opt: ClonoPrintOpt, // printing options for clonotypes - pub clono_group_opt: ClonoGroupOpt, // grouping options for clonotypes - pub parseable_opt: ParseableOpt, // parseable output options - pub toy: bool, // toy with phylogeny + pub visual_mode: bool, // running as enclone visual + pub perf_opt: PerfOpt, // computational performance options + pub start_time: Option<Instant>, // enclone start time + pub gen_opt: GeneralOpt, // miscellaneous general options + pub plot_opt: PlotOpt, // plot options + pub pretty: bool, // use escape characters to enhance view + pub nogray: bool, // don't gray in per cell lines + pub silent: bool, // turn off extra logging + pub force: bool, // make joins even if redundant + pub debug_table_printing: bool, // turn on debugging for table printing + pub merge_all_impropers: bool, // merge all improper exact subclonotypes + pub heur: ClonotypeHeuristics, // algorithmic heuristics + pub origin_info: OriginInfo, // origin (sample) info + pub allele_alg_opt: AlleleAlgOpt, // algorithmic options for allele finding + pub allele_print_opt: AllelePrintOpt, // print options for allele finding + pub join_alg_opt: JoinAlgOpt, // algorithmic options for join + pub join_print_opt: JoinPrintOpt, // printing options for join operations + pub clono_filt_opt_def: ClonoFiltOptDefault, // default filtering options for clonotypes + pub clono_filt_opt: ClonoFiltOpt, // filtering options for clonotypes + pub clono_print_opt: ClonoPrintOpt, // printing options for clonotypes + pub clono_group_opt: ClonoGroupOpt, // grouping options for clonotypes + pub parseable_opt: ParseableOpt, // parseable output options + pub pathlist: Vec<String>, // list of input files + pub last_modified: Vec<SystemTime>, // last modified for pathlist } pub static mut WALLCLOCK: f64 = 0.0; +pub static mut LAST_IPEAK: f64 = -0.0; impl EncloneControl { pub fn perf_stats(&self, t: &Instant, msg: &str) { - let used = elapsed(&t); - if self.comp { - println!( - "used {:.2} seconds {}, peak mem = {:.2} GB", - used, - msg, - peak_mem_usage_gb() - ); + let used = elapsed(t); + let t2 = Instant::now(); + #[allow(unused_mut)] + let mut usedx = String::new(); + #[cfg(not(target_os = "windows"))] + { + if self.perf_opt.comp { + let peak = peak_mem_usage_gb(); + let ipeak = (100.0 * peak).round(); + let peak_mem = format!("peak mem = {peak:.2} GB"); + usedx = format!("{used:.2}"); + let mut ipeak_changed = false; + unsafe { + if ipeak != LAST_IPEAK { + ipeak_changed = true; + LAST_IPEAK = ipeak; + } + } + if usedx != "0.00" || ipeak_changed { + println!("used {usedx} seconds {msg}, {peak_mem}"); + } + } } + + // Check for time used in the above computation, which could otherwise introduce a + // discrepancy into the time accounting stats. Surprisingly, the time spent in that + // section can be nontrivial. + + let used2 = elapsed(&t2); + let used2x = format!("{used2:.2}"); + if self.perf_opt.comp && used2x != "0.00" { + println!("used {used2x} seconds computing perf stats for {msg}"); + } + + // Update total time used. + unsafe { - WALLCLOCK += used; + WALLCLOCK += used + used2; + } + + // Report unaccounted time. + + if self.perf_opt.comp && self.perf_opt.unaccounted && msg != "total" { + let delta; + unsafe { + delta = elapsed(&self.start_time.unwrap()) - WALLCLOCK; + } + let deltas = format!("{delta:.2}"); + if deltas != "0.00" { + if usedx == "0.00" { + println!("used 0.00 seconds {msg}"); + } + println!("used {deltas} seconds unaccounted for"); + } } } } @@ -589,41 +615,52 @@ impl EncloneControl { #[derive(Eq, Ord, PartialEq, PartialOrd, Default, Clone)] // not sure these are all needed pub struct TigData { - pub cdr3_dna: String, // CDR3 DNA sequence - pub len: usize, // length of V..J sequence - pub seq: Vec<u8>, // V..J contig subsequence - pub v_start: usize, // start of V on full contig sequence - pub v_stop: usize, // stop of aligned V on full contig sequence - pub v_stop_ref: usize, // stop of aligned V on reference V - pub j_start: usize, // start of aligned J on full contig sequence - pub j_start_ref: usize, // start of aligned J on reference J - pub j_stop: usize, // stop of J on full contig sequence - pub c_start: Option<usize>, // start of C on full contig sequence - pub full_seq: Vec<u8>, // full contig sequence - pub u_ref_id: Option<usize>, // index of 5'-UTR in ref file if found - pub v_ref_id: usize, // index of V segment reference sequence in ref file - pub d_ref_id: Option<usize>, // index of D segment reference sequence in ref file - pub j_ref_id: usize, // index of J segment reference sequence in ref file - pub c_ref_id: Option<usize>, // index of C segment reference sequence in ref file - pub cdr1_aa: String, // CDR1 amino acid sequence - pub cdr1_start: Option<usize>, // start position in bases of CDR1 on V..J - pub cdr2_aa: String, // CDR2 amino acid sequence - pub cdr2_start: Option<usize>, // start position in bases of CDR2 on V..J - pub cdr3_aa: String, // CDR3 amino acid sequence - pub cdr3_start: usize, // start position in bases of CDR3 on V..J - pub quals: Vec<u8>, // quality scores, truncated to V..J - pub full_quals: Vec<u8>, // quality scores - pub barcode: String, // barcode - pub tigname: String, // name of contig - pub left: bool, // true if this is IGH or TRB - pub dataset_index: usize, // index of dataset - pub origin_index: Option<usize>, // index of origin (sample) - pub donor_index: Option<usize>, // index of donor - pub tag_index: Option<usize>, // index of tag - pub umi_count: usize, // number of UMIs supporting contig - pub read_count: usize, // number of reads supporting contig - pub chain_type: String, // e.g. IGH + pub cdr3_dna: String, // CDR3 DNA sequence + pub len: usize, // length of V..J sequence + pub v_start: usize, // start of V on full contig sequence + pub v_stop: usize, // stop of aligned V on full contig sequence + pub v_stop_ref: usize, // stop of aligned V on reference V + pub d_start: Option<usize>, // start of aligned D on full contig sequence + pub j_start: usize, // start of aligned J on full contig sequence + pub j_start_ref: usize, // start of aligned J on reference J + pub j_stop: usize, // stop of J on full contig sequence + pub c_start: Option<usize>, // start of C on full contig sequence + pub full_seq: Vec<u8>, // full contig sequence + pub u_ref_id: Option<usize>, // index of 5'-UTR in ref file if found + pub v_ref_id: usize, // index of V segment reference sequence in ref file + pub d_ref_id: Option<usize>, // index of D segment reference sequence in ref file + pub j_ref_id: usize, // index of J segment reference sequence in ref file + pub c_ref_id: Option<usize>, // index of C segment reference sequence in ref file + pub fr1_start: usize, // start position in bases of FWR1 on V..J + pub cdr1_start: Option<usize>, // start position in bases of CDR1 on V..J + pub fr2_start: Option<usize>, // start position in bases of FWR2 on V..J + pub cdr2_start: Option<usize>, // start position in bases of CDR2 on V..J + pub fr3_start: Option<usize>, // start position in bases of FWR3 on V..J + pub cdr3_aa: String, // CDR3 amino acid sequence + pub cdr3_start: usize, // start position in bases of CDR3 on V..J + pub quals: Vec<u8>, // quality scores, truncated to V..J + pub full_quals: Vec<u8>, // quality scores + pub barcode: String, // barcode + pub tigname: String, // name of contig + pub left: bool, // true if this is IGH or TRB (or TRD in gamma/delta mode) + pub dataset_index: usize, // index of dataset + pub origin_index: Option<usize>, // index of origin (sample) + pub donor_index: Option<usize>, // index of donor + pub tag_index: Option<usize>, // index of tag + pub umi_count: usize, // number of UMIs supporting contig + pub read_count: usize, // number of reads supporting contig + pub chain_type: String, // e.g. IGH pub annv: Vec<(i32, i32, i32, i32, i32)>, // V annotation (one or two entries), for V..J + pub validated_umis: Option<Vec<String>>, // validated UMIs + pub non_validated_umis: Option<Vec<String>>, // non-validated UMIs + pub invalidated_umis: Option<Vec<String>>, // invalidated UMIs + pub frac_reads_used: Option<u32>, // fraction of reads passed to assembly stage in CR +} + +impl TigData { + pub fn seq(&self) -> &[u8] { + &self.full_seq[self.v_start..self.j_stop] + } } // The ExactClonotype data structure stores information that could be exhibited as a @@ -634,55 +671,76 @@ pub struct TigData { #[derive(Clone)] pub struct TigData0 { - pub quals: Vec<u8>, // quality scores, truncated to V..J - pub v_start: usize, // start of V on full contig sequence - pub j_stop: usize, // stop of J on full contig sequence - pub c_start: Option<usize>, // start of C on full contig sequence - pub full_seq: Vec<u8>, // full contig sequence - pub barcode: String, // barcode - pub tigname: String, // name of contig - pub dataset_index: usize, // index of dataset - pub origin_index: Option<usize>, // index of origin (sample) - pub donor_index: Option<usize>, // index of donor - pub tag_index: Option<usize>, // index of tag - pub umi_count: usize, // number of UMIs supporting contig - pub read_count: usize, // number of reads supporting contig - pub marked: bool, // if marked for possible deletion + pub quals: Vec<u8>, // quality scores, truncated to V..J + pub v_start: usize, // start of V on full contig sequence + pub j_stop: usize, // stop of J on full contig sequence + pub c_start: Option<usize>, // start of C on full contig sequence + pub full_seq: Vec<u8>, // full contig sequence + pub barcode: String, // barcode + pub tigname: String, // name of contig + pub dataset_index: usize, // index of dataset + pub origin_index: Option<usize>, // index of origin (sample) + pub donor_index: Option<usize>, // index of donor + pub tag_index: Option<usize>, // index of tag + pub umi_count: usize, // number of UMIs supporting contig + pub read_count: usize, // number of reads supporting contig + pub marked: bool, // if marked for possible deletion + pub validated_umis: Option<Vec<String>>, // validated UMIs + pub non_validated_umis: Option<Vec<String>>, // non-validated UMIs + pub invalidated_umis: Option<Vec<String>>, // invalidated UMIs + pub frac_reads_used: Option<u32>, // fraction of reads passed to assembly stage in CR + pub v_ref_id: usize, // index of V segment reference sequence in ref file +} + +#[derive(Clone, Default)] +pub struct Junction { + pub hcomp: usize, // junction alignment complexity + pub matches: usize, // matches + pub mismatches: usize, // mismatches + pub jun_ins: usize, // inserted bases in junction + pub d: Vec<usize>, // D gene ids + pub vstart: usize, // start of junction alignment on tig + pub indels: Vec<(usize, isize)>, // indel tig start, size (+ ins, - del) } #[derive(Clone)] pub struct TigData1 { - pub cdr3_dna: String, // CDR3 DNA sequence - pub seq: Vec<u8>, // V..J contig subsequence - pub seq_del: Vec<u8>, // V..J, possibly with mod 3 del - pub seq_del_amino: Vec<u8>, // V..J, possibly with mod 3 del at mod 3 start - pub full_seq: Vec<u8>, // full contig sequence (consensus) - pub v_start: usize, // start of V on full contig sequence - pub v_stop: usize, // stop of aligned V on full contig sequence - pub v_stop_ref: usize, // stop of aligned V on reference V - pub j_start: usize, // start of aligned J on full contig sequence - pub j_start_ref: usize, // start of aligned J on reference J - pub j_stop: usize, // stop of J on full contig sequence - pub u_ref_id: Option<usize>, // index of 5'-UTR in ref file if found - pub v_ref_id: usize, // index of V segment reference sequence in ref file - pub v_ref_id_donor: Option<usize>, // optional index into alt_refs - pub v_ref_id_donor_donor: Option<usize>, // donor id for v_ref_id_donor + pub cdr3_dna: String, // CDR3 DNA sequence + pub seq: Vec<u8>, // V..J contig subsequence + pub seq_del: Vec<u8>, // V..J, possibly with mod 3 del + pub seq_del_amino: Vec<u8>, // V..J, possibly with mod 3 del at mod 3 start + pub aa_mod_indel: Vec<u8>, // amino acid sequence, after removing indel if present + pub ins: Vec<(usize, Vec<u8>)>, // insertions in V..J (currently at most one) = {(pos, seq)} + // **before** the given position + pub full_seq: Vec<u8>, // full contig sequence (consensus) + pub v_start: usize, // start of V on full contig sequence + pub v_stop: usize, // stop of aligned V on full contig sequence + pub v_stop_ref: usize, // stop of aligned V on reference V + pub d_start: Option<usize>, // start of aligned D on full contig sequence + pub j_start: usize, // start of aligned J on full contig sequence + pub j_start_ref: usize, // start of aligned J on reference J + pub j_stop: usize, // stop of J on full contig sequence + pub u_ref_id: Option<usize>, // index of 5'-UTR in ref file if found + pub v_ref_id: usize, // index of V segment reference sequence in ref file + pub v_ref_id_donor: Option<usize>, // optional index into alt_refs + pub v_ref_id_donor_donor: Option<usize>, // donor id for v_ref_id_donor pub v_ref_id_donor_alt_id: Option<usize>, // alt ref id for donor id for v_ref_id_donor - pub d_ref_id: Option<usize>, // index of D segment reference sequence in ref file - pub j_ref_id: usize, // index of J segment reference sequence in ref file - pub c_ref_id: Option<usize>, // index of C segment reference sequence in ref file - pub cdr1_aa: String, // CDR1 amino acid sequence - pub cdr1_start: Option<usize>, // start position in bases of CDR1 on V..J - pub cdr2_aa: String, // CDR2 amino acid sequence - pub cdr2_start: Option<usize>, // start position in bases of CDR2 on V..J - pub cdr3_aa: String, // CDR3 amino acid sequence - pub cdr3_start: usize, // start position in bases of CDR3 on V..J - pub left: bool, // true if this is IGH or TRB - pub chain_type: String, // e.g. IGH + pub d_ref_id: Option<usize>, // index of D segment reference sequence in ref file + pub j_ref_id: usize, // index of J segment reference sequence in ref file + pub c_ref_id: Option<usize>, // index of C segment reference sequence in ref file + pub fr1_start: usize, // start position in bases of FWR1 on V..J + pub cdr1_start: Option<usize>, // start position in bases of CDR1 on V..J + pub fr2_start: Option<usize>, // start position in bases of FWR2 on V..J + pub cdr2_start: Option<usize>, // start position in bases of CDR2 on V..J + pub fr3_start: Option<usize>, // start position in bases of FWR3 on V..J + pub cdr3_aa: String, // CDR3 amino acid sequence + pub cdr3_start: usize, // start position in bases of CDR3 on V..J + pub left: bool, // true if this is IGH or TRB (or TRD in gamma/delta mode) + pub chain_type: String, // e.g. IGH pub annv: Vec<(i32, i32, i32, i32, i32)>, // V annotation (one or two entries), for V..J - pub vs: DnaString, // reference V segment (possibly donor allele) - pub vs_notesx: String, // notes on reference V segment (probably to be replaced) - pub js: DnaString, // reference J segment + pub vs: DnaString, // reference V segment (possibly donor allele) + pub vs_notesx: String, // notes on reference V segment (probably to be replaced) + pub js: DnaString, // reference J segment pub inkt_alpha_chain_gene_match: bool, pub inkt_alpha_chain_junction_match: bool, pub inkt_beta_chain_gene_match: bool, @@ -691,6 +749,17 @@ pub struct TigData1 { pub mait_alpha_chain_junction_match: bool, pub mait_beta_chain_gene_match: bool, pub mait_beta_chain_junction_match: bool, + pub jun: Junction, +} + +impl TigData1 { + pub fn ins_len(&self) -> usize { + let mut x = 0; + for j in 0..self.ins.len() { + x += self.ins[j].1.len(); + } + x + } } #[derive(Clone)] @@ -726,7 +795,7 @@ impl ExactClonotype { } // Define clonotype info data structure. The fact that we have multiple data structures -// encapsulating the same info is legacy and to be cleaned up. +// encapsulating the same info is legacy and should be cleaned up. // // The vectors in a CloneInfo object mostly have length two. The exceptions are in // improper clones (not having chains of both types). @@ -740,7 +809,6 @@ pub struct CloneInfo { // if there is one (rare, so wasteful, should be Option) pub tigsp: Vec<DnaString>, // contigs, truncated to V..J, packed (doesn't show - chars) pub has_del: Vec<bool>, // if - chars inserted to represent deletion - pub orig_tigs: Vec<DnaString>, // untruncated contigs pub clonotype_id: usize, // index into exact_clonotypes pub exact_cols: Vec<usize>, // the columns of the exact_clonotype that were extracted (used?) pub clonotype_index: usize, // index into vector of all exact subclonotypes (across origins) @@ -763,6 +831,16 @@ pub struct GexInfo { pub gex_features: Vec<Vec<String>>, pub gex_barcodes: Vec<Vec<String>>, pub gex_matrices: Vec<MirrorSparseMatrix>, + pub fb_top_matrices: Vec<MirrorSparseMatrix>, + pub fb_top_barcodes: Vec<Vec<String>>, + pub fb_top_reads_matrices: Vec<MirrorSparseMatrix>, + pub fb_top_reads_barcodes: Vec<Vec<String>>, + pub fb_total_umis: Vec<u64>, + pub fb_total_reads: Vec<u64>, + pub fb_brn: Vec<Vec<(String, u32, u32)>>, + pub fb_brnr: Vec<Vec<(String, u32, u32)>>, + pub fb_bdcs: Vec<Vec<(String, u32, u32, u32)>>, + pub feature_refs: Vec<String>, pub gex_cell_barcodes: Vec<Vec<String>>, pub cluster: Vec<HashMap<String, usize>>, pub cell_type: Vec<HashMap<String, String>>, @@ -777,6 +855,9 @@ pub struct GexInfo { pub feature_id: Vec<HashMap<String, usize>>, pub have_gex: bool, pub have_fb: bool, + pub feature_metrics: Vec<HashMap<(String, String), String>>, + pub json_metrics: Vec<HashMap<String, f64>>, + pub metrics: Vec<String>, } // Every entry in a ColInfo is a vector whose number of entries is the number of chains @@ -784,16 +865,18 @@ pub struct GexInfo { #[derive(Clone, Default)] pub struct ColInfo { + pub left: Vec<bool>, pub uids: Vec<Option<usize>>, pub vids: Vec<usize>, pub vpids: Vec<Option<usize>>, pub dids: Vec<Option<usize>>, pub jids: Vec<usize>, pub cids: Vec<Option<usize>>, + pub fr1_starts: Vec<usize>, pub cdr1_starts: Vec<Option<usize>>, - pub cdr1_lens: Vec<Option<usize>>, + pub fr2_starts: Vec<Option<usize>>, pub cdr2_starts: Vec<Option<usize>>, - pub cdr2_lens: Vec<Option<usize>>, + pub fr3_starts: Vec<Option<usize>>, pub cdr3_starts: Vec<usize>, pub cdr3_lens: Vec<usize>, pub seq_lens: Vec<usize>, // not sure we should be computing or using this @@ -811,159 +894,93 @@ pub fn justification(x: &str) -> u8 { if x == "amino" || x == "var" || x == "const" - || x == "cdr3_dna" + || (x.ends_with("_aa") && x != "dref_aa") + || x.ends_with("_dna") + || x.ends_with("_name") + || x.ends_with("_indices") || x == "cdiff" || x == "notes" || x == "edit" || x == "datasets" || x == "donors" + || x == "origins" || x == "ext" || x == "barcode" || x == "barcodes" - || x.starts_with("v_name") - || x.starts_with("d_name") - || x.starts_with("j_name") - || x.starts_with("utr_name") + || x == "filter" || x.starts_with("vj_seq") + || x.starts_with("vj_seq_nl") + || x.starts_with("vj_aa_nl") || x.starts_with("seq") - || x.starts_with("q") - || x.starts_with("cdr3_aa") - || x.starts_with("var_aa") - || x.starts_with("var_indices") - || x.starts_with("share_indices") + || x.starts_with('q') || x.ends_with("_barcode") || x.ends_with("_barcodes") + || (x.starts_with("cdr") && !x.ends_with("len")) + || (x.starts_with("fwr") && !x.ends_with("len")) + || x.starts_with("d1_name") + || x.starts_with("d2_name") + || x.starts_with("fb") && !x.ends_with("_n") + || x == "cigar" + || x.contains("valumis") + || x.contains("valbcumis") + || x == "nbc" + || x == "allele" + || x == "allele_d" { - return b'l'; + b'l' } else { - return b'r'; + b'r' } } // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ -// Define the set "parseable_fields" of fields that could occur in parseable output. -// -// The overlap with code in proc_args_check.rs is not nice. +// The POUT separator character used to be a semicolon, but because semicolons could appear in the +// fields, that was broken (and there is a test for the associated problem). We substituted a +// character (the bell character) that would not be generated by enclone and should not be allowed +// in input, although we do not enforce that currently. -pub fn set_speakers(ctl: &EncloneControl, parseable_fields: &mut Vec<String>) { - // Make some abbreviations. +pub const POUT_SEP: &str = "\x07"; - let lvars = &ctl.clono_print_opt.lvars; +// Potential join structure. - // Define parseable output columns. The entire machinery for parseable output is controlled - // by macros that begin with "speak". +#[derive(Default)] +pub struct PotentialJoin<'a> { + pub k1: usize, + pub k2: usize, + pub nrefs: usize, + pub cd: isize, + pub diffs: usize, + pub bcs1: Vec<&'a str>, + pub bcs2: Vec<&'a str>, + pub shares: Vec<isize>, + pub indeps: Vec<isize>, + pub shares_details: Vec<Vec<usize>>, + pub share_pos_v: Vec<Vec<usize>>, + pub share_pos_j: Vec<Vec<usize>>, + pub score: f64, + pub err: bool, + pub p1: f64, + pub mult: f64, + pub k: isize, + pub d: isize, + pub n: usize, +} - let pcols_sort = &ctl.parseable_opt.pcols_sort; - macro_rules! speaker { - ($var:expr) => { - if ctl.parseable_opt.pcols.is_empty() || bin_member(&pcols_sort, &$var.to_string()) { - parseable_fields.push($var.to_string()); - } - }; - } - macro_rules! speakerc { - ($col:expr, $var:expr) => { - let varc = format!("{}{}", $var, $col + 1); - if ctl.parseable_opt.pcols.is_empty() || bin_member(&pcols_sort, &varc) { - parseable_fields.push(format!("{}{}", $var, $col + 1)); - } - }; - } - let mut have_gex = false; - for i in 0..ctl.origin_info.gex_path.len() { - if ctl.origin_info.gex_path[i].len() > 0 { - have_gex = true; - } - } - let mut all_lvars = lvars.clone(); - for i in 0..LVARS_ALLOWED.len() { - let x = &LVARS_ALLOWED[i]; - if !have_gex { - if *x == "gex".to_string() - || x.starts_with("gex_") - || x.ends_with("_g") - || x.ends_with("_g_μ") - || *x == "n_gex_cell".to_string() - || *x == "n_gex".to_string() - || *x == "n_b".to_string() - || *x == "clust".to_string() - || *x == "type".to_string() - || *x == "entropy".to_string() - || *x == "cred".to_string() - || *x == "cred_cell".to_string() - { - continue; - } - } - if !lvars.contains(&x.to_string()) { - all_lvars.push(x.to_string()); +pub fn get_config(config_file: &str, config: &mut HashMap<String, String>) -> bool { + if !config_file.is_empty() { + let mut cf = config_file.to_string(); + if cf.contains(':') { + cf = cf.after(":").to_string(); } - } - for x in all_lvars.iter() { - if (*x == "sec" || *x == "mem") && !ctl.gen_opt.using_secmem { - continue; - } - speaker!(x); - } - for col in 0..ctl.parseable_opt.pchains { - for x in CVARS_ALLOWED.iter() { - speakerc!(col, x); - } - if ctl.parseable_opt.pbarcode { - for x in CVARS_ALLOWED_PCELL.iter() { - speakerc!(col, x); + if path_exists(&cf) { + let f = open_for_read![&cf]; + for line in f.lines() { + let s = line.unwrap(); + config.insert(s.before("=").to_string(), s.after("=").to_string()); } - } - for x in &["v_name", "d_name", "j_name", "v_id", "d_id", "j_id"] { - speakerc!(col, x); - } - for x in &[ - "var_indices_dna", - "var_indices_aa", - "share_indices_dna", - "share_indices_aa", - ] { - speakerc!(col, x); - } - for x in &[ - "v_start", - "const_id", - "utr_id", - "utr_name", - "cdr3_start", - "cdr3_aa", - ] { - speakerc!(col, x); - } - for x in &["seq", "vj_seq", "var_aa"] { - speakerc!(col, x); - } - for i in 0..pcols_sort.len() { - if pcols_sort[i].starts_with('q') && pcols_sort[i].ends_with(&format!("_{}", col + 1)) { - let x = pcols_sort[i].after("q").rev_before("_"); - if x.parse::<usize>().is_ok() { - parseable_fields.push(pcols_sort[i].clone()); - } - } - } - } - speaker!("group_id"); - speaker!("group_ncells"); - speaker!("clonotype_id"); - speaker!("clonotype_ncells"); - speaker!("nchains"); - speaker!("exact_subclonotype_id"); - speaker!("barcodes"); - for x in ctl.origin_info.dataset_list.iter() { - if x.len() > 0 { - speaker!(&format!("{}_barcodes", x)); - } - } - if ctl.parseable_opt.pbarcode { - speaker!("barcode"); - for x in ctl.origin_info.dataset_list.iter() { - speaker!(&format!("{}_barcode", x)); + return true; } } + false } diff --git a/enclone_core/src/enclone.testdata b/enclone_core/src/enclone.testdata index f9022eb70..836aee887 100644 --- a/enclone_core/src/enclone.testdata +++ b/enclone_core/src/enclone.testdata @@ -108,16 +108,21 @@ # Excluded 118193 as contaminated. What we actually see is overlap between it and # 99638, 99645. Impossible to sort out with certainty. # -# Excluded 140365 as contaminated. What we actually see is overlap between it and -# 117417. Impossible to sort out with certainty. +# At one point we excluded 140365 based on putative contamination. That was because it overlapped +# 117417, and at the time, that was assigned to another sample. We could now try bringing back +# 140365, but would need to condense the data. -91295-91302,91312,91314,91316,91318,91320,91322,91324,92751,92758,92763,95455,106060,106062,114844,116087-116112,117703,117704,117707,118175-118192,123137,123138,123141,123142,123176,123178,123179,123182,123183,123186,123187,123190,123191,140331-140364,140366-140369 +91295-91302,91312,91314,91316,91318,91320,91322,91324,92751,92758,92763,95455,106060,106062,114844,116087-116112,117417,117703,117704,117707,118175-118192,123137,123138,123141,123142,123176,123178,123179,123182,123183,123186,123187,123190,123191,140331-140364,140366-140369 -# Below now supplanted by BI=11. +# This is a superset of BI=11, probably some without matched GEX: -131240,131256,129483,129480,129481,131270,129486,131235,131268,131237,131275,129476,129477,129478,134532,134538,134537,134536,134535,134534,134533,117458,117459 +117458-117459,129476-129486,131235,131237,131240,131256,131268,131270,131275,134532-134538 -117417,86202,86216,86215,86214,86213,86212,86211,86210,86208,86207,86206,86205,86204,86203,112741,112740,113526 +86202,86216,86215,86214,86213,86212,86211,86210,86208,86207,86206,86205,86204,86203,112741,112740,113526 # 113808 should be in above but com'ed out because needs to be rerun through marsoc with 3.1 52177 + +# This is BI=1: + +140696-140711 diff --git a/enclone_core/src/enclone_structs.rs b/enclone_core/src/enclone_structs.rs new file mode 100644 index 000000000..6eec61a06 --- /dev/null +++ b/enclone_core/src/enclone_structs.rs @@ -0,0 +1,66 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use self::refx::RefData; +use crate::{ + barcode_fate::BarcodeFate, + defs::{AlleleData, CloneInfo, EncloneControl, ExactClonotype, GexInfo}, +}; +use enclone_proto::types::DonorReferenceItem; +use qd::Double; +use std::{collections::HashMap, time::Instant}; +use vdj_ann::refx; + +#[derive(Clone, Debug, Default)] +pub struct MainEncloneOutput { + pub pics: Vec<String>, // clonotype tables + pub last_widths: Vec<u32>, + pub svgs: Vec<String>, // SVG objects + pub summary: String, // summary + pub metrics: Vec<String>, + pub dataset_names: Vec<String>, + pub parseable_stdouth: bool, + pub noprint: bool, + pub noprintx: bool, + pub html: bool, + pub ngroup: bool, + pub pretty: bool, +} + +#[derive(Default)] +pub struct EncloneState { + pub inter: EncloneIntermediates, + pub outs: MainEncloneOutput, +} + +#[derive(Default)] +pub struct EncloneSetup { + pub ctl: EncloneControl, + pub ann: String, + pub gex_info: GexInfo, + pub tall: Option<Instant>, + pub refdata: RefData, + pub is_bcr: bool, + pub to_ref_index: HashMap<usize, usize>, +} + +#[derive(Default)] +pub struct EncloneIntermediates { + pub setup: EncloneSetup, + pub ex: EncloneExacts, +} + +#[derive(Default, Clone)] +pub struct EncloneExacts { + pub to_bc: HashMap<(usize, usize), Vec<String>>, + pub exact_clonotypes: Vec<ExactClonotype>, + pub raw_joins: Vec<Vec<usize>>, + pub info: Vec<CloneInfo>, + pub orbits: Vec<Vec<i32>>, + pub vdj_cells: Vec<Vec<String>>, + pub join_info: Vec<(usize, usize, bool, Vec<u8>)>, + pub drefs: Vec<DonorReferenceItem>, + pub sr: Vec<Vec<Double>>, + pub fate: Vec<HashMap<String, BarcodeFate>>, // GETS MODIFIED SUBSEQUENTLY + pub is_bcr: bool, + pub allele_data: AlleleData, +} diff --git a/enclone_core/src/hcomp.rs b/enclone_core/src/hcomp.rs new file mode 100644 index 000000000..19cb13f0d --- /dev/null +++ b/enclone_core/src/hcomp.rs @@ -0,0 +1,186 @@ +// Copyright (c) 2022 10X Genomics, Inc. All rights reserved. + +use crate::align_to_vdj_ref::align_to_vdj_ref; +use crate::defs::{EncloneControl, ExactClonotype, Junction}; +use crate::opt_d::{jflank, opt_d}; +use bio_edit::alignment::AlignmentOperation::{Del, Ins, Match, Subst}; +use enclone_proto::types::DonorReferenceItem; +use rayon::prelude::*; +use vdj_ann::refx::RefData; + +// This is largely copied from align_n. + +pub fn heavy_complexity( + refdata: &RefData, + exact_clonotypes: &[ExactClonotype], + ctl: &EncloneControl, + dref: &[DonorReferenceItem], +) -> Vec<Junction> { + let mut results = Vec::<(usize, Junction)>::new(); + for i in 0..exact_clonotypes.len() { + results.push((i, Junction::default())); + } + results.par_iter_mut().for_each(|res| { + let i = res.0; + let ex = &exact_clonotypes[i]; + for r in 0..ex.share.len() { + if ex.share[r].left && ex.share.len() == 2 && !ex.share[1 - r].left { + let seq = ex.share[r].seq_del.as_ref(); + let mut vref = refdata.refs[ex.share[r].v_ref_id].to_ascii_vec(); + if ex.share[r].v_ref_id_donor.is_some() { + vref = dref[ex.share[r].v_ref_id_donor.unwrap()] + .nt_sequence + .clone(); + } + let mut vstart = ex.share[r].cdr3_start - 2; + + // Compensate for indel. Code here and next work imperfectly and + // there would be value in investigating the error cases. + + if !ex.share[r].ins.is_empty() { + vstart -= ex.share[r].ins[0].1.len(); + } else if ex.share[r].seq.len() < ex.share[r].seq_del.len() { + vstart += ex.share[r].seq_del.len() - ex.share[r].seq.len(); + } + + // Prevent crash (working around bug). + + if vstart > vref.len() { + vstart = vref.len(); + } + + // Keep going. + + let vref = &vref[vstart..vref.len()]; + let mut concat = vref.to_vec(); + let mut drefx = Vec::<u8>::new(); + let mut d2ref = Vec::<u8>::new(); + let mut drefname = String::new(); + let mut scores = Vec::<f64>::new(); + let mut ds = Vec::<Vec<usize>>::new(); + opt_d( + ex.share[r].v_ref_id, + ex.share[r].j_ref_id, + &ex.share[r].seq_del, + &ex.share[r].annv, + &ex.share[r].cdr3_aa, + refdata, + dref, + &mut scores, + &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + ex.share[r].v_ref_id_donor, + ); + let mut opt = Vec::new(); + if !ds.is_empty() { + opt = ds[0].clone(); + } + for (j, d) in opt.into_iter().enumerate() { + if j == 0 { + drefx = refdata.refs[d].to_ascii_vec(); + } else { + d2ref = refdata.refs[d].to_ascii_vec(); + } + if j > 0 { + drefname += ":"; + } + drefname += &mut refdata.name[d].clone(); + } + concat.extend(&drefx); + concat.extend(&d2ref); + let jref = refdata.refs[ex.share[r].j_ref_id].to_ascii_vec(); + let jend = jflank(seq, &jref); + let mut seq_start = vstart as isize; + // probably not exactly right + if ex.share[r].annv.len() > 1 { + let q1 = ex.share[r].annv[0].0 + ex.share[r].annv[0].1; + let q2 = ex.share[r].annv[1].0; + seq_start += q2 as isize - q1 as isize; + } + let mut seq_end = seq.len() - (jref.len() - jend); + // very flaky bug workaround + // asserted on BCR=180030 CDR3=CARERDLIWFGPW JALIGN1 + if seq_start as usize > seq_end { + seq_start = vstart as isize; + } + if seq_end <= seq_start as usize { + seq_end = seq.len(); // bug fix for problem found by customer, + // couldn't reproduce internally + } + let seq = &seq[seq_start as usize..seq_end]; + let jref = &jref[..jend]; + concat.extend(jref); + let (ops, _score) = align_to_vdj_ref( + seq, + vref, + &drefx, + &d2ref, + jref, + &drefname, + true, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + ); + let mut tigpos = 0; + let mut hcomp = 0; + let mut jun_ins = 0; + let mut indels = Vec::<(usize, isize)>::new(); + let mut ins_start = 0; + let mut del_len = 0; + let mut matches = 0; + let mut mismatches = 0; + for i in 0..ops.len() { + if ops[i] == Subst { + if tigpos >= 2 { + mismatches += 1; + } + hcomp += 1; + tigpos += 1; + } else if ops[i] == Match { + if tigpos >= 2 { + matches += 1; + } + tigpos += 1; + } else if ops[i] == Ins { + hcomp += 1; + jun_ins += 1; + if i == 0 || ops[i - 1] != Ins { + ins_start = tigpos; + } + tigpos += 1; + if i == ops.len() - 1 || ops[i + 1] != Ins { + let ins_len = tigpos - ins_start; + indels.push((ins_start, ins_len as isize)); + } + } else if ops[i] == Del { + if i == 0 || ops[i - 1] != Del { + hcomp += 1; + } + del_len += 1; + if i == ops.len() - 1 || ops[i + 1] != Del { + indels.push((tigpos, -(del_len as isize))); + del_len = 0; + } + } + } + res.1 = Junction { + hcomp, + matches, + mismatches, + jun_ins, + d: ds.into_iter().next().unwrap(), + vstart, + indels, + }; + } + } + }); + results.into_iter().map(|ri| ri.1).collect() +} diff --git a/enclone_core/src/join_one.rs b/enclone_core/src/join_one.rs new file mode 100644 index 000000000..3a788f9e0 --- /dev/null +++ b/enclone_core/src/join_one.rs @@ -0,0 +1,882 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use crate::defs::{CloneInfo, EncloneControl, ExactClonotype, PotentialJoin}; +use crate::opt_d::jflank; +use debruijn::{dna_string::ndiffs, Mer}; +use enclone_proto::types::DonorReferenceItem; +use qd::{dd, Double}; +use stats_utils::abs_diff; +use std::cmp::min; +use std::collections::HashMap; +use string_utils::TextUtils; +use vdj_ann::refx::RefData; +// use stirling_numbers::p_at_most_m_distinct_in_sample_of_x_from_n; +use vector_utils::{meet, unique_sort}; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// This is a copy of p_at_most_m_distinct_in_sample_of_x_from_n from the stirling_numbers crate, +// that has been modified to use higher precision internal math. This should go into that crate +// (along with the stirling numbers ratio table code) when and if the qr crate is published. + +pub fn p_at_most_m_distinct_in_sample_of_x_from_n_double( + m: usize, + x: usize, + n: usize, + sr: &[Vec<Double>], +) -> f64 { + let mut p = dd![1.0]; + for u in m + 1..=x { + let mut z = dd![sr[x][u]]; + for _ in 0..x { + z *= dd![u as f64] / dd![n as f64]; + } + for v in 1..=u { + z *= dd![(n - v + 1) as f64] / dd![(u - v + 1) as f64]; + } + p -= z; + } + if p < dd![0.0] { + p = dd![0.0]; + } + f64::from(p) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// partial_bernoulli_sum( n, k ): return sum( choose(n,i), i = 0..=k ). +// +// Beware of overflow. + +pub fn partial_bernoulli_sum(n: usize, k: usize) -> f64 { + assert!(n >= 1); + assert!(k <= n); + let mut sum = 0.0; + let mut choose = 1.0; + for i in 0..=k { + sum += choose; + choose *= (n - i) as f64; + choose /= (i + 1) as f64; + } + sum +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn join_one<'a>( + is_bcr: bool, + k1: usize, + k2: usize, + ctl: &EncloneControl, + exact_clonotypes: &[ExactClonotype], + info: &[CloneInfo], + to_bc: &'a HashMap<(usize, usize), Vec<String>>, + sr: &[Vec<Double>], + pot: &mut Vec<PotentialJoin<'a>>, + refdata: &RefData, + dref: &[DonorReferenceItem], +) -> bool { + // Do not merge onesies or foursies with anything. Deferred until later. + // Note that perhaps some foursies should be declared doublets and deleted. + // Note onesies merging above is turned off so this appears to be moot. + + let (info1, info2) = (&info[k1], &info[k2]); + let (clono1, clono2) = (info1.clonotype_id, info2.clonotype_id); + let chains1 = exact_clonotypes[clono1].share.len(); + let chains2 = exact_clonotypes[clono2].share.len(); + if !(2..=3).contains(&chains1) || !(2..=3).contains(&chains2) { + return false; + } + // NEED FOR THIS SEEMS LIKE A BUG: + if info1.vs.len() == 1 || info2.vs.len() == 4 { + return false; + } + if info1.vs.len() > 2 { + return false; + } + + // Require that CDR3s have the same length. Ugly. + // First part should be a tautology. + + let (x1, x2) = (&info[k1].cdr3s, &info[k2].cdr3s); + if x1.len() != x2.len() { + return false; + } + for i in 0..x1.len() { + if x1[i].len() != x2[i].len() { + return false; + } + } + + // Test for JOIN_BASIC and JOIN_BASIC_H. + + if ctl.join_alg_opt.basic.is_some() || ctl.join_alg_opt.basic_h.is_some() { + let chains = if ctl.join_alg_opt.basic.is_some() { + 2 + } else { + 1 + }; + let (x1, x2) = (&info1.cdr3s, &info2.cdr3s); + for (((z1, z2), (vs1, vs2)), (js1, js2)) in x1 + .iter() + .zip(x2.iter()) + .zip(info1.vs.iter().zip(info2.vs.iter())) + .zip(info1.js.iter().zip(info2.js.iter())) + .take(chains) + { + if z1.len() != z2.len() || vs1 != vs2 || js1 != js2 { + return false; + } + let mut cd = 0; + for (z1m, z2m) in z1.as_bytes().iter().zip(z2.as_bytes().iter()) { + if z1m != z2m { + cd += 1; + } + } + let limit = if let Some(basic) = ctl.join_alg_opt.basic { + (100.0 - basic) / 100.0 + } else { + (100.0 - ctl.join_alg_opt.basic_h.unwrap()) / 100.0 + }; + if cd as f64 / (z1.len() as f64) > limit { + return false; + } + } + pot.push(PotentialJoin { + k1, + k2, + ..Default::default() + }); + return true; + } + + // Test for BASICX. + + if ctl.join_alg_opt.basicx { + let (x1, x2) = (&info[k1].cdr3s, &info[k2].cdr3s); + let mut cd = 0; + let mut total = 0; + for z in 0..2 { + if x1[z].len() != x2[z].len() { + return false; + } + if info[k1].vs[z] != info[k2].vs[z] || info[k1].js[z] != info[k2].js[z] { + return false; + } + for m in 0..x1[z].len() { + total += 1; + if x1[z].as_bytes()[m] != x2[z].as_bytes()[m] { + cd += 1; + } + } + } + if cd as f64 / total as f64 > 0.1 { + return false; + } + pot.push(PotentialJoin { + k1, + k2, + ..Default::default() + }); + return true; + } + + // Test for JOIN_FULL_DIFF. + + if ctl.join_alg_opt.join_full_diff { + let (x1, x2) = (&info[k1].cdr3s, &info[k2].cdr3s); + let (mut diffs, mut total) = (0, 0); + for z in 0..2 { + if x1[z].len() != x2[z].len() { + return false; + } + if info[k1].vs[z] != info[k2].vs[z] || info[k1].js[z] != info[k2].js[z] { + return false; + } + for p in 0..info[k1].tigs_amino[z].len() { + total += 1; + if info[k1].tigs_amino[z][p] != info[k2].tigs_amino[z][p] { + diffs += 1; + } + } + } + if diffs as f64 / total as f64 > 0.1 { + return false; + } + pot.push(PotentialJoin { + k1, + k2, + ..Default::default() + }); + return true; + } + + // Put identity filter on CDR3s for BCR. + + if is_bcr { + let (x1, x2) = (&info[k1].cdr3s, &info[k2].cdr3s); + let mut cd = 0; + let mut total = 0; + for z in 0..2 { + if x1[z].len() != x2[z].len() { + return false; + } + for m in 0..x1[z].len() { + if x1[z].as_bytes()[m] != x2[z].as_bytes()[m] { + cd += 1; + } + } + total += x1[z].len(); + } + if cd as f64 / total as f64 > 1.0 - ctl.join_alg_opt.join_cdr3_ident / 100.0 { + return false; + } + } + + // Compute number of differences. The default behavior is that this is applied only to TCR. + + let (x1, x2) = (&info[k1].cdr3s, &info[k2].cdr3s); + if !is_bcr || ctl.heur.max_diffs < 1_000_000 { + let mut diffs = 0_usize; + for x in 0..info[k1].lens.len() { + if !info[k1].has_del[x] && !info[k2].has_del[x] { + // A great deal of time is spent in the call to ndiffs. Notes on this: + // 1. It is slower than if the computation is done outside + // the ndiffs function. This is mysterious but must have something to + // do with the storage of the 256-byte lookup table. + // 2. Adding #[inline(always)] in front of the ndiffs function definition + // doesn't help. + // 3. Adding a bounds test for diffs > ctl.heur.max_diffs inside the ndiffs + // function doesn't help, whether placed in the inner loop or the other + // loop. + diffs += ndiffs(&info[k1].tigsp[x], &info[k2].tigsp[x]); + } else { + for j in 0..info[k1].tigs[x].len() { + if info[k1].tigs[x][j] != info[k2].tigs[x][j] { + diffs += 1; + } + } + } + } + if diffs > ctl.heur.max_diffs { + return false; + } + if !is_bcr && diffs > 5 { + return false; + } + } + + // Compute junction diffs. + + let mut cd = 0_isize; + let mut hcd = 0_isize; + for l in 0..x1.len() { + for m in 0..x1[l].len() { + if x1[l].as_bytes()[m] != x2[l].as_bytes()[m] { + if l == 0 { + hcd += 1; + } + cd += 1; + } + } + } + + // Cap CDR3 diffs for TCR or as requested. + + if (ctl.join_alg_opt.max_cdr3_diffs < 1000 || !is_bcr) + && (cd > ctl.join_alg_opt.max_cdr3_diffs as isize || (!is_bcr && cd > 0)) + { + return false; + } + + // Unless MIX_DONORS specified, do not join across donors. + // And test for error. + // + // WARNING! There are actually two cases: where an individual exact subclonotype + // itself crosses donors, and where we cross donors in making a join. Note that + // the former case is most improbable, unless there is cross-sample contamination. + // And if that did happen, the output would be confusing and might have a greatly + // exaggerated number of fails. + + let (mut donors1, mut donors2) = (Vec::<usize>::new(), Vec::<usize>::new()); + let ex1 = &exact_clonotypes[info[k1].clonotype_index]; + let ex2 = &exact_clonotypes[info[k2].clonotype_index]; + for j in 0..ex1.clones.len() { + if ex1.clones[j][0].donor_index.is_some() { + donors1.push(ex1.clones[j][0].donor_index.unwrap()); + } + } + for j in 0..ex2.clones.len() { + if ex2.clones[j][0].donor_index.is_some() { + donors2.push(ex2.clones[j][0].donor_index.unwrap()); + } + } + unique_sort(&mut donors1); + unique_sort(&mut donors2); + if !ctl.clono_filt_opt_def.donor + && !donors1.is_empty() + && !donors2.is_empty() + && donors1 != donors2 + { + return false; + } + let err = donors1 != donors2 || donors1.len() != 1 || donors2.len() != 1; + + // Analyze the two clonotypes versus the reference. First traverse the reference + // sequences. Either we use the references for k1 or the references for k2, but + // these are nearly always the same. + + let mut nrefs = 1; + for m in 0..2 { + if info[k1].vs[m] != info[k2].vs[m] || info[k1].js[m] != info[k2].js[m] { + nrefs = 2; + } + } + let mut shares = vec![0; nrefs]; // shared mutations from reference + let mut shares1 = vec![0; nrefs]; + let mut shares2 = vec![0; nrefs]; + let mut indeps = vec![0; nrefs]; // independent mutations from reference + let mut total = vec![vec![0; 2]; nrefs]; // total differences from reference + let mut shares_details = vec![vec![0; 4]; nrefs]; + let mut share_pos_v = vec![Vec::<usize>::new(); 2]; + let mut share_pos_j = vec![Vec::<usize>::new(); 2]; + for u in 0..nrefs { + let k = if u == 0 { k1 } else { k2 }; + + // Traverse the chains in the clonotype. + + let nchains = info[k1].lens.len(); + for m in 0..nchains { + let (tig1, tig2) = (&info[k1].tigs[m], &info[k2].tigs[m]); + + // Traverse the two segments (V and J). + + for si in 0..2 { + let seg = if si == 0 { + &info[k].vs[m] + } else { + &info[k].js[m] + }; + let ref_trim = if si == 1 { + ctl.heur.ref_j_trim + } else { + ctl.heur.ref_v_trim + }; + for p in 0..seg.len() - ref_trim { + let (t1, t2); + let r; + if si == 0 { + // Ugly bailout arising very rarely if the two reference + // sequences have different lengths. + if p >= tig1.len() || p >= tig2.len() { + return false; + } + t1 = tig1[p]; + t2 = tig2[p]; + // r = seg.get(p); + let rx = seg.get(p); + if rx == 0 { + r = b'A'; + } else if rx == 1 { + r = b'C'; + } else if rx == 2 { + r = b'G'; + } else { + r = b'T'; + } + } else { + t1 = tig1[tig1.len() - p - 1]; + t2 = tig2[tig2.len() - p - 1]; + // r = seg.get( seg.len() - p - 1 ); + let rx = seg.get(seg.len() - p - 1); + if rx == 0 { + r = b'A'; + } else if rx == 1 { + r = b'C'; + } else if rx == 2 { + r = b'G'; + } else { + r = b'T'; + } + } + if t1 == t2 && t1 != r { + shares[u] += 1; + if m == 1 { + shares1[u] += 1; + } else { + shares2[u] += 1; + } + shares_details[u][2 * m + si] += 1; + if si == 0 { + share_pos_v[m].push(p); + } else { + share_pos_j[m].push(p); + } + } else if (t1 == r && t2 != r) || (t2 == r && t1 != r) { + indeps[u] += 1; + } else if t1 != r && t2 != r { + indeps[u] += 2; + } + if t1 != r { + total[u][0] += 1; + } + if t2 != r { + total[u][1] += 1; + } + } + } + } + } + + // Don't allow different references if one is strongly favored. + // (not documented) + + if nrefs == 2 { + for m in 0..2 { + if abs_diff(total[0][m], total[1][m]) > ctl.heur.max_degradation { + return false; + } + } + } + + // Another test for acceptable join. (not fully documented) + + let min_shares = shares.iter().min().unwrap(); + let _min_shares1 = shares1.iter().min().unwrap(); + let _min_shares2 = shares2.iter().min().unwrap(); + let min_indeps = indeps.iter().min().unwrap(); + + // Reject if barcode overlap. (not documented) + + let (mut bcs1, mut bcs2) = (Vec::<&'a str>::new(), Vec::<&'a str>::new()); + for origin in info[k1].origin.iter() { + bcs1.extend( + to_bc[&(*origin, info[k1].clonotype_id)] + .iter() + .map(String::as_str), + ); + } + for origin in info[k2].origin.iter() { + bcs2.extend( + to_bc[&(*origin, info[k2].clonotype_id)] + .iter() + .map(String::as_str), + ); + } + unique_sort(&mut bcs1); + unique_sort(&mut bcs2); + if meet(&bcs1, &bcs2) { + return false; + } + + // Test for concentration of SHM in the junction regions. + + if cd as f64 >= ctl.join_alg_opt.cdr3_mult * std::cmp::max(1, *min_indeps) as f64 { + return false; + } + + // Do not merge cells if they were assigned different light chain constant regions. + // Unless cd = 0. + + if !ctl.join_alg_opt.old_light { + for i in 0..info[k1].cdr3s.len() { + let (j1, j2) = (info[k1].exact_cols[i], info[k2].exact_cols[i]); + if !ex1.share[j1].left + && ex1.share[j1].c_ref_id.is_some() + && ex2.share[j2].c_ref_id.is_some() + && ex1.share[j1].c_ref_id.unwrap() != ex2.share[j2].c_ref_id.unwrap() + && cd > 0 + { + return false; + } + } + } + + // Estimate the probability p1 that drawing k = min_indeps + 2 * min_shares + // objects from n = 3 * (sum of VJ contig lengths) yields d = min_shares or + // more duplicates. + + let n = 3 * (info[k1].tigs[0].len() + info[k1].tigs[1].len()); + let k = *min_indeps + 2 * *min_shares; + let d = *min_shares; + let p1 = p_at_most_m_distinct_in_sample_of_x_from_n_double((k - d) as usize, k as usize, n, sr); + assert!(!p1.is_infinite()); // TODO: IS THIS SAFE? + + // Multiply by 80^cd, or if using old version, the number of DNA sequences that differ from + // the given CDR3 sequences on <= cd bases. This is sum( choose(3cn, m), m = 0..=cd ). + // Changed to take into account CDR3 length. + + let mut mult; + if ctl.join_alg_opt.old_mult { + let cn: usize = x1.iter().map(String::len).sum(); + mult = partial_bernoulli_sum(3 * cn, cd as usize); + assert!(!mult.is_infinite()); // TODO: IS THIS SAFE? + } else { + // mult = ctl.join_alg_opt.mult_pow.powi(cd as i32); + + let mut cd1 = 0; + let n1 = x1[0].len(); + for m in 0..x1[0].len() { + if x1[0].as_bytes()[m] != x2[0].as_bytes()[m] { + cd1 += 1; + } + } + let mut cd2 = 0; + let n2 = x1[1].len(); + for m in 0..x1[1].len() { + if x1[1].as_bytes()[m] != x2[1].as_bytes()[m] { + cd2 += 1; + } + } + let cdx = ctl.join_alg_opt.cdr3_normal_len; + mult = ctl + .join_alg_opt + .mult_pow + .powf(cdx as f64 * cd1 as f64 / n1 as f64); + mult *= ctl + .join_alg_opt + .mult_pow + .powf(cdx as f64 * cd2 as f64 / n2 as f64); + } + + // Compute score. + + let score = p1 * mult; + + // Apply JUN_SHARE. + + let mut accept = false; + if ctl.join_alg_opt.comp_filt < 1_000_000 + && score > ctl.join_alg_opt.max_score + && *min_shares < ctl.join_alg_opt.auto_share as isize + && (ctl.join_alg_opt.comp_filt_bound == 0 + || *min_indeps as usize <= ctl.join_alg_opt.comp_filt_bound) + && ex1.share.len() == 2 + && ex2.share.len() == 2 + && ex1.share[0].left != ex1.share[1].left + { + let h1 = info[k1].exact_cols[0]; + let h2 = info[k2].exact_cols[0]; + let comp = min(ex1.share[h1].jun.hcomp, ex2.share[h2].jun.hcomp); + if comp as isize - cd >= ctl.join_alg_opt.comp_filt as isize { + /* + println!("\nwould accept"); + println!("cdr3: {}", ex1.share[h1].cdr3_aa); + println!("cdr3: {}", ex2.share[h2].cdr3_aa); + */ + accept = true; + } else if ctl.join_alg_opt.super_comp_filt > 0 && score > ctl.join_alg_opt.max_score { + let vstart = ex1.share[h1].jun.vstart; + let indels = &ex1.share[h1].jun.indels; + let v_ref_id = ex1.share[h1].v_ref_id; + let j_ref_id = ex1.share[h1].j_ref_id; + if vstart == ex2.share[h2].jun.vstart && *indels == ex2.share[h2].jun.indels { + if accept { + // println!("passes first test"); + } + let d = &ex1.share[h1].jun.d; + if *d == ex2.share[h2].jun.d + && v_ref_id == ex2.share[h2].v_ref_id + && j_ref_id == ex2.share[h2].j_ref_id + { + // println!("passes second test"); + let mut seq1 = ex1.share[h1].seq_del.clone(); + let mut seq2 = ex2.share[h2].seq_del.clone(); + let mut vref1 = refdata.refs[v_ref_id].to_ascii_vec(); + if ex1.share[h1].v_ref_id_donor.is_some() { + vref1 = dref[ex1.share[h1].v_ref_id_donor.unwrap()] + .nt_sequence + .clone(); + } + let mut vref2 = refdata.refs[v_ref_id].to_ascii_vec(); + if ex2.share[h2].v_ref_id_donor.is_some() { + vref2 = dref[ex2.share[h2].v_ref_id_donor.unwrap()] + .nt_sequence + .clone(); + } + let donor1 = ex1.clones[0][0].donor_index; + let donor2 = ex2.clones[0][0].donor_index; + let mut ok = vref1 == vref2; + if ctl.gen_opt.mix_only && donor1 == donor2 { + ok = false; + } + if ok { + let mut concat = vref1[vstart..vref1.len()].to_vec(); + for &di in d { + concat.append(&mut refdata.refs[di].to_ascii_vec()); + } + let jref = refdata.refs[j_ref_id].to_ascii_vec(); + let jend = jflank(&seq1, &jref); // note using seq1 + let jref = &jref[0..jend]; + concat.extend(jref); + let mut seq_start = vstart as isize; + if ex1.share[h1].annv.len() > 1 { + let q1 = ex1.share[h1].annv[0].0 + ex1.share[h1].annv[0].1; + let q2 = ex1.share[h1].annv[1].0; + seq_start += q2 as isize - q1 as isize; + } + let mut seq_end = seq1.len() - (jref.len() - jend); + if seq_start as usize > seq_end { + seq_start = vstart as isize; + } + if seq_end <= seq_start as usize { + seq_end = seq1.len(); + } + seq1 = seq1[seq_start as usize..seq_end].to_vec(); + seq2 = seq2[seq_start as usize..seq_end].to_vec(); + let mut share = 0; + for indel in indels { + if indel.1 < 0 { + share += 1; + } + } + let mut ref_pos = 0; + let mut i = 0; + let n = min(seq1.len(), seq2.len()); + 'seq: while i < n { + for indel in indels { + if indel.0 == i { + if indel.1 > 0 { + for k in 0..indel.1 as usize { + if seq1[i + k] == seq2[i + k] { + share += 1; + } + } + i += indel.1 as usize; + continue 'seq; + } else { + ref_pos += -indel.1 as usize; + } + } + } + if i >= n || ref_pos >= concat.len() { + break; + } + if seq1[i] == seq2[i] && seq1[i] != concat[ref_pos] { + share += 1; + } + i += 1; + ref_pos += 1; + } + if share >= ctl.join_alg_opt.super_comp_filt { + let mut log = Vec::<u8>::new(); + use io_utils::fwriteln; + use std::io::Write; + fwriteln!(log, "\nEXAMPLE"); + fwriteln!(log, "cdr3: {}", ex1.share[h1].cdr3_aa); + fwriteln!(log, "cdr3: {}", ex2.share[h2].cdr3_aa); + + let (j1, j2) = (info[k1].exact_cols[0], info[k2].exact_cols[0]); + let (x1, x2) = (&ex1.share[j1], &ex2.share[j2]); + let (v1, v2) = (x1.v_ref_id, x2.v_ref_id); + let (n1, n2) = (refdata.name[v1].clone(), refdata.name[v2].clone()); + fwriteln!(log, "heavy V genes = {}/{}", n1, n2); + + use itertools::Itertools; + fwriteln!( + log, + "indels1 = {:?}", + ex1.share[h1].jun.indels.iter().format(",") + ); + fwriteln!( + log, + "indels2 = {:?}", + ex2.share[h2].jun.indels.iter().format(",") + ); + use string_utils::strme; + /* + fwriteln!(log, "vstart1 = {}", vstart); + fwriteln!(log, "vstart2 = {}", ex2.share[h2].jun.vstart); + fwriteln!(log, "seq1 = {}", strme(&seq1)); + fwriteln!(log, "seq2 = {}", strme(&seq2)); + fwriteln!(log, "concat = {}", strme(&concat)); + */ + fwriteln!(log, "heavy junction share = {}", share); + fwriteln!(log, "non junction share = {}", *min_shares); + fwriteln!(log, "indep mutations outside = {}", *min_indeps); + fwriteln!(log, "cd = {}", cd); + fwriteln!(log, "hcd = {}", hcd); + print!("{}", strme(&log)); + accept = true; + } + } + } + } + } + } + + // Threshold on score. + + if !accept + && score > ctl.join_alg_opt.max_score + && *min_shares < ctl.join_alg_opt.auto_share as isize + { + return false; + } + + // If V gene names are different (after removing trailing *...), and either + // • V gene reference sequences are different, after truncation on right to the same length + // • or 5' UTR reference sequences are different, after truncation on left to the same length, + // then the join is rejected. + + for i in 0..info[k1].cdr3s.len() { + let (j1, j2) = (info[k1].exact_cols[i], info[k2].exact_cols[i]); + let (x1, x2) = (&ex1.share[j1], &ex2.share[j2]); + let (v1, v2) = (x1.v_ref_id, x2.v_ref_id); + let (mut n1, mut n2) = (refdata.name[v1].clone(), refdata.name[v2].clone()); + if n1.contains('*') { + n1 = n1.before("*").to_string(); + } + if n2.contains('*') { + n2 = n2.before("*").to_string(); + } + if n1 != n2 { + let (y1, y2) = (&refdata.refs[v1], &refdata.refs[v2]); + if y1.len() == y2.len() { + if y1 != y2 { + return false; + } + } else { + let n = min(y1.len(), y2.len()); + for m in 0..n { + if y1.get(m) != y2.get(m) { + return false; + } + } + } + let (u1, u2) = (x1.u_ref_id, x2.u_ref_id); + if let (Some(u1), Some(u2)) = (u1, u2) { + let (x1, x2) = (&refdata.refs[u1], &refdata.refs[u2]); + let n = min(x1.len(), x2.len()); + for m in 0..n { + if x1.get(x1.len() - 1 - m) != x2.get(x2.len() - 1 - m) { + return false; + } + } + } + } + } + + // Require + // percent heavy chain nuke identity on FWR1 + // minus + // percent heavy chain nuke identity on CDR12 + // is less than 20. + + let nchains = info[k1].lens.len(); + let (mut fwr1_len, mut cdr1_len, mut cdr2_len) = (0, 0, 0); + let (mut fwr1_diffs, mut cdr1_diffs, mut cdr2_diffs) = (0, 0, 0); + for m in 0..nchains { + let (j1, j2) = (info[k1].exact_cols[m], info[k2].exact_cols[m]); + let (x1, x2) = (&ex1.share[j1], &ex2.share[j2]); + if x1.left { + if x1.cdr1_start.is_some() && x2.cdr1_start.is_some() { + let fr1_start1 = x1.fr1_start; + let fr1_stop1 = x1.cdr1_start.unwrap(); + let fr1_start2 = x2.fr1_start; + let fr1_stop2 = x2.cdr1_start.unwrap(); + let len = fr1_stop1 - fr1_start1; + if fr1_stop2 - fr1_start2 == len { + let mut diffs = 0; + for p in 0..len { + if x1.seq_del_amino[p + fr1_start1] != x2.seq_del_amino[p + fr1_start2] { + diffs += 1; + } + } + fwr1_len = len; + fwr1_diffs = diffs; + } + } + if x1.cdr1_start.is_some() + && x1.fr2_start.is_some() + && x2.cdr1_start.is_some() + && x2.fr2_start.is_some() + { + let cdr1_start1 = x1.cdr1_start.unwrap(); + let cdr1_stop1 = x1.fr2_start.unwrap(); + let cdr1_start2 = x2.cdr1_start.unwrap(); + let cdr1_stop2 = x2.fr2_start.unwrap(); + let len = cdr1_stop1 - cdr1_start1; + if cdr1_stop2 - cdr1_start2 == len { + let mut diffs = 0; + for p in 0..len { + if x1.seq_del_amino[p + cdr1_start1] != x2.seq_del_amino[p + cdr1_start2] { + diffs += 1; + } + } + cdr1_len = len; + cdr1_diffs = diffs; + } + } + if x1.cdr2_start.is_some() + && x1.fr3_start.is_some() + && x2.cdr2_start.is_some() + && x2.fr3_start.is_some() + { + let cdr2_start1 = x1.cdr2_start.unwrap(); + let cdr2_stop1 = x1.fr3_start.unwrap(); + let cdr2_start2 = x2.cdr2_start.unwrap(); + let cdr2_stop2 = x2.fr3_start.unwrap(); + // this was violated once when using IMGT reference + if cdr2_start1 <= cdr2_stop1 { + let len = cdr2_stop1 - cdr2_start1; + if cdr2_stop2 - cdr2_start2 == len { + let mut diffs = 0; + for p in 0..len { + if x1.seq_del_amino[p + cdr2_start1] + != x2.seq_del_amino[p + cdr2_start2] + { + diffs += 1; + } + } + cdr2_len = len; + cdr2_diffs = diffs; + } + } + } + } + } + if fwr1_len > 0 && cdr1_len > 0 && cdr2_len > 0 { + let len = fwr1_len; + let diffs = fwr1_diffs; + let fwr1_identity = 100.0 * (len - diffs) as f64 / len as f64; + let len = cdr1_len + cdr2_len; + let diffs = cdr1_diffs + cdr2_diffs; + let cdr12_identity = 100.0 * (len - diffs) as f64 / len as f64; + if fwr1_identity - cdr12_identity >= ctl.join_alg_opt.fwr1_cdr12_delta { + return false; + } + } + + // Save potential joins. Note that this jacks up memory usage significantly, + // so it would likely be more efficient to duplicate some of the computations + // during the analysis phase. + + if !ctl.join_print_opt.show_bc { + bcs1.clear(); + bcs2.clear(); + } + let diffs = 0; // no longer computed + pot.push(PotentialJoin { + k1, + k2, + nrefs, + cd, + diffs, + bcs1, + bcs2, + shares, + indeps, + shares_details, + share_pos_v, + share_pos_j, + score, + err, + p1, + mult, + k, + d, + n, + }); + true +} diff --git a/enclone_core/src/lib.rs b/enclone_core/src/lib.rs index 45f49a912..4a55b599a 100644 --- a/enclone_core/src/lib.rs +++ b/enclone_core/src/lib.rs @@ -1,13 +1,204 @@ -// Copyright (c) 2020 10x Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. +pub mod align_to_vdj_ref; +pub mod allowed_vars; +pub mod barcode_fate; +pub mod cell_color; +pub mod combine_group_pics; pub mod defs; +pub mod enclone_structs; +pub mod hcomp; +pub mod join_one; +pub mod linear_condition; +pub mod logging; +pub mod main_testlist; +pub mod mammalian_fixed_len; +pub mod median; +pub mod opt_d; +pub mod packing; pub mod print_tools; -pub mod testlist; +pub mod set_speakers; +pub mod slurp; +pub mod stringulate; +pub mod test_def; +pub mod var_reg; -const VERSION_STRING: &'static str = env!("VERSION_STRING"); +use lazy_static::lazy_static; +use std::cmp::max; +use std::fmt::Write; +use std::io::BufRead; +use std::sync::Mutex; +use std::time::Duration; -// Return the code version string. +#[cfg(not(target_os = "windows"))] +use string_utils::stringme; -pub fn version_string() -> String { - VERSION_STRING.to_string() +#[cfg(not(target_os = "windows"))] +use tilde_expand::tilde_expand; + +// tilde_expand_me: not that this is NOT implementd for Windows + +pub fn tilde_expand_me(_s: &mut String) { + #[cfg(not(target_os = "windows"))] + { + *_s = stringme(&tilde_expand(_s.as_bytes())); + } +} + +pub fn hcat(col1: &[String], col2: &[String], sep: usize) -> Vec<String> { + let mut cat = Vec::<String>::new(); + let height = max(col1.len(), col2.len()); + let mut width1 = 0; + for x in col1 { + width1 = max(width1, x.chars().count() + sep); + } + for i in 0..height { + let mut s = if i < col1.len() { + col1[i].clone() + } else { + String::new() + }; + while s.chars().count() < width1 { + s += " "; + } + if i < col2.len() { + s += &col2[i]; + } + cat.push(s); + } + cat +} + +pub fn expand_integer_ranges(x: &str) -> String { + let mut tokens = Vec::<String>::new(); + let mut token = String::new(); + for c in x.chars() { + if c == ',' || c == ':' || c == ';' { + if !token.is_empty() { + tokens.push(token.clone()); + token.clear(); + } + tokens.push(c.to_string()); + } else { + token.push(c); + } + } + if !token.is_empty() { + tokens.push(token); + } + let mut tokens2 = String::new(); + for token in tokens { + if let Some((n1s, n2s)) = token.split_once('-') { + if let Ok(n1) = n1s.parse::<usize>() { + if let Ok(n2) = n2s.parse::<usize>() { + if n1 <= n2 { + for n in n1..=n2 { + if n > n1 { + tokens2.push(','); + } + write!(tokens2, "{n}").unwrap(); + } + continue; + } + } + } + } + tokens2 += token.as_str(); + } + tokens2 +} + +lazy_static! { + pub static ref BUG_REPORT_ADDRESS: Mutex<Vec<String>> = Mutex::new(Vec::<String>::new()); + pub static ref REMOTE_HOST: Mutex<Vec<String>> = Mutex::new(Vec::<String>::new()); +} + +// Parse a line, breaking at blanks, but not if they're in quotes. And strip the quotes. +// Ridiculously similar to parse_csv, probably should refactor. +pub fn parse_bsv(x: &str) -> Vec<&str> { + let mut args = Vec::<&str>::new(); + let w = x.as_bytes(); + let (mut quotes, mut i) = (0, 0); + while i < w.len() { + let mut j = i; + while j < w.len() { + if quotes % 2 == 0 && w[j] == b' ' { + break; + } + if w[j] == b'"' { + quotes += 1; + } + j += 1; + } + // These will always be a char boundaries because it's either the end of + // the string or it's a space character, which is a single byte in + // UTF-8. That remains true if i/j are '"' bytes. + let (mut start, mut stop) = (i, j); + if stop - start >= 2 && w[start] == b'"' && w[stop - 1] == b'"' { + start += 1; + stop -= 1; + } + args.push(&x[start..stop]); + i = j + 1; + } + args +} + +pub fn fetch_url(url: &str) -> Result<String, String> { + const TIMEOUT: u64 = 120; // timeout in seconds + let req = attohttpc::get(url).read_timeout(Duration::new(TIMEOUT, 0)); + let response = req.send(); + if response.is_err() { + return Err(format!( + "\nFailed to access URL {url},\ntimeout after two minutes. There are a few ways that \ + you might have arrived at this state:\n• The server for that URL is down.\n\ + • The server for that URL is overloaded and responding very slowly.\n\ + • Same thing as last, and your process is slamming the server. Please inspect \ + your command!\n\ + • There is a bug in this program. This is relatively unlikely but possible.\n" + )); + } + let response = response.unwrap(); + if !response.is_success() { + let msg = response.text().unwrap(); + if msg.contains("Not found") { + return Err(format!( + "\nAttempt to access the URL\n{url}\nfailed with \"Not found\". Could there \ + be something wrong with the id?\n" + )); + } + return Err(format!("Failed to access URL {url}: {msg}.")); + } + Ok(response.text().unwrap()) +} + +// Test to see if a line can be read from the given file f. If not, return an error message +// the references arg, which is supposed to be the name of a command line argument from which +// f originated. + +pub fn require_readable_file(f: &str, arg: &str) -> Result<(), String> { + let x = std::fs::File::open(f); + if x.is_err() { + return Err(format!( + "\nThe file {} could not be opened because {}.\nThis came from \ + the command line argument {}.\n", + f, + x.err().unwrap(), + arg, + )); + } + let y = std::io::BufReader::new(x.unwrap()); + if let Some(line) = y.lines().next() { + if line.is_err() { + let mut err = line.err().unwrap().to_string(); + if err.starts_with("Is a directory") { + err = "it is a directory".to_string(); + } + return Err(format!( + "\nThe file {f} could not be read because {err}.\nThis came from \ + the command line argument {arg}.\n", + )); + } + } + Ok(()) } diff --git a/enclone_core/src/linear_condition.rs b/enclone_core/src/linear_condition.rs new file mode 100644 index 000000000..051fb147d --- /dev/null +++ b/enclone_core/src/linear_condition.rs @@ -0,0 +1,159 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use crate::defs::EncloneControl; +use string_utils::{stringme, TextUtils}; + +#[derive(Clone, PartialEq)] +pub struct LinearCondition { + pub coeff: Vec<f64>, // left hand side (lhs) coefficients + pub var: Vec<String>, // left hand side variables (parallel to coefficients) + pub rhs: f64, // right hand side; sum of lhs must exceed rhs + pub sense: &'static str, // le, ge, lt, gt +} + +impl LinearCondition { + pub fn n(&self) -> usize { + self.coeff.len() + } + + pub fn new(x: &str) -> Result<LinearCondition, String> { + let y = x.replace(' ', ""); + let lhs: &str; + let rhs: &str; + let sense: &str; + if y.contains(">=") { + (lhs, rhs) = y.split_once(">=").unwrap(); + sense = "ge"; + } else if y.contains('≥') { + (lhs, rhs) = y.split_once('≥').unwrap(); + sense = "ge"; + } else if y.contains('⩾') { + (lhs, rhs) = y.split_once('⩾').unwrap(); + sense = "ge"; + } else if y.contains("<=") { + (lhs, rhs) = y.split_once("<=").unwrap(); + sense = "le"; + } else if y.contains('≤') { + (lhs, rhs) = y.split_once('≤').unwrap(); + sense = "le"; + } else if y.contains('⩽') { + (lhs, rhs) = y.split_once('⩽').unwrap(); + sense = "le"; + } else if y.contains('<') { + (lhs, rhs) = y.split_once('<').unwrap(); + sense = "lt"; + } else if y.contains('>') { + (lhs, rhs) = y.split_once('>').unwrap(); + sense = "gt"; + } else { + return Err(format!( + "\nImproperly formatted condition, no inequality symbol, \ + please type \"enclone help display\": {x}.\n" + )); + } + let mut rhs = rhs.replace('E', "e"); + if !rhs.contains('.') && !rhs.contains('e') { + rhs += ".0"; + } + if rhs.parse::<f64>().is_err() { + return Err(format!( + "\nImproperly formatted condition, right-hand side invalid: {x}.\n\ + The right-hand side needs to be a constant. Please type \ + \"enclone help filter\"\n\ + for more information.\n" + )); + } + let rhs = rhs.force_f64(); + let mut parts = Vec::<String>::new(); + let mut last = 0; + let lhsx = lhs.as_bytes(); + let mut parens = 0_isize; + for i in 0..lhsx.len() { + if i > 0 && parens == 0 && (lhsx[i] == b'+' || lhsx[i] == b'-') { + if lhsx[last] != b'+' { + parts.push(stringme(&lhsx[last..i])); + } else { + parts.push(stringme(&lhsx[last + 1..i])); + } + last = i; + } + if lhsx[i] == b'(' { + parens += 1; + } else if lhsx[i] == b')' { + parens -= 1; + } + } + let mut coeff = Vec::<f64>::new(); + let mut var = Vec::<String>::new(); + if lhsx[last] != b'+' { + parts.push(stringme(&lhsx[last..])); + } else { + parts.push(stringme(&lhsx[last + 1..])); + } + for part in parts.iter_mut() { + *part = part.replace('(', ""); + *part = part.replace(')', ""); + if part.contains('*') { + let mut coeffi = part.before("*").to_string(); + let vari = part.after("*"); + if !coeffi.contains('.') && !coeffi.contains('e') { + coeffi += ".0"; + } + if coeffi.parse::<f64>().is_err() { + return Err(format!( + "\nImproperly formatted condition, coefficient {coeffi} is invalid: {x}.\n\ + Please type \"enclone help filter\" for more information.\n" + )); + } + coeff.push(coeffi.force_f64()); + var.push(vari.to_string()); + } else { + let mut coeffi = 1.0; + let mut start = 0; + if part.starts_with('-') { + coeffi = -1.0; + start = 1; + } + coeff.push(coeffi); + var.push(part[start..].to_string()); + } + } + Ok(LinearCondition { + coeff, + var, + rhs, + sense, + }) + } + + pub fn satisfied(&self, val: &[f64]) -> bool { + let lhs: f64 = self + .coeff + .iter() + .zip(val.iter()) + .map(|(&ci, &vi)| ci * vi) + .sum(); + if self.sense == "lt" { + lhs < self.rhs + } else if self.sense == "gt" { + lhs > self.rhs + } else if self.sense == "le" { + lhs <= self.rhs + } else { + lhs >= self.rhs + } + } + + pub fn require_valid_variables(&self, _ctl: &EncloneControl) -> Result<(), String> { + for i in 0..self.var.len() { + if self.var[i].ends_with("_cell") { + return Err(format!( + "\nThe variable {} should not be used in a linear condition.\n\ + Please type \"enclone help filter\" for more information.\n", + self.var[i] + )); + } + } + Ok(()) + } +} diff --git a/enclone_core/src/logging.rs b/enclone_core/src/logging.rs new file mode 100644 index 000000000..ae5399655 --- /dev/null +++ b/enclone_core/src/logging.rs @@ -0,0 +1,21 @@ +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. + +use lazy_static::lazy_static; +use std::fs::OpenOptions; +use std::io::Write; +use std::sync::Mutex; + +lazy_static! { + pub static ref SERVER_LOGFILE: Mutex<Vec<String>> = Mutex::new(Vec::<String>::new()); +} + +pub fn logme(s: &str) { + if SERVER_LOGFILE.lock().unwrap().len() > 0 { + let mut file = OpenOptions::new() + .create(true) + .append(true) + .open(&SERVER_LOGFILE.lock().unwrap()[0]) + .unwrap(); + writeln!(file, "{s}").unwrap(); + } +} diff --git a/enclone_core/src/main_testlist.rs b/enclone_core/src/main_testlist.rs new file mode 100644 index 000000000..f3066c5aa --- /dev/null +++ b/enclone_core/src/main_testlist.rs @@ -0,0 +1,766 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +pub const TESTS: [&str; 305] = [ + // 1. tests variant base after CDR3, parseable output + r###"BCR=123089 CDR3=CVRDRQYYFDYW POUT=stdout + PCOLS=exact_subclonotype_id,n,v_name1,v_name2,nchains,var_indices_aa1,barcodes"###, + // 2. tests many donor ref differences, test comp, edit and var and donorn + r###"BCR=123089 CDR3=CARRYFGVVADAFDIW CVARSP=comp,edit,var AMINO=cdr3,var,share,donorn"###, + // 3. tests motif in CDR3, CHAINS, u_sum, ulen, flipped args in CVARS, on tiny dataset + r###"BCR=85333 CDR3="CAA.*" CHAINS=2 CVARS=const,u_sum,ulen"###, + // 4. tests gex and antibody, FULL_SEQC, ulen, udiff, on tiny dataset + r###"BCR=86237 GEX=85679 LVARSP=gex,CD19_ab_μ,CD25_ab_μ,IGLV3-1_g_μ,IGLV3-1_g_%,RPS27_g_μ + CELLS=3 FULL_SEQC SUM MEAN + CVARSP=ulen,udiff"###, + // 5. tests TCR and correct grouping of onesies on AGBT Donor 2 dataset + r###"TCR=101287 MIN_CELLS=100"###, + // 6. tests AMINO= and vjlen and other things + r###"BCR=86237 CELLS=3 AMINO= CVARS=u,r,cdr3_dna,cdr3_len,vjlen"###, + // 7. tests SHM deletion + r###"BCR=123085 CVARSP=var,clen,cdiff CDR3=CAREPLYYDFWSAYFDYW LVARSP=near,far"###, + // 8. test KEEP_CELL_IF with >= and <= + r###"BCR=123085 BC=testx/inputs/123077_cells.csv PER_CELL LVARSP=rank + KEEP_CELL_IF="rank >= 2 && rank <= 3""###, + // 9. tests PER_CELL and unicode + r###"BCR=█≈ΠΠΠ≈█ CDR3=CAKGDRTGYSYGGGIFDYW PER_CELL"###, + // 10. tests multiple datasets and also LVARS=n,origins,donors,datasets, and share + // Note that we have deliberately "faked" two donors. In reality there is one. + r###"BCR="123085;123089" CDR3=CVKDRVTGTITELDYW LVARS=n,origins,donors,datasets AMINO=share + MIX_DONORS"###, + // 11. tests META, and CONST_IGH + META, which was broken at one point + r###"META=testx/inputs/test11_meta CDR3=CARSFFGDTAMVMFQAFDPW LVARSP=donors,gex + CONST_IGH=IGHD"###, + // 12. test colon lvar in KEEP_CLONO_IF_CELL_MEAN= and test for parsing error at + + r###"BCR=86237 GEX=85679 LVARSP=g37:IGHV3-7_g_μ KEEP_CLONO_IF_CELL_MEAN="n + g37 >= 5.5" + MIN_CHAINS=2 NH5"###, + // 13. check TSV file with BC + r###"BCR=123085 BC=testx/inputs/123077_cells.tsv PER_CELL LVARSP=T CDR3=CARGYEDFTMKYGMDVW"###, + // 14. test cdr3_aa_conp + r###"BCR=123085 CVARSP=cdr3_aa_conp CDR3=CAKTGDLELRYFDWDMDVW"###, + // 15. tests insertion and AMINO range; also this incorrectly reported an insertion before + // it was fixed + r###"BCR=86233 CDR3=CARGLVVVYAIFDYW CVARS=notes AMINO=cdr3,105-113"###, + // 16. tests number of cells broken out by dataset + r###"BCR=123085,123089 LVARS=n,n_123085,n_123089 CDR3=CTRDRDLRGATDAFDIW"###, + // 17. tests gex with PER_CELL and tests n_gex + // See also enclone_test_prebuild below, that tests nearly the same thing, + // and tests versus the same output file. + r###"BCR=86237 GEX=85679 LVARSP=gex_max,gex,n_gex,CD19_ab_μ CELLS=3 PER_CELL NH5"###, + // 18. makes sure cross filtering isn't applied to two origins from same donor + r###"BCR=123085:123089 CDR3=CVRDEGGARPNKWNYEGAFDIW"###, + // 19. there was a bug that caused a twosie to be deleted, and there was foursie junk + // There were also some cells that were lost due to a bug in graph filtering. + r###"BCR=123085 CDR3=CARRYFGVVADAFDIW"###, + // 20. example affected by whitelist (gel bead oligo contamination) filtering, and test u_Σ + r###"BCR=52177 AMINO=cdr3 PER_CELL CDR3=CATWDDSLSGPNWVF CVARSP=u_Σ"###, + // 21. test MIN_CHAINS_EXACT + r###"BCR=123089 CDR3=CGTWHSNSKPNWVF MIN_CHAINS_EXACT=3"###, + // 22. DUPLICATE, TO REMOVE + r###"BCR=123089 CDR3=CGTWHSNSKPNWVF MIN_CHAINS_EXACT=3"###, + // 23. here we were generating a fake alternate allele + r###"BCR=83808 CDR3=CAREGRGMVTTNPFDYW MIN_CELLS_EXACT=30"###, + // 24. an example that uses IGHE, and test NGROUP + r###"BCR=52177 CDR3=CSTGWGLDFDFWSGYYTAGYHW NGROUP"###, + // 25. add mouse B6 example that had messed up constant regions + r###"TCR=74396 MOUSE CVARSP=cdiff CDR3=CASSDAGDTQYF"###, + // 26. tests multiple datasets and also LVARS=n,donors,datasets, and share + // Note that we have deliberately "faked" two donors. In reality there is one. + // Here we make sure that non-specification of MIX_DONORS works. + r###"BCR="123085;123089" CDR3=CVKDRVTGTITELDYW"###, + // 27. tests SUMMARY and NOPRINT + r###"BCR=123085 SUMMARY SUMMARY_CLEAN NOPRINT"###, + // 28. tests BARCODE option + r###"BCR=165807 BARCODE=CCCATACGTGATGATA-1,TCTATTGAGCTGAAAT-1"###, + // 29. tests KEEP_CLONO_IF_CELL_MAX and parenthesized variable in it, SUM and MEAN, use of ≥ + r###"BCR=123085 GEX=123217 LVARSP=IGHV3-7_g,IGHV3-7_g_μ + KEEP_CLONO_IF_CELL_MAX="(IGHV3-7_g_μ)≥10000.0" MIN_CHAINS=2 SUM MEAN H5"###, + // 30. tests d_univ and d_donor + r###"BCR=123085 CVARSP=d_univ,d_donor CDR3=CVKDRVTGTITELDYW"###, + // 31. tests Cell Ranger 3.1 output + r###"BCR=../3.1/123085 CDR3=CVKDRVTGTITELDYW ACCEPT_BROKEN"###, + // 32. tests Cell Ranger 2.0 output and RE + r###"BCR=../2.0/124550 CDR3=CAREPLYYDFWSAYFDYW RE ACCEPT_BROKEN"###, + // 33. tests SCAN + r###"BCR=123085 GEX=123217 LVARSP=IGHV1-69D_g_μ MIN_CELLS=10 NGEX + SCAN="(IGHV1-69D_g_μ)>=100,(IGHV1-69D_g_μ)<=1,t-10*c>=0.1" NOPRINT H5"###, + // 34. tests honeycomb plot + // (This yields a lot of output so will be annoying to debug if something changes.) + r###"BCR=123085:123089 MIN_CELLS=10 PLOT="stdout,s1->red,s2->blue" NOPRINT + LEGEND=red,"cell from 123085",blue,"cell from 123089""###, + // 35. tests barcode-by-barcode specification of colors, and tests LEGEND= + // Note that the specification of PRE overrides our usual specification. + // (This yields a lot of output so will be annoying to debug if something changes.) + r###"PRE=../enclone-data/big_inputs/version{TEST_FILES_VERSION},. + META=testx/inputs/test35_meta MIN_CELLS=10 MIN_CHAINS_EXACT=2 NOPRINT PLOT=stdout NO_PRE + LEGEND=red,IGHG1,green,IGHG3,blue,IGHA1,orange,IGHM,black,unassigned"###, + // 36. tests PCELL and u_Σ in PCOLS (both forms) + r###"BCR=85333 CDR3=CARDGMTTVTTTAYYGMDVW POUT=stdout PCELL CVARSP=u_Σ + PCOLS=barcode,const1,const2,u_Σ1,u_sum1"###, + // 37. tests parseable output of barcodes for a given dataset + r###"BCR=123085,123089 POUT=stdout PCOLS=123085_barcodes,123089_barcodes + CDR3=CAVTIFGVRTALPYYYALDVW"###, + // 38. tests parseable output of barcodes for a given dataset, using PCELL + r###"BCR=123085,123089 POUT=stdout PCOLS=123085_barcode,123089_barcode PCELL + CDR3=CAVTIFGVRTALPYYYALDVW"###, + // 39. tests u and r fields in parseable output, and tests stdouth + r###"BCR=85333 POUT=stdouth PCOLS=barcode,u1,u_cell1,r2,r_cell2 PCELL PER_CELL CVARSP=r + CDR3=CAADGGGDQYYYMDVW"###, + // 40. indel was wrong + // Note that F is deprecated, equals KEEP_CLONO_IF_CELL_MEAN. Also test ⩾. + r###"BCR=86237 GEX=85679 LVARSP=IGHV3-7_g_μ F="(IGHV3-7_g_μ)⩾4.5" MIN_CHAINS=2 SUM MEAN + NH5"###, + // 41. test case for gex_cell + r###"BCR=86237 GEX=85679 CDR3=CAKAVAGKAVAGGWDYW POUT=stdouth PCOLS=gex_cell PCELL NH5"###, + // 42. test case that should fail because gex_cell doesn't make sense without gex data + r###"BCR=85333 CDR3=CQQRSNWPLYTF POUT=stdouth PCOLS=gex_cell PCELL PER_CELL EXPECT_FAIL"###, + // 43. test case that should fail because _cell variables can't be used in LVARS + r###"BCR=86237 GEX=85679 CDR3=CAKAVAGKAVAGGWDYW LVARS=gex_cell EXPECT_FAIL"###, + // 44. test _cell + r###"BCR=86237 GEX=85679 LVARSP=gex,RPS27_g_μ CELLS=3 POUT=stdouth + PCOLS=barcode,gex_cell,CD19_ab,CD19_ab_cell NH5 PCELL"###, + // 45. test ndiff... + r###"BCR=123085 CVARSP=ndiff1vj,ndiff2vj CDR3=CARDQNFDESSGYDAFDIW"###, + // 46. test u_μ, u_min, r_μ, r_min and r_max + r###"BCR=85333 CVARSP=u_μ,u_min,u_max,r,r_μ,r_min,r_max AMINO=cdr3 CDR3=CAADGGGDQYYYMDVW + POUT=stdouth PCOLS=u_μ1,u_min1,u_max1,r2,r_μ2,r_min2,r_max2"###, + // 47. this should fail + r###"BCR=85333 CDR3=CAREEYYYDSSGDAFDIW LVARSP=gex_mean EXPECT_FAIL"###, + // 48. test gex_mean and gex_Σ and NGEX + // Do not use NH5 because the bin file is too big for git. + r###"BCR=123085 GEX=123217 LVARSP=gex_mean,gex_Σ CDR3=CASRKSGNYIIYW NGEX H5"###, + // 49. test HTML + r###"BCR=85333 CDR3=CAAWDDSLNGWVF CHAINS=1 POUT=stdouth PCOLS=barcodes,n FASTA=stdout + FASTA_AA=stdout HTML=CAAWDDSLNGWVF"###, + // 50. make sure this doesn't fail + r###"NOPAGER EXPECT_OK"###, + // 51. make sure this fails gracefully + r###"BCR=123085 PLOT=/nonexistent/broken.svg NOPRINT MIN_CELLS=50 EXPECT_FAIL"###, + // 52. add test for some gene patterns + // Do not use NH5 because the bin file is too big for git. + r###"BCR=123085 GEX=123217 CDR3=CARPKSDYIIDAFDIW MIN_CELLS=10 H5 + LVARSP="(IGHV5-51|IGLV1-47)_g_%,IGH.*_g_%,IG(K|L).*_g_%""###, + // 53. add test for _% with PER_CELL + // Do not use NH5 because the bin file is too big for git. + r###"BCR=123085 GEX=123217 LVARSP="gex,n_gex,JCHAIN_g_%,IG%:IG.*_g_%" CVARS=u_μ,const + MIN_CHAINS_EXACT=2 CDR3=CAREGGVGVVTATDWYFDLW PER_CELL H5"###, + // 54. make sure this fails gracefully + r###"BCR=86237 GEX=85679 LVARSP=GERBULXXX123_g_% EXPECT_FAIL"###, + // 55. test cred + r###"BCR=86237 GEX=85679 LVARSP=cred PCELL PER_CELL POUT=stdouth PCOLS=cred_cell + CDR3=CARSFFGDTAMVMFQAFDPW"###, + // 56. test SVG + r###"BCR=85333 CDR3=CARDPRGWGVELLYYMDVW SVG NGROUP"###, + // 57. test 1/8 for newline correctness + r###"BCR=85333 CDR3="CLLSYSGARVF|CQSADSSGTYKVF" AMINO= PLAIN SET_IN_STONE"###, + // 58. test 2/8 for newline correctness + r###"BCR=85333 CDR3="CLLSYSGARVF|CQSADSSGTYKVF" AMINO= PLAIN NGROUP SET_IN_STONE"###, + // 59. test 3/8 for newline correctness + r###"BCR=85333 CDR3="CLLSYSGARVF|CQSADSSGTYKVF" AMINO= PLAIN HTML SET_IN_STONE"###, + // 60. test 4/8 for newline correctness + r###"BCR=85333 CDR3="CLLSYSGARVF|CQSADSSGTYKVF" AMINO= PLAIN NGROUP HTML SET_IN_STONE"###, + // 61. test that enclone doesn't crash on CS multi 6.1 pipeline + r###"BCR_GEX=tiny_multi_CS_6.1 ALLOW_INCONSISTENT EXPECT_OK"###, + // 62. make sure color from BC can be used as lead variable was broken) + r###"BCR=123085 BC=testx/inputs/123077_cells.csv PER_CELL LVARSP=color + AMINO=cdr3 BARCODE=CATATGGTCAGTTGAC-1"###, + // 63. DUPLICATE, TO REPLACE + r###"BCR=85333 GROUP_VJ_REFNAME MIN_GROUP=2 AMINO= PLAIN HTML SET_IN_STONE"###, + // 64. DUPLICATE, TO REPLACE + r###"BCR=85333 GROUP_VJ_REFNAME MIN_GROUP=2 AMINO= PLAIN HTML NGROUP SET_IN_STONE"###, + // 65. test NCELL + r###"BCR=86237 NCELL CDR3=CAKTATTLGGYYSHGLDVW MIN_CELLS=2"###, + // 66. test BC in combination with PER_CELL and PCELL + // Do not use NH5 because the bin file is too big for git. + r###"BCR=123085 GEX=123217 BC=testx/inputs/123077_cells.csv PER_CELL LVARSP=gex,cred,T PCELL + POUT=stdouth PCOLS=barcode,T CDR3=CAKAGPTESGYYVWYFDLW MIN_CELLS=2 H5"###, + // 67. expect fail if garbage PRE + r###"PRE=garbage_gerbil_stuff BCR=86237 CELLS=3 EXPECT_FAIL NO_PRE"###, + // 68. a test of PRE + r###"PRE=mumbo_jumbo,../enclone-data/big_inputs/version{TEST_FILES_VERSION} BCR=86237 NO_PRE + CDR3=CARENHPVEYCSSTSCYKAYYYGMDVW"###, + // 69. another test of pre + r###"PRE=mumbo_jumbo BCR=../enclone-data/big_inputs/version{TEST_FILES_VERSION}/86237 NO_PRE + CDR3=CARENHPVEYCSSTSCYKAYYYGMDVW"###, + // 70. another test of META + r###"META=mumbo_jumbo EXPECT_FAIL"###, + // 71. another test of META + r###"PRE=../enclone-data/big_inputs/version{TEST_FILES_VERSION},testx/inputs META=test11_meta + CDR3=CARSFFGDTAMVMFQAFDPW LVARSP=donors,gex NO_PRE"###, + // 72. test SUMMARY_CSV + r###"BCR=86237 NOPRINT SUMMARY_CSV"###, + // 73. test cdr3_aa_conx + r###"BCR=123085 CVARSP=cdr3_aa_conx CDR3=CAKTGDLELRYFDWDMDVW"###, + // 74. this changed after a bug was fixed; the RE can probably be dropped later when we + // rerun all the datasets + r###"BCR=123085 RE CDR3=CARGYEDFTMKYGMDVW POUT=stdouth PCOLS=utr_id2"###, + // 75. this changed after a bug in RE was fixed, and this is in fact testing RE + r###"BCR=123085 CDR3=CQQSYSTPRTF RE"###, + // 76. test PLOT_BY_ISOTYPE + r###"BCR=123085 MIN_CELLS=10 PLOT_BY_ISOTYPE=stdout NOPRINT MIN_CHAINS_EXACT=2"###, + // 77. test PLOT_BY_ISOTYPE_COLOR + r###"BCR=123085 MIN_CELLS=10 PLOT_BY_ISOTYPE=stdout NOPRINT MIN_CHAINS_EXACT=2 + PLOT_BY_ISOTYPE_COLOR=red,green,blue,yellow,black,orange,turquoise,pink,gray,purple"###, + // 78. make sure that POUT with PCELL works on full dataset + r###"BCR=86237 POUT=stdout PCELL EXPECT_OK"###, + // 79. make sure that POUT works on full dataset with gex + r###"BCR=86237 GEX=85679 POUT=stdout NGEX NCELL EXPECT_OK"###, + // 80. make sure that POUT with PCELL works on full dataset with gex + r###"BCR=86237 GEX=85679 POUT=stdout PCELL NGEX NCELL EXPECT_OK"###, + // 81. IG:IG.*_g_%_cell and variants in parseable output + r###"BCR=86237 GEX=85679 CDR3=CARSFFGDTAMVMFQAFDPW POUT=stdouth PCELL + PCOLS="barcode,IG:IG.*_g_%_cell,IG.*_g_%_cell,IGN:IG.*_g_%,IG.*_g_%""###, + // 82. test entropy + // Do not use NH5 because the bin file is too big for git. + r###"BCR=123085 GEX=123217 LVARSP=entropy PER_CELL POUT=stdouth PCELL + PCOLS=barcode,entropy,entropy_cell CDR3=CARAQRHDFWGGYYHYGMDVW H5"###, + // 83. test COMPLETE and dref + r###"BCR=86237 CDR3=CARSFFGDTAMVMFQAFDPW COMPLETE LVARSP=dref"###, + // 84. test CLUSTAL_AA + r###"BCR=123085 CDR3=CAADRQLWSRSPGDYIYYGMQVW CLUSTAL_AA=stdout"###, + // 85. test NALL + r###"BCR=86237 NALL CDR3=CARAPEDTSRWPQYNYSGLDVW SEG=IGKV3-15"###, + // 86. test CLUSTAL_DNA + r###"BCR=86237 CDR3=CARSFFGDTAMVMFQAFDPW CLUSTAL_DNA=stdout"###, + // 87. test PHYLIP_AA and COLOR=codon + r###"BCR=123085 CDR3=CAADRQLWSRSPGDYIYYGMQVW PHYLIP_AA=stdout COLOR=codon"###, + // 88. test PHYLIP_DNA and COLOR=default + r###"BCR=123085 CDR3=CAADRQLWSRSPGDYIYYGMQVW PHYLIP_DNA=stdout COLOR=property"###, + // 89. test TREE and NEWICK + r###"BCR=123085 COMPLETE TREE NEWICK CDR3=CARDLGGRYYGSKDPW"###, + // 90. test KEEP_CELL_IF with non-null value + // Do not use NH5 because the bin file is too big for git. + r###"BCR=123085 GEX=123217 H5 BC=testx/inputs/123077_cells.csv PER_CELL LVARSP=gex,cred,T + CDR3=CARGYEDFTMKYGMDVW KEEP_CELL_IF="keeper == 'yes'""###, + // 91. test FCELL with null value + // Do not use NH5 because the bin file is too big for git. + r###"BCR=123085 GEX=123217 H5 BC=testx/inputs/123077_cells.csv PER_CELL LVARSP=gex,cred,T + CDR3=CARGYEDFTMKYGMDVW FCELL="keeper == ''""###, + // 92. test NALL_CELL + r###"BCR=123085 NALL_CELL CDR3=CQKYDSAPLTF MIN_CELLS=20"###, + // 93. test MIN_DATASET_RATIO + r###"BCR=123085,123089 MIN_DATASET_RATIO=6 LVARSP=nd2"###, + // 94. test use of SEG twice + r###"BCR=123085 SEG=IGHV5-51 SEG=IGKV1D-39"###, + // 95. test TREE=const + r###"BCR=123085 TREE=const CDR3=CARPKSDYIIDAFDIW MIN_CELLS=2"###, + // 96. test MAX_LOG_SCORE + r###"BCR=123085 CDR3=CARDQNFDESSGYDAFDIW MAX_LOG_SCORE=0.0"###, + // 97. Test MAX_CDR3_DIFFS. This is also an instance where an exact subclonotype has + // two chains with indentical CDR3s, and this is the right answer, until and unless we change + // cellranger to somehow not emit two such chains. + r###"BCR=123085 CDR3=CARESVVGLLPIFDYW MAX_CDR3_DIFFS=1"###, + // 98. test reduced stringency D alignment + // (RE can be removed once cellranger rerun) + r###"TCR=101287 CDR3=CASSPAGTSGKVWGTDTQYF RE"###, + // 99. test mait (redundant with mait_example.html below, so could delete) + r###"TCR=101287 LVARSP=mait CDR3=CSAGQGDTEAFF"###, + // 100. test inkt and INKT + r###"TCR=101287 LVARSP=inkt INKT MIN_CELLS=2"###, + // 101. test MAIT + r###"TCR=101287 LVARSP=mait MAIT MIN_CELLS=50"###, + // 102. test BINARY with unwriteable path + r###"BCR=123085 BINARY=/gerbilspam/bumblebee EXPECT_FAIL"###, + // 103. test POUT without PCOLS (somewhat annoying, because easily triggered to change) + r###"BCR=85333 POUT=stdout CDR3=CQSADSSGTYKVF"###, + // 104. test EASY + r###"BCR=123085 CDR3="CARVIVGPKKLEGRLYSSSLHFDCW|CARVIVGPEKQEGRLYSSSLHFDYW" EASY + MAX_LOG_SCORE=100"###, + // 105. test MAX_DEGRADATION and MAX_DIFFS + r###"BCR=123085,123089 MAX_LOG_SCORE=100 MAX_DEGRADATION=150 MAX_DIFFS=200 + MAX_CDR3_DIFFS=100 CDR3=CVRILGRALTVRVYFYYGIDVW"###, + // 106. test for failed interaction between POUT and COMPLETE (crashed at one point) + r###"BCR=123085 CDR3=CAKANQLLYGGRQYYYGMDVW COMPLETE POUT=stdout + PCOLS=clonotype_id,exact_subclonotype_id,n,d_donor1,d_donor2"###, + // 107. part 1 of test for weak onesies filter + r###"TCR=101287 CDR3=CASSQVAGAGQPQHF"###, + // 108. part 2 of test for weak onesies filter + r###"TCR=101287 CDR3=CASSQVAGAGQPQHF NWEAK_ONESIES"###, + // 109. test Levenshtein distance pattern + r###"BCR=123085 CDR3="CAKDKVPRRSSWSVFDYYGMDVW~9|CAVTIFGVRTALPYYYALDVW~9" NGROUP"###, + // 110. test dref_aa + r###"BCR=123085 LVARSP=dref,dref_aa CDR3=CAREKGIGSSGWDWGAFDIW"###, + // 111. test for fail if F used with unsupported variable (but now supported) + // Note that F is deprecated, equals KEEP_CLONO_IF_CELL_MEAN. + r###"BCR=123085 LVARSP=near F="near>=0" EXPECT_OK"###, + // 112. test 1 of 6 for cdr1/cdr2 in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,cdr1,cdr2,cdr3"###, + // 113. test 2 of 6 for cdr1/cdr2 in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,cdr1,cdr3"###, + // 114. test 3 of 6 for cdr1/cdr2 in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,cdr2,cdr3"###, + // 115. test 4 of 6 for cdr1/cdr2 in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,cdr1,cdr2"###, + // 116. test 5 of 6 for cdr1/cdr2 in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,cdr1"###, + // 117. test 6 of 6 for cdr1/cdr2 in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,cdr2"###, + // 118. test cdr1_aa and cdr2_aa + r###"BCR=85333 CDR3=CARDLRVEGFDYW CVARSP=cdr1_aa,cdr2_aa AMINO=cdr1"###, + // 119. test cdr3_aa + r###"BCR=85333 CDR3=CARDLRVEGFDYW CVARSP=cdr3_aa AMINO=cdr3"###, + // 120. test cdr1_dna and cdr2_dna + r###"BCR=85333 CDR3=CARDLRVEGFDYW CVARS=cdr1_dna,cdr2_dna AMINO="###, + // 121. test cdr1_len and cdr2_len + r###"BCR=85333 CDR3=CARDLRVEGFDYW CVARS=cdr1_len,cdr2_len AMINO="###, + // 122. test insertion in CDR1 and test cdr3_start when there is an insertion + r###"BCR=123089 CDR3=CARARPYSSGWSLDAFDIW AMINO=cdr1,cdr3 CVARSP=cdr1_aa + POUT=stdout PCOLS=cdr3_start1"###, + // 123. test fwr1_dna and fwr2_dna + r###"BCR=85333 CDR3=CARDLRVEGFDYW CVARSP=fwr1_dna,fwr2_dna AMINO=cdr3"###, + // 124. test fwr3_dna + r###"BCR=85333 CDR3=CARDLRVEGFDYW CVARSP=fwr3_dna AMINO=cdr3"###, + // 125. test fwr1_aa and fwr2_aa and fwr3_aa + r###"BCR=85333 CDR3=CARDLRVEGFDYW CVARSP=fwr1_aa,fwr2_aa,fwr3_aa AMINO=cdr3"###, + // 126. test fwr1_len and fwr2_len and fwr3_len + r###"BCR=85333 CDR3=CARDLRVEGFDYW CVARSP=fwr1_len,fwr2_len,fwr3_len AMINO=cdr3"###, + // 127. test 1/8 for fwr* in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,fwr1,cdr1"###, + // 128. test 2/8 for fwr* in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,cdr1,fwr2"###, + // 129. test 3/8 for fwr* in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,fwr2,cdr2"###, + // 130. test 4/8 for fwr* in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,cdr2,fwr3"###, + // 131. test 5/8 for fwr* in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,fwr3,cdr3"###, + // 132. test 6/8 for fwr* in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,fwr1,fwr2"###, + // 133. test 7/8 for fwr* in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,fwr2,fwr3"###, + // 134. test 8/8 for fwr* in AMINO + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,fwr2,cdr2,fwr3"###, + // 135. test CONST_IGH + r###"BCR=123085 CDR3=CARPKSDYIIDAFDIW SEG=IGLV1-47 CONST_IGH="IGHG.""###, + // 136. test CONST_IGKL + r###"BCR=123085 CDR3=CARPKSDYIIDAFDIW SEG=IGLV1-47 CONST_IGKL=IGLC3"###, + // 137. test 1/2 of fwr4 + r###"BCR=85333 CDR3=CARDLRVEGFDYW AMINO=var,share,donor,cdr3,fwr4 CVARS=fwr4_aa"###, + // 138. test 2/2 of fwr4 + r###"BCR=85333 CDR3=CARDLRVEGFDYW CVARS=fwr4_dna,fwr4_len"###, + // 139. test cvar vj_seq_nl + r###"BCR=85333 CHAINS=1 CDR3=CAAWDDSLNGWVF POUT=stdout PCOLS=vj_seq_nl1"###, + // 140. test cvar vj_aa_nl + r###"BCR=85333 CHAINS=1 CDR3=CAAWDDSLNGWVF POUT=stdout PCOLS=vj_aa_nl1"###, + // 141. test cvar aa% + r###"BCR=85333 CDR3=CAKGDRTGYSYGGGIFDYW CVARS=aa%,dna%"###, + // 142. test 1/3 of DIFF_STYLE + r###"BCR=123085 CDR3=CARVRDILTGDYGMDVW DIFF_STYLE=C1"###, + // 143. test 2/3 of DIFF_STYLE + r###"BCR=123085 CDR3=CARVRDILTGDYGMDVW DIFF_STYLE=C2"###, + // 144. test 3/3 of DIFF_STYLE + r###"BCR=123085 CDR3=CAREPLYYDFWSAYFDYW DIFF_STYLE=C1"###, + // 145. test the lead variable "filter" + r###"BCR=123085 NALL LVARSP=filter PER_CELL CHAINS=2 CDR3=CQQSYSTPPYTF SEG=IGKV1D-39 + SEG=IGLV3-21"###, + // 146. test BUILT_IN + r###"BCR=../2.0/124550 CDR3=CAREPLYYDFWSAYFDYW BUILT_IN"###, + // 147. test NALL_GEX + r###"BCR=86237 GEX=85679 NALL_GEX LVARSP=n_gex,filter PER_CELL BARCODE=CTTGGCTGTTAAGACA-1"###, + // 148. test that LVARSP=n_gex fails if only BCR provided + r###"BCR=1031851 LVARSP=n_gex EXPECT_FAIL"###, + // 149. test FCELL with complex expression + r###"BCR=123085 BC=testx/inputs/123077_cells.csv PER_CELL LVARSP=keeper,rank + FCELL="keeper == 'no' && rank > 10""###, + // 150. test FCELL with a more complex expression + r###"BCR=123085 BC=testx/inputs/123077_cells.csv PER_CELL LVARSP=keeper,rank + FCELL="(keeper == 'no' && rank > 10) || keeper == 'maybe'""###, + // 151. test PEER_GROUP + r###"BCR=85333 CDR3=CAKGRYSSPQYYFDYW PEER_GROUP=stdout"###, + // 152. test PEER_GROUP with PG_READABLE + r###"BCR=85333 CDR3=CAKGRYSSPQYYFDYW PEER_GROUP=stdout PG_READABLE"###, + // 153. test d_start and d_frame + r###"BCR=86237 CDR3=CARGHPNYDYVWGSYRYRAYYFDYW POUT=stdouth + PCOLS=d_start1,d_frame1,d_start2,d_frame2"###, + // 154. test POUT=stdout with NOPRINT + r###"BCR=85333 CDR3="CARTSNRGIVATIFRAFDIW|CARDPRGWGVELLYYMDVW" NOPRINT POUT=stdout + PCOLS=cdr3_aa1"###, + // 155. test count_<regex> and F for that + // Note that F is deprecated, equals KEEP_CLONO_IF_CELL_MEAN. + r###"BCR=123085 LVARSP="z:count_CAKTG" F="z > 0""###, + // 156. test ref variables + r###"BCR=123085 CDR3=CAREVEQWLERNTLDYW POUT=stdouth PCOLS=fwr1_aa1,fwr1_aa_ref1 AMINO=fwr1"###, + // 157. test ref variables + r##"BCR=123085 CDR3=CAREVEQWLERNTLDYW POUT=stdouth PCOLS=fwr1_dna1,fwr1_dna_ref1 AMINO=fwr1"##, + // 158. test ref variables + r###"BCR=123085 CDR3=CAREVEQWLERNTLDYW POUT=stdouth PCOLS=fwr2_aa1,fwr2_aa_ref1 AMINO=fwr2"###, + // 159. test ref variables + r##"BCR=123085 CDR3=CAREVEQWLERNTLDYW POUT=stdouth PCOLS=fwr2_dna1,fwr2_dna_ref1 AMINO=fwr2"##, + // 160. test ref variables + r###"BCR=123085 CDR3=CAREVEQWLERNTLDYW POUT=stdouth PCOLS=fwr3_aa1,fwr3_aa_ref1 AMINO=fwr3"###, + // 161. test ref variables + r##"BCR=123085 CDR3=CAREVEQWLERNTLDYW POUT=stdouth PCOLS=fwr3_dna1,fwr3_dna_ref1 AMINO=fwr3"##, + // 162. test ref variables + r###"BCR=123085 CDR3=CAREVEQWLERNTLDYW POUT=stdouth PCOLS=fwr4_aa1,fwr4_aa_ref1 AMINO=fwr4"###, + // 163. test ref variables + r###"BCR=123085 CDR3=CAREVEQWLERNTLDYW POUT=stdouth PCOLS=cdr1_aa2,cdr1_aa_ref2 AMINO=cdr1"###, + // 164. test ref variables + r##"BCR=123085 CDR3=CAREVEQWLERNTLDYW POUT=stdouth PCOLS=cdr1_dna2,cdr1_dna_ref2 AMINO=cdr1"##, + // 165. test ref variables + r###"BCR=123085 CDR3=CAREVEQWLERNTLDYW POUT=stdouth PCOLS=cdr2_aa2,cdr2_aa_ref2 AMINO=cdr2"###, + // 166. test ref variables + r##"BCR=123085 CDR3=CAREVEQWLERNTLDYW POUT=stdouth PCOLS=cdr2_dna2,cdr2_dna_ref2 AMINO=cdr2"##, + // 167. Test that for TCR, the number of two-chain clonotypes does not change. It is probably + // OK for it to change a little bit, but a big change would be indicative of a problem. At + // one point we had a release with such a problem and this test is here to prevent that from + // happening again. + r###"TCR=101287 NOPRINT REPROD REQUIRED_TWO_CHAIN_CLONOTYPES=849 EXPECT_OK"###, + // 168. Test POUT without PCELL, where a per-barcode variable is converted into a + // comma-separated list. + r###"BCR=123085 BC=testx/inputs/123077_cells.csv POUT=stdout PCOLS=rank + CDR3=CAKAGPTESGYYVWYFDLW MIN_CELLS=2"###, + // 169. this crashed at one point because the heavy chain CDR3 computed by cellranger was + // different than the current one, resulting in an inconsistency + r###"BCR=85333 CDR3=CQQYNSYSYTF CVARSP=fwr3_aa_ref"###, + // 170. doublet filter, before + r###"BCR=123085 CDR3=CAREGGVGVVTATDWYFDLW NDOUBLET"###, + // 171. doublet filter, after + r###"BCR=123085 CDR3=CAREGGVGVVTATDWYFDLW"###, + // 172. this crashed at one point + r###"META=testx/inputs/test11_meta LVARSP=CD56_ab NOPRINT EXPECT_OK"###, + // 173. test MIN_UMIS + r###"BCR=85333 MIN_UMIS=100"###, + // 174. test METAX, and also the origins printed by this was wrong at one point + r###"METAX="bcr,origin,donor;toast:86237,c,d;zip:123085,a,b" LVARSP=origins,donors + POUT=stdouth PCOLS=origins,donors CDR3=CARSFFGDTAMVMFQAFDPW"###, + // 175. test some variables + r###"BCR=123085 CDR3=CAKDKVPRRSSWSVFDYYGMDVW POUT=stdouth PCOLS=cdr1_aa1,cdr1_aa_1_2_ext1"###, + // 176. test some variables + r###"BCR=123085 CDR3=CAKDKVPRRSSWSVFDYYGMDVW POUT=stdouth PCOLS=cdr2_aa1,cdr2_aa_1_2_ext1"###, + // 177. test some variables + r###"BCR=123085 CDR3=CAKDKVPRRSSWSVFDYYGMDVW POUT=stdouth PCOLS=cdr3_aa1,cdr3_aa_1_2_ext1"###, + // 178. test an ndiff variable as a parseable variable + r###"BCR=123085 CDR3=CAKDKVPRRSSWSVFDYYGMDVW POUT=stdouth PCOLS=ndiff1vj1"###, + // 179. test cdr1_aa_north etc. + r###"BCR=123085 CDR3=CAKDKVPRRSSWSVFDYYGMDVW POUT=stdouth + PCOLS=cdr1_aa_north1,cdr1_aa_north2,cdr2_aa_north1,cdr2_aa_north2,cdr3_aa_north1,cdr3_aa_north2"###, + // 180. test some count vars + r###"BCR=85333 LVARS=all:count_C,c:count_cdr_C,c1:count_cdr1_C,c3:count_cdr3_C,f:count_fwr_C,f1:count_fwr1_C CDR3=CARDKEGLSGYAVERAFDYW"###, + // 181. test some count vars + r###"BCR=85333 LVARS=f2:count_fwr2_C CDR3=CVKDIRESSGPLLSHSFDLW"###, + // 182. test some count vars + r###"BCR=85333 LVARS=f3:count_fwr3_C CDR3=CARGGFSHAFDIW"###, + // 183. test some count vars + r###"BCR=123085 LVARS=f4:count_fwr4_V CDR3=CTRDRDLRGATDAFDIW"###, + // 184. test some count vars + r###"BCR=123085 LVARS=c2:count_cdr2_C CDR3=CARQQDVYTRSWYFDYW CELLS=1"###, + // 185. test SUPPRESS_ISOTYPE_LEGEND + r###"BCR=123085 MIN_CELLS=10 PLOT_BY_ISOTYPE=stdout NOPRINT MIN_CHAINS_EXACT=2 + SUPPRESS_ISOTYPE_LEGEND"###, + // 186. test LVAR= (with no value) + r###"BCR=123085 CDR3=CAREPLYYDFWSAYFDYW LVARS="###, + // 187. test FOLD_HEADERS + r###"BCR=123085 CDR3=CAREADYCSGGSCYFSDW FOLD_HEADERS AMINO=cdr3 CVARS=u"###, + // 188. test for correct handling of COMPLETE + r_cell1 (asserted at one point) + r###"BCR=85333 POUT=stdout PCOLS=r_cell1 COMPLETE PCELL CDR3=CARGQEGSGWYRPWDYW"###, + // 189. test CONP + r###"BCR=123085 CONP CDR3=CVKRASGSAFTAPYW"###, + // 190. test CONX + r###"BCR=123085 CONX CDR3=CVKRASGSAFTAPYW"###, + // 191. test CONP when there's a gap + r###"BCR=123085 CONP CDR3=CALGGYTWFDPW"###, + // 192. test INFO + r###"BCR=123085 CDR3=CAREGGVGVVTATDWYFDLW INFO=testx/inputs/123085_info.csv LVARSP=funny"###, + // 193. check that this fails gracefully + r###"NOPRINT EXPECT_FAIL"###, + // 194. this crashed at one point + r###"BCR=86237 GEX=85679 LVARSP=g37:IGHV3-7_g_μ NH5 POUT=stdout PCOLS=g37 EXPECT_OK"###, + // 195. failed at one point + r###"BCR=86237 GEX=85679 LVARSP=woof:IGHV3-7_g_μ NH5 POUT=stdout PCOLS=woof EXPECT_OK"###, + // 196. test TREE=n + r###"BCR=123085 COMPLETE TREE=n CDR3=CARDLGGRYYGSKDPW"###, + // 197. failed at one point + r###"BCR=123085 INFO=testx/inputs/123085_info.csv LVARSP=funny EXPECT_OK"###, + // 198. test TREE=n,cdr2_aa1 + r###"BCR=123085 AMINO=cdr3 CDR3=CAVTIFGVRTALPYYYALDVW TREE=n,cdr2_aa1"###, + // 199. test KEEP_CLONO_IF_CELL_MEAN with INFO + r###"BCR=123085 INFO=testx/inputs/123085_info.csv LVARSP=moo + KEEP_CLONO_IF_CELL_MEAN="moo>0""###, + // 200. test SCAN_EXACT + r###"BCR=123085 GEX=123217 LVARSP=IGHV1-69D_g_μ,IGHV3-64D_g_μ MIN_CELLS=10 + SCAN="(IGHV1-69D_g_μ)>=1800,(IGHV3-64D_g_μ)>=100,t-10*c>=5.0" NOPRINT H5 SCAN_EXACT"###, + // 201. test SOURCE + r###"SOURCE=testx/inputs/123085_args AMINO=cdr2,cdr3"###, + // 202. DUPLICATE TO REPLACE + r###"SOURCE=testx/inputs/123085_args AMINO=cdr2,cdr3 EXPECT_OK"###, + // 203. test plotting with using the BC option to set color + r###"BCR=123085 BC=testx/inputs/123077_cells.csv PLOT=stdout NOPRINT"###, + // + // TESTS WITH PER_CELL AND PCELL + // + // 204. test INFO with PER_CELL and PCELL + r###"BCR=123085 CDR3=CAREGGVGVVTATDWYFDLW INFO=testx/inputs/123085_info.csv POUT=stdout + PCOLS=moo LVARS=moo PCELL PER_CELL"###, + // 205. test g<d> with PER_CELL and PCELL + r###"BCR=123085 GEX=123217 AMINO=cdr3 LVARS=g15 CDR3=CARVRDILTGDYGMDVW POUT=stdout PCOLS=g15 + PCELL PER_CELL H5"###, + // 206. test origins with PER_CELL and PCELL + r###"BCR=123085:123089 AMINO= CDR3=CTRAGFLSYQLLSYYYYGMDVW FOLD_HEADERS POUT=stdout PCELL + PER_CELL PCOLS=origins LVARSP=origins"###, + // 207. test datasets with PER_CELL and PCELL + r###"BCR=123085:123089 CELLS=5 AMINO= CDR3=CTRAGFLSYQLLSYYYYGMDVW FOLD_HEADERS POUT=stdout + PCELL PER_CELL PCOLS=datasets LVARSP=datasets"###, + // 208. test donors with PER_CELL and PCELL + r###"BCR="123085;123089" AMINO= CDR3=CTRAGFLSYQLLSYYYYGMDVW FOLD_HEADERS POUT=stdout PCELL + PER_CELL PCOLS=donors LVARS=donors MIX_DONORS CHAINS=2"###, + // 209. test n with PER_CELL and PCELL + r###"BCR=123085 AMINO=cdr3 CDR3=CAKDKVPRRSSWSVFDYYGMDVW POUT=stdout PCELL PER_CELL PCOLS=n"###, + // 210. test filter with PER_CELL and PCELL + r###"BCR=123085 AMINO=cdr3 FOLD_HEADERS POUT=stdout PCELL PER_CELL PCOLS=filter LVARSP=filter + NALL_CELL CDR3=CAKHQRGGGRQNYYYGMDVW"###, + // 211. test inkt with PER_CELL and PCELL + r###"TCR=101287 INKT MIN_CELLS=2 AMINO=cdr3 FOLD_HEADERS POUT=stdout PCELL PER_CELL + PCOLS=inkt LVARSP=inkt"###, + // 212. test mait with PER_CELL and PCELL + r###"TCR=101287 AMINO=cdr3 FOLD_HEADERS POUT=stdout PCELL PER_CELL PCOLS=mait LVARSP=mait + CDR3=CSAGQGDTEAFF"###, + // 213. test cred with PER_CELL and PCELL + r###"BCR=123085 GEX=123217 AMINO=cdr3 LVARS=cred CVARS=u POUT=stdout PCOLS=cred,cred_cell + PCELL PER_CELL H5 CDR3=CARDPEDIVLMVYAMGGNYGMDVW"###, + // 214. test n_<name> with PER_CELL and PCELL + r###"BCR=123085:123089 AMINO=cdr3 FOLD_HEADERS POUT=stdout PCELL PER_CELL PCOLS=n_s1 + LVARS=datasets,n_s1 CDR3=CARDLFVLVPAAITYYYGMDVW CVARS=u"###, + // 215. test n_gex with PER_CELL and PCELL + r###"BCR=123085 GEX=123217 AMINO=cdr3 LVARS=n_gex POUT=stdout PCOLS=n_gex,n_gex_cell PCELL + PER_CELL H5 CDR3=CAKDKVPRRSSWSVFDYYGMDVW"###, + // 216. test near with PER_CELL and PCELL + r###"BCR=123085 AMINO=cdr3 POUT=stdout PCELL PER_CELL LVARSP=near PCOLS=near CVARS=u + CDR3=CARHLQWELPYW"###, + // 217. test far with PER_CELL and PCELL + r###"BCR=123085 AMINO=cdr3 POUT=stdout PCELL PER_CELL LVARSP=far PCOLS=far CVARS=u + CDR3=CARHLQWELPYW"###, + // 218. test dref with PER_CELL and PCELL + r###"BCR=123085 AMINO=cdr3 POUT=stdout PCELL PER_CELL LVARSP=dref PCOLS=dref CVARS=u + CDR3=CSRVFGNSTYYSSRVGGYW"###, + // 219. test count_cdr_C with PER_CELL and PCELL + r###"BCR=85333 LVARSP=count_cdr_C CDR3=CARDKEGLSGYAVERAFDYW POUT=stdout PCELL PER_CELL + PCOLS=count_cdr_C CVARS=u"###, + // 220. test cdr3_aa_conp with PER_CELL and PCELL + r###"BCR=123085 AMINO= CDR3=CARHLQWELPYW FOLD_HEADERS POUT=stdout PCELL PER_CELL + PCOLS=cdr3_aa_conp2 CVARS=cdr3_aa_conp"###, + // 221. test RPS27_g with PER_CELL and PCELL + r###"BCR=123085 GEX=123217 AMINO=cdr3 POUT=stdout PCOLS=RPS27_g,RPS27_g_cell PCELL + PER_CELL H5 CDR3=CAREVEQWLERNTLDYW LVARSP=RPS27_g"###, + // + // OTHER TESTS + // + // 222. test for busted reference + r###"BCR=85333 REF=testx/inputs/busted_regions.fa EXPECT_FAIL"###, + // 223. test {v,d,j}_name and _id + r###"BCR=86237 CDR3=CARGHPNYDYVWGSYRYRAYYFDYW POUT=stdouth + PCOLS=v_name1,d_name1,j_name1,v_id1,d_id1,j_id1"###, + // 224. test const_id and utr_name + r###"BCR=86237 CDR3=CARSFFGDTAMVMFQAFDPW POUT=stdouth + PCOLS=const_id1,utr_name1"###, + // 225. test q<n>_ + r###"BCR=123085 CDR3=CANFGRGGDVAFDIW CVARS=q10_"###, + // + // MORE TESTS OF PER_CELL AND PCELL + // + // 226. test RPS27_g_mean with PER_CELL and PCELL + r###"BCR=123085 GEX=123217 AMINO=cdr3 POUT=stdout PCOLS=RPS27_g_mean PCELL PER_CELL H5 + CDR3=CAREVEQWLERNTLDYW LVARSP=RPS27_g_mean CVARS=u"###, + // 227. test datasets, donors, origins with PER_CELL and PCELL + r###"BCR=123085:123089 CELLS=5 AMINO= CDR3=CTRAGFLSYQLLSYYYYGMDVW FOLD_HEADERS POUT=stdout + PCELL PER_CELL PCOLS=datasets,datasets_cell,origins,origins_cell,donors,donors_cell + LVARSP=origins,donors"###, + // 228. test clonotype_ncells with PER_CELL and PCELL + r###"BCR=123085 AMINO= CDR3=CARHLQWELPYW FOLD_HEADERS POUT=stdout PCELL PER_CELL + PCOLS=clonotype_ncells LVARSP=clonotype_ncells"###, + // + // OTHER TESTS + // + // 229. test KEEP_CLONO_IF_CELL_MAX with comp + r###"BCR=123085 CVARSP=comp KEEP_CLONO_IF_CELL_MAX="comp1 >= 18" AMINO=cdr3"###, + // 230. not really clear what this is doing, but don't delete, as it used to represent + // strange behavior + r###"BCR=123085 CDR3=CTRDRDLRGATDAFDIW"###, + // 231. test ≤ + r###"BCR=86237 KEEP_CLONO_IF_CELL_MEAN="u2≤150" NOPRINT SUMMARY SUMMARY_CLEAN"###, + // 232. test nonsense variable in linear constraint + r###"BCR=86237 KEEP_CLONO_IF_CELL_MAX="gexzz > 8000" EXPECT_FAIL H5"###, + // 233. test use of two linear constraints + r###"BCR=123085 GEX=123217 + KEEP_CLONO_IF_CELL_MAX="gex > 8000" KEEP_CLONO_IF_CELL_MAX="gex < 8200" H5"###, + // 234. test tooltip comments; this is via a testing-only filename option gui_stdout + r###"BCR=123085 MIN_CELLS=10 PLOT_BY_ISOTYPE=gui_stdout NOPRINT MIN_CHAINS_EXACT=2"###, + // 235. test that v_name etc. do not appear in parseable output if chain is absent + r###"BCR=123085 CDR3=CVRGLRTW PCOLS=barcodes,v_name1,j_name1,v_id1,j_id1 POUT=stdouth"###, + // 236. test MAX_HEAVIES=1 + r###"BCR=123085 CDR3=CASPVPYYYDSSGYPYW MAX_HEAVIES=1 EXPECT_NULL"###, + // 237. test enclone --help + r###"--help NO_PRE EXPECT_OK"###, + // 238. test cigar + r###"BCR=123085 AMINO=cdr3 POUT=stdout PCOLS=cigar2 CDR3=CTRSSTTPRDPTMIVVAYYYYGMDVW"###, + // 239. test group post filtering + r###"BCR=123085 G=2,100-101 NGROUP"###, + // 240. test sym option for PLOTXY_EXACT + r###"BCR=123085 PLOTXY_EXACT=u1,u2,stdout,sym NOPRINT"###, + // 241. test KEEP_CELL_IF on gex var + r###"BCR=123085 GEX=123217 LVARSP=IGHM_g KEEP_CELL_IF="IGHM_g>=10" CDR3=CARRYFGVVADAFDIW H5"###, + // 242. test nchains_present + r###"BCR=86237 LVARSP=nchains_present CDR3=CARSFFGDTAMVMFQAFDPW"###, + // 243. this crashed at one point + r###"BCR=123085 GROUP="cdr3_aa_heavy>=85%,vj_refname" MIN_GROUP=2 PLOT=/dev/null + NOPRINT EXPECT_OK"###, + // 244. test for very long (120 amino acid) CDR3 + // Note that this long CDR3 is likely part of a nonproductive chain. The test is here because + // there may be long productive CDR3 sequences in data from other species, although we do not + // have such data. This is from 1020665. + r###"BCR=testx/inputs/flaky BUILT_IN REPROD CVARSP=cdr3_len CDR3=CARDGGGQPFDLW AMINO="###, + // 245. Test a tweak to the weak chains filter. This should have two chains. From 174957. + r###"BCR=testx/inputs/flaky2 CDR3=CARPRGYCSGGSCFPFASW BUILT_IN"###, + // 246. test that used to crash on a particular barcode; this also gave the wrong + // answer for an insertion until it was fixed + r###"BCR=testx/inputs/flaky3 NCELL CDR3=CARNWRYCTSVSCQHREYFYYMDVW AMINO=cdr3"###, + // 247. this crashed + r###"BCR=testx/inputs/flaky4"###, + // 248. this crashed + r###"BCR=testx/inputs/flaky5"###, + // 249. an example that triggered an internal inconsistency test, which we subsequently removed; + // there are three chains and the middle one was the problem + r###"TCR=testx/inputs/flaky6 BARCODE=CCAGCGAAGTGTTGAA-1 REPROD EXPECT_OK"###, + // 250. test MOUSE + IMGT; note that specifying by number forces BCR+TCR reference checks + // Added FORCE_EXTERNAL because couldn't reproduce the result. Don't understand. + r###"74396 MOUSE NOPRINT SUMMARY SUMMARY_CLEAN IMGT ACCEPT_BROKEN FORCE_EXTERNAL"###, + // 251. test mouse + IMGT; note that specifying by number forces BCR+TCR reference checks + r###"74396 MOUSE REQUIRE_UNBROKEN_OK IMGT ACCEPT_BROKEN EXPECT_NULL"###, + // 252. this exhibits what happens when signature filtering is ON, see next + // this was the only example we could find + // based on 83808-83809, derived using modified version of minimal_fail, and also shrink_json + r###"BCR=testx/inputs/flaky7 BUILT_IN REPROD REQUIRED_TWO_CHAIN_CLONOTYPES=1 + REQUIRED_THREE_CHAIN_CLONOTYPES=0 NOPRINT EXPECT_OK"###, + // 253. this exhibits what happens when signature filtering is OFF, see previous + // this was the only example we could find + // based on 83808-83809, derived using modified version of minimal_fail, and also shrink_json + r###"BCR=testx/inputs/flaky7 BUILT_IN REPROD NSIG REQUIRED_TWO_CHAIN_CLONOTYPES=0 + REQUIRED_THREE_CHAIN_CLONOTYPES=1 NOPRINT EXPECT_OK"###, + // 254. parseable value for fwr4_aa was wrong, from 1117070 + r###"BCR=testx/inputs/flaky AMINO=fwr4 CDR3=CAKDVNGYSSGWAFENW POUT=stdout PCOLS=fwr4_aa1 + BUILT_IN"###, + // 255. conp value was truncated, from 1117069 + r###"BCR=testx/inputs/flaky CONP CDR3=CVRDPPEELELFDYW BUILT_IN"###, + // 256. Make sure that FP join output includes join error details. + // If somehow we fix the FP join occurring here, another one should be substituted. + // This is from BCR="131036;140707". + r###"PRE=testx/inputs BCR="flaky8a;flaky8b" ANN SHOW_BC MIN_DONORS=2 + PRINT_FAILED_JOINS BUILT_IN NO_PRE"###, + // 257. clonotype that was two clonotypes before raising MAX_DIFFS to 60, from 1084461-1084462 + r###"BCR=testx/inputs/flaky CDR3=CAKEFGNGGFDTFDIW BUILT_IN AMINO=cdr3"###, + // 258. This used to appear as a four-chain clonotype, and is now split. From 123085,123090. + r###"BCR=testx/inputs/flaky9 BUILT_IN REQUIRED_FOUR_CHAIN_CLONOTYPES=0 EXPECT_OK"###, + // 259. this crashed at one point, from 83809 + r###"BCR=testx/inputs/flaky10 EXPECT_OK"###, + // 260. the result of this changed when sub_alts was changed, from 40086;132888 + r###"BCR=testx/inputs/flaky11 MAX_DIFFS=80 CDR3=CVKGDWGSAFDIW BUILT_IN"###, + // 261. previously this yielded a disconnected clonotype, from 140699,140705-140706 + r###"BCR=testx/inputs/flaky12 AMINO=cdr3 CDR3="CAKDRQAGGIGEVDDW|CARDRVPGGIGEVDYW" BUILT_IN"###, + // 262. test NSEG + r###"BCR=86237 SEG=IGHV4-59 NSEG="IGHJ3|IGHJ4|IGHJ6""###, + // 263. test NSEGN + r###"BCR=86237 SEG=IGHV4-34 NSEGN="51|54|55|57|321""###, + // 264. test MIN_ORIGINS + r###"BCR=123085:123089 MAX_CELLS=2 SEG=IGHV3-49 MIN_ORIGINS=2"###, + // 265. test DVARS + // The output is a bit flaky because we imported some but not all of the special files + // for 85679. + r###"BCR=86237 GEX=85679 DVARS=CD19_ab_cellular_u,CD19_ab_cellular_r + NOPRINT SUMMARY SUMMARY_CLEAN"###, + // 266. a test of VAR_DEF + r###"BCR=86237 GEX=85679 VAR_DEF="mu:CD19_ab + CD25_ab" LVARSP=gex,CD19_ab,CD25_ab,mu + CDR3=CARSFFGDTAMVMFQAFDPW FOLD_HEADERS PER_CELL AMINO="###, + // 267. a test of VAR_DEF + r###"BCR=86237 GEX=85679 VAR_DEF=x19:CD19_ab VAR_DEF=x25:CD25_ab VAR_DEF="mu:x19 + x25" + LVARSP=gex,CD19_ab,CD25_ab,mu CDR3=CARSFFGDTAMVMFQAFDPW FOLD_HEADERS PER_CELL AMINO="###, + // 268. a test of VAR_DEF + r###"BCR=86237 GEX=85679 VAR_DEF="pink:PINK1-AS_g" LVARSP=pink CDR3=CARSFFGDTAMVMFQAFDPW + FOLD_HEADERS PER_CELL AMINO="###, + // 269. test fb variables + r###"BCR=86237 GEX=85679 ALLOW_INCONSISTENT NGEX LVARSP=fb1,fb1_n PER_CELL AMINO=cdr3 CVARS= FOLD_HEADERS POUT=stdouth PCOLS=fb2,fb2_n,fb2_n_cell PCELL CDR3=CARSFFGDTAMVMFQAFDPW"###, + // 270. test NOSPACES + r###"BCR=123085 CDR3=CTRDRDLRGATDAFDIW AMINO=cdr3,fwr4 NOSPACES CONX"###, + // 271. test for weird path bug + r###"BCR_GEX=tiny_multi_PD_broken EXPECT_OK"###, + // 272. a test for validated UMI variables + r###"BCR=tiny_multi_PD CVARS=u,nval,nnval,nival BARCODE=AAAGCAAGTGGCTCCA-1 AMINO= PER_CELL + POUT=stdouth PCOLS=nval1,nval2,nval3,valumis3,valbcumis2"###, + // 273. a test for validated UMI variables + r###"BCR=tiny_multi_PD CVARS=u,nval,nnval,nival AMINO= PER_CELL POUT=stdouth + PCOLS=ivalumis1,ivalbcumis1,nvalbcumis2 BARCODE=TACCTTAAGAGCCCAA-1"###, + // 274. at one point this printed bell characters + r###"CVARS=u,nval,nnval,nival AMINO= PER_CELL POUT=stdouth PCOLS=nval1,nval2 BCR=123085 + BARCODE=ACAGCCGAGATAGGAG-1"###, + // 275. test _ext var with negative extensions + r###"BCR=123085 CDR3=CAKDKVPRRSSWSVFDYYGMDVW POUT=stdouth + PCOLS=cdr3_aa1,cdr3_aa_-1_-2_ext1"###, + // 276. this failed at one time + r###"BCR=40970_subset NCELL NOPRINT EXPECT_OK"###, + // 277. test nbc + r###"BCR=85333 CDR3=CARDGMTTVTTTAYYGMDVW LVARSP=nbc PER_CELL CVARS= FOLD_HEADERS"###, + // 278. test some count_fwr variables + r###"BCR=85333 LVARS=count_fwr1_C,count_fwr_C CDR3=CARGGFSHAFDIW AMINO=cdr3"###, + // 279. barcode having five contigs + r###"BCR=123085 NALL CDR3=CAKKHYRYYDSSGYNPLGYYYYGMDVW CVARS=u AMINO= FOLD_HEADERS"###, + // 280. this asserted at one point + r###"BCR=86237 CDR3=CARRGPRFRPRFLRGRKTGNWFDPW CVARS= AMINO= EXPECT_FAIL"###, + // 281. this yielded the wrong aa_nl_2 value + r###"BCR=123085 CDR3=CARHPAPNYGFWSGYYKTDNWFDPW POUT=stdout PCOLS=vj_aa_nl2 CVARS=u,notes + AMINO=fwr1"###, + // 282. fwr3_aa1 was wrong + r###"BCR=123085 CDR3=CALGGYTWFDPW POUT=stdout PCOLS=fwr3_aa1"###, + // 283. another test of VAR_DEF + r###"BCR=86237 GEX=85679 VAR_DEF="mu:CD19_ab + CD25_ab" LVARSP=gex,CD19_ab,CD25_ab,mu + CDR3=CARSFFGDTAMVMFQAFDPW FOLD_HEADERS PER_CELL AMINO= POUT=stdout PCOLS=mu"###, + // 284. another test of VAR_DEF + r###"BCR=86237 GEX=85679 VAR_DEF="mu:CD19_ab + CD25_ab" LVARSP=mu CDR3=CARSFFGDTAMVMFQAFDPW + PER_CELL POUT=stdout PCOLS=mu PCELL FOLD_HEADERS"###, + // 285. another test of VAR_DEF + r###"BCR=123085 VAR_DEF=x:u1 LVARSP=x CDR3=CAKDGYSSSWYVVDW SEG=IGHV3-30"###, + // 286. this asserted at one point + r###"BUILT_IN BCR=testx/inputs/flaky2/outs/,testx/inputs/flaky3/outs/ EXPECT_OK"###, + // 287. test gamma delta data (pos control) + r###"TCRGD=testx/inputs/gamma_delta1 GAMMA_DELTA MOUSE BUILT_IN"###, + // 288. test gamma delta data without GAMMA_DELTA tag in regular TCR pipe (neg control) + r###"TCR=testx/inputs/gamma_delta1 MOUSE BUILT_IN REQUIRED_CLONOTYPES=0 EXPECT_NULL"###, + // 289. test dref_max + r###"BCR=86237 LVARSP=dref_max CDR3=CARAPEDTSRWPQYNYSGLDVW AMINO=cdr3"###, + // 290. test v_name_orig + r###"BCR=123089 CVARS=v_name_orig PCELL POUT=stdout PCOLS=v_name_orig_cell2 PER_CELL + CDR3=CARDRIDDSSGYYYAYYYGMDVW"###, + // 291. test SPLIT_PLOT_BY_DATASET + r###"BCR=123085,123089 PLOT_BY_ISOTYPE=stdout SPLIT_PLOT_BY_DATASET NOPRINT"###, + // 292. this asserted + r###"BCR=85333 CDR3=”CAKGDRTGYSYGGGIFDYW~3” NOPRINT SUMMARY EXPECT_FAIL"###, + // 293. test BC var in color by variable + r###"BCR=123085 BC=testx/inputs/123077_cells.csv KEEP_CELL_IF="rank >= 1" + HONEY="out=stdout,color=var,rank" NOPRINT"###, + // 294. this asserted + r###"BCR=86237 HONEY=out=stdout,color=var,cdr3_aa1 NOPRINT EXPECT_FAIL"###, + // 295. test BC_JOINT + r###"BCR="123085;85333" BC_JOINT=testx/inputs/bc_joint.csv KEEP_CELL_IF="dummy >= 0""###, + // 296. test META with two args + r###"META=testx/inputs/meta1.csv,testx/inputs/meta2.csv CDR3="CQQANSFPLTF|CQHYGSSPYTF" + SEG=IGHV4-39"###, + // 297. test two META args + r###"META=testx/inputs/meta1.csv META=testx/inputs/meta2.csv CDR3="CQQANSFPLTF|CQHYGSSPYTF" + SEG=IGHV4-39"###, + // 298. test iReceptor and PREPOST + r###"PREPOST=iReceptor BUILT_IN BCR=CIS/1_1 CDR3=CARIPYGDYWLGPKHWYFDLW"###, + // 299. test PNOHEADER + r###"BCR=85333 CDR3=CARDGMTTVTTTAYYGMDVW POUT=stdout PCOLS=dref PNO_HEADER NOPRINT"###, + // 300. test PNOHEADER + r###"BCR=85333 CDR3=CARDGMTTVTTTAYYGMDVW POUT=stdouth PCOLS=dref PNO_HEADER NOPRINT"###, + // 301. test DATASET + r###"BCR="86237;123085,123089" CELLS=1 SEG=IGHV4-34 SEG=IGKV1D-39 NGROUP + DATASET="123085|123089""###, + // 302. test hcomp and jun_ins and jun_mat and jun_sub + r###"BCR=85333 JALIGN1 CHAINS_EXACT=2 CDR3=CAKGDRTGYSYGGGIFDYW + LVARSP=hcomp,jun_ins,jun_mat,jun_sub AMINO=cdr3 FOLD_HEADERS"###, + // 303. test GD_BC + r###"BCR=testx/inputs/flaky GD_BC=testx/inputs/flaky NO_PRE BUILT_IN AMINO=cdr3 + LVARSP=status PER_CELL CDR3=CVRDPPEELELFDYW"###, + // 304. test Ab-only data + r###"BCR=1031851 GEX=1031779 NGEX LVARSP=n_gex,CD19_ab + CDR3="CARDELDILTGYNIPTFGGCVYW|CAHHGSARYSSSWHAAPGPYYFDYW" BUILT_IN"###, + // 305. test that LVARSP=gex fails on Ab-only data + r###"BCR=1031851 GEX=1031779 NGEX LVARSP=gex EXPECT_FAIL"###, +]; diff --git a/enclone_core/src/mammalian_fixed_len.rs b/enclone_core/src/mammalian_fixed_len.rs new file mode 100644 index 000000000..15ba7037d --- /dev/null +++ b/enclone_core/src/mammalian_fixed_len.rs @@ -0,0 +1,151 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use amino::aa_seq; +use std::collections::HashMap; +use string_utils::TextUtils; +use superslice::Ext; +use vdj_ann::refx::RefData; +use vdj_ann::vdj_features::{cdr1_start, cdr2_start, cdr3_start, fr1_start, fr2_start, fr3_start}; + +// {chain, feature, len, {{(count, amino_acid)}}} + +pub fn mammalian_fixed_len() -> Vec<(&'static str, &'static str, usize, Vec<Vec<(u32, u8)>>)> { + const X: &str = include_str!("mammalian_fixed_len.table"); + X.lines() + .map(|line| { + let mut y = line.splitn(5, ','); + ( + y.next().unwrap(), + y.next().unwrap(), + y.next().unwrap().force_usize(), + y.next() + .unwrap() + .split('+') + .map(|zi| { + zi.split('/') + .map(|wj| { + ( + wj.before(":").force_usize() as u32, + wj.after(":").as_bytes()[0], + ) + }) + .collect() + }) + .collect(), + ) + }) + .collect() +} + +// Calculate peer groups for each V segment reference sequence. + +pub fn mammalian_fixed_len_peer_groups(refdata: &RefData) -> Vec<Vec<(usize, u8, u32)>> { + let m = mammalian_fixed_len(); + let mut start = HashMap::<(&str, &str), usize>::new(); + let mut stop = HashMap::<(&str, &str), usize>::new(); + for chain in ["IGH", "IGK", "IGL", "TRA", "TRB"] { + for feature in ["fwr1", "cdr1", "fwr2", "cdr2", "fwr3"] { + let low = m.lower_bound_by_key(&(chain, feature), |(a, b, _c, _d)| (a, b)); + let high = m.upper_bound_by_key(&(chain, feature), |(a, b, _c, _d)| (a, b)); + start.insert((chain, feature), low); + stop.insert((chain, feature), high); + } + } + let mut pg = vec![Vec::<(usize, u8, u32)>::new(); refdata.refs.len()]; + for (i, pgi) in pg.iter_mut().enumerate() { + if refdata.is_v(i) { + let aa = aa_seq(&refdata.refs[i].to_ascii_vec(), 0); + let rtype = refdata.rtype[i]; + let chain_type = if rtype == 0 { + "IGH" + } else if rtype == 1 { + "IGK" + } else if rtype == 2 { + "IGL" + } else if rtype == 3 { + "TRA" + } else if rtype == 4 { + "TRB" + } else { + continue; + }; + let fs1 = fr1_start(&aa, chain_type); + let cs1 = cdr1_start(&aa, chain_type, false); + let fs2 = fr2_start(&aa, chain_type, false); + let cs2 = cdr2_start(&aa, chain_type, false); + let fs3 = fr3_start(&aa, chain_type, false); + let cs3 = cdr3_start(&aa, chain_type, false); + if let Some(cs1) = cs1 { + if fs1 < cs1 { + let x1 = start[&(chain_type, "fwr1")]; + let x2 = stop[&(chain_type, "fwr1")]; + let len = cs1 - fs1; + for mx in &m[x1..x2] { + if mx.2 == len { + for (j, mj) in mx.3.iter().take(len).enumerate() { + pgi.extend(mj.iter().map(|mjk| (fs1 + j, mjk.1, mjk.0))); + } + } + } + } + } + if let (Some(cs1), Some(fs2)) = (cs1, fs2) { + if cs1 < fs2 { + let x1 = start[&(chain_type, "cdr1")]; + let x2 = stop[&(chain_type, "cdr1")]; + let len = fs2 - cs1; + for mx in &m[x1..x2] { + if mx.2 == len { + for (j, mj) in mx.3.iter().take(len).enumerate() { + pgi.extend(mj.iter().map(|mjk| (cs1 + j, mjk.1, mjk.0))); + } + } + } + } + } + if let (Some(fs2), Some(cs2)) = (fs2, cs2) { + if fs2 < cs2 { + let x1 = start[&(chain_type, "fwr2")]; + let x2 = stop[&(chain_type, "fwr2")]; + let len = cs2 - fs2; + for mx in &m[x1..x2] { + if mx.2 == len { + for (j, mj) in mx.3.iter().take(len).enumerate() { + pgi.extend(mj.iter().map(|mjk| (fs2 + j, mjk.1, mjk.0))); + } + } + } + } + } + if let (Some(cs2), Some(fs3)) = (cs2, fs3) { + if cs2 < fs3 { + let x1 = start[&(chain_type, "cdr2")]; + let x2 = stop[&(chain_type, "cdr2")]; + let len = fs3 - cs2; + for mx in &m[x1..x2] { + if mx.2 == len { + for (j, mj) in mx.3.iter().take(len).enumerate() { + pgi.extend(mj.iter().map(|mjk| (cs2 + j, mjk.1, mjk.0))); + } + } + } + } + } + if let Some(fs3) = fs3 { + if fs3 < cs3 { + let x1 = start[&(chain_type, "fwr3")]; + let x2 = stop[&(chain_type, "fwr3")]; + let len = cs3 - fs3; + for mx in &m[x1..x2] { + if mx.2 == len { + for (j, mj) in mx.3.iter().take(len).enumerate() { + pgi.extend(mj.iter().map(|mjk| (fs3 + j, mjk.1, mjk.0))); + } + } + } + } + } + } + } + pg +} diff --git a/enclone_core/src/mammalian_fixed_len.table b/enclone_core/src/mammalian_fixed_len.table new file mode 100644 index 000000000..330b9a4da --- /dev/null +++ b/enclone_core/src/mammalian_fixed_len.table @@ -0,0 +1,117 @@ +IGH,cdr1,4,1:F+1:S+1:D+1:H +IGH,cdr1,7,622:G/3:V/3:E/2:W/2:R/1:Y/1:K/1:D+417:F/165:Y/27:G/11:L/5:I/4:D/3:S/2:R/1:A+427:T/170:S/12:D/11:N/6:A/2:P/2:I/2:E/1:V/1:M/1:K+460:F/119:L/41:I/14:V/1:S+334:S/238:T/17:N/12:D/8:G/7:K/5:R/4:I/2:Y/2:V/2:P/2:M/2:A+348:S/131:D/64:N/31:G/28:T/11:E/10:R/4:Y/3:I/2:A/1:Q/1:K/1:C+491:Y/44:N/26:S/17:H/16:F/13:A/8:T/5:C/4:D/3:G/2:R/2:L/2:K/1:V/1:M +IGH,cdr1,8,35:G/1:S/1:R/1:D+16:Y/11:G/9:F/1:S/1:I+33:S/2:P/1:F/1:E/1:D+30:I/3:F/2:V/2:L/1:T+18:S/17:T/2:F/1:N+34:S/1:T/1:P/1:I/1:D+15:S/14:G/5:D/1:Y/1:R/1:I/1:H+22:Y/11:N/1:I/1:H/1:G/1:C/1:A +IGH,cdr1,9,86:G/1:V/1:E+50:F/24:G/4:D/3:Y/3:I/2:V/1:T/1:S+87:S/1:L+50:L/30:I/7:V/1:F+68:S/19:T/1:G+50:T/31:S/5:N/1:G/1:A+52:S/18:G/4:Y/4:R/4:N/3:F/3:A+51:G/12:S/10:Y/7:N/4:D/3:R/1:E+39:M/27:Y/7:V/4:A/3:I/2:S/2:H/2:D/1:T/1:F +IGH,cdr1,10,1:G+1:Y+1:S+1:F+1:T+1:S+1:Y+1:W+1:I+1:S +IGH,cdr1,11,3:V/2:L+5:S+5:G+5:F+5:S+3:V/2:I+5:T+5:T+5:S+5:G+3:H/2:Y +IGH,cdr1,12,1:S+1:S+1:L+1:L+1:C+1:R+1:F+1:T+1:F+1:S+1:K+1:Y +IGH,cdr2,3,1:T+1:G+1:S +IGH,cdr2,4,4:S/2:Y/2:W/2:G+6:G/2:S/2:N+4:S/4:G/2:N+4:T/4:S/2:D +IGH,cdr2,5,68:W/64:Y/49:S/27:D/15:N/7:T/7:R/7:C/7:A/6:G/5:F/4:Q/2:H/1:K+80:S/65:Y/46:W/22:G/20:T/13:H/8:I/4:R/4:A/3:N/1:L/1:K/1:F/1:D+81:G/74:S/61:D/23:N/7:A/5:Y/5:E/4:T/3:H/2:V/1:R/1:M/1:K/1:C+212:G/45:D/3:E/3:A/2:S/1:W/1:V/1:N/1:F+166:S/50:D/19:N/9:G/8:E/7:T/4:R/3:V/1:K/1:I/1:A +IGH,cdr2,6,121:S/110:N/68:Y/32:D/13:I/7:K/7:H/5:W/5:T/5:G/4:R/4:L/3:F/2:V/2:C/1:E+151:P/66:S/45:T/33:Y/25:G/18:N/14:W/13:A/4:K/3:V/3:R/3:L/3:D/2:Q/2:H/2:E/1:M/1:I+99:G/76:D/71:S/49:N/25:E/22:Y/9:I/8:A/6:T/5:L/4:R/4:F/3:V/3:H/2:Q/1:W/1:K/1:C+169:G/84:S/49:N/38:D/24:T/6:F/5:Y/4:I/3:E/2:V/2:A/1:L/1:K/1:C+228:G/128:S/14:D/3:R/3:N/3:K/2:Y/2:T/2:A/1:V/1:I/1:E/1:C+148:S/56:N/39:G/33:D/30:Y/29:T/21:E/8:A/7:K/6:V/5:R/5:I/1:Q/1:H +IGH,cdr2,7,6:Y+4:Y/2:A+4:R/2:G+6:S+4:K/2:S+4:W/2:G+4:Y/2:S +IGH,cdr2,8,55:R/13:K/6:A/3:G/3:D/2:T/2:S/2:E/1:N+36:N/35:S/3:T/3:L/3:D/2:V/2:A/1:R/1:P/1:I+77:K/6:G/3:S/1:R+47:A/28:S/3:T/3:P/3:D/1:Y/1:N/1:G+44:N/22:Y/12:D/5:S/1:T/1:R/1:K/1:H+37:N/26:G/12:S/9:D/3:I+60:Y/23:G/3:E/1:H+36:T/32:A/12:S/4:E/2:G/1:F +IGH,cdr2,12,2:C+2:S+2:E+2:G+2:N+2:T+2:C+2:Y+2:S+2:P+2:S+2:F +IGH,fwr1,23,6:Q+6:V+6:Q+6:L+5:Q/1:K+6:E+4:S/2:V+5:G/1:E+6:P+6:G+4:L/2:T+4:V/2:G+3:K/2:E/1:T+3:P/3:A+6:S+3:H/2:Q/1:G+5:T/1:L+6:L+5:S/1:L+5:L/1:T+3:N/2:I/1:S+3:Y/2:S/1:N+4:A/1:V/1:S +IGH,fwr1,24,25:Q+23:S/1:T/1:Q+18:V/7:L+16:K/8:E/1:G+25:E+24:S/1:F+15:E/6:G/4:R+25:G+21:G/3:R/1:D+25:L+14:F/6:I/5:V+21:K/2:T/1:R/1:E+24:P/1:A+15:T/6:G/3:A/1:D+14:D/4:G/3:E/1:T/1:S/1:H/1:A+21:T/3:S/1:P+25:L+24:T/1:K+25:L+23:T/1:S/1:C+25:C+24:T/1:K+17:V/7:A/1:L+25:S +IGH,fwr1,25,379:Q/332:E/16:D/2:V/2:R/2:G/2:A/1:S/1:L/1:K/1:H+645:V/32:I/30:E/9:L/8:M/5:A/3:F/2:R/2:Q/1:K/1:G/1:D+587:Q/77:K/50:T/5:N/4:M/4:H/2:S/2:R/2:P/2:E/1:Y/1:V/1:L/1:G+725:L/4:I/3:Q/3:M/2:V/1:W/1:F+337:V/195:Q/133:K/19:R/19:L/17:E/7:M/5:A/2:T/2:D/1:N/1:I/1:H+521:E/206:Q/6:K/4:V/2:S+678:S/25:T/19:P/8:W/2:Y/2:F/2:C/1:L/1:H/1:E+719:G/7:E/5:D/2:W/2:V/2:R/2:A+311:G/278:P/127:A/7:S/6:T/5:E/3:D/1:Q/1:H+428:G/178:E/42:D/27:A/22:S/16:V/8:T/8:R/5:N/2:K/2:H/1:P+622:L/80:V/19:I/8:M/5:Q/3:F/1:W/1:E+584:V/73:K/33:L/21:A/8:I/6:R/5:G/4:M/2:T/1:Q/1:E/1:D+350:K/291:Q/49:R/16:A/7:N/6:T/5:M/4:P/3:L/3:E/2:S/1:V/1:I/1:H+710:P/12:S/7:T/6:A/3:L/1:G+471:G/197:S/38:T/11:K/8:E/6:R/3:A/1:W/1:V/1:P/1:N/1:F+243:G/175:Q/123:A/77:E/44:R/41:S/13:T/7:K/5:N/5:D/2:V/1:P/1:M/1:L/1:H+513:S/209:T/9:P/4:I/2:L/2:A+534:L/180:V/16:M/6:R/2:I/1:T+277:K/203:S/196:R/47:T/6:G/6:F/2:Q/1:P/1:N+565:L/96:I/54:V/17:M/4:P/2:F/1:R+482:S/252:T/2:A/1:P/1:I/1:F+731:C/3:Y/2:F/1:S/1:R/1:L+241:A/188:T/181:K/76:V/35:S/10:E/3:I/2:Q/2:P/1:G+419:A/186:V/50:F/36:T/27:G/10:I/4:S/4:D/2:L/1:E+689:S/29:T/13:Y/3:A/2:P/2:N/1:F +IGH,fwr2,16,1:C+1:L+1:C+1:W+1:T+1:P+1:K+1:K+1:G+1:L+1:E+1:W+1:I+1:E+1:M+1:I +IGH,fwr2,18,1:T+1:H+1:G+1:A+1:P+1:M+1:P+1:I+1:R+1:G+1:S+1:L+1:E+1:W+1:L+1:A+1:A+1:I +IGH,fwr2,19,182:Y/153:G/115:W/113:A/40:D/36:S/22:N/20:C/19:R/15:E/14:T/10:H/8:V/6:F/5:P/3:L/3:I/2:K+414:M/149:V/97:W/84:I/13:L/4:T/3:F/1:R/1:K+270:S/192:H/93:N/67:G/30:Y/30:D/17:A/16:E/15:I/13:Q/8:C/4:R/4:F/3:T/2:V/1:W/1:M+757:W/6:C/1:R/1:G/1:F+542:V/182:I/15:F/12:L/6:A/3:T/3:M/2:Y/1:E+588:R/117:K/23:H/20:C/7:Q/3:P/2:W/2:S/2:M/1:Y/1:N+733:Q/15:K/11:H/3:R/2:E/1:P/1:L+358:A/174:P/70:R/64:S/46:T/17:F/11:M/7:V/5:L/4:K/4:H/2:Q/2:I/1:N/1:D+669:P/34:S/18:H/15:T/10:L/8:Q/6:A/4:N/2:R+655:G/63:E/11:K/10:T/10:R/5:D/4:V/4:S/2:A/1:N/1:I+576:K/119:Q/38:N/9:R/9:E/8:H/3:S/2:P/1:M/1:A+586:G/59:A/45:R/30:S/18:K/9:E/7:T/2:Q/2:P/2:I/2:D/1:V/1:N/1:H/1:C+725:L/14:P/8:I/4:Q/4:M/4:F/3:V/3:R/1:N+692:E/41:Q/18:K/7:D/6:V/1:H/1:G+698:W/36:Y/12:L/6:S/5:C/4:G/2:V/2:F/1:Q+256:I/251:V/131:M/126:L/1:T/1:K+442:G/214:A/87:S/9:T/5:E/3:W/3:V/2:R/1:C+112:Y/88:R/74:V/72:A/58:G/57:W/56:S/42:E/35:L/31:T/24:F/22:D/21:I/21:H/18:N/14:Q/10:M/9:C/2:K+711:I/16:V/14:M/9:T/7:S/6:F/1:W/1:N/1:K +IGH,fwr2,20,2:G+2:I+2:S+2:W+2:L+2:R+2:W+2:S+2:P+2:G+2:N+2:G+2:D+2:S+2:G+2:I+2:L+2:Q+2:L+2:S +IGH,fwr3,31,1:T/1:S+1:Y/1:A+2:Y+1:N/1:A+1:S/1:G+1:W/1:N+1:T/1:A+1:R/1:N+1:Y/1:S+1:R/1:A+1:S/1:Q+1:T/1:K+1:L/1:I+1:S/1:Q+1:S/1:G+1:R/1:N+1:V/1:T+1:T/1:P+1:R/1:M+2:T+1:T/1:R+1:F/1:D+1:T/1:L+1:S/1:C+1:T/1:K+1:S/1:R+2:T+1:V/1:A+2:Y+1:M/1:F+1:E/1:C +IGH,fwr3,32,2:N+2:S+2:H+2:T+2:S+2:L+2:S+2:R+2:D+2:K+2:S+2:G+2:D+2:Q+2:W+2:F+2:L+2:K+2:L+2:S+2:S+2:L+2:P+2:A+2:E+2:N+2:M+2:A+2:V+2:Y+2:Y+2:C +IGH,fwr3,35,1:K+1:S+1:Y+1:A+1:D+1:A+1:V+1:K+1:G+1:Q+1:F+1:T+1:I+1:S+1:R+1:D+1:N+1:A+1:K+1:N+1:T+1:L+1:Y+1:L+1:H+1:M+1:N+1:S+1:L+1:I+1:A+1:L+1:Y+1:Y+1:C +IGH,fwr3,37,6:A/1:T+7:Y+7:Y+5:A/2:K+7:S+7:W+6:V/1:A+4:N/3:K+7:G+7:R+7:F+7:T+7:I+7:S+7:K+7:T+7:S+5:S/2:T+7:T+6:V/1:M+7:D+7:L+7:K+6:M/1:I+7:T+7:S+6:L/1:P+6:T/1:R+6:A/1:T+3:S/3:A/1:E+7:D+5:T/1:M/1:K+7:A+7:T+7:Y+7:F+7:C +IGH,fwr3,38,3:T+3:Y+3:Y+3:A+3:S+3:W+3:A+3:K+3:G+3:R+3:F+3:T+3:I+3:S+3:K+3:T+3:S+3:S+3:T+3:T+3:V+2:T/1:D+3:L+2:Q/1:K+3:M+3:T+3:S+3:L+3:T+2:A/1:T+2:A/1:E+3:D+3:T+3:A+3:T+3:Y+3:F+3:C +IGH,fwr3,39,547:T/68:K/43:I/40:A/25:P/12:N/5:M/4:S/2:V/1:Q/1:E+283:Y/126:N/58:D/50:S/41:E/35:G/30:K/25:T/25:H/24:R/14:F/11:A/7:W/7:I/5:V/4:L/2:C/1:Q+721:Y/5:N/5:L/5:H/4:S/4:F/3:C/1:D+290:A/278:N/69:S/41:P/27:T/13:R/11:V/11:G/2:M/2:L/2:D/1:K/1:H+214:D/173:P/90:Q/87:S/86:E/49:A/28:T/8:G/4:V/4:N/3:K/1:L/1:H+389:S/148:K/111:A/29:W/24:T/15:D/10:G/5:N/5:E/3:P/2:I/1:Y/1:V/1:R/1:M/1:L/1:H/1:F+279:V/218:L/203:F/24:A/11:I/8:M/4:R/1:Y+617:K/74:Q/13:N/11:T/11:I/6:R/6:M/6:E/2:S/1:G/1:D+420:G/248:S/31:D/14:T/14:N/6:E/4:R/3:K/2:Y/2:Q/2:P/2:A+606:R/110:K/17:Q/3:P/3:L/3:G/2:H/1:W/1:T/1:S/1:E+322:F/140:L/121:A/104:V/28:S/26:I/7:T+573:T/121:S/18:I/16:A/12:V/3:F/2:K/1:Y/1:R/1:N+563:I/104:L/32:M/24:F/22:V/2:T/1:S+506:S/224:T/6:N/5:F/2:P/2:C/1:Y/1:H/1:D+411:R/114:K/97:V/70:A/16:S/16:L/8:T/4:P/4:M/4:I/2:E/1:H/1:G+669:D/44:E/28:N/3:G/2:S/1:V/1:Q+310:T/268:N/78:K/68:D/6:M/4:R/3:I/3:E/2:V/2:A/1:S/1:Q/1:P/1:G+547:S/146:A/22:N/11:T/6:P/5:D/3:K/3:F/2:V/1:Y/1:L/1:G+454:K/105:S/47:Q/33:T/25:E/21:A/19:N/16:I/12:R/7:L/5:M/3:V/1:F+416:N/316:S/6:T/5:D/2:Y/2:K/1:G+395:T/226:Q/57:S/25:I/22:M/5:N/4:R/4:L/4:K/3:H/1:V/1:P/1:A+226:L/215:V/200:A/88:F/5:I/3:T/3:R/2:Y/2:S/2:Q/2:G+457:Y/113:F/98:S/29:V/25:T/6:L/6:C/4:H/4:D/3:N/1:P/1:M/1:I+585:L/144:M/11:F/4:I/3:V/1:T+427:Q/163:K/85:E/40:T/15:S/7:D/3:R/3:L/2:N/2:H/1:V+414:M/262:L/40:I/15:V/7:W/4:K/3:F/1:T/1:Q/1:P+319:N/298:S/78:T/22:D/13:R/5:G/3:K/3:A/2:Y/2:I/2:H/1:L+599:S/91:N/25:R/15:T/5:A/4:K/4:G/1:Q/1:I/1:H/1:D/1:C+569:L/138:V/35:M/2:R/2:I/1:Q/1:P+262:R/257:T/100:K/64:Q/50:D/4:I/3:N/2:G/2:E/2:A/1:S/1:M+242:A/222:S/168:T/60:P/17:N/9:G/9:D/7:I/7:E/4:V/1:Y/1:L/1:K+539:E/98:A/52:D/35:V/8:S/5:K/5:G/2:Q/2:N/1:P/1:L+734:D/8:G/2:Y/2:A/1:N/1:E+606:T/102:S/26:M/5:I/4:A/2:P/1:L/1:F/1:C+703:A/30:G/5:T/3:V/2:S/2:E/2:D/1:P+356:V/190:T/76:M/75:I/29:L/19:R/2:A/1:Y+735:Y/5:H/2:I/1:N/1:L/1:K/1:F/1:D/1:C+632:Y/101:F/4:S/4:H/3:L/2:N/2:C+736:C/4:S/2:Y/2:V/1:W/1:M/1:G/1:F +IGH,fwr3,40,2:T/2:K/2:G/1:I+2:Y/2:I/2:G/1:D+4:L/3:Y+2:Q/2:E/1:S/1:N/1:A+2:W/2:L/1:S/1:P/1:D+3:S/1:V/1:T/1:M/1:A+2:H/1:S/1:M/1:I/1:G/1:F+4:R/3:K+2:S/2:P/2:I/1:G+3:R/1:S/1:Q/1:H/1:D+2:K/1:T/1:P/1:N/1:L/1:F+3:S/2:P/1:T/1:G+3:I/2:S/1:G/1:D+3:S/2:H/1:G/1:C+2:R/2:H/1:Y/1:S/1:K+3:D/2:T/2:P+2:T/2:N/2:C/1:W+3:S/2:Y/1:P/1:A+3:S/3:K/1:A+3:S/2:N/1:P/1:D+2:S/2:Q/2:A/1:T+2:V/2:S/2:F/1:L+2:L/1:Y/1:S/1:Q/1:K/1:F+3:L/2:P/2:G+2:Q/2:I/1:R/1:K/1:H+2:R/2:L/1:V/1:I/1:F+2:T/2:S/2:D/1:N+3:S/2:P/2:I+2:S/2:Q/2:L/1:V+2:S/2:R/1:Q/1:P/1:A+2:T/2:L/1:S/1:E/1:D+2:P/2:E/1:N/1:K/1:D+3:D/2:T/1:S/1:A+3:T/3:I/1:K+3:A/2:H/1:S/1:N+2:T/2:Q/2:M/1:F+2:V/2:L/1:S/1:C/1:A+6:Y/1:S+3:Y/3:L/1:F+2:Q/2:I/2:C/1:A +IGK,cdr1,10,15:S/2:R+17:A+16:S/1:I+17:S+17:S+16:V/1:I+15:S/2:N+17:Y+17:M+9:H/7:Y/1:N +IGK,cdr1,11,97:R/33:K/23:Q/4:W/4:H/3:S/2:T/2:L/2:I/1:G+154:A/6:S/5:T/3:M/2:V/1:P+155:S/8:N/4:R/2:T/2:H+139:Q/19:E/3:T/3:I/2:K/2:G/1:L/1:D/1:A+71:S/45:G/29:D/16:N/6:E/2:T/2:H+120:I/48:V/2:T/1:A+104:S/28:G/12:N/8:Y/7:D/6:R/3:H/2:K/1:V+65:S/51:N/21:T/12:D/9:K/6:G/2:I/2:H/1:Y/1:V/1:R+82:Y/22:W/19:N/16:S/13:D/9:A/4:E/2:K/1:R/1:I/1:H/1:G+144:L/15:V/7:M/5:I+87:A/34:N/22:S/12:H/7:D/5:G/1:Y/1:R/1:M/1:I +IGK,cdr1,12,10:R/7:S/2:N/1:T/1:G+18:A/2:G/1:V+20:S/1:R+11:S/8:Q/2:P+19:S/2:G+17:V/4:I+21:S+19:S/2:T+16:S/5:N+17:Y/2:N/2:K+21:L+10:H/4:S/3:Y/2:T/2:A +IGK,cdr1,13,3:Q/1:R+2:A/1:S/1:P+4:S+3:Q/1:R+2:S/1:N/1:A+3:V/1:S+3:Y/1:A+2:D/1:S/1:I+3:N/1:T+3:N/1:S+2:Y/1:W/1:A+3:L/1:G+3:S/1:I +IGK,cdr1,14,1:K+1:S+1:T+1:Q+1:S+1:L+1:K+1:Y+1:S+1:D+1:G+1:K+1:T+1:Y +IGK,cdr1,15,12:R/2:K/1:Q+9:A/6:S+14:S/1:T+9:Q/5:E/1:K+15:S+9:V/6:L+6:E/5:D/3:S/1:K+4:Y/3:S/2:T/2:I/2:E/1:N/1:F+9:Y/3:S/1:N/1:D/1:A+13:G/2:S+3:Y/3:S/3:N/2:T/2:I/1:K/1:D+9:S/5:N/1:T+6:F/5:Y/4:L+9:M/6:L+6:H/5:S/2:N/1:R/1:Q +IGK,cdr1,16,48:R/35:K/1:S+60:S/23:A/1:F+75:S/7:T/2:N+72:Q/10:K/1:H/1:E+84:S+80:L/3:I/1:V+50:L/16:V/8:K/3:R/3:E/2:Q/1:F/1:A+50:H/16:Y/5:N/5:D/4:G/2:R/1:S/1:P+76:S/2:T/2:R/2:K/2:G+43:D/32:N/6:Y/2:S/1:K+83:G/1:D+40:N/27:K/8:Y/4:I/4:D/1:F+71:T/8:N/4:S/1:K+78:Y/3:F/2:L/1:S+83:L/1:V+31:N/22:Y/9:H/8:S/8:D/3:E/1:R/1:Q/1:F +IGK,cdr1,17,15:K/2:R/1:T+17:S/1:A+16:S/2:N+17:Q/1:E+16:S/2:N+14:L/4:V+17:L/1:Y+5:Y/4:W/4:N/2:S/2:D/1:A+18:S+6:S/5:G/2:V/2:D/1:R/1:K/1:F+14:N/2:D/1:T/1:H+13:Q/2:G/1:R/1:N/1:K+11:K/4:N/2:R/1:V+11:N/2:Y/2:T/2:S/1:H+16:Y/2:C+18:L+10:A/2:V/2:S/2:D/1:T/1:N +IGK,cdr2,4,1:V+1:T+1:E+1:S +IGK,cdr2,7,46:A/43:Y/41:G/35:D/28:S/26:L/24:R/20:K/18:E/16:W/16:Q/5:N/3:F/2:T/2:M/2:H+193:A/64:V/44:T/8:M/8:G/7:I/2:L/1:K+270:S/34:T/6:D/5:N/5:K/3:C/2:P/2:F+144:N/59:S/58:T/17:K/13:R/9:Q/6:D/5:Y/5:I/5:A/4:E/1:G/1:F+164:L/138:R/15:S/4:W/2:Q/2:H/1:V/1:K+113:A/43:Q/41:E/25:Y/25:D/24:F/20:H/10:I/7:V/4:N/4:G/3:R/3:P/3:L/1:W/1:S+218:S/58:T/17:P/17:D/4:N/4:E/3:F/3:A/1:L/1:I/1:G +IGK,cdr2,9,1:I+1:Y+1:Y+1:V+1:S+1:N+1:R+1:Y+1:T +IGK,cdr2,11,1:Y+1:A+1:S+1:Q+1:S+1:I+1:S+1:W+1:I+1:P+1:S +IGK,fwr1,8,1:G+1:E+1:R+1:V+1:T+1:I+1:S+1:C +IGK,fwr1,21,1:D+1:L+1:Q+1:M+1:T+1:Q+1:S+1:P+1:S+1:S+1:P+1:F+1:A+1:S+1:L+1:G+1:D+1:A+1:I+1:T+1:W +IGK,fwr1,22,1:N/1:H+1:I/1:A+1:Q/1:G+1:V/1:L+1:T/1:I+1:R/1:Q+1:S/1:C+1:R/1:P+1:L/1:F+1:P/1:L+2:S+2:A+2:S+1:V/1:L+1:R/1:G+1:H/1:E+1:S/1:R+2:V+1:T/1:S+1:V/1:I+1:T/1:S+2:C +IGK,fwr1,23,195:D/53:E/40:A/21:Q/6:S/5:N/3:T/2:V/1:W/1:G+249:I/45:V/10:T/7:A/5:N/4:Q/2:M/2:G/1:P/1:L/1:H+199:V/94:Q/7:L/6:T/6:M/3:W/3:I/3:E/2:R/2:K/1:D/1:A+190:M/118:L/13:V/3:I/2:T/1:Q+316:T/3:Y/3:S/3:I/2:N+327:Q+212:S/101:T/9:A/2:D/1:N/1:F/1:C+298:P/7:T/7:A/4:E/3:S/3:H/2:Q/2:L/1:I+95:S/91:A/76:L/12:K/10:G/10:D/9:P/8:E/5:F/4:V/4:T/2:R/1:Y+226:S/30:T/25:I/23:F/7:L/6:Y/4:P/2:H/2:A/1:N/1:C+244:L/43:M/16:V/6:N/6:I/5:Q/2:S/2:K/2:F/1:T+220:S/57:A/35:P/6:T/3:Y/3:L/1:V/1:F/1:E+152:V/142:A/15:L/5:T/4:M/3:I/3:G/1:R/1:K/1:F+233:S/57:T/14:I/11:A/5:V/5:P/1:N/1:C+122:P/98:L/60:V/17:A/10:I/7:Q/5:T/3:S/2:R/2:H/1:F+306:G/6:E/5:K/4:T/3:R/2:D/1:S+161:E/102:D/39:Q/12:G/5:S/4:T/2:N/1:K/1:A+121:R/55:K/53:T/43:P/27:S/14:Q/4:I/3:L/3:F/2:E/1:N/1:M+207:V/105:A/14:I/1:G+176:T/135:S/7:E/3:I/2:D/2:A/1:N/1:F+249:I/41:M/31:L/3:V/1:T/1:N/1:F+150:S/141:T/16:N/9:K/6:I/3:R/1:F/1:C+326:C/1:F +IGK,fwr2,15,326:W/3:R/2:L+263:Y/41:F/13:L/8:H/4:I/1:V/1:R+250:Q/59:L/12:R/4:K/3:V/2:M/1:N+306:Q/14:H/3:W/3:L/2:K/1:Y/1:R/1:E+299:K/19:R/9:Q/2:N/1:E/1:A+289:P/15:S/8:Q/7:A/5:L/4:T/1:V/1:H/1:G+282:G/17:D/7:N/7:E/5:R/4:H/3:S/2:K/1:W/1:V/1:L/1:A+178:Q/81:K/17:E/16:S/13:G/8:T/5:A/4:P/4:N/2:R/2:L/1:F+160:S/96:A/37:P/19:T/11:V/2:R/2:G/1:I/1:H/1:F/1:D+306:P/9:V/4:I/3:L/2:S/2:N/2:F/1:T/1:Q/1:A+188:K/66:Q/57:R/5:T/5:M/5:E/2:S/2:I/1:A+236:L/38:R/18:P/10:G/6:F/5:S/4:I/4:H/3:T/2:Q/2:C/2:A/1:M+289:L/23:W/6:I/5:M/3:V/3:F/1:Q/1:P+313:I/7:V/5:L/3:F/1:T/1:S/1:M+289:Y/16:K/8:S/4:H/4:F/2:T/2:L/2:C/1:R/1:Q/1:N/1:I +IGK,fwr3,28,1:Q+1:V+1:S+1:R+1:S+1:G+1:S+1:G+1:T+1:D+1:F+1:T+1:L+1:T+1:I+1:T+1:S+1:L+1:E+1:A+1:E+1:D+1:A+1:A+1:T+1:S+1:Y+1:C +IGK,fwr3,32,310:G/8:W/4:D/2:S/2:E/1:V/1:R/1:L+284:V/34:I/5:T/3:F/1:M/1:E/1:A+302:P/25:S/2:L+133:D/125:S/47:A/5:E/4:C/3:V/3:K/2:T/2:H/2:G/1:P/1:N/1:L+317:R/4:G/2:S/2:Q/1:W/1:P/1:K/1:C+327:F/2:L+264:S/42:T/10:K/7:I/4:R/1:V/1:N+317:G/7:S/4:A/1:D+314:S/6:T/3:G/2:R/2:I/1:N/1:L+300:G/18:R/7:Q/3:V/1:K+307:S/7:Y/6:A/3:F/2:P/2:L/1:T/1:H+297:G/21:E/2:V/2:T/2:R/2:D/2:A/1:W+288:T/18:S/7:A/4:R/4:K/2:Q/2:N/2:I/1:V/1:M+250:D/33:S/19:E/16:Q/3:H/2:V/1:Y/1:N/1:K/1:G/1:F/1:A+253:F/74:Y/1:T/1:H+245:T/69:S/6:I/5:V/2:A/1:N/1:L+300:L/23:F/2:V/1:R/1:M/1:I/1:C+211:T/69:K/22:R/11:S/6:N/4:I/3:E/1:Q/1:P/1:A+327:I/2:V+274:S/27:N/10:H/5:I/3:E/3:D/2:T/2:R/2:G/1:C+159:S/87:R/26:G/24:N/9:P/7:T/7:K/5:C/3:Q/1:I/1:D+148:V/143:L/28:M/3:I/2:P/2:G/2:F/1:T+181:E/125:Q/13:K/3:H/2:L/2:G/2:A/1:V+181:A/84:P/30:S/7:V/7:E/7:C/5:T/3:Q/2:Y/2:D/1:G+277:E/44:D/3:Q/3:N/2:K+325:D/2:N/1:V/1:G+94:A/82:V/68:F/43:L/18:I/14:T/4:M/2:G/2:E/1:P/1:D+208:A/113:G/3:V/2:T/2:R/1:E+141:V/121:T/22:D/12:I/11:M/10:S/4:A/3:Y/2:L/1:N/1:H/1:E+325:Y/2:F/1:L/1:C+283:Y/30:F/8:H/3:S/3:C/1:T/1:L+323:C/2:S/2:G/1:V/1:L +IGK,fwr3,34,1:Y+1:A+1:T+1:N+1:L+1:E+1:D+1:G+1:I+1:T+1:S+1:W+1:F+1:S+1:S+1:S+1:G+1:S+1:G+1:A+1:D+1:Y+1:S+1:L+1:T+1:I+1:S+1:S+1:L+1:E+1:S+1:E+1:D+1:C +IGL,cdr1,8,2:T+2:G+2:V+2:G+2:S+2:Y+2:V+1:Y/1:G +IGL,cdr1,11,30:S/23:G/14:Q/6:E/1:V/1:R+74:G/1:R+44:D/18:N/7:S/3:E/2:G/1:A+29:N/16:S/11:L/5:V/4:D/4:A/2:Y/1:M/1:K/1:I/1:E+35:I/34:L/4:F/2:M+42:G/11:D/8:E/4:P/4:A/3:S/2:R/1:K+37:S/13:K/12:E/4:R/2:N/2:F/2:D/1:Y/1:L/1:G+32:K/13:S/10:Y/6:Q/5:N/4:R/2:T/2:L/1:A+39:Y/14:S/6:G/4:D/4:A/3:F/1:V/1:R/1:N/1:M/1:H+31:V/31:A/9:T/3:I/1:E+22:H/16:Y/13:Q/7:G/5:S/4:N/2:R/2:D/2:A/1:E/1:C +IGL,cdr1,12,6:T/2:S/1:G/1:A+6:L/3:G/1:S+8:S/1:T/1:R+9:S/1:T+3:N/3:E/2:A/1:S/1:G+6:H/3:I/1:D+4:S/3:G/2:K/1:I+4:T/3:S/1:R/1:I/1:G+7:Y/1:L/1:G/1:F+4:Y/2:I/2:G/1:T/1:S+6:I/3:V/1:A+3:S/2:Y/2:E/1:G/1:D/1:A +IGL,cdr1,13,49:S/38:T/5:A/4:N/4:K/1:E+83:G/11:R/3:L/2:A/1:S/1:C+93:S/4:D/3:N/1:G+83:S/14:T/2:R/2:A+58:S/25:N/14:G/2:C/1:F/1:E+84:N/7:S/7:D/1:R/1:I/1:H+71:I/17:V/7:T/5:M/1:F+82:G/5:R/5:E/4:D/2:S/2:A/1:L+24:R/17:G/15:S/14:I/11:N/7:L/4:D/3:M/2:K/2:A/1:F/1:E+26:Y/24:G/21:N/11:L/4:V/3:S/3:I/3:A/2:K/1:Q/1:F/1:E/1:D+45:Y/37:G/10:F/3:S/2:N/2:D/1:P/1:A+88:V/6:A/3:G/2:T/2:I+30:S/30:G/14:N/9:H/4:Y/3:Q/3:I/3:A/2:C/1:K/1:F/1:D +IGL,cdr1,14,78:T/30:S/18:G/18:A/5:R/2:D/1:N/1:H+83:G/51:L/11:S/7:F/1:R+121:S/26:T/3:N/2:R/1:C+124:S/17:T/5:R/4:N/1:M/1:G/1:A+75:S/62:G/6:D/5:T/2:N/2:E/1:C+52:N/26:D/24:S/17:Y/10:A/8:F/6:I/3:V/3:Q/2:L/1:T/1:H+77:V/39:I/18:S/16:N/1:R/1:G/1:A+81:G/32:V/23:T/14:S/2:I/1:A+59:G/32:T/27:Y/20:S/6:A/3:R/2:L/2:I/2:D+62:G/42:S/27:Y/6:N/4:T/4:D/2:R/2:F/2:E/1:K/1:A+94:N/37:Y/6:S/5:L/4:K/3:H/2:D/1:I/1:F+84:Y/12:S/10:G/7:D/6:H/5:R/5:N/5:E/5:A/4:L/3:P/2:V/2:T/1:W/1:K/1:C+79:V/32:I/25:P/9:A/4:L/3:M/1:S+52:G/43:S/18:Y/11:N/5:Q/5:H/4:L/4:F/3:V/3:T/1:R/1:I/1:E/1:D/1:C +IGL,cdr1,15,1:T+1:L+1:S+1:R+1:D+1:I+1:N+1:V+1:G+1:S+1:Y+1:R+1:H+1:I+1:L +IGL,cdr1,19,2:T+2:G+2:I+2:P+2:S+2:N+1:T/1:I+1:N/1:D+1:L/1:F+1:E/1:D+1:G/1:E+1:L/1:I+1:G/1:E+1:I/1:F+1:D/1:A+2:T+1:S/1:K+2:V+1:S/1:N +IGL,cdr2,4,2:Y+2:N+2:S+2:D +IGL,cdr2,6,2:T/1:G+2:T/1:S+3:S+2:G/1:R+2:A/1:P+2:L/1:S +IGL,cdr2,7,92:G/35:N/31:E/28:D/25:S/20:K/19:Y/16:R/8:Q/4:L/4:A/3:W/2:C+95:D/55:T/38:N/34:S/22:V/20:A/5:Y/5:L/5:I/5:G/2:K/1:P+149:S/70:N/34:T/17:D/11:G/2:I/2:E/1:R/1:P+86:N/66:S/61:K/23:E/16:D/14:T/10:Q/4:Y/2:R/2:I/1:L/1:G/1:A+262:R/10:Q/4:L/4:K/2:C/1:W/1:S/1:I/1:H/1:G+219:P/46:A/3:S/3:R/3:L/2:H/2:G/2:F/2:E/1:Y/1:T/1:N/1:I/1:D+255:S/12:P/11:T/5:D/3:L/1:A +IGL,cdr2,8,1:L+1:I+1:D+1:R+1:N+1:N+1:P+1:H +IGL,cdr2,9,1:Y+1:Y+1:S+1:S+1:T+1:E+1:L+1:G+1:P +IGL,cdr2,11,24:Y/8:F/4:L/2:H/1:V+13:Y/13:K/9:H/2:N/1:V/1:E+28:S/8:T/1:K/1:G/1:F+34:D/3:E/1:S/1:A+27:S/5:G/4:E/3:D+18:D/9:S/4:N/3:I/2:F/1:Y/1:M/1:E+30:K/3:Y/2:N/2:H/1:T/1:E+24:H/6:Q/2:T/2:S/2:N/2:E/1:D+29:Q/5:L/4:K/1:R+39:G+23:S/12:P/2:T/2:D +IGL,cdr2,12,1:K+1:V+1:N+1:S+1:D+1:G+1:S+1:H+1:S+1:R+1:G+1:D +IGL,fwr1,20,1:Q+1:I+1:V+1:V+1:T+1:Q+1:E+1:P+1:S+1:L+1:S+1:P+1:G+1:G+1:T+1:V+1:L+1:L+1:T+1:C +IGL,fwr1,21,1:Q+1:A+1:V+1:L+1:T+1:Q+1:R+1:P+1:P+1:C+1:P+1:G+1:P+1:W+1:A+1:E+1:V+1:S+1:I+1:T+1:C +IGL,fwr1,22,252:Q/74:S/4:R/4:A/2:N/2:L/1:P/1:H/1:E+106:S/100:A/49:Y/41:P/14:T/10:F/9:Q/4:V/2:L/2:I/1:N/1:H/1:E/1:D+231:V/37:E/28:A/15:G/7:T/5:Q/5:M/5:L/2:R/2:K/1:S/1:I/1:D/1:C+285:L/49:V/4:P/2:F/1:Q+319:T/10:N/9:I/2:S/1:A+337:Q/3:P/1:K+272:P/31:E/19:L/8:S/3:K/2:T/2:D/2:A/1:V/1:Q+162:P/80:S/57:A/17:T/5:Q/4:N/4:H/4:F/2:K/2:D/1:V/1:R/1:L/1:I+307:S/24:A/3:G/2:T/2:L/2:F/1:E+248:V/61:L/12:M/12:A/3:F/2:K/2:E/1:I+312:S/13:T/6:F/5:L/3:N/2:A+154:G/90:V/49:A/20:R/13:T/7:K/4:E/1:W/1:M/1:L/1:F+231:S/49:A/28:N/18:T/4:V/3:G/3:F/1:Y/1:P/1:L/1:D/1:C+229:L/104:P/3:V/2:S/2:E/1:T+323:G/8:R/3:E/3:D/2:K/1:N/1:A+233:Q/34:G/23:A/14:T/10:S/9:K/8:E/3:R/3:N/1:V/1:P/1:L/1:I+156:T/119:R/45:S/13:M/4:K/2:P/1:N/1:A+221:V/102:A/13:I/3:E/1:T/1:L+162:T/76:S/66:R/24:K/8:I/3:H/2:F+243:I/82:L/10:V/2:T/2:F/1:P/1:N+197:T/128:S/7:P/3:I/3:A/2:C/1:F+338:C/1:T/1:S/1:R +IGL,fwr1,23,1:Q+1:A+1:V+1:L+1:T+1:Q+1:P+1:S+1:S+1:V+1:L+1:P+1:G+1:S+1:L+1:G+1:Q+1:R+1:V+1:S+1:I+1:T+1:C +IGL,fwr2,15,340:W/1:R/1:Q/1:G/1:C+262:Y/41:F/12:C/10:H/7:V/7:L/2:S/2:I/1:Q+341:Q/1:R/1:L/1:K+308:Q/12:L/10:E/5:H/4:K/3:P/1:R/1:D+116:L/104:K/32:V/25:I/24:T/17:H/7:R/6:Q/4:N/4:F/2:Y/2:P/1:S+289:P/14:Q/13:L/11:S/9:A/6:E/2:T+322:G/7:D/6:S/3:E/2:Y/1:P/1:N/1:L/1:A+117:S/81:Q/50:T/40:K/28:R/8:H/6:M/4:N/4:L/3:E/1:I/1:G/1:A+199:A/48:G/43:P/22:S/8:L/7:V/7:T/5:R/4:D/1:K+302:P/27:L/7:F/5:S/1:I/1:H/1:A+139:R/99:K/51:V/20:T/7:E/6:M/4:W/4:S/4:L/3:Q/3:I/1:P/1:H/1:G/1:A+151:T/115:L/39:Y/9:M/8:G/5:N/4:R/4:Q/2:V/2:K/1:S/1:I/1:F/1:D/1:A+185:L/76:V/57:I/11:M/6:F/5:P/3:R/1:Y+275:I/40:L/15:V/6:M/4:T/2:S/2:C+282:Y/11:R/10:G/9:D/7:C/4:T/4:S/3:Q/3:N/3:K/2:M/2:L/2:H/1:F/1:A +IGL,fwr3,28,2:S+2:Q+2:K+2:L+2:Q+2:G+2:S+2:G+2:V+2:P+2:R+2:H+2:F+2:S+2:G+1:S/1:P+2:K+2:D+2:T+2:S+2:S+2:N+2:A+2:G+2:L+2:L+2:L+2:I +IGL,fwr3,31,2:G+2:V+2:P+2:D+2:R+2:V+2:S+2:G+2:S+2:K+2:S+2:G+2:N+2:S+2:A+2:S+2:L+2:T+2:S+2:S+2:V+2:H+2:A+2:E+2:D+2:D+2:T+2:D+2:Y+2:Y+2:C +IGL,fwr3,32,281:G/3:W/3:R/3:E+166:V/96:I/8:G/5:S/5:A/4:T/4:D/1:L/1:F+261:P/17:S/8:T/4:L+171:D/46:E/30:A/18:S/18:N/3:V/3:G/1:H+263:R/12:W/7:Q/3:H/2:L/2:C/1:G+277:F/8:A/2:P/1:Y/1:S/1:L+282:S/5:T/2:P/1:C+277:G/6:A/3:S/3:C/1:V+248:S/15:T/11:A/10:V/3:F/2:Y/1:D+112:K/54:R/36:N/34:S/24:I/10:P/10:L/5:T/1:V/1:Q/1:M/1:F/1:C+265:S/10:D/7:L/5:I/1:V/1:T/1:P+270:G/10:R/5:D/3:E/2:S+185:N/47:S/17:T/12:D/10:F/9:K/6:A/3:G/1:Q+194:T/48:S/33:K/6:D/5:M/2:V/1:L/1:A+231:A/37:G/6:V/6:R/5:T/2:H/2:D/1:S+202:T/51:S/26:A/6:Y/3:I/1:V/1:N+271:L/9:M/8:R/2:K+260:T/15:S/8:A/5:I/2:G+272:I/10:G/3:V/3:L/1:T/1:R+177:S/80:T/16:A/11:N/3:R/1:P/1:L/1:H+186:G/73:S/11:R/10:T/4:N/3:E/1:W/1:V/1:L+169:L/92:A/17:V/5:I/4:T/3:F+237:Q/20:R/10:T/10:L/6:E/2:W/2:P/2:K/1:H+216:A/30:P/13:T/11:S/10:L/4:V/3:D/1:I/1:G/1:F+256:E/10:G/9:T/5:K/4:D/2:N/1:Q/1:M/1:I/1:A+268:D/10:I/9:E/2:V/1:N+273:E/10:S/4:D/2:K/1:Q+271:A/12:S/3:T/2:V/2:D+248:D/10:L/10:G/9:E/3:N/3:M/3:I/2:Y/1:T/1:A+277:Y/10:Q/2:D/1:H+236:Y/33:F/10:A/7:H/3:I/1:S+272:C/10:E/3:Y/2:S/1:W/1:I/1:G +IGL,fwr3,34,47:G/1:R/1:E+49:V+44:P/3:H/2:S+28:S/14:D/4:N/2:T/1:K+44:R/2:C/1:W/1:S/1:H+45:F/4:V+49:S+48:G/1:D+48:S/1:D+35:K/13:I/1:S+46:D/2:E/1:N+21:A/13:S/12:T/2:D/1:R+43:S/3:T/2:A/1:L+20:A/16:S/8:T/5:E+47:N/1:Y/1:H+28:A/17:S/3:T/1:V+30:G/16:A/3:F+23:L/9:S/7:V/6:F/4:I+49:L+27:L/13:T/7:S/1:R/1:A+49:I+38:S/5:N/4:T/2:A+39:G/6:N/4:E+36:L/9:V/3:P/1:I+42:Q/3:L/3:K/1:E+31:P/6:S/5:T/4:A/2:I/1:M+48:E/1:D+49:D+48:E/1:K+48:A/1:T+43:D/3:I/1:V/1:N/1:E+48:Y/1:C+39:Y/6:F/3:H/1:D+49:C +IGL,fwr3,35,1:G+1:F+1:P+1:E+1:R+1:I+1:R+1:E+1:S+1:P+1:G+1:S+1:K+1:S+1:G+1:N+1:T+1:G+1:S+1:L+1:T+1:I+1:T+1:G+1:L+1:Q+1:A+1:E+1:D+1:E+1:A+1:D+1:Y+1:Y+1:C +TRA,cdr1,3,2:Q+2:S+2:A +TRA,cdr1,4,3:S+3:F+3:R+3:G +TRA,cdr1,5,63:T/51:S/20:A/15:I/14:D/7:K/4:N/4:G/4:E/2:V/1:Y/1:R+74:T/42:S/18:A/15:F/7:L/7:I/7:G/5:Y/4:M/3:V/2:P/1:N/1:E+46:V/28:L/26:F/24:A/17:P/17:M/16:I/8:T/4:S+43:T/38:N/24:S/22:Y/20:D/17:Q/12:R/8:K/2:P+85:S/36:N/26:T/8:A/7:R/7:Q/5:G/5:F/4:V/1:Y/1:L/1:D +TRA,cdr1,6,105:D/61:S/54:N/52:T/48:V/37:Y/24:A/8:I/6:K/3:F/1:Q/1:G/1:E+254:S/65:T/30:G/16:R/9:I/6:P/5:A/4:K/3:Y/3:N/2:V/2:F/2:D+100:A/88:G/46:N/33:T/26:Y/21:S/19:V/12:M/11:Q/11:P/8:W/7:R/6:L/6:D/4:I/3:F+148:S/103:F/45:Y/33:T/24:L/14:V/9:P/6:I/6:A/5:D/4:N/3:Q/1:M+124:P/74:D/37:N/36:Y/33:Q/15:S/14:V/14:T/12:R/12:A/8:G/7:I/7:E/6:L/2:F+198:Y/39:S/35:T/34:F/28:A/26:N/18:G/8:V/5:D/4:H/2:R/2:E/1:L/1:I +TRA,cdr1,7,139:T/14:A/3:N/3:I/1:V/1:S/1:K/1:D+59:S/49:I/16:T/16:A/10:R/7:N/5:Q/1:G+38:D/27:S/22:Y/20:G/20:E/11:A/8:Q/4:R/4:I/3:V/3:N/1:W/1:H/1:C+54:S/32:G/19:T/14:Y/12:L/8:V/6:P/5:I/4:Q/3:A/2:R/2:N/2:F+36:N/25:S/24:T/24:D/10:L/9:A/8:R/7:F/5:G/4:Y/3:V/3:E/2:P/1:K/1:I/1:C+81:Y/23:D/20:E/19:P/10:V/4:S/2:I/2:A/1:L/1:C+95:Y/18:S/14:F/8:T/8:H/6:G/6:A/3:V/3:R/2:C +TRA,cdr1,8,3:T/3:A/1:I+4:T/2:R/1:P+4:S/3:D+4:I/2:T/1:K+3:G/3:A/1:N+4:Y/1:G/1:F/1:D+4:P/2:Y/1:E+4:N/2:Y/1:S +TRA,cdr1,10,1:S+1:N+1:Y+1:T+1:V+1:R+1:P+1:F+1:N+1:N +TRA,cdr1,19,1:V+1:H+1:L+1:T+1:T+1:S+1:H+1:G+1:T+1:V+1:N+1:T+1:L+1:V+1:K+1:A+1:L+1:V+1:F +TRA,cdr2,2,1:G+1:T +TRA,cdr2,3,1:Q/1:F/1:E+2:T/1:R+2:M/1:D +TRA,cdr2,4,10:G/8:H/2:E/1:Y/1:S+7:T/7:S/5:L/3:I+8:S/5:K/4:R/2:M/1:T/1:G/1:E+9:P/6:R/2:G/2:E/1:S/1:M/1:K +TRA,cdr2,5,78:I/39:G/18:L/10:N/8:V/2:M/1:S/1:Q+51:A/48:P/27:L/9:Y/8:S/6:V/4:F/2:T/1:N/1:H+101:S/20:R/9:K/7:T/7:L/5:Q/4:P/3:N/1:G+122:G/7:N/7:E/6:T/6:D/4:S/2:Q/2:A/1:M+100:T/33:M/15:N/4:K/2:V/2:G/1:I +TRA,cdr2,6,23:I/12:V/12:S/7:N/3:G/2:T/2:Q/2:L/2:A/1:M+20:F/17:Y/8:V/8:S/6:P/2:K/2:A/1:W/1:L/1:I+31:S/17:L/8:A/3:R/3:E/2:G/1:T/1:K+27:N/23:D/5:W/5:S/4:G/1:Q/1:K+37:G/6:Q/6:K/5:N/5:A/4:T/2:E/1:R+21:E/15:D/9:N/9:L/6:K/5:V/1:I +TRA,cdr2,7,135:I/69:L/48:A/28:S/26:G/24:V/19:M/7:Q/6:T+83:R/55:S/52:L/45:T/20:A/15:Y/15:Q/15:M/14:F/13:V/10:I/8:H/7:D/5:G/4:P/1:W+156:S/67:K/50:T/19:R/13:L/9:P/9:M/9:A/6:V/6:E/5:D/4:Y/3:F/2:Q/2:H/1:N/1:I+101:N/63:A/49:V/43:D/39:S/23:G/10:E/9:T/9:I/8:P/4:R/2:F/1:Q/1:K+79:G/64:N/46:K/31:V/31:E/27:D/25:M/23:S/13:Q/8:L/7:T/6:A/1:Y/1:R+95:E/92:D/41:K/30:N/26:Q/19:R/18:A/13:S/9:G/8:I/4:V/2:P/2:H/1:Y/1:T/1:L+162:K/89:E/35:R/33:V/12:N/6:T/6:Q/6:L/4:M/3:I/3:A/2:S/1:G +TRA,cdr2,8,72:Q/65:Y/3:D/2:R/1:S/1:M+26:D/21:Y/21:I/19:E/13:T/11:L/9:M/7:F/6:N/4:V/4:G/3:P+100:S/22:A/9:T/6:G/4:P/2:L/1:V+63:G/46:Y/14:S/12:F/3:A/2:K/1:T/1:N/1:H/1:E+55:D/27:N/15:K/15:E/8:S/8:A/5:G/4:T/3:P/2:R/1:W/1:V+35:T/33:Q/19:P/15:K/11:E/9:N/8:S/5:A/3:R/3:I/3:G+50:Q/45:L/18:V/9:E/7:A/6:R/6:P/1:M/1:K/1:D+68:V/66:N/6:K/3:I/1:F +TRA,cdr2,9,2:Q/1:R/1:L+2:D/1:S/1:Q+2:S/1:R/1:E+2:Y/1:R/1:A+3:N/1:G+3:E/1:K+2:L/1:Q/1:I+2:N/1:K/1:E+1:T/1:R/1:N/1:K +TRA,cdr2,10,2:Y+1:L/1:F+2:S+2:G+2:P+1:S/1:P+1:N/1:D+1:V/1:A+2:L+2:V +TRA,fwr1,17,1:Q/1:E+1:V/1:E+1:T/1:S+1:V/1:L+1:Q/1:L+2:E+1:G/1:A+1:D/1:A+2:T+1:V/1:L+2:T+1:V/1:L+1:N/1:D+2:C+2:T+2:Y+1:T/1:S +TRA,fwr1,18,13:E/1:G+12:F/1:V/1:S+13:L/1:D+13:L/1:V+13:L/1:E+14:Q+13:E/1:S+13:G/1:P+13:E/1:P+12:N/1:K/1:A+13:F/1:L+13:T/1:S+12:T/1:M/1:L+13:Y/1:Q+13:C/1:E+13:N/1:G+9:S/4:F/1:A+14:S +TRA,fwr1,22,2:G/1:K+2:Q/1:N+2:P/1:Q+2:L/1:V+2:V/1:E+2:E/1:Q+2:K/1:S+2:K/1:P+2:V/1:P+2:K/1:S+2:E/1:L+2:G/1:V+2:E/1:V+2:S/1:L+2:F/1:E+2:T/1:G+2:L/1:E+3:N+3:C+2:S/1:T+2:Y/1:F+2:T/1:Q +TRA,fwr1,25,33:D/17:G/11:K/8:Q/8:L/6:N/3:A/2:S/2:E+38:A/17:Q/9:E/8:N/5:T/4:L/3:D/2:V/2:S/2:K+52:K/12:Q/8:S/4:G/4:E/3:P/3:N/2:I/2:D+41:T/35:V/8:I/2:L/2:K/1:Q/1:N+42:T/15:E/9:F/7:K/5:S/3:Q/3:D/2:I/2:H/1:L/1:A+84:Q/3:S/2:F/1:G+63:P/15:S/6:T/4:L/2:V+28:S/17:P/11:N/9:T/6:D/5:Q/5:I/3:L/2:G/2:E/1:V/1:A+43:S/14:T/7:E/5:Q/5:F/4:A/3:P/3:I/2:N/1:L/1:K/1:G/1:C+40:M/21:V/14:L/9:I/2:T/2:S/2:F+33:D/13:T/11:E/7:V/7:S/5:L/4:I/3:M/3:G/3:A/1:N+25:S/24:C/11:V/11:A/6:L/5:I/3:Y/3:G/1:T/1:H+28:A/10:L/8:Y/8:T/8:Q/6:S/5:I/4:V/4:R/4:P/2:N/2:M/1:K+83:E/3:R/3:Q/1:I+73:G/13:E/2:W/2:R+37:E/20:A/10:Q/5:R/4:T/4:N/4:G/4:D/1:V/1:K+23:D/12:S/9:V/9:P/9:N/9:E/8:I/3:Q/3:A/2:T/2:F/1:M+57:V/20:A/6:S/4:T/2:F/1:L+30:N/14:T/11:E/8:H/7:Q/6:V/4:G/3:R/3:I/2:F/1:S/1:A+41:L/35:I/7:V/4:M/2:T/1:F+38:P/24:N/13:S/7:T/2:Y/2:H/2:F/1:R/1:K+89:C/1:F+52:N/18:T/12:S/6:D/1:Q/1:I+59:H/26:Y/5:S+57:S/15:T/7:Q/5:N/3:E/2:A/1:P +TRA,fwr1,26,263:G/170:A/99:Q/31:K/25:S/24:E/14:T/8:D/6:L/5:R/3:N/2:V/2:I/1:M+291:Q/134:D/52:E/50:N/38:V/31:L/29:M/8:K/5:I/5:A/3:T/3:S/2:H/1:Y/1:P+176:S/154:K/140:Q/53:T/51:D/20:E/15:N/11:L/10:R/6:P/5:M/5:G/4:W/1:I/1:H/1:A+598:V/20:L/17:I/11:G/3:A/1:Q/1:M/1:K/1:E+224:T/211:E/55:K/36:N/33:Q/24:R/23:S/13:I/9:V/7:M/7:G/6:A/4:D/1:C+634:Q/15:E/2:L/1:W/1:H+269:S/104:T/75:P/51:N/32:M/23:D/19:L/14:R/14:H/13:V/12:I/12:A/7:K/5:Y/2:G/1:F+343:P/117:E/78:Q/74:D/15:S/12:L/7:W/1:Y/1:V/1:N/1:K/1:H/1:G/1:A+140:G/130:P/114:S/105:Q/59:A/34:E/18:T/16:D/10:R/9:L/5:V/4:W/4:K/2:I/2:H/1:F+147:S/95:A/69:Q/61:P/56:T/47:H/36:V/34:F/31:E/24:L/16:I/13:R/11:Y/6:D/4:G/2:W/1:N+360:L/173:V/48:I/29:M/16:E/7:Q/6:T/5:G/3:A/2:P/2:F/1:S/1:R+256:S/192:T/49:I/28:N/23:V/22:P/22:L/16:R/15:A/8:H/8:F/6:W/4:K/1:Y/1:Q/1:G/1:C+353:V/188:L/48:I/19:A/18:R/7:P/6:T/5:S/4:G/3:M/1:Q/1:F+301:Q/133:S/55:P/30:R/26:W/24:L/22:T/17:H/14:F/13:A/5:M/5:K/4:Y/4:V+623:E/13:K/7:Q/5:W/2:T/1:P/1:I/1:D+523:G/47:K/28:E/18:R/15:A/11:T/5:S/3:N/1:M/1:I/1:D+206:A/176:E/96:D/40:G/37:T/19:S/18:K/17:N/16:L/14:R/8:V/4:Q/1:P/1:M+184:S/108:N/76:T/57:P/41:A/39:I/27:L/26:F/24:D/17:M/15:R/12:E/10:G/8:V/4:K/4:H/1:C+170:V/153:L/116:S/97:A/32:C/19:T/18:F/17:Y/9:M/8:H/6:P/6:I/2:R+320:T/70:I/66:S/59:E/57:V/18:Q/17:M/13:A/12:N/8:F/7:L/4:K/2:G+428:L/118:I/38:V/38:M/19:F/8:T/3:A/1:S+282:N/98:R/61:D/50:K/34:Q/33:T/19:Y/19:S/16:W/13:H/12:G/11:I/2:M/2:L/1:E+652:C/1:V+245:T/205:N/137:S/17:K/13:D/13:A/11:V/4:I/3:H/2:R/2:M/1:Y+490:Y/119:F/41:S/1:R/1:I/1:C+328:S/99:E/87:T/58:Q/40:D/22:K/12:P/4:L/2:R/1:G +TRA,fwr1,27,2:G+2:Q+2:Q+2:V+2:Q+2:Q+2:S+2:P+2:P+2:A+2:S+2:L+2:V+2:L+2:Q+2:E+2:G+2:E+2:N+2:A+2:E+2:L+2:Q+2:C+2:T+2:Y+2:S +TRA,fwr2,16,10:L+6:L/3:H/1:F+10:W+9:Y/1:F+9:K/1:Q+10:Q+8:E/1:Q/1:P+5:K/4:E/1:P+8:K/1:S/1:N+6:A/3:V/1:R+9:P/1:E+8:T/1:M/1:I+9:F/1:L+10:L+9:F/1:I+4:M/2:V/1:T/1:R/1:I/1:A +TRA,fwr2,17,394:L/173:F/97:V/55:I/10:M/2:T/2:P/2:H/1:W/1:S+289:F/162:Q/69:P/54:H/35:Y/34:L/17:R/15:W/14:S/13:N/9:A/6:T/6:I/5:V/5:M/2:D/1:K/1:G+734:W/2:Y/1:C+630:Y/92:F/6:L/6:C/2:H/1:V+187:R/179:V/178:K/83:Q/51:L/16:Y/13:I/9:W/9:M/5:T/4:H/2:F/1:A+667:Q/27:H/17:K/9:W/8:R/4:N/3:E/2:L+193:Y/111:N/79:H/66:E/65:D/50:P/48:F/34:R/22:K/19:L/17:S/10:I/9:T/5:G/4:V/3:C/1:Q/1:A+493:P/81:S/70:L/17:A/15:H/14:T/12:Q/9:R/7:F/5:N/5:D/3:E/2:Y/1:V/1:K/1:I/1:C+383:G/118:N/110:S/55:R/24:A/12:E/10:D/8:H/4:K/3:W/3:T/3:L/1:Y/1:V/1:P/1:C+207:G/179:K/146:E/129:Q/36:R/14:A/7:F/6:S/4:H/2:T/2:L/2:C/1:V/1:N/1:M+353:G/144:S/70:E/66:A/49:R/25:H/11:Q/6:V/6:N/2:P/2:I/2:C/1:D+363:P/271:L/71:M/22:I/7:F/1:V/1:T/1:R+235:Q/125:V/118:I/68:E/58:A/49:K/46:T/22:R/10:H/2:M/1:W/1:S/1:N/1:L+316:L/179:F/76:S/41:Y/38:H/18:N/17:R/14:A/13:Q/7:M/6:V/5:I/4:T/1:P/1:G/1:C+643:L/47:I/45:V/1:H/1:F+267:I/237:L/125:F/61:M/20:V/11:T/11:S/3:Q/1:H/1:G+186:K/98:Y/74:R/62:S/56:A/54:H/34:L/31:I/24:Q/23:F/20:T/20:N/19:V/19:D/6:M/4:G/3:P/3:E/1:C +TRA,fwr2,18,11:V/5:L/1:F+12:Q/1:W/1:N/1:L/1:I/1:H+16:W/1:A+10:F/5:Y/1:L/1:I+11:R/2:L/2:K/1:Q/1:G+16:Q/1:S+11:N/2:R/2:D/1:V/1:K+12:P/2:S/1:Q/1:M/1:L+11:G/2:R/1:W/1:N/1:K/1:E+11:G/2:K/2:D/1:S/1:A+11:G/2:S/1:R/1:L/1:K/1:A+6:S/3:R/2:E/2:A/1:L/1:I/1:H/1:G+10:L/2:V/2:S/1:T/1:I/1:D+9:T/2:F/1:V/1:S/1:P/1:L/1:I/1:G+10:R/3:F/2:L/1:Y/1:V+14:L/2:M/1:I+9:F/3:I/2:L/1:T/1:P/1:H+9:Y/3:L/2:S/2:M/1:I +TRA,fwr3,32,12:S/12:K/3:N/2:Q/1:Y/1:L/1:E+19:Q/5:E/4:S/1:Y/1:T/1:K/1:G+11:K/10:Q/3:S/2:T/2:E/1:V/1:N/1:I/1:H+12:G/11:E/4:A/2:N/1:Y/1:L/1:K+22:R/3:S/2:F/1:V/1:T/1:Q/1:G/1:D+16:L/10:Y/2:G/1:Q/1:P/1:F/1:A+11:N/6:R/6:K/3:G/2:T/2:F/1:Q/1:E+13:A/10:M/3:I/2:F/1:V/1:R/1:Q/1:G+22:T/3:S/3:N/1:Y/1:G/1:E/1:A+12:L/9:Y/3:S/2:I/1:R/1:N/1:K/1:F/1:E/1:A+10:E/9:L/3:T/3:K/3:D/2:S/1:V/1:A+14:R/13:K/2:N/2:D/1:L+16:K/11:F/1:T/1:S/1:N/1:L/1:E+15:S/9:G/3:E/1:T/1:Q/1:N/1:L/1:D+29:S/2:F/1:K+19:S/4:T/3:F/2:P/2:H/1:L/1:A+29:L/2:I/1:A+11:H/10:L/3:V/2:T/2:K/1:Y/1:S/1:Q/1:F+23:I/4:K/3:L/1:T/1:F+9:L/6:E/5:P/4:T/3:A/2:I/1:Y/1:S/1:K+12:Q/12:A/4:S/1:W/1:R/1:H/1:G+17:V/11:P/1:T/1:L/1:G/1:A+13:Q/11:K/2:T/2:R/2:H/1:S/1:I+13:P/7:T/3:V/3:L/2:W/2:E/1:I/1:A+14:E/9:A/4:R/3:S/1:Q/1:L+31:D/1:L+17:S/12:A/2:T/1:K+24:A/7:G/1:Y+15:V/13:T/2:I/1:L/1:K+31:Y/1:E+16:Y/11:L/3:F/1:T/1:D+32:C +TRA,fwr3,33,3:S/3:N/2:T/2:E/1:V/1:R/1:Q/1:P+3:S/3:G/2:T/2:N/2:K/1:E/1:D+5:S/3:I/2:E/2:D/1:H/1:G+6:K/3:Q/2:M/1:T/1:R/1:E+8:G/2:Y/2:N/1:Q/1:L+9:F/3:S/1:T/1:N+5:L/3:K/2:G/1:T/1:S/1:Q/1:H+6:A/4:N/2:V/1:S/1:H+4:T/3:I/3:E/2:F/1:R/1:G+3:F/3:A/2:V/2:Q/2:N/2:L+4:I/3:S/3:K/1:T/1:M/1:L/1:H+4:K/3:D/2:S/2:N/2:A/1:T+4:E/3:R/3:D/2:T/1:S/1:G+5:K/4:S/3:T/2:R+7:S/3:T/2:N/1:R/1:H+8:S/4:F/2:I+4:S/4:H/3:T/2:P/1:F+11:L/2:I/1:H+4:V/4:T/3:E/1:Q/1:L/1:H+6:K/3:L/3:I/1:V/1:Q+7:S/4:P/1:Y/1:W/1:K+5:S/3:Q/3:L/1:G/1:D/1:A+6:V/5:S/3:A+7:Q/3:T/3:H/1:A+4:L/3:W/3:T/2:V/1:Q/1:M+6:R/4:S/2:T/1:K/1:E+13:D/1:N+11:S/3:T+11:A/2:G/1:T+6:V/3:K/2:T/2:L/1:M+14:Y+7:F/5:Y/1:N/1:C+14:C +TRA,fwr3,34,330:K/65:A/60:G/53:V/52:Q/42:E/28:T/22:N/17:M/15:P/14:R/13:S/1:I+170:E/124:T/101:Q/72:K/71:S/68:G/42:R/26:N/11:H/10:A/6:I/4:D/2:V/2:P/2:L/1:W+156:N/120:S/109:D/101:E/46:H/43:I/32:K/27:Q/22:G/20:V/15:T/14:R/4:L/1:Y/1:P/1:F+330:G/91:Q/82:K/61:E/51:R/37:N/24:S/21:D/12:Y/1:W/1:H/1:A+417:R/209:G/31:M/23:K/13:H/7:S/6:V/3:Q/2:E/1:D+310:F/227:L/81:Y/39:A/24:I/17:V/7:M/4:W/1:T/1:Q/1:C+223:T/165:S/137:E/42:N/41:K/30:R/23:Q/22:H/13:I/4:F/3:Y/3:V/3:A/2:M/1:W+320:A/163:V/74:L/70:S/33:I/27:G/10:F/9:C/4:T/1:W/1:M+277:T/72:E/65:N/65:F/58:L/43:S/30:Q/28:H/20:R/15:V/9:I/9:A/7:D/4:Y/4:W/4:K/2:M+253:L/218:F/55:I/49:T/48:Y/28:V/21:H/14:M/11:S/8:A/5:P/1:N/1:C+253:N/104:D/81:Q/74:V/38:G/34:S/34:A/24:K/21:R/21:I/19:H/5:P/3:T/1:E+368:K/86:S/61:T/46:E/42:R/40:P/22:A/12:N/9:I/7:Q/5:G/4:L/4:D/3:V/2:F/1:M+202:S/150:K/86:T/82:A/51:D/46:E/37:R/18:N/17:G/8:Q/8:P/2:V/2:H/1:M/1:L/1:F+193:E/100:A/94:S/79:R/79:D/43:T/39:K/35:N/16:Q/13:L/7:I/4:V/4:G/3:F/2:M/1:P+287:K/122:R/95:S/77:T/48:Q/34:L/23:G/8:A/6:N/2:M/2:I/2:H/2:F/2:E/1:Y/1:C+300:S/119:H/84:R/74:Y/40:Q/20:F/18:K/14:N/13:D/8:A/7:L/6:T/4:I/3:G/2:C+270:S/249:F/87:L/53:I/31:V/11:G/5:T/4:A/2:N+335:S/183:H/107:T/16:Y/15:N/11:L/10:F/8:Q/8:A/6:G/5:I/4:P/2:R/2:K+692:L/8:F/7:M/2:S/2:P/1:V+290:H/69:R/66:Q/52:V/49:K/40:Y/30:E/26:T/23:N/21:L/20:S/16:I/4:F/2:M/2:A/1:G/1:C+418:I/192:K/83:L/18:V/1:M+191:S/162:T/125:A/73:R/67:P/24:K/14:G/12:Q/12:M/9:I/6:L/5:V/5:H/4:D/2:F/1:W+251:A/217:S/118:D/28:T/23:Q/17:P/14:R/14:E/12:G/7:H/6:V/3:F/1:Y/1:L+309:S/159:V/119:A/73:T/38:L/6:P/5:I/2:M/1:G+521:Q/64:H/30:E/29:T/19:R/19:L/7:S/7:K/5:V/4:N/3:Y/2:I/2:A+211:L/199:P/73:T/49:E/47:A/35:W/23:I/19:G/18:M/16:V/9:H/5:C/4:Q/2:D/1:S/1:R+259:S/143:G/127:E/66:T/42:R/28:A/22:K/12:D/6:V/3:I/2:N/1:Y/1:Q+694:D/7:L/6:H/3:Y/1:N/1:A+529:S/116:A/52:T/7:V/4:L/2:F/1:W/1:P+557:A/138:G/16:T/1:V+278:T/243:V/52:L/35:K/30:I/28:M/18:S/17:E/3:R/3:F/2:Q/2:A/1:N+703:Y/5:F/2:C/1:L/1:D+383:F/153:L/148:Y/18:I/6:H/3:V/1:N+709:C/1:Y/1:W/1:V +TRA,fwr3,35,2:K/1:T/1:S/1:Q/1:L+2:E/1:T/1:S/1:I/1:G+2:N/1:K/1:H/1:G/1:D+2:G/1:R/1:Q/1:K/1:D+2:R/1:Y/1:Q/1:G/1:F+2:Y/2:L/1:W/1:T+2:S/2:K/1:T/1:F+2:S/1:R/1:Q/1:K/1:F+2:E/2:A/1:S/1:L+2:L/2:F/1:N/1:K+2:D/1:P/1:L/1:K/1:A+2:S/2:G/1:K/1:E+3:K/1:S/1:Q/1:D+4:E/1:N/1:H+2:R/1:K/1:H/1:E/1:C+3:F/2:R/1:Y+3:Y/2:S/1:H+3:S/1:V/1:Q/1:L+2:T/1:V/1:P/1:N/1:H+2:L/1:W/1:I/1:H/1:F+3:H/1:Q/1:N/1:E+2:L/2:I/1:K/1:D+3:R/1:S/1:P/1:K+2:L/2:D/1:Q/1:K+2:A/1:T/1:S/1:R/1:L+2:Q/2:H/1:F/1:A+2:L/1:Q/1:P/1:I/1:D+2:G/2:E/1:S/1:N+3:D/1:T/1:L/1:F+4:S/2:T+2:G/1:V/1:S/1:I/1:H+4:T/2:V+4:Y/1:Q/1:F+3:F/2:L/1:Q+4:C/1:K/1:G +TRB,cdr1,5,144:S/69:M/62:L/33:K/9:P/9:E/6:W/5:N/4:F/3:D/1:V/1:T/1:I/1:C+217:G/73:N/20:S/14:D/7:Q/6:E/5:R/5:K/1:V+310:H/21:Y/6:D/4:V/4:F/1:P/1:L/1:A+84:N/82:D/34:S/26:T/24:L/23:R/23:E/19:A/14:V/4:Y/4:F/3:I/2:M/2:K/2:G/1:Q/1:H+91:Y/65:S/59:A/37:T/31:R/19:N/9:V/8:G/7:F/6:L/5:H/3:M/2:Q/2:K/2:C/1:E/1:D +TRB,cdr1,6,15:D/14:G/3:N+9:F/6:S/5:T/5:L/2:V/2:A/1:K/1:I/1:E+22:Q/10:S+14:A/7:Y/5:S/5:N/1:T+17:P/11:T/2:I/1:V/1:L+13:T/6:W/6:N/3:S/2:Y/1:R/1:I +TRB,cdr2,4,1:L/1:G+1:R/1:A+2:S+1:T/1:P +TRB,cdr2,5,9:S/5:Y+3:R/3:I/2:V/2:L/2:F/1:S/1:A+4:Y/4:G/2:S/1:T/1:N/1:D/1:A+4:V/4:K/3:I/2:A/1:G+5:G/3:E/2:D/1:V/1:T/1:K/1:H +TRB,cdr2,6,114:S/99:F/88:Y/20:L/4:C/3:H/1:T/1:I+100:Y/69:Q/36:N/27:A/25:V/18:R/12:F/11:S/7:T/7:L/3:I/3:G/3:E/3:D/2:H/1:W/1:P/1:M/1:K+93:N/73:G/46:D/22:E/17:S/16:Y/13:R/9:V/9:A/7:K/6:T/6:I/5:L/5:F/2:H/1:Q+73:E/59:K/40:V/32:Q/23:G/23:A/16:T/16:P/14:N/10:R/7:D/6:I/4:Y/3:S/1:M/1:L/1:H/1:F+61:E/49:G/38:D/35:N/33:K/29:A/23:S/15:P/13:Q/8:R/8:M/6:V/4:L/4:I/3:T/1:H+41:I/37:Q/33:T/33:D/31:V/30:L/30:E/29:P/24:S/17:A/9:R/4:N/4:F/3:M/2:K/2:G/1:Y +TRB,cdr2,7,12:S/6:A/3:F/1:V/1:T/1:L/1:I+18:N/3:Q/1:Y/1:T/1:G/1:E+10:Q/6:E/4:N/3:V/1:R/1:A+16:G/2:E/2:D/1:T/1:R/1:Q/1:N/1:A+15:S/2:D/2:A/1:V/1:T/1:N/1:L/1:F/1:E+9:E/6:K/2:N/2:G/2:D/1:T/1:S/1:P/1:A+16:A/5:V/2:I/1:P/1:M +TRB,cdr2,8,3:S/1:Y/1:Q+4:N/1:Y+3:M/1:E/1:D+3:G/1:K/1:D+3:S/1:I/1:E+3:N/1:L/1:I+3:A/1:T/1:D+3:T/1:R/1:K +TRB,cdr2,9,2:L/1:I+2:R/1:Y+3:N+1:K/1:G/1:E+1:G/1:E/1:D+1:P/1:I/1:D+1:V/1:S/1:I+2:E/1:M+2:K/1:E +TRB,fwr1,25,2:D/1:Y/1:P/1:N+4:A/1:T+3:E/1:V/1:K+4:I/1:L+2:Y/1:T/1:L/1:C+4:Q/1:S+3:R/1:V/1:S+2:R/2:P/1:A+2:F/1:R/1:G/1:A+2:L/1:Y/1:R/1:F+3:L/1:P/1:H+2:A/1:V/1:L/1:I+2:T/1:L/1:G/1:E+2:A/1:L/1:K/1:G+2:G/1:R/1:E/1:A+2:R/1:Q/1:G/1:A+2:D/1:Q/1:N/1:K+2:V/1:Q/1:K/1:E+2:T/2:S/1:N+5:L+4:E/1:R+5:C+2:K/2:E/1:N+4:Q/1:A+2:N/1:S/1:M/1:H +TRB,fwr1,26,148:D/68:E/52:G/45:N/19:A/16:H/12:S/5:V/3:P/3:K/2:T/2:I/1:Q/1:L+228:A/56:T/34:S/16:G/14:P/10:Q/9:V/2:L/2:E/2:C/1:M/1:K/1:H/1:D+221:G/40:E/19:L/19:A/14:V/14:T/13:K/11:D/8:Q/7:M/6:R/2:S/1:N/1:I/1:F+317:V/51:I/7:L/1:D/1:A+218:T/75:S/29:I/11:H/10:A/7:E/5:V/5:N/5:L/4:Y/4:F/2:M/1:R/1:K+374:Q/1:R/1:M/1:L+151:T/130:S/20:D/17:N/12:K/10:R/8:W/8:H/7:I/5:F/4:A/2:Y/2:M/1:Q+374:P/2:S/1:L+206:R/84:K/38:S/20:T/13:G/6:A/2:P/1:V/1:Q/1:M/1:L/1:I/1:H/1:E/1:C+164:H/97:Y/56:F/20:R/17:N/8:W/8:T/3:S/2:A/1:V/1:L+147:L/64:K/59:Q/41:R/14:V/11:A/9:E/8:D/4:M/4:H/3:Y/3:I/3:C/2:W/2:T/2:G/1:S+199:V/153:I/18:L/4:F/2:T/1:D+109:T/81:K/46:L/23:V/23:Q/20:C/20:A/19:R/17:I/12:M/5:S/1:G/1:E+80:K/78:G/61:E/33:T/29:R/25:Q/22:A/16:S/11:P/8:V/4:L/3:N/2:M/2:I/2:H/1:D+112:T/111:R/55:K/34:M/16:S/14:V/14:E/7:I/6:A/3:P/2:L/1:Q/1:N/1:F+310:G/22:K/13:R/9:A/8:T/7:E/3:S/3:D/2:N+220:Q/46:K/20:E/19:T/17:R/16:S/13:N/9:G/8:A/5:D/2:H/1:W/1:L+72:S/59:K/55:Q/35:N/34:R/27:E/23:D/22:T/20:P/20:A/4:G/2:Y/2:M/1:L/1:H+195:V/45:A/40:L/26:M/18:I/17:K/10:T/9:E/8:G/5:R/2:S/2:F+218:T/47:S/24:K/23:A/12:I/9:M/9:L/9:E/7:V/6:N/5:R/2:Y/2:F/2:D/1:Q/1:H+294:L/29:M/26:I/14:V/14:F+139:R/50:E/37:Q/36:K/35:T/20:S/13:D/12:W/10:N/6:Y/6:G/4:H/3:L/2:M/2:I/2:A+375:C/1:Y/1:W+87:S/58:D/51:E/45:T/35:A/27:V/16:H/14:R/13:I/12:Q/9:N/3:K/3:F/2:Y/1:M/1:G+178:P/151:Q/17:V/11:S/10:L/4:T/3:A/1:I/1:H/1:C+96:I/87:D/35:T/28:E/27:V/23:N/22:H/19:K/13:R/8:A/7:M/6:L/3:Q/2:S/1:Y +TRB,fwr2,17,173:M/111:V/85:L/9:F/3:I/1:S+253:Y/61:F/44:S/7:L/7:C/3:V/2:H/2:A/1:R/1:N/1:D+379:W/1:S/1:R/1:L+378:Y/2:N/1:H/1:F+218:R/120:Q/29:K/5:L/4:H/3:W/1:Y/1:P/1:C+358:Q/12:K/8:R/4:H+133:D/67:A/35:S/33:T/31:K/20:V/15:I/14:L/10:N/10:F/3:R/3:Q/3:M/2:G/1:P/1:H/1:E+149:L/140:P/24:A/17:Q/16:S/9:M/8:T/7:V/5:R/2:G/1:W/1:K/1:I/1:F/1:E+261:G/28:K/28:E/18:Q/10:S/8:M/7:N/6:A/5:R/4:D/3:V/2:P/1:T/1:I+178:Q/46:K/39:L/32:H/26:M/22:E/15:G/13:R/5:N/2:S/1:T/1:P/1:F/1:A+258:G/30:E/23:P/20:S/20:A/8:Q/7:L/5:K/4:D/3:R/2:T/2:F+228:L/115:P/15:F/10:M/6:V/4:I/1:W/1:S/1:K/1:D+103:Q/82:E/80:K/78:R/9:T/7:M/5:I/5:H/4:L/3:V/2:A/1:N/1:G/1:F/1:D+210:L/137:F/12:I/9:Q/5:V/3:S/2:R/1:Y/1:W/1:M/1:A+178:L/137:I/57:M/4:V/2:Y/2:F/1:P/1:K+83:F/58:Y/55:H/53:V/52:I/29:A/21:T/12:M/11:L/5:S/1:R/1:Q/1:K+195:Y/42:Q/32:S/26:T/20:E/13:A/12:F/11:L/9:I/6:D/5:V/5:H/2:W/2:R/1:N/1:C +TRB,fwr3,35,2:I/1:V/1:R/1:E/1:A+1:Y/1:Q/1:K/1:H/1:E/1:A+1:Q/1:N/1:K/1:G/1:F/1:D+2:F/1:T/1:R/1:P/1:M+3:E/2:G/1:S+2:D/1:Q/1:N/1:M/1:E+1:R/1:Q/1:N/1:M/1:G/1:C+3:F/1:Y/1:S/1:L+3:S/1:R/1:Q/1:D+3:A/1:S/1:R/1:L+1:S/1:Q/1:K/1:I/1:F/1:E+2:C/1:S/1:R/1:M/1:H+4:P/1:Y/1:G+2:N/2:K/1:S/1:Q+2:N/1:T/1:Q/1:G/1:A+4:S/1:T/1:A+2:P/1:Y/1:L/1:G/1:E+3:C/1:S/1:L/1:D+3:S/1:T/1:I/1:E+5:L/1:T+2:K/2:D/1:N/1:E+3:I/1:P/1:M/1:L+1:T/1:R/1:N/1:H/1:G/1:E+4:S/1:P/1:E+2:T/2:A/1:V/1:L+2:Q/2:E/1:P/1:G+3:L/1:P/1:D/1:A+5:G/1:M+5:D/1:Y+5:S/1:L+5:A/1:V+2:L/1:V/1:S/1:R/1:M+4:Y/1:R/1:F+4:L/2:F+5:C/1:N +TRB,fwr3,36,2:K/1:L/1:I/1:E+3:E/1:G/1:A+1:V/1:P/1:H/1:E/1:A+2:R/1:T/1:K/1:G+2:S/1:L/1:F/1:E+2:L/1:K/1:I/1:F+3:Q/2:P+2:G/1:Y/1:R/1:N+2:F/2:A/1:S+2:D/1:S/1:E/1:A+2:Y/2:A/1:G+1:S/1:Q/1:L/1:K/1:A+2:A/1:S/1:R/1:C+3:T/2:P+2:R/1:S/1:Q/1:K+2:V/1:N/1:E/1:D+2:S/1:T/1:P/1:G+2:D/1:Q/1:P/1:G+2:T/1:F/1:D/1:C+2:E/1:V/1:T/1:S+4:L/1:P+2:R/2:E/1:S+2:L/1:S/1:I/1:G+1:Y/1:Q/1:N/1:L/1:K+2:V/2:S/1:K+2:A/1:V/1:T/1:L+2:Q/2:N/1:S+1:V/1:R/1:M/1:L/1:A+2:S/1:T/1:N/1:G+2:Q/2:N/1:D+2:S/1:T/1:R/1:G+2:R/1:G/1:D/1:A+2:T/1:W/1:R/1:F+2:Y/2:L/1:S+2:L/2:F/1:Y+5:C +TRB,fwr3,37,76:T/35:R/18:S/18:I/16:V/15:N/13:Y/10:Q/6:H/6:F/4:P/4:L/4:A/2:M/2:K/2:D/1:E+74:E/30:D/19:Q/18:N/18:G/16:A/15:S/11:I/8:V/6:T/6:R/5:L/4:K/1:Y/1:H+131:K/29:N/26:E/22:R/5:Q/5:D/4:I/3:S/3:P/1:V/1:T/1:L/1:A+167:G/21:E/12:A/6:N/5:T/5:S/4:P/3:V/3:K/3:D/2:Q/1:R+74:D/58:E/53:N/20:S/11:T/6:Q/3:F/2:Y/2:H/2:A/1:V+113:V/38:I/26:F/20:L/10:T/8:M/6:A/3:R/3:N/1:P/1:K/1:G/1:E/1:D+151:P/57:S/9:A/5:L/4:T/2:K/2:F/1:Q/1:H+89:D/37:E/35:S/24:N/20:G/7:Q/6:A/3:Y/3:P/3:K/2:R/1:T/1:L/1:H+108:G/92:R/16:N/5:E/4:K/3:Q/3:H/1:D+109:F/106:Y/7:S/5:L/1:V/1:R/1:I/1:H/1:A+132:S/46:N/13:T/8:R/7:Q/7:K/4:V/4:L/3:H/2:D/2:A/1:W/1:I/1:F/1:E+99:V/59:A/35:G/27:P/5:S/4:I/2:T/1:L+125:S/30:E/20:H/19:Q/14:K/12:R/4:T/3:L/3:D/1:F/1:A+129:R/68:Q/18:C/8:S/4:H/2:T/1:V/1:M/1:E+67:F/61:P/37:S/16:E/14:L/14:K/6:T/5:I/4:Q/3:A/2:D/1:N/1:G/1:C+68:S/37:N/28:K/25:D/22:E/18:P/11:Q/8:R/6:T/4:A/3:H/1:L/1:G+57:K/54:D/50:T/29:N/14:S/10:Q/5:R/5:L/3:I/1:V/1:P/1:M/1:G/1:E+73:E/45:S/24:Y/21:A/14:F/9:R/8:G/8:D/6:T/6:P/5:K/4:Q/4:L/2:N/2:C/1:M+45:S/36:D/35:H/25:N/18:R/16:E/14:L/11:F/9:Q/7:K/4:P/3:Y/3:T/3:C/2:G/1:A+121:F/69:S/29:L/11:C/1:N/1:I+73:P/59:E/31:S/16:L/10:I/9:N/7:Y/7:Q/7:F/6:H/4:T/1:R/1:D/1:A+211:L/21:M+69:T/58:N/25:H/17:R/16:S/11:I/6:D/5:M/4:V/4:K/4:G/3:Y/3:L/3:E/1:Q/1:P/1:F/1:A+131:L/64:V/19:I/8:S/8:M/1:T/1:G+71:E/40:S/24:T/19:K/16:N/15:R/9:L/9:D/8:A/5:Q/5:P/4:H/3:G/2:V/1:M/1:I+142:S/46:A/12:L/9:K/7:T/6:P/6:F/2:N/1:Y/1:R+104:A/96:L/14:T/8:V/7:P/3:S+69:E/31:S/21:A/20:Q/16:V/16:L/14:T/11:N/9:K/7:H/7:G/5:D/4:I/2:R+81:P/81:L/18:T/15:R/9:S/7:K/6:A/4:V/4:H/3:I/2:M/1:F/1:E+90:S/43:G/31:N/21:E/19:T/13:Q/4:A/3:M/3:K/3:D/1:R/1:F+111:D/105:Q/5:H/4:P/3:N/1:Y/1:R/1:E/1:A+113:T/111:S/5:A/1:R/1:M/1:L+120:A/98:S/12:G/1:P/1:C+130:V/68:L/12:F/9:M/2:T/2:S/2:R/2:Q/2:A/1:W/1:P/1:I+226:Y/3:F/1:V/1:N/1:C+128:L/99:F/2:S/1:V/1:I/1:H+229:C/1:L/1:H/1:A +TRB,fwr3,38,21:L/21:I/18:T/17:A/16:V/8:S/8:Q/8:P/4:M/3:R/3:K/3:F/3:E/2:N/2:D/1:Y/1:G+77:D/26:E/19:Y/4:S/4:Q/3:V/3:I/1:N/1:H/1:G+49:E/40:K/33:D/4:V/4:Q/2:S/2:N/1:Y/1:T/1:R/1:L/1:A+85:S/17:T/12:Q/6:G/4:N/4:A/3:I/2:V/2:L/2:K/1:H/1:E+84:G/24:E/10:Q/5:N/5:D/3:S/3:R/2:M/2:L/1:K+45:M/43:L/18:F/13:V/9:I/3:T/3:P/2:R/1:Y/1:H/1:A+80:P/12:F/9:T/8:I/5:V/5:S/4:A/3:R/3:L/3:K/2:H/2:E/1:Y/1:N/1:D+54:K/20:S/20:N/14:G/7:E/5:Q/4:R/4:D/3:I/2:T/2:H/2:A/1:Y/1:M+67:D/22:E/18:A/10:K/7:G/4:Q/3:S/3:N/2:T/2:R/1:P+92:R/19:K/11:H/5:Q/3:D/2:V/2:G/2:F/1:W/1:N/1:E+127:F/5:Y/3:S/1:V/1:L/1:G/1:A+95:S/17:P/7:L/6:F/5:Q/3:T/3:G/1:R/1:K/1:D+96:A/20:I/10:V/4:P/2:T/2:S/2:R/1:Y/1:K/1:G+64:E/17:Q/14:K/11:N/10:S/9:T/6:V/4:G/1:R/1:I/1:F/1:A+76:R/23:C/16:M/12:H/8:F/1:W/1:V/1:Q/1:I+97:P/11:T/11:L/8:S/4:V/3:R/2:A/1:Y/1:H/1:E+30:E/27:N/20:K/20:D/14:G/8:T/8:S/7:Q/3:P/2:R+61:G/21:N/19:L/8:S/8:A/7:E/4:D/3:T/2:Y/2:V/2:K/1:R/1:Q+79:S/27:T/10:V/5:L/5:G/4:A/3:M/2:H/1:P/1:N/1:I/1:D+29:F/20:P/19:V/19:S/11:Y/8:L/7:R/7:D/5:H/4:N/4:I/4:E/1:K/1:C+98:S/20:C/9:L/7:F/3:A/1:T/1:P+84:T/25:S/10:I/6:Y/4:E/2:V/2:R/2:H/2:A/1:G/1:D+126:L/7:V/4:M/1:W/1:G+64:K/24:T/21:E/7:R/7:H/5:Q/3:N/3:M/2:G/1:Y/1:S/1:A+102:I/32:V/2:L/1:N/1:M/1:F+87:Q/11:T/10:S/6:K/5:H/4:R/4:L/4:D/4:A/2:E/1:V/1:P+47:P/30:S/28:R/16:N/8:A/2:Q/2:H/2:C/1:V/1:T/1:L/1:D+54:T/44:A/17:V/11:S/8:L/2:R/1:M/1:I/1:C+67:E/24:Q/15:K/8:S/7:D/5:H/4:T/2:G/2:A/1:Y/1:V/1:R/1:P/1:N+58:P/39:L/28:Q/5:A/4:S/2:R/1:V/1:K/1:E+61:G/38:E/15:R/8:Q/4:A/3:K/3:D/2:T/2:N/1:V/1:S/1:I+132:D/5:R/1:Q/1:N+124:S/8:T/2:L/2:I/1:R/1:N/1:A+109:A/16:S/9:G/4:L/1:R+64:V/29:L/15:M/9:T/9:F/4:A/3:S/3:I/1:Y/1:E/1:D+129:Y/5:C/2:F/1:S/1:L/1:I+75:L/48:F/5:I/4:T/3:V/3:R/1:Y+135:C/2:Y/2:R diff --git a/enclone_core/src/median.rs b/enclone_core/src/median.rs new file mode 100644 index 000000000..c8915f6a1 --- /dev/null +++ b/enclone_core/src/median.rs @@ -0,0 +1,33 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +pub fn median(x: &[usize]) -> f64 { + let h = x.len() / 2; + if x.len() % 2 == 1 { + x[h] as f64 + } else { + (x[h - 1] + x[h]) as f64 / 2.0 + } +} + +pub fn rounded_median(x: &[usize]) -> usize { + let h = x.len() / 2; + if x.len() % 2 == 1 { + x[h] + } else { + let s = x[h - 1] + x[h]; + if s % 2 == 0 { + s / 2 + } else { + s / 2 + 1 + } + } +} + +pub fn median_f64(x: &[f64]) -> f64 { + let h = x.len() / 2; + if x.len() % 2 == 1 { + x[h] + } else { + (x[h - 1] + x[h]) / 2.0 + } +} diff --git a/enclone_core/src/opt_d.rs b/enclone_core/src/opt_d.rs new file mode 100644 index 000000000..cd258ee1a --- /dev/null +++ b/enclone_core/src/opt_d.rs @@ -0,0 +1,240 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Find the optimal D segment, the runner up, and the delta between the scores. This uses +// the donor V and J segments that are assigned to the clonotype. Note that the optimal D +// segment may be null. This is obvious from looking at data. + +use crate::align_to_vdj_ref::{align_to_vdj_ref, match_bit_score, zero_one}; +use enclone_proto::types::DonorReferenceItem; +use std::cmp::min; +use vdj_ann::refx::RefData; + +pub fn vflank(_seq: &[u8], vref: &[u8]) -> usize { + let mut flank = 13; + if flank > vref.len() { + flank = vref.len(); + } + flank +} + +pub fn jflank(seq: &[u8], jref: &[u8]) -> usize { + let flank = 13; + if flank > jref.len() { + return jref.len(); + } + + // Let start be the first position on the J gene where there is a perfect match of length at + // least five to the contig. + + const MATCHLEN: usize = 5; + let mut start = 0; + for i in 0..=jref.len() - MATCHLEN { + let mut matchlen = 0; + for j in 0..MATCHLEN { + if seq[seq.len() - jref.len() + i + j] != jref[i + j] { + break; + } + matchlen += 1; + } + if matchlen == MATCHLEN { + start = i; + break; + } + } + + // Add start to the flank, so long as that's possible. + + min(flank + start, jref.len()) +} + +pub fn evaluate_d( + tig: &[u8], + vref: &[u8], + seq_start: usize, + ds: &[usize], + jref: &[u8], + refdata: &RefData, + jscore_match: i32, + jscore_mismatch: i32, + jscore_gap_open: i32, + jscore_gap_extend: i32, + jscore_bits_multiplier: f64, +) -> (Vec<bio_edit::alignment::AlignmentOperation>, f64) { + // Start to build reference concatenation. First append the V segment. + + let mut concat = Vec::<u8>::new(); + let vstart = vref.len() - vflank(tig, vref); + let vref = &vref[vstart..vref.len()]; + concat.extend(vref); + + // Append the D segment or segments. + + let mut dref = Vec::<u8>::new(); + let mut d2ref = Vec::<u8>::new(); + let mut drefname = String::new(); + for (j, &d) in ds.iter().enumerate() { + if j == 0 { + dref = refdata.refs[d].to_ascii_vec(); + } else if j == 1 { + d2ref = refdata.refs[d].to_ascii_vec(); + } + if j > 0 { + drefname += ":"; + } + drefname += refdata.name[d].as_str(); + } + concat.extend(&dref); + concat.extend(&d2ref); + + // Append the J segment. + + let jend = jflank(tig, jref); + + // Align the V..J sequence on the contig to the reference concatenation. + + let mut seq_end = tig.len() - (jref.len() - jend); + if seq_end <= seq_start { + seq_end = tig.len(); // bug fix for problem found by customer, couldn't reproduce internally + } + let seq = &tig[seq_start..seq_end]; + let jref = &jref[0..jend]; + concat.extend(jref); + let (ops, count) = align_to_vdj_ref( + seq, + vref, + &dref, + &d2ref, + jref, + &drefname, + true, + jscore_match, + jscore_mismatch, + jscore_gap_open, + jscore_gap_extend, + jscore_bits_multiplier, + ); + (ops, count) +} + +pub fn opt_d( + v_ref_id: usize, // ex.share[mid].v_ref_id + j_ref_id: usize, // ex.share[mid].j_ref_id + tig: &[u8], // ex.share[mid].seq_del + annv: &[(i32, i32, i32, i32, i32)], // ex.share[mid].annv + cdr3_aa: &str, // ex.share[mid].cdr3_aa + refdata: &RefData, + dref: &[DonorReferenceItem], + scores: &mut Vec<f64>, + dsx: &mut Vec<Vec<usize>>, + jscore_match: i32, + jscore_mismatch: i32, + jscore_gap_open: i32, + jscore_gap_extend: i32, + jscore_bits_multiplier: f64, + v_alt: Option<usize>, +) { + let mut comp = 1000000.0; + + // Go through every D segment, or possibly every concatenation of D segments. + + let mut todo = vec![vec![]]; + for i in refdata.ds.iter() { + todo.push(vec![*i]); + } + let mut ds = Vec::<Vec<usize>>::new(); + let mut counts = Vec::<f64>::new(); + let mut good_d = Vec::<usize>::new(); + let mut vref = refdata.refs[v_ref_id].to_ascii_vec(); + if let Some(v_alt) = v_alt { + vref = dref[v_alt].nt_sequence.clone(); + } + let vstart = vref.len() - vflank(tig, &vref); + let mut seq_start = vstart as isize; + // probably not exactly right + if annv.len() > 1 { + let q1 = annv[0].0 + annv[0].1; + let q2 = annv[1].0; + + seq_start += q1 as isize - q2 as isize; + } + let jref = refdata.refs[j_ref_id].to_ascii_vec(); + const MIN_BITS_FOR_D2: f64 = 14.0; + for di in &todo { + let (ops, count) = evaluate_d( + tig, + &vref, + seq_start as usize, + di, + &jref, + refdata, + jscore_match, + jscore_mismatch, + jscore_gap_open, + jscore_gap_extend, + jscore_bits_multiplier, + ); + counts.push(count); + if !di.is_empty() { + let drefx = refdata.refs[di[0]].to_ascii_vec(); + let vstart = vref.len() - vflank(tig, &vref); + let vref = vref[vstart..vref.len()].to_vec(); + let zos = zero_one(&ops, vref.len(), vref.len() + drefx.len()); + let bits = match_bit_score(&zos); + if bits >= MIN_BITS_FOR_D2 { + good_d.push(di[0]); + } + } + ds.push(di.clone()); + if count > comp { + comp = count; + } + } + if cdr3_aa.len() >= 20 { + todo.clear(); + for &i1 in &good_d { + for &i2 in &good_d { + todo.push(vec![i1, i2]); + } + } + for di in &todo { + let (_ops, count) = evaluate_d( + tig, + &vref, + seq_start as usize, + di, + &jref, + refdata, + jscore_match, + jscore_mismatch, + jscore_gap_open, + jscore_gap_extend, + jscore_bits_multiplier, + ); + counts.push(count); + ds.push(di.clone()); + if count > comp { + comp = count; + } + } + } + + // Reverse sort sync (counts, ds). + + let mut counts_ds = counts + .iter() + .zip(ds.iter()) + .map(|(&c, d)| (c, d.clone())) + .collect::<Vec<_>>(); + counts_ds.sort_by(|a, b| b.partial_cmp(a).unwrap()); // reverse sort + counts.clear(); + ds.clear(); + for count in counts_ds { + counts.push(count.0); + ds.push(count.1); + } + + // Done. + + *scores = counts; + *dsx = ds; +} diff --git a/enclone_core/src/packing.rs b/enclone_core/src/packing.rs new file mode 100644 index 000000000..44310472d --- /dev/null +++ b/enclone_core/src/packing.rs @@ -0,0 +1,344 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Unoptimized functions for packing and unpacking some data structures. + +use zstd::bulk::{compress, decompress}; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Compression and decompression. We use zstd rather than gzip because when tested it yielded +// slightly smaller compression size and much lower compression time. Note that using zstd +// appears to add about 4 MB to the executable size. If this is really true, it's not obvious +// that it's a good tradeoff. + +pub fn compress_bytes(x: &[u8]) -> Vec<u8> { + compress(x, 0).unwrap() +} + +pub fn uncompress_bytes(x: &[u8], uncompressed_size: usize) -> Vec<u8> { + decompress(x, uncompressed_size).unwrap() +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn u32_bytes(x: usize) -> [u8; 4] { + (x as u32).to_le_bytes() +} + +pub fn u32_from_bytes(x: &[u8]) -> u32 { + u32::from_le_bytes([x[0], x[1], x[2], x[3]]) +} + +pub fn f32_bytes(x: usize) -> [u8; 4] { + (x as f32).to_le_bytes() +} + +pub fn f32_from_bytes(x: &[u8]) -> f32 { + f32::from_le_bytes([x[0], x[1], x[2], x[3]]) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn save_string(x: &String) -> Vec<u8> { + let mut bytes = Vec::<u8>::new(); + bytes.extend(u32_bytes(x.len())); + bytes.extend(x.as_bytes()); + bytes +} + +pub fn restore_string(x: &[u8], pos: &mut usize) -> Result<String, ()> { + if *pos + 4 > x.len() { + return Err(()); + } + let k = u32_from_bytes(&x[*pos..*pos + 4]) as usize; + *pos += 4; + if *pos + k > x.len() { + return Err(()); + } + let s = String::from_utf8(x[*pos..*pos + k].to_vec()); + if s.is_err() { + return Err(()); + } + *pos += k; + Ok(s.unwrap()) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn save_vec_string(x: &[String]) -> Vec<u8> { + let mut bytes = Vec::<u8>::with_capacity(4 + 5 * x.len()); + bytes.extend(u32_bytes(x.len())); + for xi in x { + bytes.extend(u32_bytes(xi.len())); + bytes.extend(xi.as_bytes()); + } + bytes +} + +pub fn restore_vec_string(x: &[u8], pos: &mut usize) -> Result<Vec<String>, ()> { + if *pos + 4 > x.len() { + return Err(()); + } + let n = u32_from_bytes(&x[*pos..*pos + 4]) as usize; + *pos += 4; + let mut y = vec![String::new(); n]; + for yj in &mut y { + if *pos + 4 > x.len() { + return Err(()); + } + let k = u32_from_bytes(&x[*pos..*pos + 4]) as usize; + *pos += 4; + if *pos + k > x.len() { + return Err(()); + } + let s = String::from_utf8(x[*pos..*pos + k].to_vec()); + match s { + Err(_) => return Err(()), + Ok(s) => { + *pos += k; + *yj = s; + } + } + } + Ok(y) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn save_vec_string_comp(x: &[String]) -> Vec<u8> { + let z = save_vec_string(x); + let mut y = compress_bytes(&z); + let mut bytes = Vec::<u8>::with_capacity(8 + y.len()); + bytes.extend(u32_bytes(y.len())); + bytes.extend(u32_bytes(z.len())); + bytes.append(&mut y); + bytes +} + +pub fn restore_vec_string_comp(x: &[u8], pos: &mut usize) -> Result<Vec<String>, ()> { + if *pos + 8 > x.len() { + return Err(()); + } + let n = u32_from_bytes(&x[*pos..*pos + 4]) as usize; + *pos += 4; + let uncompressed_size = u32_from_bytes(&x[*pos..*pos + 4]) as usize; + *pos += 4; + if *pos + n > x.len() { + return Err(()); + } + let uncomp = uncompress_bytes(&x[*pos..*pos + n], uncompressed_size); + *pos += n; + let mut posx = 0; + restore_vec_string(&uncomp, &mut posx) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn save_vec_vec_string(x: &[Vec<String>]) -> Vec<u8> { + let mut bytes = Vec::<u8>::with_capacity(4 + 5 * x.len()); + bytes.extend(u32_bytes(x.len())); + for xi in x { + bytes.append(&mut save_vec_string(xi)); + } + bytes +} + +pub fn restore_vec_vec_string(x: &[u8], pos: &mut usize) -> Result<Vec<Vec<String>>, ()> { + if *pos + 4 > x.len() { + return Err(()); + } + let n = u32_from_bytes(&x[*pos..*pos + 4]) as usize; + *pos += 4; + let mut y = vec![Vec::<String>::new(); n]; + for yj in &mut y { + *yj = restore_vec_string(x, pos)?; + } + Ok(y) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn save_vec_vec_u8(x: &Vec<Vec<u8>>) -> Vec<u8> { + let mut bytes = Vec::<u8>::with_capacity(4 + 5 * x.len()); + bytes.extend(u32_bytes(x.len())); + for vi in x { + bytes.extend(u32_bytes(vi.len())); + bytes.extend(vi); + } + bytes +} + +pub fn restore_vec_vec_u8(x: &[u8], pos: &mut usize) -> Result<Vec<Vec<u8>>, ()> { + if *pos + 4 > x.len() { + return Err(()); + } + let n = u32_from_bytes(&x[*pos..*pos + 4]) as usize; + *pos += 4; + let mut y = vec![Vec::<u8>::new(); n]; + for yj in &mut y { + if *pos + 4 > x.len() { + return Err(()); + } + let k = u32_from_bytes(&x[*pos..*pos + 4]) as usize; + *pos += 4; + if *pos + k > x.len() { + return Err(()); + } + *yj = x[*pos..*pos + k].to_vec(); + *pos += k; + } + Ok(y) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn save_vec_vec_u32(x: &[Vec<u32>]) -> Vec<u8> { + let mut bytes = Vec::<u8>::with_capacity(4 + 8 * x.len()); + bytes.extend(u32_bytes(x.len())); + for vi in x { + bytes.extend(u32_bytes(vi.len())); + for xj in vi { + bytes.extend(xj.to_le_bytes()); + } + } + bytes +} + +pub fn restore_vec_vec_u32(x: &[u8], pos: &mut usize) -> Result<Vec<Vec<u32>>, ()> { + if *pos + 4 > x.len() { + return Err(()); + } + let n = u32_from_bytes(&x[*pos..*pos + 4]) as usize; + *pos += 4; + let mut y = vec![Vec::<u32>::new(); n]; + for yj in &mut y { + if *pos + 4 > x.len() { + return Err(()); + } + let k = u32_from_bytes(&x[*pos..*pos + 4]) as usize; + *pos += 4; + if *pos + 4 * k > x.len() { + return Err(()); + } + yj.reserve(4 * k); + for _ in 0..k { + yj.push(u32_from_bytes(&x[*pos..*pos + 4])); + *pos += 4; + } + } + Ok(y) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn save_vec_bool(x: &Vec<bool>) -> Vec<u8> { + let mut bytes = Vec::<u8>::with_capacity(4 + x.len()); + bytes.extend(u32_bytes(x.len())); + for &xi in x { + bytes.push(u8::from(xi)); + } + bytes +} + +pub fn restore_vec_bool(x: &[u8], pos: &mut usize) -> Result<Vec<bool>, ()> { + if *pos + 4 > x.len() { + return Err(()); + } + let n = u32_from_bytes(&x[*pos..*pos + 4]) as usize; + *pos += 4; + if *pos + n > x.len() { + return Err(()); + } + let mut y = vec![false; n]; + for (yj, &xj) in y[..n].iter_mut().zip(x[*pos..].iter()) { + *yj = xj == 1; + *pos += 1; + } + Ok(y) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn save_vec_u32(x: &[u32]) -> Vec<u8> { + let mut bytes = Vec::<u8>::with_capacity(4 * (x.len() + 1)); + bytes.extend(u32_bytes(x.len())); + for &xi in x { + bytes.extend(xi.to_le_bytes()); + } + bytes +} + +pub fn restore_vec_u32(x: &[u8], pos: &mut usize) -> Result<Vec<u32>, ()> { + if *pos + 4 > x.len() { + return Err(()); + } + let n = u32_from_bytes(&x[*pos..*pos + 4]) as usize; + *pos += 4; + if *pos + 4 * n > x.len() { + return Err(()); + } + let mut y = vec![0; n]; + for yj in &mut y { + *yj = u32_from_bytes(&x[*pos..*pos + 4]); + *pos += 4; + } + Ok(y) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn save_u32(x: u32) -> [u8; 4] { + x.to_le_bytes() +} + +pub fn restore_u32(x: &[u8], pos: &mut usize) -> Result<u32, ()> { + if *pos + 4 > x.len() { + return Err(()); + } + let y = u32_from_bytes(&x[*pos..*pos + 4]); + *pos += 4; + Ok(y) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn save_bool(x: bool) -> [u8; 1] { + [x as u8] +} + +pub fn restore_bool(x: &[u8], pos: &mut usize) -> Result<bool, ()> { + if *pos + 1 > x.len() { + return Err(()); + } + let y = x[*pos] != 0; + *pos += 1; + Ok(y) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn save_vec_f32(x: &[f32]) -> Vec<u8> { + let mut bytes = Vec::<u8>::with_capacity(4 * (x.len() + 1)); + bytes.extend(f32_bytes(x.len())); + for &xi in x { + bytes.extend(xi.to_le_bytes()); + } + bytes +} + +pub fn restore_vec_f32(x: &[u8], pos: &mut usize) -> Result<Vec<f32>, ()> { + if *pos + 4 > x.len() { + return Err(()); + } + let n = f32_from_bytes(&x[*pos..*pos + 4]) as usize; + *pos += 4; + if *pos + 4 * n > x.len() { + return Err(()); + } + let mut y = vec![0.0; n]; + for yj in &mut y[..n] { + *yj = f32_from_bytes(&x[*pos..*pos + 4]); + *pos += 4; + } + Ok(y) +} diff --git a/enclone_core/src/print_tools.rs b/enclone_core/src/print_tools.rs index 511e9869c..c0da88fa9 100644 --- a/enclone_core/src/print_tools.rs +++ b/enclone_core/src/print_tools.rs @@ -1,34 +1,9 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. -use ansi_escape::*; -use io_utils::*; +use ansi_escape::{emit_end_escape, print_color}; +use io_utils::fwrite; use std::io::Write; -use string_utils::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Extract the @font-face content from the current css file. - -pub fn font_face_in_css() -> String { - let f = include_str!["../../pages/enclone_css_v2.css"]; - let mut x = String::new(); - let mut in_font_face = false; - let mut count = 0; - for line in f.lines() { - if line.starts_with("@font-face") { - in_font_face = true; - count += 1; - } - if in_font_face { - x += &format!("{}\n", line); - } - if line == "}" { - in_font_face = false; - } - } - assert_eq!(count, 2); // because there are two fonts: regular and bold - x -} +use string_utils::strme; // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ @@ -50,7 +25,7 @@ pub fn emit_codon_color_escape(c: &[u8], log: &mut Vec<u8>) { } else if c[i] == b'T' { s += 3; } else { - panic!("Illegal codon: \"{}\".", strme(&c)); + panic!("Illegal codon: \"{}\".", strme(c)); } } } @@ -60,36 +35,30 @@ pub fn emit_codon_color_escape(c: &[u8], log: &mut Vec<u8>) { // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ -pub fn color_by_property(c: &[u8], mut log: &mut Vec<u8>) { - for i in 0..c.len() { +pub fn color_by_property(c: &[u8], log: &mut Vec<u8>) { + for &ci in c { let mut color = 7; - if c[i] == b'A' - || c[i] == b'G' - || c[i] == b'I' - || c[i] == b'L' - || c[i] == b'P' - || c[i] == b'V' - { + if ci == b'A' || ci == b'G' || ci == b'I' || ci == b'L' || ci == b'P' || ci == b'V' { color = 0; - } else if c[i] == b'F' || c[i] == b'W' || c[i] == b'Y' { + } else if ci == b'F' || ci == b'W' || ci == b'Y' { color = 1; - } else if c[i] == b'D' || c[i] == b'E' { + } else if ci == b'D' || ci == b'E' { color = 2; - } else if c[i] == b'R' || c[i] == b'H' || c[i] == b'K' { + } else if ci == b'R' || ci == b'H' || ci == b'K' { color = 3; - } else if c[i] == b'S' || c[i] == b'T' { + } else if ci == b'S' || ci == b'T' { color = 4; - } else if c[i] == b'C' || c[i] == b'M' { + } else if ci == b'C' || ci == b'M' { color = 5; - } else if c[i] == b'N' || c[i] == b'Q' { + } else if ci == b'N' || ci == b'Q' { color = 6; } if color < 7 { print_color(color, log); } - fwrite!(log, "{}", c[i] as char); + fwrite!(log, "{}", ci as char); if color < 7 { - emit_end_escape(&mut log); + emit_end_escape(log); } } } diff --git a/enclone_core/src/set_speakers.rs b/enclone_core/src/set_speakers.rs new file mode 100644 index 000000000..6ffa43cd5 --- /dev/null +++ b/enclone_core/src/set_speakers.rs @@ -0,0 +1,129 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use crate::allowed_vars::{CVARS_ALLOWED, CVARS_ALLOWED_PCELL, LVARS_ALLOWED}; +use crate::defs::EncloneControl; +use string_utils::TextUtils; +use vector_utils::bin_member; + +// Define the set "parseable_fields" of fields that could occur in parseable output. +// +// The overlap with code in proc_args_check.rs is not nice. + +pub fn set_speakers(ctl: &EncloneControl, parseable_fields: &mut Vec<String>, max_chains: usize) { + // Make some abbreviations. + + let lvars = &ctl.clono_print_opt.lvars; + + // Define parseable output columns. The entire machinery for parseable output is controlled + // by macros that begin with "speak". + + let pcols_sort = &ctl.parseable_opt.pcols_sort; + macro_rules! speaker { + ($var:expr) => { + if ctl.parseable_opt.pcols.is_empty() || bin_member(&pcols_sort, &$var.to_string()) { + parseable_fields.push($var.to_string()); + } + }; + } + let mut have_gex = false; + for i in 0..ctl.origin_info.gex_path.len() { + if !ctl.origin_info.gex_path[i].is_empty() { + have_gex = true; + } + } + let mut all_lvars = lvars.clone(); + for x in LVARS_ALLOWED { + if !have_gex + && (x == "gex" + || x.starts_with("gex_") + || x.ends_with("_g") + || x.ends_with("_g_μ") + || x == "n_gex_cell" + || x == "n_gex" + || x == "n_b" + || x == "clust" + || x == "type" + || x == "entropy" + || x == "cred" + || x == "cred_cell") + { + continue; + } + if !lvars.contains(&x.to_string()) { + all_lvars.push(x.to_string()); + } + } + for x in all_lvars.iter() { + if (*x == "sec" || *x == "mem") && !ctl.gen_opt.using_secmem { + continue; + } + speaker!(x); + } + + // Define chain variables for parseable output. + + macro_rules! speakerc { + ($col:expr, $var:expr) => { + let varc = format!("{}{}", $var, $col + 1); + if ctl.parseable_opt.pcols.is_empty() || bin_member(&pcols_sort, &varc) { + parseable_fields.push(format!("{}{}", $var, $col + 1)); + } + }; + } + let pchains = if ctl.parseable_opt.pchains == "max" { + max_chains + } else { + ctl.parseable_opt.pchains.force_usize() + }; + for col in 0..pchains { + for x in CVARS_ALLOWED { + speakerc!(col, x); + } + if ctl.parseable_opt.pbarcode { + for x in CVARS_ALLOWED_PCELL { + speakerc!(col, x); + } + } + for x in [ + "var_indices_dna", + "var_indices_aa", + "share_indices_dna", + "share_indices_aa", + "seq", + "vj_seq", + "vj_seq_nl", + "vj_aa", + "vj_aa_nl", + "var_aa", + ] { + speakerc!(col, x); + } + for pcol in pcols_sort { + if pcol.starts_with('q') && pcol.ends_with(&format!("_{}", col + 1)) { + let x = pcol.after("q").rev_before("_"); + if x.parse::<usize>().is_ok() { + parseable_fields.push(pcol.clone()); + } + } + } + } + + // Define more lead variables for parseable output. + + speaker!("group_id"); + speaker!("group_ncells"); + speaker!("clonotype_id"); + speaker!("exact_subclonotype_id"); + speaker!("barcodes"); + for x in ctl.origin_info.dataset_list.iter() { + if !x.is_empty() { + speaker!(&format!("{x}_barcodes")); + } + } + if ctl.parseable_opt.pbarcode { + speaker!("barcode"); + for x in ctl.origin_info.dataset_list.iter() { + speaker!(&format!("{x}_barcode")); + } + } +} diff --git a/enclone_core/src/slurp.rs b/enclone_core/src/slurp.rs new file mode 100644 index 000000000..2673bd257 --- /dev/null +++ b/enclone_core/src/slurp.rs @@ -0,0 +1,63 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. +// +// Slurp in needed data from an h5 file. + +use hdf5::types::FixedAscii; +use itertools::Itertools; + +pub fn slurp_h5( + h5_path: &str, + take_matrix: bool, + barcodes: &mut Vec<String>, + features: &mut Vec<String>, + matrix: &mut Vec<Vec<(i32, i32)>>, +) -> Result<(), String> { + // Read barcodes from the h5 file. + + let h = hdf5::File::open(h5_path).unwrap(); + let barcode_loc = h.dataset("matrix/barcodes").unwrap(); + + let barcodes0: Result<Vec<FixedAscii<18>>, hdf5::Error> = barcode_loc.as_reader().read_raw(); + if barcodes0.is_err() { + return Err(format!( + "\nencountered error reading HDF5 file\n{h5_path}\nas follows\n{}\n", + barcodes0.as_ref().err().unwrap() + )); + } + let barcodes0 = barcodes0.unwrap(); + + barcodes.extend(barcodes0.into_iter().map_into()); + + // Read features from the h5 file. + + let feature_id_loc = h.dataset("matrix/features/id").unwrap(); + let feature_ids: Vec<FixedAscii<256>> = feature_id_loc.as_reader().read_raw().unwrap(); + let feature_name_loc = h.dataset("matrix/features/name").unwrap(); + let feature_names: Vec<FixedAscii<256>> = feature_name_loc.as_reader().read_raw().unwrap(); + let feature_type_loc = h.dataset("matrix/features/feature_type").unwrap(); + let feature_types: Vec<FixedAscii<256>> = feature_type_loc.as_reader().read_raw().unwrap(); + for i in 0..feature_ids.len() { + features.push(format!( + "{}\t{}\t{}", + feature_ids[i], feature_names[i], feature_types[i] + )); + } + + // If appropriate, construct the binary matrix file from the h5 file. + + if take_matrix { + let data_loc = h.dataset("matrix/data").unwrap(); + let data: Vec<u32> = data_loc.as_reader().read_raw().unwrap(); + let ind_loc = h.dataset("matrix/indices").unwrap(); + let ind: Vec<u32> = ind_loc.as_reader().read_raw().unwrap(); + let ind_ptr_loc = h.dataset("matrix/indptr").unwrap(); + let ind_ptr: Vec<u32> = ind_ptr_loc.as_reader().read_raw().unwrap(); + matrix.resize(barcodes.len(), Vec::new()); + for i in 0..matrix.len() { + for j in ind_ptr[i]..ind_ptr[i + 1] { + matrix[i].push((ind[j as usize] as i32, data[j as usize] as i32)); + } + } + } + Ok(()) +} diff --git a/enclone_core/src/stringulate.rs b/enclone_core/src/stringulate.rs new file mode 100644 index 000000000..8da7eebe6 --- /dev/null +++ b/enclone_core/src/stringulate.rs @@ -0,0 +1,220 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Representation of Vec<String> objects as Strings. We do this under that assumption that the +// strings in the vector do not contain double escapes, which seems like a reasonable assumption +// in typical circumstances. Then the representation of v as a string is +// (double escape) v[0] (double escape) v[1] (double escape) ... (double escape). +// +// For this to be reversible, one has to know the length of v. In fact, the way we use this +// representation is as follows. Let have a data structure, say Widget, and we convert it to +// a Vec<String>, in such a way that the vector starts with +// "Widget", number of entries in the string, ... . +// +// Second, in practice what we want to represent are vectors of heterogeneous objects, from a +// list of types having functions to_string and from_string, using the above design. In that +// case we do this: +// 1. Convert each of the objects to strings, except for strings, which we leave intact. +// 2. Concatenate these. + +use std::fmt::Display; + +use itertools::Itertools; +use string_utils::TextUtils; + +const DOUBLE: &str = ""; + +pub fn flatten_vec_string(v: &[&str]) -> String { + format!("{DOUBLE}{}{DOUBLE}", v.iter().format(DOUBLE)) +} + +pub fn unflatten_string(s: &str) -> Vec<&str> { + let mut chars = s.char_indices(); + // Skip first two characters and the last character. + let start = chars.nth(2).unwrap().0; + let end = chars.nth_back(1).unwrap().0; + let mid = &s[start..end]; + mid.split(&DOUBLE).collect() +} + +pub struct HetString { + pub name: String, + pub content: String, +} + +pub fn unpack_to_het_string(s: &str) -> Vec<HetString> { + let mut v = Vec::<HetString>::new(); + let fields: Vec<&str> = s.split(DOUBLE).collect(); + let mut i = 0; + while i < fields.len() { + if !fields[i].is_empty() { + v.push(HetString { + name: "String".to_string(), + content: fields[i].to_string(), + }); + } + if i + 2 < fields.len() { + let n = fields[i + 2].force_usize(); + v.push(HetString { + name: fields[i + 1].to_string(), + content: flatten_vec_string(&fields[i + 1..i + 1 + n]), + }); + i += n + 1; + } else { + break; + } + } + v +} + +// Specific implementations, should split off if significantly more are added. + +pub struct DescriptionTable { + pub display_text: String, + pub spreadsheet_text: String, +} + +impl Display for DescriptionTable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}DescriptionTable{}4{}{}{}{}{}", + DOUBLE, DOUBLE, DOUBLE, self.display_text, DOUBLE, self.spreadsheet_text, DOUBLE, + ) + } +} + +impl DescriptionTable { + pub fn from_string(x: &str) -> Self { + let v = unflatten_string(x); + DescriptionTable { + display_text: v[2].to_string(), + spreadsheet_text: v[3].to_string(), + } + } +} + +#[derive(Default, Clone)] +pub struct FeatureBarcodeAlluvialTable { + pub id: String, + pub display_text: String, + pub spreadsheet_text: String, +} + +#[derive(Default, Clone)] +pub struct FeatureBarcodeAlluvialTableSet { + pub s: Vec<FeatureBarcodeAlluvialTable>, +} + +impl Display for FeatureBarcodeAlluvialTableSet { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}FeatureBarcodeAlluvialTableSet{}{}{}", + DOUBLE, + DOUBLE, + 3 * self.s.len() + 2, + DOUBLE + )?; + for s in &self.s { + write!( + f, + "{}{DOUBLE}{}{DOUBLE}{}{DOUBLE}", + s.id, s.display_text, s.spreadsheet_text + )?; + } + Ok(()) + } +} + +impl FeatureBarcodeAlluvialTableSet { + pub fn from_string(x: &str) -> Self { + let v = unflatten_string(x); + let n = v[1].force_usize() / 3; + let mut s = Vec::with_capacity(n); + for i in 0..n { + s.push(FeatureBarcodeAlluvialTable { + id: v[2 + 3 * i].to_string(), + display_text: v[2 + 3 * i + 1].to_string(), + spreadsheet_text: v[2 + 3 * i + 2].to_string(), + }); + } + FeatureBarcodeAlluvialTableSet { s } + } +} + +#[derive(Default, Clone)] +pub struct FeatureBarcodeAlluvialReadsTable { + pub id: String, + pub display_text: String, + pub spreadsheet_text: String, +} + +#[derive(Default, Clone)] +pub struct FeatureBarcodeAlluvialReadsTableSet { + pub s: Vec<FeatureBarcodeAlluvialReadsTable>, +} + +impl Display for FeatureBarcodeAlluvialReadsTableSet { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}FeatureBarcodeAlluvialReadsTableSet{}{}{}", + DOUBLE, + DOUBLE, + 3 * self.s.len() + 2, + DOUBLE + )?; + for s in &self.s { + write!( + f, + "{}{DOUBLE}{}{DOUBLE}{}{DOUBLE}", + s.id, s.display_text, s.spreadsheet_text + )?; + } + Ok(()) + } +} + +impl FeatureBarcodeAlluvialReadsTableSet { + pub fn from_string(x: &str) -> Self { + let v = unflatten_string(x); + let n = v[1].force_usize() / 3; + let mut s = Vec::with_capacity(n); + for i in 0..n { + s.push(FeatureBarcodeAlluvialReadsTable { + id: v[2 + 3 * i].to_string(), + display_text: v[2 + 3 * i + 1].to_string(), + spreadsheet_text: v[2 + 3 * i + 2].to_string(), + }); + } + FeatureBarcodeAlluvialReadsTableSet { s } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_unflatten_string() { + // All ascii. + let simple_case = "0123456789"; + let simple = unflatten_string(simple_case); + assert_eq!(simple.len(), 1); + assert_eq!(simple[0], &simple_case[2..simple_case.len() - 2]); + // multi-byte characters. + let complex_case = format!("ƒƒƒ2{DOUBLE}345{DOUBLE}67ƒƒƒ"); + let result = unflatten_string(complex_case.as_str()); + assert_eq!(result.len(), 3); + assert_eq!(result[0], "ƒ2"); + assert_eq!(result[1], "345"); + assert_eq!(result[2], "67ƒ"); + } + + #[test] + fn test_flatten_vec_string() { + let expected = format!("{DOUBLE}ƒ123ƒ{DOUBLE}ƒƒ{DOUBLE}"); + let split = unflatten_string(expected.as_str()); + assert_eq!(expected, flatten_vec_string(&split)); + } +} diff --git a/enclone_core/src/test_def.rs b/enclone_core/src/test_def.rs new file mode 100644 index 000000000..6fe68f45f --- /dev/null +++ b/enclone_core/src/test_def.rs @@ -0,0 +1,63 @@ +// Copyright (c) 2022 10X Genomics, Inc. All rights reserved. + +// Define enclone paper test sets. + +use crate::expand_integer_ranges; +use string_utils::TextUtils; + +const TEST1: &str = "1279053,1279061,1287192-1287195,1287200-1287203:\ + 1279050,1279058,1287196-1287197,1287204-1287205:\ + 1279051,1279059,1287198-1287199,1287206-1287207:1279052,1279060"; + +const TEST2: &str = "1279049,1279057,1287176-1287179,1287184-1287187:\ + 1279054,1279062,1287180-1287181,1287188-1287189:\ + 1279055,1279063,1287182-1287183,1287190-1287191"; + +const TEST3: &str = "1279065,1279073,1287144-1287147,1287152-1287155:\ + 1279066,1279074,1287156-1287157,1287148-1287149:\ + 1279067,1279075,1287150-1287151,1287158-1287159:1279068,1279076"; + +const TEST4: &str = "1279069,1279077,1287160-1287163,1287168-1287171:\ + 1279070,1279078,1287164-1287165,1287172-1287173:\ + 1279071,1279079,1287166-1287167,1287174-1287175:1279072,1279080"; + +pub fn replace_at_test(x: &mut String) { + *x = x.replace("@test1", TEST1); + *x = x.replace("@test2", TEST2); + *x = x.replace("@test3", TEST3); + *x = x.replace("@test4", TEST4); + *x = x.replace("@test", &format!("{TEST1};{TEST2};{TEST3};{TEST4}")); + *x = x.replace("@training", "1-3,5-9,11-12,14-16,18-43"); +} + +pub fn test_donor_id(x: usize) -> usize { + let test1 = expand_integer_ranges(&TEST1.replace(':', ",")); + let test1 = test1.split(',').collect::<Vec<&str>>(); + for t in test1.iter() { + if t.force_usize() == x { + return 1; + } + } + let test2 = expand_integer_ranges(&TEST2.replace(':', ",")); + let test2 = test2.split(',').collect::<Vec<&str>>(); + for t in test2.iter() { + if t.force_usize() == x { + return 2; + } + } + let test3 = expand_integer_ranges(&TEST3.replace(':', ",")); + let test3 = test3.split(',').collect::<Vec<&str>>(); + for t in test3.iter() { + if t.force_usize() == x { + return 3; + } + } + let test4 = expand_integer_ranges(&TEST4.replace(':', ",")); + let test4 = test4.split(',').collect::<Vec<&str>>(); + for t in test4.iter() { + if t.force_usize() == x { + return 4; + } + } + panic!("unknown test donor id"); +} diff --git a/enclone_core/src/testdata.public.bcr.human b/enclone_core/src/testdata.public.bcr.human index 9fb2bf7d9..7011cf97c 100644 --- a/enclone_core/src/testdata.public.bcr.human +++ b/enclone_core/src/testdata.public.bcr.human @@ -2,25 +2,31 @@ # # There is probably still some combining to be done, either within this list or with the other one. -40935,40936,40937,40938,40943,40944,40945,40946,40951,40952,40953,40954,40959,40960,40961,40962,40963,40965,40966,40967,40968,40969,40970,40971,40972,47203,47204,47215,47216,40947,40948,40955,40956,40964,46032 +40935,40936,40937,40938,40943,40944,40945,40946,40951,40952,40953,40954,40959,40960,40961,40962,47203,47204,47215,47216,46032 # above includes preexisting -# 40947,40948,40955,40956,40964,46032 +# 46032 # above matches preexisting # 40939,40940,42817 -44933,44934,44935,44936,44979,44980,44981,44982,44987,44988,43892,43893,43894,43895,43896,43897,43898,43899 +44979,44980,44981,44982,44987,44988,43892,43893,43894,43895,43896,43897,43898,43899 -124477,124478,124479,124480,124483,124484,124487,124488,124493,124494,128045,128048,124481,124482,124485,124486,124489,124490 +# PBMCs (and these have matching gex = 127798,127801, respectively, for first two) +# also TCR 128024 matches BCR 128040 +# not checked for overlap with any of the lists + +128037,128040,124483,124484,124487,124488,128045,128048,124481,124482,124485,124486,124489,124490 # above matches preexisting # 91295-91302,91312,91314,91316,91318,91320,91322,91324,92751,92758,92763,95455,106060,106062,116087-116112,117703,117704,117707,118175-118192,123137,123138,123141,123142,123176,123178,123179,123182,123183,123186,123187,123190,123191,140331-140364,140366-140369 -124547,124550 +# 124547 is essentially identical to 123086 + +123085,123089,124547 # above matches preexisting @@ -63,21 +69,16 @@ 86231 -# PBMCs (and these have matching gex = 127798,127801, respectively) -# also TCR 128024 matches BCR 128040 -# not checked for overlap with any of the lists - -128037,128040 - # All of the remaining datasets still need to be sorted out. Some may duplicate barcodes # with datasets above, and some may be from the same donors. -123085 -123089 165807 -165808 -83808 + +# MALT + +83808,86233 + 85333 -86233 86237 -1021341 + +1017974,1017975,1017976,1017977,1018095,1018096,1018097,1018098,1029474,1029475,1029476,1029477,165808,1018288,1018289,1018291,1018293,1018296,1018297,1018298,1018301,1021358,1021360,1021361,1021362,1031844,1031845,1031846,1031847,1031848,1031850,1031851,1031853,1031854,1031856,1031857,1031858 diff --git a/enclone_core/src/testdata.public.gex.human b/enclone_core/src/testdata.public.gex.human index c4c4ec1ae..cd129545b 100644 --- a/enclone_core/src/testdata.public.gex.human +++ b/enclone_core/src/testdata.public.gex.human @@ -3,4 +3,4 @@ 127798 127801 85679 -123749 +123217 diff --git a/enclone_core/src/testlist.rs b/enclone_core/src/testlist.rs deleted file mode 100644 index 5d202cdeb..000000000 --- a/enclone_core/src/testlist.rs +++ /dev/null @@ -1,377 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Information about enclone tests. - -pub fn enclone_testdata() -> String { - include_str!["enclone.testdata"].to_string() -} - -pub fn enclone_testdata_public_bcr_human() -> String { - include_str!["testdata.public.bcr.human"].to_string() -} - -pub fn enclone_testdata_public_tcr_human() -> String { - include_str!["testdata.public.tcr.human"].to_string() -} - -pub fn enclone_testdata_public_tcr_mouse() -> String { - include_str!["testdata.public.tcr.mouse"].to_string() -} - -pub fn enclone_testdata_public_gex_human() -> String { - include_str!["testdata.public.gex.human"].to_string() -} - -pub const TEST_FILES_VERSION: u8 = 14; - -pub const TESTS: [&str; 105] = [ - // 1. tests variant base after CDR3, parseable output - r###"BCR=123089 CDR3=CVRDRQYYFDYW POUT=stdout - PCOLS=exact_subclonotype_id,n,v_name1,v_name2,nchains,var_indices_aa1,barcodes"###, - // 2. tests many donor ref differences, test comp, edit and var and donorn - r###"BCR=123089 CDR3=CARRYFGVVADAFDIW CVARSP=comp,edit,var AMINO=cdr3,var,share,donorn"###, - // 3. tests motif in CDR3, CHAINS, u_sum, ulen, flipped args in CVARS, on tiny dataset - r###"BCR=85333 CDR3="CAA.*" CHAINS=2 CVARS=const,u_sum,ulen"###, - // 4. tests gex and antibody, FULL_SEQC, ulen, udiff, on tiny dataset - r###"BCR=86237 GEX=85679 LVARSP=gex,CD19_ab_μ,CD25_ab_μ,IGLV3-1_g_μ,IGLV3-1_g_%,RPS27_g_μ - CELLS=3 FULL_SEQC SUM MEAN - CVARSP=ulen,udiff"###, - // 5. tests TCR and correct grouping of onesies on AGBT Donor 2 dataset - r###"TCR=101287 MIN_CELLS=100"###, - // 6. tests AMINO= and vjlen and other things - r###"BCR=86237 CELLS=3 AMINO= CVARS=u,r,cdr3_dna,cdr3_len,vjlen"###, - // 7. tests SHM deletion - r###"BCR=123085 CVARSP=var,clen,cdiff CDR3=CAREPLYYDFWSAYFDYW LVARSP=near,far"###, - // 8. this clonotype included a junk chain before we made a change, and test "/outs" - r###"TCR=163911/outs CDR3=CAPSAGDKIIF AMINO=donor"###, - // 9. tests PER_CELL and unicode - r###"BCR=█≈ΠΠΠ≈█ CDR3=CAKGDRTGYSYGGGIFDYW PER_CELL"###, - // 10. tests multiple datasets and also LVARS=n,origins,donors,datasets, and share - // Note that we have deliberately "faked" two donors. In reality there is one. - r###"BCR="123085;123089" CDR3=CVKDRVTGTITELDYW LVARS=n,origins,donors,datasets AMINO=share - MIX_DONORS"###, - // 11. tests META - r###"META=testx/inputs/test11_meta CDR3=CARSFFGDTAMVMFQAFDPW LVARSP=donors,gex"###, - // 12. this added because it got better when a noise filter was added, also tests u_max - r###"TCR=163914 CDR3=CASSLVQPSTDTQYF CVARSP=u_max"###, - // 13. this added because it got better when a noise filter was added; also test FASTA - r###"TCR=163914 CDR3=CAFRGGSYIPTF FASTA=stdout"###, - // 14. this added because it got better when a bug in bads detection was fixed - r###"TCR=163914 CDR3=CASRLGGEETQYF"###, - // 15. tests insertion and AMINO range - r###"BCR=86233 CDR3=CARGLVVVYAIFDYW CVARS=notes AMINO=cdr3,105-113"###, - // 16. tests number of cells broken out by dataset - r###"BCR=123085,123089 LVARS=n,n_123085,n_123089 CDR3=CTRDRDLRGATDAFDIW"###, - // 17. tests gex with PER_CELL and tests n_gex - // See also enclone_test_prebuild below, that tests nearly the same thing, - // and tests versus the same output file. - r###"BCR=86237 GEX=85679 LVARSP=gex_max,gex,n_gex,CD19_ab_μ CELLS=3 PER_CELL NH5"###, - // 18. makes sure cross filtering isn't applied to two origins from same donor - r###"BCR=123085:123089 CDR3=CVRDEGGARPNKWNYEGAFDIW"###, - // 19. there was a bug that caused a twosie to be deleted, and there was foursie junk - // There were also some cells that were lost due to a bug in graph filtering. - r###"BCR=123085 CDR3=CARRYFGVVADAFDIW"###, - // 20. example affected by whitelist (gel bead oligo contamination) filtering, and test u_Σ - r###"BCR=52177 AMINO=cdr3 PER_CELL CDR3=CATWDDSLSGPNWVF CVARSP=u_Σ"###, - // 21. test MIN_CHAINS_EXACT - r###"BCR=123089 CDR3=CGTWHSNSKPNWVF MIN_CHAINS_EXACT=3"###, - // 22. there was a false positive clonotype - r###"BCR="165807;165808" FAIL_ONLY=true EXPECT_NULL"###, - // 23. here we were generating a fake alternate allele - r###"BCR=83808 CDR3=CAREGRGMVTTNPFDYW MIN_CELLS_EXACT=30"###, - // 24. an example that uses IGHE, and test NGROUP - r###"BCR=52177 CDR3=CSTGWGLDFDFWSGYYTAGYHW NGROUP"###, - // 25. add mouse B6 example that had messed up constant regions - r###"TCR=74396 MOUSE CVARSP=cdiff CDR3=CASSDAGDTQYF"###, - // 26. tests multiple datasets and also LVARS=n,donors,datasets, and share - // Note that we have deliberately "faked" two donors. In reality there is one. - // Here we make sure that non-specification of MIX_DONORS works. - r###"BCR="123085;123089" CDR3=CVKDRVTGTITELDYW"###, - // 27. tests SUMMARY and NOPRINT - r###"BCR=123085 SUMMARY SUMMARY_CLEAN NOPRINT"###, - // 28. tests BARCODE option - r###"BCR=165807 BARCODE=CCCATACGTGATGATA-1,TCTATTGAGCTGAAAT-1"###, - // 29. tests parenthesized variable in F, SUM and MEAN - r###"BCR=86237 GEX=85679 LVARSP=IGHV3-7_g_μ F="(IGHV3-7_g_μ)>=4.5" MIN_CHAINS=2 SUM MEAN - NH5"###, - // 30. tests d_univ and d_donor - r###"BCR=123085 CVARSP=d_univ,d_donor CDR3=CVKDRVTGTITELDYW"###, - // 31. tests Cell Ranger 3.1 output - r###"BCR=../3.1/123085 CDR3=CVKDRVTGTITELDYW"###, - // 32. tests Cell Ranger 2.0 output and RE - r###"BCR=../2.0/124550 CDR3=CAREPLYYDFWSAYFDYW RE"###, - // 33. tests SCAN - r###"BCR=123085 GEX=123749 LVARSP=IGHV1-69D_g_μ MIN_CELLS=10 NGEX - SCAN="(IGHV1-69D_g_μ)>=100,(IGHV1-69D_g_μ)<=1,t-10*c>=0.1" NOPRINT H5"###, - // 34. tests honeycomb plot - // (This yields a lot of output so will be annoying to debug if something changes.) - r###"BCR=123085:123089 MIN_CELLS=10 PLOT="stdout,s1->red,s2->blue" NOPRINT - LEGEND=red,"cell from 123085",blue,"cell from 123089""###, - // 35. tests barcode-by-barcode specification of colors, and tests LEGEND= - // Note that the specification of PRE overrides our usual specification. - // (This yields a lot of output so will be annoying to debug if something changes.) - r###"PRE=../enclone-data/big_inputs/version{TEST_FILES_VERSION},. META=testx/inputs/test35_meta MIN_CELLS=10 MIN_CHAINS_EXACT=2 NOPRINT PLOT=stdout NO_PRE - LEGEND=red,IGHG1,green,IGHG3,blue,IGHA1,orange,IGHM,black,unassigned"###, - // 36. tests PCELL and u_Σ in PCOLS (both forms) - r###"BCR=85333 CDR3=CARDGMTTVTTTAYYGMDVW POUT=stdout PCELL CVARSP=u_Σ - PCOLS=barcode,const1,const2,u_Σ1,u_sum1"###, - // 37. tests parseable output of barcodes for a given dataset - r###"BCR=123085,123089 POUT=stdout PCOLS=123085_barcodes,123089_barcodes - CDR3=CAVTIFGVRTALPYYYALDVW"###, - // 38. tests parseable output of barcodes for a given dataset, using PCELL - r###"BCR=123085,123089 POUT=stdout PCOLS=123085_barcode,123089_barcode PCELL - CDR3=CAVTIFGVRTALPYYYALDVW"###, - // 39. tests u and r fields in parseable output, and tests stdouth - r###"BCR=85333 POUT=stdouth PCOLS=barcode,u1,u_cell1,r2,r_cell2 PCELL PER_CELL CVARSP=r - CDR3=CAADGGGDQYYYMDVW"###, - // 40. test case where digit rows are just barely present - r###"TCR=163911 CDR3=CASSLVQPSTDTQYF AMINO=donor"###, - // 41. test case for gex_cell - r###"BCR=86237 GEX=85679 CDR3=CAKAVAGKAVAGGWDYW POUT=stdouth PCOLS=gex_cell PCELL NH5"###, - // 42. test case that should fail because gex_cell doesn't make sense without gex data - r###"BCR=85333 CDR3=CQQRSNWPLYTF POUT=stdouth PCOLS=gex_cell PCELL PER_CELL EXPECT_FAIL"###, - // 43. test case that should fail because _cell variables can't be used in LVARS - r###"BCR=86237 GEX=85679 CDR3=CAKAVAGKAVAGGWDYW LVARS=gex_cell EXPECT_FAIL"###, - // 44. test _cell - r###"BCR=86237 GEX=85679 LVARSP=gex,RPS27_g_μ CELLS=3 POUT=stdouth - PCOLS=barcode,gex_cell,CD19_ab,CD19_ab_cell NH5 PCELL"###, - // 45. test ndiff... - r###"BCR=123085 CVARSP=ndiff1vj,ndiff2vj CDR3=CARDQNFDESSGYDAFDIW"###, - // 46. test u_μ, u_min, r_μ, r_min and r_max - r###"BCR=85333 CVARSP=u_μ,u_min,u_max,r,r_μ,r_min,r_max AMINO=cdr3 CDR3=CAADGGGDQYYYMDVW - POUT=stdouth PCOLS=u_μ1,u_min1,u_max1,r2,r_μ2,r_min2,r_max2"###, - // 47. this should fail - r###"BCR=85333 CDR3=CAREEYYYDSSGDAFDIW LVARSP=gex_mean EXPECT_FAIL"###, - // 48. test gex_mean and gex_Σ and NGEX - // Do not use NH5 because the bin file is too big for git. - r###"BCR=123085 GEX=123749 LVARSP=gex_mean,gex_Σ CDR3=CASRKSGNYIIYW NGEX H5"###, - // 49. test HTML - r###"BCR=85333 CDR3=CAAWDDSLNGWVF CHAINS=1 POUT=stdouth PCOLS=barcodes,n FASTA=stdout - FASTA_AA=stdout HTML=CAAWDDSLNGWVF"###, - // 50. make sure this doesn't fail - r###"NOPAGER EXPECT_OK"###, - // 51. make sure this fails gracefully - r###"BCR=123085 PLOT=/nonexistent/broken.svg NOPRINT MIN_CELLS=50 EXPECT_FAIL"###, - // 52. add test for some gene patterns - // Do not use NH5 because the bin file is too big for git. - r###"BCR=123085 GEX=123749 CDR3=CARPKSDYIIDAFDIW MIN_CELLS=10 - LVARSP="(IGHV5-51|IGLV1-47)_g_%,IGH.*_g_%,IG(K|L).*_g_%""###, - // 53. add test for _% with PER_CELL - // Do not use NH5 because the bin file is too big for git. - r###"BCR=123085 GEX=123749 LVARSP="gex,n_gex,JCHAIN_g_%,IG%:IG.*_g_%" CVARS=u_μ,const - MIN_CHAINS_EXACT=2 CDR3=CAREGGVGVVTATDWYFDLW PER_CELL"###, - // 54. make sure this fails gracefully - r###"BCR=86237 GEX=85679 LVARSP=GERBULXXX123_g_% EXPECT_FAIL"###, - // 55. test cred - r###"BCR=86237 GEX=85679 LVARSP=cred PCELL PER_CELL POUT=stdouth PCOLS=cred_cell - CDR3=CARSFFGDTAMVMFQAFDPW"###, - // 56. test SVG - r###"BCR=85333 CDR3=CARDPRGWGVELLYYMDVW SVG NGROUP"###, - // 57. test 1/8 for newline correctness - r###"BCR=85333 CDR3="CLLSYSGARVF|CQSADSSGTYKVF" AMINO= PLAIN SET_IN_STONE"###, - // 58. test 2/8 for newline correctness - r###"BCR=85333 CDR3="CLLSYSGARVF|CQSADSSGTYKVF" AMINO= PLAIN NGROUP SET_IN_STONE"###, - // 59. test 3/8 for newline correctness - r###"BCR=85333 CDR3="CLLSYSGARVF|CQSADSSGTYKVF" AMINO= PLAIN HTML SET_IN_STONE"###, - // 60. test 4/8 for newline correctness - r###"BCR=85333 CDR3="CLLSYSGARVF|CQSADSSGTYKVF" AMINO= PLAIN NGROUP HTML SET_IN_STONE"###, - // 61. test 5/8 for newline correctness - r###"BCR=85333 GROUP_VJ_REFNAME MIN_GROUP=2 AMINO= PLAIN SET_IN_STONE"###, - // 62. test 6/8 for newline correctness - r###"BCR=85333 GROUP_VJ_REFNAME MIN_GROUP=2 AMINO= PLAIN NGROUP SET_IN_STONE"###, - // 63. test 7/8 for newline correctness - r###"BCR=85333 GROUP_VJ_REFNAME MIN_GROUP=2 AMINO= PLAIN HTML SET_IN_STONE"###, - // 64. test 8/8 for newline correctness - r###"BCR=85333 GROUP_VJ_REFNAME MIN_GROUP=2 AMINO= PLAIN HTML NGROUP SET_IN_STONE"###, - // 65. test NCELL - r###"BCR=86237 NCELL CDR3=CAKTATTLGGYYSHGLDVW MIN_CELLS=2"###, - // 66. test BC in combination with PER_CELL and PCELL - // Do not use NH5 because the bin file is too big for git. - r###"BCR=123085 GEX=123749 BC=testx/inputs/123077_cells.csv PER_CELL LVARSP=gex,cred,T PCELL - POUT=stdouth PCOLS=barcode,T CDR3=CAKAGPTESGYYVWYFDLW MIN_CELLS=2"###, - // 67. expect fail if garbage PRE - r###"PRE=garbage_gerbil_stuff BCR=86237 CELLS=3 EXPECT_FAIL NO_PRE"###, - // 68. a test of PRE - r###"PRE=mumbo_jumbo,../enclone-data/big_inputs/version{TEST_FILES_VERSION} BCR=86237 NO_PRE - CDR3=CARENHPVEYCSSTSCYKAYYYGMDVW"###, - // 69. another test of pre - r###"PRE=mumbo_jumbo BCR=../enclone-data/big_inputs/version{TEST_FILES_VERSION}/86237 NO_PRE - CDR3=CARENHPVEYCSSTSCYKAYYYGMDVW"###, - // 70. another test of META - r###"META=mumbo_jumbo EXPECT_FAIL"###, - // 71. another test of META - r###"PRE=../enclone-data/big_inputs/version{TEST_FILES_VERSION},testx/inputs META=test11_meta - CDR3=CARSFFGDTAMVMFQAFDPW LVARSP=donors,gex NO_PRE"###, - // 72. test SUMMARY_CSV - r###"BCR=86237 NOPRINT SUMMARY_CSV"###, - // 73. this crashed before a bug was fixed - r###"BCR=1021341 NCELL CDR3=CQQANSYPLTF SEG=IGHV1-69D"###, - // 74. this changed after a bug was fixed; the RE can probably be dropped later when we - // rerun all the datasets - r###"BCR=123085 RE CDR3=CARGYEDFTMKYGMDVW POUT=stdouth PCOLS=utr_id2"###, - // 75. this changed after a bug in RE was fixed, and this is in fact testing RE - r###"BCR=123085 CDR3=CQQSYSTPRTF RE"###, - // 76. test PLOT_BY_ISOTYPE - r###"BCR=123085 MIN_CELLS=10 PLOT_BY_ISOTYPE=stdout NOPRINT MIN_CHAINS_EXACT=2"###, - // 77. make sure that POUT works on full dataset - r###"BCR=86237 POUT=stdout EXPECT_OK"###, - // 78. make sure that POUT with PCELL works on full dataset - r###"BCR=86237 POUT=stdout PCELL EXPECT_OK"###, - // 79. make sure that POUT works on full dataset with gex - r###"BCR=86237 GEX=85679 POUT=stdout NGEX NCELL EXPECT_OK"###, - // 80. make sure that POUT with PCELL works on full dataset with gex - r###"BCR=86237 GEX=85679 POUT=stdout PCELL NGEX NCELL EXPECT_OK"###, - // 81. IG:IG.*_g_%_cell and variants in parseable output - r###"BCR=86237 GEX=85679 CDR3=CARSFFGDTAMVMFQAFDPW POUT=stdouth PCELL - PCOLS="barcode,IG:IG.*_g_%_cell,IG.*_g_%_cell,IGN:IG.*_g_%,IG.*_g_%""###, - // 82. test entropy - // Do not use NH5 because the bin file is too big for git. - r###"BCR=123085 GEX=123749 LVARSP=entropy PER_CELL POUT=stdouth PCELL - PCOLS=barcode,entropy,entropy_cell CDR3=CARAQRHDFWGGYYHYGMDVW"###, - // 83. test COMPLETE and dref - r###"BCR=86237 CDR3=CARSFFGDTAMVMFQAFDPW COMPLETE LVARSP=dref"###, - // 84. test CLUSTAL_AA - r###"BCR=123085 CDR3=CAADRQLWSRSPGDYIYYGMQVW CLUSTAL_AA=stdout"###, - // 85. test NALL - r###"BCR=86237 NALL CDR3=CARAPEDTSRWPQYNYSGLDVW SEG=IGKV3-15"###, - // 86. test CLUSTAL_DNA - r###"BCR=86237 CDR3=CARSFFGDTAMVMFQAFDPW CLUSTAL_DNA=stdout"###, - // 87. test PHYLIP_AA and COLOR=codon - r###"BCR=123085 CDR3=CAADRQLWSRSPGDYIYYGMQVW PHYLIP_AA=stdout COLOR=codon"###, - // 88. test PHYLIP_DNA and COLOR=default - r###"BCR=123085 CDR3=CAADRQLWSRSPGDYIYYGMQVW PHYLIP_DNA=stdout COLOR=property"###, - // 89. test TREE and NEWICK - r###"BCR=123085 COMPLETE TREE NEWICK CDR3=CARDLGGRYYGSKDPW"###, - // 90. test FCELL with non-null value - // Do not use NH5 because the bin file is too big for git. - r###"BCR=123085 GEX=123749 BC=testx/inputs/123077_cells.csv PER_CELL LVARSP=gex,cred,T - CDR3=CARGYEDFTMKYGMDVW FCELL=keeper=yes"###, - // 91. test FCELL with null value - // Do not use NH5 because the bin file is too big for git. - r###"BCR=123085 GEX=123749 BC=testx/inputs/123077_cells.csv PER_CELL LVARSP=gex,cred,T - CDR3=CARGYEDFTMKYGMDVW FCELL=keeper="###, - // 92. test NALL_CELL - r###"BCR=123085 NALL_CELL CDR3=CQKYDSAPLTF MIN_CELLS=20"###, - // 93. test MIN_DATASET_RATIO - r###"BCR=123085,123089 MIN_DATASET_RATIO=10 LVARSP=nd2"###, - // 94. test use of SEG twice - r###"BCR=123085 SEG=IGHV5-51 SEG=IGKV1D-39"###, - // 95. test TREE=const - r###"BCR=123085 TREE=const CDR3=CARPKSDYIIDAFDIW MIN_CELLS=2"###, - // 96. test MAX_LOG_SCORE - r###"BCR=123085 CDR3=CARDQNFDESSGYDAFDIW MAX_LOG_SCORE=0.0"###, - // 97. test MAX_CDR3_DIFFS - r###"BCR=123085 CDR3=CARESVVGLLPIFDYW MAX_CDR3_DIFFS=1"###, - // 98. test reduced stringency D alignment - // (RE can be removed once cellranger rerun) - r###"TCR=101287 CDR3=CASSPAGTSGKVWGTDTQYF RE"###, - // 99. test mait (redundant with mait_example.html below, so could delete) - r###"TCR=101287 LVARSP=mait CDR3=CSAGQGDTEAFF"###, - // 100. test inkt and INKT - r###"TCR=101287 LVARSP=inkt INKT MIN_CELLS=2"###, - // 101. test MAIT - r###"TCR=101287 LVARSP=mait MAIT MIN_CELLS=50"###, - // 102. test BINARY with unwriteable path - r###"BCR=123085 BINARY=/gerbilspam/bumblebee EXPECT_FAIL"###, - // 103. test POUT without PCOLS (somewhat annoying, because easily triggered to change) - r###"BCR=85333 POUT=stdout CDR3=CQSADSSGTYKVF"###, - // 104. test EASY - r###"BCR=123085 CDR3="CARVIVGPKKLEGRLYSSSLHFDCW|CARVIVGPEKQEGRLYSSSLHFDYW" EASY - MAX_LOG_SCORE=100"###, - // 105. test MAX_DEGRADATION and MAX_DIFFS - r###"BCR=123085,123089 MAX_LOG_SCORE=100 MAX_DEGRADATION=150 MAX_DIFFS=200 - MAX_CDR3_DIFFS=100 CDR3=CVRILGRALTVRVYFYYGIDVW"###, -]; - -// Test using the extended public dataset collection. Or tests that require samtools. - -pub const EXTENDED_TESTS: [&str; 3] = [ - // 1. test that used to crash on a particular barcode - r###"BCR=40955 NCELL BARCODE=GCGCAGTCAAAGTGCG-1 AMINO=cdr3 NO_PRE NFORCE"###, - // 2. tests nd2 - r###"BCR=47199,47200,47212 AMINO=cdr3 NCROSS LVARS=nd2 CDR3=CVKGKSGSFWYYFENW - NO_PRE NFORCE"###, - // 3. test sec and mem [requires samtools] - r###"BCR=123085 GEX=123749 LVARSP=sec,mem CDR3=CVKDRVTGTITELDYW"###, -]; - -// Tests of internal features. - -pub const INTERNAL_TESTS: [&str; 1] = [ - // 1. gave wrong result - r###"123085 CDR3=CARDRIAGRFGYGMDVW NFORCE"###, -]; - -// List of examples in documentation. - -pub const EXAMPLES: [&str; 2] = [ - // 1. - r###"BCR=123089 CDR3=CARRYFGVVADAFDIW"###, - // 2. - // Do not use NH5 because the bin file is too big for git. - r###"BCR=123085 GEX=123749 LVARSP=gex,IGHV2-5_g_μ,CD4_ab_μ CDR3=CALMGTYCSGDNCYSWFDPW"###, -]; - -// List of examples on site. - -pub const SITE_EXAMPLES: [(&str, &str); 10] = [ - // 1. - // Do not use NH5 because the bin file is too big for git. - ( - "pages/auto/clonotype_with_gex.html", - "BCR=123085 CDR3=CQQRSNWPPSITF GEX=123749 LVARSP=gex,IGHV3-49_g,CD19_ab NUMI \ - HTML=\"enclone example with gex\"", - ), - // 2. - ( - "pages/auto/illusory1.html", - "BCR=128037,128040 NCROSS CDR3=CARGGTTTYFISW NGROUP NUMI NUMI_RATIO \ - HTML=\"illusory clonotype expansion 1\"", - ), - // 3. - ( - "pages/auto/illusory2.html", - "BCR=128037,128040 CDR3=CARGGTTTYFISW NGROUP NUMI NUMI_RATIO \ - HTML=\"illusory clonotype expansion 2\"", - ), - // 4. - ( - "pages/auto/illusory3.html", - "BCR=128040 GEX=127801 CDR3=CARGGTTTYFISW NGROUP NUMI NUMI_RATIO \ - HTML=\"illusory clonotype expansion 3\"", - ), - // 5. - ( - "pages/auto/illusory4.html", - "BCR=128040 GEX=127801 CDR3=CARGGTTTYFISW PER_CELL LVARSP=gex,cred MIN_CHAINS_EXACT=2 NUMI \ - NUMI_RATIO NGROUP HTML=\"illusory clonotype expansion 4\"", - ), - // 6. - ( - "pages/auto/illusory5.html", - "BCR=128040 GEX=127801 BC=testx/inputs/128024_cells.csv \ - CDR3=CARGGTTTYFISW PER_CELL NUMI NUMI_RATIO \ - LVARSP=gex,cred,T CHAINS_EXACT=2 NGROUP HTML=\"illusory clonotype expansion 5\"", - ), - // 7. - ( - "img/samples.svg", - "BCR=123085:123089 MIN_CELLS=10 PLOT=\"stdout,s1->blue,s2->red\" NOPRINT \ - LEGEND=blue,123085,red,123089", - ), - // 8. - ( - "img/iso.svg", - "BCR=123085,123089 MIN_CELLS=5 MIN_CHAINS_EXACT=2 NOPRINT PLOT_BY_ISOTYPE=stdout", - ), - // 9. - ( - "pages/auto/tree_example.html", - "BCR=123085 TREE COMPLETE CDR3=CARDQNFDESSGYDAFDIW LVARSP=dref HTML", - ), - // 10. - ( - "pages/auto/mait_example.html", - "TCR=101287 LVARSP=mait CDR3=CSAGQGDTEAFF HTML", - ), -]; diff --git a/enclone_core/src/var_reg.rs b/enclone_core/src/var_reg.rs new file mode 100644 index 000000000..a95e15c0d --- /dev/null +++ b/enclone_core/src/var_reg.rs @@ -0,0 +1,53 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// This is the start of a variable registry for enclone. +// +// The intention is that all variables would be registered here. Or perhaps we could instead +// have a distributed system. +// +// String fields are to be replaced by structures. + +pub struct Variable { + pub name: String, + pub scope: String, + pub prereqs: Vec<String>, + pub value_type: String, + pub function: String, + pub doc: String, +} + +#[rustfmt::skip] +pub fn variable_registry() -> Vec<Variable> { + vec![ + // <FeatureName>_cellular_u + Variable { + name: "<FeatureName>_cellular_u".to_string(), + scope: "dataset".to_string(), + prereqs: vec!["per_feature_metrics.csv".to_string()], + value_type: "float[0,100].precision(1)".to_string(), + function: "in ***.rs".to_string(), + doc: "For a given feature, the percent of UMIs that are identified by the \ + cellranger pipeline as lying in a cell.".to_string(), + }, + // <FeatureName>_cellular_r + Variable { + name: "<FeatureName>_cellular_r".to_string(), + scope: "dataset".to_string(), + prereqs: vec!["per_feature_metrics.csv".to_string()], + value_type: "float[0,100].precision(1)".to_string(), + function: "in ***.rs".to_string(), + doc: "For a given feature, the percent of reads that are identified by the \ + cellranger pipeline as lying in a cell.".to_string(), + }, + // nchains_present + Variable { + name: "nchains_present".to_string(), + scope: "exact".to_string(), + prereqs: vec![], + value_type: "positive_integer".to_string(), + function: "in ***.rs".to_string(), + doc: "The number of chains that are present in a given exact subclonotype." + .to_string(), + }, + ] +} diff --git a/enclone_help/src/example1 b/enclone_help/src/example1 deleted file mode 100644 index c72ae1e75..000000000 --- a/enclone_help/src/example1 +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 12 CELLS - -[1.1] CLONOTYPE = 12 CELLS -┌───────────┬───────────────────────────────────────────────┬──────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 181.1.1|IGHV4-30-2 ◆ 53|IGHJ3 │ 254|IGKV1D-39 ◆ 218|IGKJ5 │ -│ ├───────────────────────────────────────────────┼──────────────────────────────┤ -│ │ 1 1111111111111111 │ 1 111111111111 │ -│ │ 2224556788990 1111122222222223 │ 0 011111111112 │ -│ │ 0571380317346 5678901234567890 │ 6 901234567890 │ -│ │ ══════CDR3══════ │ ════CDR3════ │ -│reference │ LSSASRPHPVRST ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ T CQQ◦◦◦◦◦◦◦◦◦ │ -│donor ref │ VSPTYRHYPVTST ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ T CQQ◦◦◦◦◦◦◦◦◦ │ -├───────────┼───────────────────────────────────────────────┼──────────────────────────────┤ -│# n │ ........x.... ..............x. u const │ x ......x..... u const│ -│1 10 │ VSPTYRHYPVTST CARRYFGVVADAFDIW 4285 IGHM │ T CQQSYSTPPITF 11793 IGKC │ -│2 2 │ VSPTYRHYSVTST CARRYFGVVADAFDIW 4383 IGHM │ A CQQSYSPPPITF 13922 IGKC │ -└───────────┴───────────────────────────────────────────────┴──────────────────────────────┘ - diff --git a/enclone_help/src/example2 b/enclone_help/src/example2 deleted file mode 100644 index a83c800fa..000000000 --- a/enclone_help/src/example2 +++ /dev/null @@ -1,21 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 5 CELLS - -[1.1] CLONOTYPE = 5 CELLS -┌────────────────────────────────────┬───────────────────────────────────────┬─────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 98|IGHV2-5 ◆ 57|IGHJ5 │ 352|IGLV3-1 ◆ 314|IGLJ2 │ -│ ├───────────────────────────────────────┼─────────────────────────────┤ -│ │ 11111111111111111111 │ 11111111111 │ -│ │ 8 11111222222222233333 │ 6 00000111111 │ -│ │ 5 56789012345678901234 │ 2 56789012345 │ -│ │ ════════CDR3════════ │ ════CDR3═══ │ -│reference │ S ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ V CQAWD◦◦◦◦◦◦ │ -│donor ref │ S ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ V CQAWD◦◦◦◦◦◦ │ -├────────────────────────────────────┼───────────────────────────────────────┼─────────────────────────────┤ -│# n gex IGHV2-5_g_μ CD4_ab_μ │ x .................... u const │ . ........... u const│ -│1 3 8852 1850 79 │ S CALMGTYCSGDNCYSWFDPW 592 IGHM │ V CQAWDSSVVVF 2995 IGLC2│ -│2 1 29657 6515 36 │ S CALMGTYCSGDNCYSWFDPW 6112 IGHG1 │ V CQAWDSSVVVF 15203 IGLC2│ -│3 1 14886 3326 42 │ T CALMGTYCSGDNCYSWFDPW 4045 IGHG1 │ V CQAWDSSVVVF 7025 IGLC2│ -└────────────────────────────────────┴───────────────────────────────────────┴─────────────────────────────┘ - diff --git a/enclone_help/src/help1.rs b/enclone_help/src/help1.rs deleted file mode 100644 index 941efb11a..000000000 --- a/enclone_help/src/help1.rs +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Test for help request. - -use crate::help_utils::*; - -pub fn help1(args: &Vec<String>, h: &mut HelpDesk) { - // Provide main help. - - if args.len() == 1 || (args.len() == 3 && args[1] == "help" && args[2] == "main") || h.help_all - { - if h.help_all { - h.print("\n"); - } - h.begin_doc(""); - h.print("\nThe mission of "); - h.print_enclone(); - h.print(" is to:\n\n"); - h.print("\\bold{ Find and display the clonotypes within single cell VDJ datasets:}\n"); - h.print("\\bold{ groups of cells having the same fully rearranged common ancestor.}\n\n"); - h.print( - "\\boldblue{enclone is part of the 10x Genomics immune profiling tools, including \ - Cell Ranger and Loupe. enclone uses output from Cell Ranger version ≥ 3.1.}\n\n\ - The complete enclone documentation is at \\green{bit.ly/enclone}. This page \ - catalogs the subset of those pages that are directly accessible from the enclone \ - command line. These pages can be viewed in a 100 wide x 56 high window, except for \ - those labeled \"long\" or \"wide\".\n\n", - ); - h.docpr("\\bold{command}", "\\bold{what it provides}"); - h.ldoc("enclone help", "help to test for correct setup"); - h.doc("enclone", "what you see here: guide to all the doc"); - h.ldoc("enclone help quick", "quick guide to getting started"); - h.doc("enclone help how", "how enclone works (long)"); - h.doc( - "enclone help command", - "info about enclone command line argument processing", - ); - h.ldoc( - "enclone help glossary", - "glossary of terms used by enclone, and conventions", - ); - h.ldoc("enclone help example1", "explanation of an example"); - h.doc( - "enclone help example2", - "example showing gene expression \ - and feature barcodes (wide)", - ); - h.ldoc( - "enclone help input", - "how to provide input to enclone (long)", - ); - h.doc( - "enclone help input_tech", - "how to provide input to enclone (technical notes)", - ); - h.doc("enclone help parseable", "parseable output (long)"); - h.ldoc( - "enclone help filter", - "clonotype filtering options, scanning for feature enrichment (long)", - ); - h.doc("enclone help special", "special filtering options (long)"); - h.ldoc("enclone help lvars", "lead column options (long)"); - h.doc("enclone help cvars", "per chain column options (long)"); - h.doc( - "enclone help amino", - "per chain column options for amino acids", - ); - h.doc("enclone help display", "other clonotype display options"); - h.ldoc("enclone help indels", "insertion and deletion handling"); - h.ldoc( - "enclone help color", - "how enclone uses color, and related things", - ); - h.doc("enclone help faq", "frequently asked questions (long)"); - h.doc("enclone help developer", "a few things for developers"); - h.ldoc_greenish( - "enclone help all", - "concatenation of all the help pages (long, wide)", - ); - h.doc_greenish("", "███ USE THIS TO SEARCH ALL THE HELP PAGES! ███"); - h.print_tab2(); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Setup test. - - if (args.len() == 2 && args[1] == "help") - || (args.len() == 2 && args[1] == "--help") - || h.help_all - { - h.begin_doc("setup"); - h.print( - "\n\nWelcome to enclone!\n\n\ - The purpose of this first page is to help you make sure that you're set up properly\n\ - to run enclone. PLEASE READ!\n\n\ - (for the main help page, please type instead: enclone)\n\n\ - Here we go through several setup tests. If you have any problem that you can't\n\ - resolve, please email us at enclone@10xgenomics.com.\n\n\n\ - 1. Are you using a fixed width font?\n\ - Look at this:\n\ - A FAT BROWN CAT JUMPED OVER THE WALL\n\ - ||||||||||||||||||||||||||||||||||||\n\ - Do those two lines end at the same position? If not, you need to switch your \ - font.\n\n\ - 2. Is your terminal window wide enough to see the help pages?\n\ - Your terminal needs to be at least 100 columns wide. Look at this:\n\ - 01234567890123456789012345678901234567890123456789\ - 01234567890123456789012345678901234567890123456789\n\ - Does it appear as a single line? If not, please widen your window.\n\n\ - 3. Can your terminal display box characters?\n\ - Look at this:\n\ - ┌────────┬─────────┐\n\ - │banana │ peel │\n\ - ├────────┼─────────┤\n\ - │oops │ slipped│\n\ - └────────┴─────────┘\n\ - Do you see a neat rectangle composed of four rectangles with words inside them? \ - Are the vertical lines contiguous? \ - If not, something is wrong with your terminal! You may need to change the terminal \ - font. For example, Menlo works, but Courier does not.\n\n\ - 4. Can your terminal correctly display ANSI escape sequences?\n\ - The following word should be \\bold{bold}. \ - The following word should be \\blue{blue}.\n\ - If that doesn't make sense, or is messed up, something is wrong, and you have \ - two options:\n\ - (a) seek help to fix your terminal window\n\ - (b) turn off escape sequences by adding PLAIN to every enclone command, or set\n\ - the environment variable ENCLONE_PLAIN.\n\ - But that should be only a last resort.\n\n\ - 5. Can your terminal correctly display unicode characters?\n\ - Do you see a centered dot here • ?\n\ - If not, your terminal has a problem!\n\n\ - 6. Does this entire help page appear at once in your terminal window?\n\ - If not, please increase the number of rows in your window to 56.\n\n\n\ - If you go through all those tests and everything worked, you should be \ - good to go!\n\n\n", - ); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide quick help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "quick") || h.help_all { - h.begin_doc("quick"); - h.print("\n"); - h.print("\\bold{quick guide to getting started}\n\n"); - h.print( - "Just type this:\n\n\ - \\bold{enclone BCR=p}\n\n\ - where \\bold{p} is the path to your Cell Ranger VDJ directory.\n\n\ - Substitute \\bold{TCR} if that's what you've got.\n\n\ - This will show you all the clonotypes, in descending order by number of cells.\n\n\ - You'll need to make your window wide enough so that lines are not folded. This \ - depends on the dataset.\n\n\ - Only one page of output is shown at a time. To navigate within the full output, \ - use the space bar to go forward and the b key to go backward.\n\n\ - See \\bold{enclone help example1} for a detailed guide to how to read the enclone \ - output. A few key things you should know:\n\n\ - 1. You'll see numbers near the top. These are amino acid position numbers, and\n \ - they read downwards. Numbering starts at the start codon, numbered zero.\n\n\ - 2. Each numbered line represents an exact subclonotype: cells having identical \ - V(D)J transcripts.\n\n\ - 3. By default, you'll see data in amino acid space. Only \"interesting\" amino acids \ - are shown.\n\n\ - Please read on to learn more!\n\n\ - \\bold{navigation in enclone}\n\n\ - enclone automatically sends its output through the program \"less\". This allows you \ - to navigate within the output, using the following keys \ - (and many more, not shown, and which you don't need to know):\n\ - • space: causes output to page forward\n\ - • b: causes output to page backward\n\ - • /string: finds instances of \"string\" in the output\n\ - • n: having done the previous, jump to the next instance\n\ - • q: quit, to return to the command line.\n\n\ - When enclone uses less, it passes the argument -R, which causes certain characters \ - to be hidden, namely escape codes that color or bold text.\n\n", - ); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide how help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "how") || h.help_all { - // Start. - - h.begin_doc("how"); - h.print("\n"); - h.print("\\bold{information about how enclone works}\n\n"); - h.print( - "The goal of enclone is to find and display the clonotypes within single cell \ - VDJ datasets: groups of cells having the same fully rearranged common ancestor.\n\n\ - \ - enclone provides the foundation for fully understanding each cell's antigen \ - affinity and the evolutionary relationship between cells within one or more datasets. \ - This starts with, for each cell, \ - \\bold{the full length sequence of all its VDJ receptor chains}. Such data may be \ - obtained using the 10x Genomics immune profiling platform.\n\n\ - See also the heuristics page at \\green{bit.ly/enclone}.\n\n\ - \ - For this, there are fundamental challenges:\n\n", - ); - - // Print challenges. - - h.print_with_box( - "1. It is extremely easy to get false positives: the incorrect \ - appearance that two cells have a common ancestor.\n\n\ - \ - 2. Because of somatic hypermutation in B cells, it can be difficult to know that \ - two B cells share a common ancestor.\n\n\ - \ - 3. There is always some background noise, e.g. from ambient mRNA. When building \ - large clonotypes, this noise tends to pile up, yielding ectopic chains, i.e. chains \ - within a clonotype that are artifacts and do not represent true biology.", - false, - ); - - // Print boxed algorithm. - - h.print( - "To address these challenges, the enclone algorithm has several steps, which we \ - outline:\n\n", - ); - h.print( - "\\boldred{1}. Input data. \ - enclone gets its information from the file all_contig_annotations.json that is \ - produced by Cell Ranger. Only productive contigs are used. Each has an annotated \ - V and J segment. The V segment alignment may have a single indel whose length is \ - divisible by three, and in that case, the V reference sequence is edited either to \ - delete or insert sequence. In the insertion case, the bases are taken from the \ - contig. These indels are noted in the enclone output.\n\n\ - \ - \\boldred{2}. Exact subclonotypes. \ - enclone groups cells into exact subclonotypes, provided that they have the same \ - number of chains, identical V..J sequences, identical C segment assignments, \ - and the same distance between the J stop and the C start (which is usually zero).\n\n\ - \ - \\boldred{3}. Finding the germline sequences. For datasets from a given donor, \ - enclone derives \"donor reference sequences\" for the V chains present in the donor's \ - genome. This is powerful, even though based on imperfect information. V segments \ - vary in their expression frequency and thus the more cells which are present, the \ - more complete the information will be. It is also not possible to accurately \ - determine the terminal bases in a V chain from transcript data alone because these \ - bases mutate during recombination and because of non-templated nucleotide addition.\n\n\ - \ - The idea for how this is done is roughly the following: for each V segment, we choose \ - one cell from each clonotype (although these have not actually been computed yet, so \ - it's an approximation). Next for each position on the V segment, excluding the last \ - 15 bases, we determine the distribution of bases that occur within these selected \ - cells. We only consider those positions where a non-reference base occurs at least \ - four times and is at least 25% of the total. Then each cell has a footprint relative \ - to these positions; we require that these footprints satisfy similar evidence \ - criteria. Each such non-reference footprint then defines an \"alternate allele\". \ - We do not restrict the number of alternate alleles because they may arise from \ - duplicated gene copies.\n\n\ - \ - A similar approach was attempted for J segments but at the time of testing did not \ - appear to enhance clonotyping specificity. This could be revisited later and might \ - be of interest even if it does not improve specificity.\n\n\ - \ - \\boldred{4}. What joins are tested. \ - Pairs of exact subclonotypes are considered for joining, as described below. This \ - process only considers exact subclonotypes have two or three chains. There is some \ - separate joining for the case of one chain. Exact subclonotypes having four chains \ - are not joined at present. These cases are clearly harder because these exact \ - subclonotypes are highly enriched for cell doublets, which we discard if we can \ - identify as such.\n\n\ - \ - \\boldred{5}. Initial grouping. \ - For each pair of exact subclonotypes, and for each pair of chains in each of the \ - two exact subclonotypes, for which V..J has the same length for the corresponding \ - chains, and the CDR3 segments have the same length for the corresponding chains, \ - enclone considers joining the exact subclonotypes into the same clonotype.\n\n\ - \ - \\boldred{6}. Error bounding. \ - To proceed, as a minimum requirement, there must be at most 50 total \ - mismatches between the two exact subclonotypes, within the given two V..J segments.\n\ - This can be changed by setting \\bold{MAX_DIFFS=n} on the command line.\n\n\ - \ - \\boldred{7}. Shared mutations. \ - enclone next finds shared mutations betweens exact subclonotypes, that is, for \ - two exact subclonotypes, common mutations from the reference sequence, using the \ - donor reference for the V segments and the universal reference for the J segments. \ - Shared mutations are supposed to be somatic hypermutations, that would be evidence \ - of common ancestry. By using the donor reference sequences, most shared germline \ - mutations are excluded, and this is critical for the algorithm's success.\n\n\ - \ - \\boldred{8}. Are there enough shared mutations? \ - We find the probability p that “the shared mutations occur by chance”. More \ - specifically, given d shared mutations, and k total mutations (across the two cells), \ - we compute the probability p that a sample with replacement of k items from a set \ - whose size is the total number of bases in the V..J segments, yields at most k – d \ - distinct elements. The probability is an approximation, for the method please see\n\ - \\green{https://docs.rs/stirling_numbers/0.1.0/stirling_numbers}.\n\n\ - \ - \\boldred{9}. Are there too many CDR3 mutations? \ - Next, let N be \"the number of DNA sequences that differ from the given CDR3 \ - sequences by at most the number of observed differences\". More specifically, if \ - cd is the number of differences between the given CDR3 nucleotide sequences, and n \ - is the total length in nucleotides of the CDR3 sequences (for the two chains), we \ - compute the total number N of strings of length n that are obtainable by perturbing \ - a given string of length n, which is\nsum( choose(n,m), m = 0..=cd) ). We also \ - require that cd is at most 10 (and this bound is adjustable via the command-line \ - argument \\bold{MAX_CDR3_DIFFS}).\n\n\ - \ - \\boldred{10}. Key join criteria. \ - Two cells sharing sufficiently many shared differences and sufficiently few \ - CDR3 differences are deemed to be in the same clonotype. That is, The lower p is, \ - and the lower N is, the more likely it is that the shared mutations represent bona \ - fide shared ancestry. Accordingly, the smaller p*N is, the more likely it is that \ - two cells lie in the same true clonotype. To join two cells into the same \ - clonotype, we require that the bound p*n ≤ C is satisfied, where C is the \ - constant 1,000,000 (and adjustable via the command-line argument\n\ - \\bold{MAX_LOG_SCORE}, the log10 of this, with default value 6). This constant was \ - arrived at by empirically balancing \ - sensitivity and specificity across a large collection of datasets. See discussion \ - of performance below.\n\n\ - \ - \\boldred{11}. Other join criteria. We do not join two clonotypes which were \ - assigned different reference sequences unless those reference sequences differ by \ - at most \\bold{3} positions. This value can be controlled using the \ - command-line argument \\bold{MAX_DEGRADATION}. There is an additional restriction \ - imposed when creating two-cell clonotypes: we require that that \ - cd ≤ d, where cd is the number of CDR3 differences and d is the number of shared \ - mutations, as above. This filter may be turned off \ - using the command-line argument \\bold{EASY}.\n\n\ - \ - \\boldred{12}. Junk. \ - Spurious chains are filtered out based on frequency and connections. See \ - \"enclone help special\" for a description of the filters.\n\n", - ); - - // Finish. Note that the numbers here also appear on the landing page. - - h.print( - "We are actively working to improve the algorithm. To test the performance of the \ - current version, we combined data from 443 BCR libraries from 30 donors, which yielded \ - \\boldred{9573} clonotypes having at least two cells each, of which \ - \\boldred{15 (0.16%)} contained data from multiple donors. These are errors.\n\n", - ); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide command line help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "command") || h.help_all { - h.begin_doc("command"); - h.print("\n"); - h.print("\\bold{information about enclone command-line argument processing}\n\n"); - h.print("\\bold{1. Order of processing}\n\n"); - h.print( - "• Before processing its command line, enclone first checks for environment\n\ - variables of the form \\bold{ENCLONE_<x>}. These are converted into command-line \ - arguments. You can set any command-line argument this way. The reason why you might \ - want to use this feature is if you find yourself using the same \ - command-line option over and over, and it is more convenient to set it once as \ - an environment variable.\n\ - • For example, setting the environment variable \\bold{ENCLONE_PRE} to \ - \\bold{/Users/me/enclone_data} \ - is equivalent to providing the command-line argument \ - \\bold{PRE=/Users/me/enclone_data}.\n\ - • After checking environment variables, arguments on the command line are read from \ - left to right; if an argument name is repeated, only the \ - rightmost value is used, except as noted specifically in the documentation.\n\n", - ); - h.print("\\bold{2. Color}\n\n"); - h.print_enclone(); - h.print( - " uses ANSI escape codes for color and bolding, frivolously, for emphasis, \ - and more\nimportantly for amino acids, to represent different codons. This is \ - done automatically but you can turn it off....\n\n\ - \\boldred{PLEASE READ THIS:}\n\n\ - You can turn off escape codes by adding \\bold{PLAIN} to any command. Use this if \ - you want to peruse output using a text editor which does not grok the escape \ - codes. However some things will not make sense without color.\n\n", - ); - h.print("\\bold{3. Paging}\n\n"); - h.print("• enclone automatically pipes its output to \\bold{less -R -F -X}.\n"); - h.print( - "• The effect of this will be that you'll see only the first screen of output. \ - You can then use the spacebar to go forward, b to go backward, and q to quit. \ - The \\bold{-R} option causes escape characters to be correctly displayed, the \ - \\bold{-F} option causes an automatic exit if output fits on a single screen, and \ - the \\bold{-X} option prevents output from being sent to the \"alternate screen\" \ - under certain platform/version combinations.\n", - ); - h.print("• Type \\bold{man less} if you need more information.\n"); - h.print( - "• If for whatever reason you need to turn off output paging, add the argument \ - \\bold{NOPAGER} to the enclone command.\n\n", - ); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide glossary help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "glossary") || h.help_all { - h.begin_doc("glossary"); - h.print("\n"); - - // intro - - h.print("\\bold{glossary of terms used by enclone}\n\n"); - - // doc V..J - - h.doc( - "V..J", - "the full sequence of a V(D)J transcript, from the beginning of the V", - ); - h.doc( - "", - "segment to the end of the J segment; this sequence begins with a stop codon", - ); - h.doc("", "and ends with a partial codon (its first base)"); - - // doc CDR3 - - h.doc( - "CDR3", - "The terms CDR3 and junction are commonly mistaken and often used", - ); - h.doc( - "", - "interchangeably. In enclone's nomenclature, \"CDR3\" actually refers to the", - ); - h.doc( - "", - "junction (the CDR3 loop plus the canonical C and W/F at the N and C termini", - ); - h.doc("", "respectively)."); - - // doc clonotype - - h.ldoc( - "clonotype", - "all the cells descended from a single fully rearranged T or B cell", - ); - h.doc("", "(approximated computationally)"); - - // doc exact subclonotype - - h.docpr( - "exact subclonotype", - "all cells having identical transcripts \\boldred{○}", - ); - h.doc("", "(every clonotype is a union of exact subclonotypes)"); - - // doc clone - - h.doc( - "clone", - "a cell in a clonotype, or in an exact subclonotype", - ); - - // doc onesie etc. - - h.ldoc( - "onesie", - "a clonotype or exact subclonotype having exactly one chain", - ); - h.doc( - "twosie", - "a clonotype or exact subclonotype having exactly two chains", - ); - h.doc( - "threesie", - "a clonotype or exact subclonotype having exactly three chains;", - ); - h.doc( - "", - "these frequently represent true biological events, arising from expression", - ); - h.doc("", "of both alleles"); - h.doc( - "foursie", - "a clonotype or exact subclonotype having exactly four chains;", - ); - h.doc("", "these very rarely represent true biological events"); - h.doc("moresie", "a clonotype having more than four chains;"); - h.doc( - "", - "these sad clonotypes do not represent true biological events", - ); - - // doc donor etc. - - h.ldoc( - "donor", - "an individual from whom datasets of an origin are obtained", - ); - h.doc( - "origin", - "a tube of cells from a donor, from a particular tissue at a", - ); - h.doc( - "", - "particular point in time, and possibly enriched for particular cells", - ); - h.doc( - "cell group", - "an aliquot from an origin, presumed to be a random draw", - ); - h.doc( - "dataset", - "all sequencing data obtained from a particular library type", - ); - h.doc( - "", - "(e.g. TCR or BCR or GEX or FB), from one cell group, processed by running", - ); - h.doc("", "through the Cell Ranger pipeline"); - - // print main table - - h.print_tab2(); - h.print("\n"); - - // print footnote - - h.print( - "\\boldred{○} The exact requirements for being in the same exact subclonotype are \ - that cells:\n\ - • have the same number of productive contigs identified\n\ - • that these have identical bases within V..J\n\ - • that they are assigned the same constant region reference sequences\n\ - • and that the difference between the V stop and the C start is the same\n \ - (noting that this difference is nearly always zero).\n\ - Note that we allow mutations within the 5'-UTR and constant regions.\n\n", - ); - - // conventions - - h.print("\\bold{conventions}\n\n"); - h.print( - "• When we refer to \"V segments\", we always include the leader segment.\n\ - • Zero or one? We number exact subclonotypes as 1, 2, ... and likewise with\n\ - chains within a clonotype, however DNA and amino-acid positions are numbered starting \ - at zero.\n\n", - ); - - // done - - h.end_doc(); - } -} diff --git a/enclone_help/src/help2.rs b/enclone_help/src/help2.rs deleted file mode 100644 index e209b622d..000000000 --- a/enclone_help/src/help2.rs +++ /dev/null @@ -1,264 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Test for help request. - -use crate::help_utils::*; -use enclone_core::defs::*; -use enclone_core::testlist::*; -use string_utils::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn help2(args: &Vec<String>, _ctl: &EncloneControl, h: &mut HelpDesk) { - // Set up. - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide example1 help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "example1") || h.help_all { - h.begin_doc("example1"); - h.print("\nShown below is the output of the command:\n"); - h.print(&format!("\n\\bold{{enclone {}}}\n", EXAMPLES[0])); - if !h.plain { - h.print_plain(&format!("{}", include_str!("example1"))); - } else { - let s = include_str!("example1").as_bytes(); - let mut x = Vec::<u8>::new(); - let mut escaped = false; - for l in 0..s.len() { - if s[l] == b'' { - escaped = true; - } - if escaped { - if s[l] == b'm' { - escaped = false; - } - continue; - } - x.push(s[l]); - } - h.print_plain(&format!("{}", strme(&x))); - } - h.print( - "This shows an invocation of enclone that takes one dataset as input \ - and exhibits\nall clonotypes for which some chain has the given CDR3 sequence.\n\n\ - \ - What you see here is a compressed view of the entire information encoded in the\n\ - full length transcripts of the 13 cells comprising this clonotype: every base!\n\ - There is a lot to explain about the compression, so please read carefully.\n\n\ - \ - • Clonotypes are grouped. Here we see just one group having one clonotype in it.\n\ - • This clonotype has three exact subclonotypes in it, the first of which has 10 \ - cells.\n\ - • This clonotype has two chains. The reference segments for them are shown at \ - the top.\n\ - • The notation 181.1.1 says that this V reference sequence is an alternate allele\n \ - derived from the universal reference sequence (contig in the reference file)\n \ - numbered 181, that is from donor 1 (\"181.1\") and is alternate allele 1 for that \ - donor.\n\ - • Sometimes chains are missing from exact subclonotypes.\n\ - • Amino acids are assigned different colors depending on which codon they represent.\n\ - • Numbered columns show the state of particular amino acids, e.g. the first column \ - is for amino\n acid 20 in chain 1 (where 0 is the start codon). The numbers read \ - vertically, downward!\n\ - • Universal ref: state for the contig in the reference file.\n\ - • Donor ref: state for the inferred donor germline sequence.\n\ - • ◦s are \"holes\" in the recombined region where the reference doesn't make sense.\n\ - • The \"dot and x\" line has xs where there's a difference *within* the clonotype.\n\ - • Amino acids are shown if they differ from the universal reference or are in \ - the CDR3.\n\ - • u = median UMI count for a chain in the exact subclonotype.\n\ - • const = const region name for a chain in the exact subclonotype.\n\n", - ); - h.print( - "The view you see here is configurable: see the documentation at \ - \\bold{enclone help lvars} and \\bold{enclone help cvars}.\n\n", - ); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide example2 help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "example2") || h.help_all { - h.begin_doc("example2"); - h.print("\nShown below is the output of the command:\n"); - h.print(&format!("\n\\bold{{enclone {}}}\n", EXAMPLES[1])); - if !h.plain { - h.print_plain_unchecked(include_str!("example2")); - } else { - let s = include_str!("example2").as_bytes(); - let mut x = Vec::<u8>::new(); - let mut escaped = false; - for l in 0..s.len() { - if s[l] == b'' { - escaped = true; - } - if escaped { - if s[l] == b'm' { - escaped = false; - } - continue; - } - x.push(s[l]); - } - h.print_plain_unchecked(&format!("{}", strme(&x))); - } - h.print( - "This shows an invocation of enclone that takes VDJ, gene expression and feature \ - barcode data as input, and exhibits all clonotypes for which some chain has the \ - given CDR3 sequence. As well the command requests UMI (molecule) counts for one \ - hand-selected gene and one antibody. You can use any gene(s) you like and any \ - antibodies for which you have feature barcodes.\n\n", - ); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide input help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "input") || h.help_all { - h.begin_doc("input"); - h.print( - "\nenclone has \\boldred{two} mechanisms for specifying input datasets: either \ - directly on the command line or via a supplementary metadata file. Only one mechanism \ - may be used at a time.\n\n\ - In both cases, you will need to provide paths to directories where the outputs of \ - the Cell Ranger pipeline may be found. enclone uses only some of the pipeline \ - output files, so it is enough that those files are present in given directory, and \ - the particular files that are needed may be found by typing \ - \\bold{enclone help input_tech}.\n\n", - ); - h.print_with_box( - "If you use the argument \\bold{PRE=p} then \\bold{p/} will be prepended to all \ - pipeline paths. A comma-separated list is also allowed \\bold{PRE=p1,...,pn}, in \ - which case these directories are searched from left to right, until one works, and \ - if all fail, the path is used without prepending anything. Lastly, \ - (see \\bold{enclone help command}), you can avoid putting \ - \\bold{PRE} on the command line by setting the environment variable \ - \\bold{ENCLONE_PRE} to the desired value. The default value for \\bold{PRE} \ - is\n\\bold{~/enclone/datasets,~/enclone/datasets2}.", - true, - ); - h.print( - "Both input forms involve abbreviated names (discussed below), which should be as \ - short as possible, as longer abbreviations will increase the width of the clonotype \ - displays.\n\n", - ); - h.print_with_box( - "enclone can use gene expression and feature barcode data, as represented by a feature \ - matrix. Cell Ranger stores this matrix in an hdf5 file, which while generally very \ - efficient, is not optimized for interactive use. Therefore enclone provides an \ - alternate file structure, which speeds up enclone overall by up to \\boldred{50%}. \ - To use this, add the argument \\bold{NH5} to the enclone command line. This will \ - work so long as you have write permission on input directories. The first time you \ - run enclone (using given inputs), an alternate file feature_barcode_matrix.bin will \ - be written; then subsequent invocations will be faster. Once the file has been \ - created, it will always be used, regardless of whether \\bold{NH5} is used. \ - However, we may occasionally change the format of the alternate file. If do that, \ - then if you have previously generated the file, then it will be rewritten when \ - you invoke enclone for that dataset. \ - Like with other enclone command-line options, if you want \\bold{NH5} on all the \ - time, you can set the environment variable \\bold{ENCLONE_NH5}.", - true - ); - h.print( - "\\boldred{█ 1 █} To point directly at input files on the command line, use e.g.\n\ - \\bold{TCR=/home/jdoe/runs/dataset345}\n\ - or likewise for \\bold{BCR}. A more complicated syntax is allowed in which commas, \ - colons and semicolons act as delimiters. Commas go between datasets from the \ - same origin, colons between datasets from the same donor, and semicolons separate \ - donors. If semicolons are used, the value must be quoted.\n\n", - ); - h.print( - "enclone uses the distinction between datasets, origins and donors in the following \ - ways:\n\ - 1. If two datasets come from the same origin, then enclone can filter to remove \ - certain artifacts, unless you specify the option \\bold{NCROSS}.\n\ - See also illusory clonotype expansion page at \\green{bit.ly/enclone}.\n\ - 2. If two cells came from different donors, then enclone will not put them in the \ - same clonotype, unless you specify the option \\bold{MIX_DONORS}.\n\ - More information may be found at `enclone help special`. In addition, this is \ - enclone's way of keeping datasets organized and affects the output of fields like \ - origin, etc.\n\n", - ); - - h.print_with_box( - "\\bold{Naming.} Using this input system, each dataset is assigned an abbreviated \ - name, which is \ - everything after the final slash in the directory name (e.g. \\bold{dataset345} in the \ - above example), or the entire name if there is no slash; \ - origins and donors are assigned identifers s1,... and d1,..., respectively; \ - numbering of origins restarts with each new donor. \\bold{To specify origins}\n\ - \\bold{and donors, use the second input form, and see in particular} \ - \\green{abbr:path}\\bold{.}", - true, - ); - h.print( - "Examples:\n\ - \\bold{TCR=p1,p2} -- input data from two libraries from the same origin\n\ - \\bold{TCR=p1,p2:q} -- input data as above plus another from a different origin \ - from the same donor\n\ - \\bold{TCR=\"a;b\"} -- input one library from each of two donors.\n\n", - ); - h.print( - "Matching gene expression and/or feature barcode data may also be supplied using \ - an argument \\bold{GEX=...}, whose right side must have the exact same structure \ - as the \\bold{TCR} or \\bold{BCR} argument. Specification of both \ - \\bold{TCR} and \\bold{BCR} is not allowed.\n\n", - ); - h.print( - "In addition, barcode-level data may be specified using \\bold{BC=...}, whose right \ - side is a list of paths having the same structure as the \\bold{TCR} or \\bold{BCR} \ - argument. Each such path must be for a CSV file, which must include the field \ - \\bold{barcode}, may include special fields \\bold{origin}, \\bold{donor}, \ - \\bold{tag} and \\bold{color}, and may also include arbitrary other fields. The \ - \\bold{origin} and \\bold{donor} fields allow a particular origin and donor to be \ - associated to a given barcode. A use case for this is genetic demultiplexing. The \ - \\bold{tag} field is intended to be used with tag demultiplexing. The \\bold{color} \ - field is used by the \\bold{PLOT} option. All other fields are treated as lead \ - variables, but values are only displayed in \\bold{PER_CELL} mode, or for parseable \ - output using \\bold{PCELL}. These fields should not include existing lead variable \ - names. Use of \\bold{BC} automatically turns on the \\bold{MIX_DONORS} option.\n\n", - ); - h.print("\\boldred{█ 2 █} To specify a metadata file, use the command line argument\n"); - h.print("\\bold{META=filename}\n"); - h.print( - "This file should be a CSV (comma-separated values) file, with one line per cell \ - group. After the first line, lines starting with # are ignored. There must be a \ - field tcr or bcr, and some other fields are allowed:\n", - ); - h.doc3("\\bold{field}", "\\bold{default}", "\\bold{meaning}"); - h.ldoc3( - "tcr", - "(required!)", - "path to dataset, or abbr:path, where abbr is an abbreviated", - ); - h.doc3( - "or bcr", - "", - "name for the dataset; exactly one of tcr or bcr must be used", - ); - h.ldoc3( - "gex", - "null", - "path to GEX dataset, which may include or consist entirely", - ); - h.doc3("", "", "of FB data"); - h.ldoc3("origin", "s1", "abbreviated name of origin"); - - h.ldoc3("donor", "d1", "abbreviated name of donor"); - h.ldoc3pr( - "color", - "null", - "color to associate to this dataset (for \\bold{PLOT} option)", - ); - h.ldoc3pr("bc", "null", "name of CSV file as in the \\bold{BC} option"); - h.print_tab3(); - h.print("\n"); - h.end_doc(); - } -} diff --git a/enclone_help/src/help3.rs b/enclone_help/src/help3.rs deleted file mode 100644 index 511a3f359..000000000 --- a/enclone_help/src/help3.rs +++ /dev/null @@ -1,515 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. -// -// Test for help request, under development. - -use crate::help_utils::*; -use tables::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn help3(args: &Vec<String>, h: &mut HelpDesk) { - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide input_tech help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "input_tech") || h.help_all { - h.begin_doc("input_tech"); - h.print("\n\\bold{information about providing input to enclone (technical notes)}\n\n"); - h.print( - "enclone only uses certain files, which are all in the outs subdirectory of \ - a Cell Ranger pipeline directory:\n\n", - ); - h.docpr("\\bold{file}", "\\bold{pipeline}"); - h.ldoc("all_contig_annotations.json", "VDJ"); - h.ldoc("vdj_reference/fasta/regions.fa", "VDJ"); - h.ldoc("metrics_summary.csv", "GEX"); - h.ldoc("raw_feature_bc_matrix.h5", "GEX"); - h.ldoc("analysis/clustering/graphclust/clusters.csv", "GEX"); - h.ldoc("analysis/pca/10_components/projection.csv", "GEX"); - h.print_tab2(); - h.print( - "\nThe first file is required, and the second should be supplied if Cell Ranger \ - version 4.0 or greater was used. The others are required, in the indicated \ - structure, if GEX or META/gex arguments are provided. The exact files \ - that are used could be changed in the future.\n\n", - ); - h.print( - "Note that the VDJ outs directories must be from Cell Ranger version \ - \\boldred{≥ 3.1}. There \ - is a workaround for earlier versions (which you will be informed of if you try), but \ - it is much slower and the results may not be as good.\n\n", - ); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide parseable output help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "parseable") || h.help_all { - h.begin_doc("parseable"); - h.print("\n"); - h.print("\\bold{parseable output}\n"); - h.print( - "\nThe standard output of enclone is designed to be read by humans, but is not \ - readily parseable by computers. We supplement this with parseable output that can \ - be easily read by computers.\n\n\ - \ - The default behavior for this is to generate a CSV file having \"every possible\" \ - field (over a hundred). We also provide an option to print only selected fields, \ - and some options which enable inspection, short of generating a separate CSV file.\n\n\ - \ - Parseable output is targeted primarily at R and Python users, because of the ease of \ - wrangling CSV files with these languages.\n\n", - ); - h.print_with_box( - "Parseable output is invoked by using the argument\n\ - \\bold{POUT=filename}\n\ - specifying the name of the file that is to be written to.\n\ - \ -   The filename \"stdout\" may be used for a preview; in that case \ - parseable output is generated\n\ -   separately for each clonotype and the two output types \ - are integrated. There is also\n\ -   \"stdouth\", which is similar, but uses spaces instead \ - of commas, and lines things up in columns.\n\ - \ - By default, we show four chains for each clonotype, regardless of how many chains it\n\ - has, filling in with null entries. One may instead specify n chains using the \ - argument\n\ - \\bold{PCHAINS=n}\n\ - The parseable output fields may be specified using\n\ - \\bold{PCOLS=x1,...,xn}\n\ - where each xi is one of the field names shown below.\n\\boldred{This option reduces} \ - \\boldred{run time and memory usage, and prevents voluminous output. Please use it!}", - true, - ); - h.print( - "Over time additional fields may be added and the order of fields may \ - change.\n\n", - ); - h.print( - "There is an alternate parseable output mode in which one line is emitted for each \ - cell, rather then each exact subclonotype. This mode is enabled by adding the \ - argument \\bold{PCELL} to the command line. Each exact subclonotype then yields a \ - sequence of output lines that are identical except as noted below.\n\n", - ); - h.print( - "If you want to completely suppress the generation of visual clonotypes, add \ - \\bold{NOPRINT} to the enclone command line.\n\n", - ); - h.print_with_box( - "\\bold{FASTA output.} This is a separate feature. \ - To generate nucleotide FASTA output for each chain in each exact subclonotype, \ - use the argument \\bold{FASTA=filename}. The special case \\bold{stdout} will \ - cause the FASTA records to be shown as part of standard output. The FASTA records \ - that are generated are of the form V(D)JC, where V is the full V segment (including \ - the leader) and C is the full constant region, copied verbatim from the reference. \ - If a particular chain in a particular exact subclonotype is not assigned a constant \ - region, then we use the constant region that was assigned to the clonotype. If no \ - constant region at all was assigned, then the FASTA record is omitted. \ - Similarly, \\bold{FASTA_AA=filename} may be used to generate a matching amino acid \ - FASTA file.", - true, - ); - h.print( - "\\boldred{───────────────────────}\n\ - \\boldred{parseable output fields}\n\ - \\boldred{───────────────────────}\n\n", - ); - h.rows.clear(); - h.print("\\bold{1. per clonotype group fields}\n\n"); - h.doc("group_id", "identifier of clonotype group - 0,1, ..."); - h.ldoc("group_ncells", "total number of cells in the group"); - h.print_tab2(); - h.print("\n"); - - h.rows.clear(); - h.print("\\bold{2. per clonotype fields}\n\n"); - h.doc( - "clonotype_id", - "identifier of clonotype within the clonotype group = 0, 1, ...", - ); - h.ldoc("clonotype_ncells", "total number of cells in the clonotype"); - h.ldoc("nchains", "total number of chains in the clonotype"); - h.print_tab2(); - h.print("\n"); - - h.rows.clear(); - h.print( - "\\bold{3. per chain fields, where <i> is 1,2,... (see above)\n\ - each of these has the same value for each exact clonotype}\n\n", - ); - h.doc("v_name<i>", "name of V segment"); - h.doc("d_name<i>", "name of D segment (or null)"); - h.doc("j_name<i>", "name of J segment"); - h.ldoc("v_id<i>", "id of V segment"); - h.doc("d_id<i>", "id of D segment (or null)"); - h.doc("j_id<i>", "id of J segment"); - h.ldoc( - "var_indices_dna<i>", - "DNA positions in chain that vary across the clonotype", - ); - h.doc( - "var_indices_aa<i>", - "amino acid positions in chain that vary across the clonotype", - ); - h.doc( - "share_indices_dna<i>", - "DNA positions in chain that are constant across the \ - clonotype,", - ); - h.doc("", "but differ from the donor ref"); - h.doc( - "share_indices_aa<i>", - "amino acid positions in chain that are constant across the \ - clonotype,", - ); - h.doc("", "all of these are comma-separated lists"); - h.doc("", "but differ from the donor ref"); - h.print_tab2(); - h.print("\n"); - - h.rows.clear(); - h.print("\\bold{4. per exact subclonotype fields}\n\n"); - h.doc( - "exact_subclonotype_id", - "identifer of exact subclonotype = 1, 2, ...", - ); - h.ldoc( - "barcodes", - "comma-separated list of barcodes for the exact subclonotype", - ); - h.doc( - "<dataset>_barcodes", - "like \"barcodes\", but restricted to the dataset with the given name", - ); - h.doc("barcode", "if PCELL is specified, barcode for one cell"); - h.doc( - "<dataset>_barcode", - "if PCELL is specified, barcode for one cell, or null, if the barcode is", - ); - h.doc("", "not from the given dataset"); - h.ldoc( - "In addition, every lead variable may be specified as a field. \ - See \"enclone help lvars\".", - "\\ext", - ); - h.print_tab2(); - h.print("\n"); - - h.rows.clear(); - h.print( - "\\bold{5. per chain, per exact subclonotype fields, where <i> is 1,2,... \ - (see above)}\n\n", - ); - h.print("[all apply to chain i of a particular exact clonotype]\n\n"); - h.doc("vj_seq<i>", "DNA sequence of V..J"); - h.doc("seq<i>", "full DNA sequence"); - h.doc( - "q<n>_<i>", - "special option to display a comma-separated list of the quality", - ); - h.doc( - "", - "scores for chain i, at zero-based position n, numbered starting at the", - ); - h.doc( - "", - "beginning of the V segment, for each cell in the exact subclonotype", - ); - h.ldoc("v_start<i>", "start of V segment on full DNA sequence"); - h.ldoc( - "const_id<i>", - "numerical identifier of constant region (or null, if not known)", - ); - h.ldoc( - "utr_id<i>", - "numerical identifier of 5'-UTR region (or null, if not known)", - ); - h.doc( - "utr_name<i>", - "name of 5'-UTR region (or null, if not known)", - ); - h.ldoc( - "cdr3_start<i>", - "base position start of CDR3 sequence on full contig", - ); - h.doc("cdr3_aa<i>", "amino acid sequence of CDR3"); - h.ldoc( - "var_aa<i>", - "amino acids that vary across the clonotype (synonymous changes included)", - ); - h.ldoc( - "In addition, every chain variable, after suffixing by <i>, may be used as a field.", - "\\ext", - ); - h.doc("See \"enclone help cvars\".", "\\ext"); - h.print_tab2(); - h.print("\n"); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide filter help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "filter") || h.help_all { - h.begin_doc("filter"); - - // intro - - h.print("\n\\bold{clonotype filtering options}\n\n"); - h.print( - "enclone provides filtering by cell, by exact subclonotype, and by clonotype. This \ - page describes filtering by clonotype. \ - These options cause only certain clonotypes to be printed. See also \ - \"enclone help special\", which describes other filtering options. This page \ - also described scanning for feature enrichment.\n\n", - ); - - // doc *CELLS - - h.doc( - "MIN_CELLS=n", - "only show clonotypes having at least n cells", - ); - h.doc("MAX_CELLS=n", "only show clonotypes having at most n cells"); - h.doc("CELLS=n", "only show clonotypes having exactly n cells"); - - // doc MIN_UMIS - - h.ldoc( - "MIN_UMIS=n", - "only show clonotypes having ≳ n UMIs on some chain on some cell", - ); - - // doc *CHAINS - - h.ldoc( - "MIN_CHAINS=n", - "only show clonotypes having at least n chains", - ); - h.doc( - "MAX_CHAINS=n", - "only show clonotypes having at most n chains", - ); - h.doc("CHAINS=n", "only show clonotypes having exactly n chains"); - - // doc CDR3 - - h.ldoc( - "CDR3=<pattern>", - "only show clonotypes having a CDR3 amino acid seq that matches", - ); - h.doc( - "", - "the given pattern (regular expression)*, from beginning to end", - ); - - // doc SEG and SEGN - - h.ldoc( - "SEG=\"s_1|...|s_n\"", - "only show clonotypes using one of the given reference segment names", - ); - h.doc( - "SEGN=\"s_1|...|s_n\"", - "only show clonotypes using one of the given reference segment numbers", - ); - h.doc( - "", - "both: looks for V, D, J and C segments; double quote only", - ); - h.doc("", "needed if n > 1"); - h.doc( - "", - "For both SEG and SEGN, multiple instances are allowed, and their", - ); - h.doc("", "effects are cumulative."); - - // doc MIN_EXACTS - - h.ldoc( - "MIN_EXACTS=n", - "only show clonotypes having at least n exact subclonotypes", - ); - - // doc VJ - - h.ldoc( - "VJ=seq", - "only show clonotypes using exactly the given V..J sequence", - ); - h.doc("", "(string in alphabet ACGT)"); - - // doc MIN_DATASETS and MAX_DATASETS and MIN_DATASET_RATIO - - h.ldoc( - "MIN_DATASETS=n", - "only show clonotypes containing cells from at least n datasets", - ); - h.doc( - "MAX_DATASETS=n", - "only show clonotypes containing cells from at most n datasets", - ); - h.doc( - "MIN_DATASET_RATIO=n", - "only show clonotypes having at least n cells and for which the ratio", - ); - h.doc( - "", - "of the number of cells in the must abundant dataset to the next most", - ); - h.doc("", "abundant one is at least n"); - - // doc CDIFF - - h.ldoc( - "CDIFF", - "only show clonotypes having a difference in constant region with the", - ); - h.doc("", "universal reference"); - - // doc DEL - - h.ldoc("DEL", "only show clonotypes exhibiting a deletion"); - - // doc BARCODE - - h.ldoc( - "BARCODE=bc1,...,bcn", - "only show clonotypes that use one of the given barcodes", - ); - - // doc INKT and MAIT - - h.ldoc( - "INKT", - "only show clonotypes for which some exact subclonotype is annotated as", - ); - h.docpr( - "", - "having some iNKT evidence, see \\green{bit.ly/enclone} for details", - ); - h.ldoc( - "MAIT", - "only show clonotypes for which some exact subclonotype is annotated as", - ); - h.docpr( - "", - "having some MAIT evidence, see \\green{bit.ly/enclone} for details", - ); - - // print main table - - h.print_tab2(); - h.print("\n"); - - // footnote for CDR3 - - h.print("* Examples of how to specify CDR3:\n\n"); - let mut rows = Vec::<Vec<String>>::new(); - rows.push(vec![ - "CDR3=CARPKSDYIIDAFDIW".to_string(), - "have exactly this sequence as a CDR3".to_string(), - ]); - rows.push(vec![ - "CDR3=\"CARPKSDYIIDAFDIW|CQVWDSSSDHPYVF\"".to_string(), - "have at least one of these sequences as a CDR3".to_string(), - ]); - rows.push(vec![ - "CDR3=\".*DYIID.*\"".to_string(), - "have a CDR3 that contains DYIID inside it".to_string(), - ]); - let mut log = String::new(); - print_tabular_vbox(&mut log, &rows, 2, &b"l|l".to_vec(), false, false); - h.print(&format!("{}\n", log)); - h.print( - "Note that double quotes should be used if the pattern \ - contains characters other than letters.\n\n", - ); - h.print( - "A gentle introduction to regular expressions may be found at\n\ - https://en.wikipedia.org/wiki/Regular_expression#Basic_concepts, and a precise\n\ - specification for the regular expression version used by enclone may be found at\n\ - https://docs.rs/regex.\n\n", - ); - - // linear conditions - - h.print( - "\\bold{linear conditions}\n\n\ - enclone understands linear conditions of the form\n\ - \\bold{c1*v1 ± ... ± cn*vn > d}\n\ - where each ci is a constant, \"ci*\" may be omitted, each vi is a variable, \ - and d is a constant. Blank spaces are ignored. The > sign may be replaced by \ - >= or ≥ or < or <= or ≤. \ - Each vi is a lead variable (see \"\\bold{enclone help lvars}\") that \ - represents a \ - origin/donor/tag count or gene/feature barcode UMI count. In evaluating the \ - condition, each vi is \ - replaced by the \\bold{mean} of its values across all cells in the clonotype. \ - Because the minus sign - doubles as a hyphen and is used in some feature names, we \ - allow parentheses around variable names to prevent erroneous parsing, like this \ - \\bold{(IGHV3-7_g) >= 1}.\n\n", - ); - - // bounds - - h.print( - "\\bold{filtering by linear conditions}\n\n\ - enclone has the capability to filter by bounding certain lead variables, using \ - the command-line argument:\n\ - \\bold{F=\"L\"}\n\ - where L is a linear condition (as defined above). Currently this is limited to \ - the case where the lead variables have been selected using \\bold{LVARS} or \ - \\bold{LVARSP}! Multiple bounds may be imposed by using\n\ - multiple instances of \\bold{F=...} .\n\n", - ); - - // feature scanning - - h.print( - "\\bold{feature scanning}\n\n\ - If gene expression and/or feature barcode data have been generated, \ - enclone can scan all features to find those that are enriched \ - in certain clonotypes relative to certain other clonotypes. This feature is turned \ - on using the command line argument\n\ - \\bold{SCAN=\"test,control,threshold\"}\n\ - where each of \\bold{test}, \\bold{control} and \\bold{threshold} are linear \ - conditions as defined above. Blank spaces are ignored. The \\bold{test} condition \ - defines the \"test clonotypes\" and the \\bold{control} condition defines the \ - \"control clonotypes\". Currently, the lead variables in \\bold{test} and \ - \\bold{control} must be specified by\n\ - \\bold{LVARS} or \\bold{LVARSP}! \ - The \\bold{threshold} condition is special: it may use \ - only the variables \"t\" and \"c\" that represent the raw UMI count for \ - a particular gene or feature, for the test (t) or control (c) clonotypes. \ - To get a meaningful result, you should specify \\bold{MIN_CELLS} appropriately \ - and manually examine the test and control clonotypes to make sure that they make \ - sense.\n\n\ - \ - \\bold{an example}\n\nSuppose that your data are comprised of two origins with datasets - named pre and post, representing time points relative to some event. Then\n\ - \\bold{SCAN=\"n_post - 10*n_pre >= 0, n_pre - 0.5*n_post >= 0, t - 2*c >= 0.1\"}\n\ - would define the test clonotypes to be those satisfying \ - n_post >= 10*n_pre (so having far more post cells then pre cells), \ - the control clonotypes to be those satisfying n_pre >= 0.5*n_post (so having lots of \ - pre cells), and thresholding on t >= 2*c * 0.1, so that the feature must \ - have a bit more than twice as many UMIs in the test than the control. The 0.1 \ - is there to exclude noise from features having very low UMI counts.\n\n\ - \ - Feature scanning is not a proper statistical test. It is a tool for generating a list \ - of feature candidates that may then be examined in more detail by rerunning \ - enclone using some of the detected features as lead variables (appropriately \ - suffixed). Ultimately the power of the scan is determined by having \"enough\" \ - cells in both the test and control sets, and in having those sets cleanly defined.\n\n\ - Currently feature scanning requires that each dataset have identical features.\n\n", - ); - - // done - - h.end_doc(); - } -} diff --git a/enclone_help/src/help4.rs b/enclone_help/src/help4.rs deleted file mode 100644 index 4ab6529e7..000000000 --- a/enclone_help/src/help4.rs +++ /dev/null @@ -1,987 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. -// -// Test for help request, under development. - -use crate::help_utils::*; -use tables::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn help4(args: &Vec<String>, mut h: &mut HelpDesk) { - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide special filtering help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "special") || h.help_all { - h.begin_doc("special"); - h.print("\n\\bold{special filtering options}\n\n"); - h.print( - "This page documents some options, most of which allow noise \ - filters to be turned off, and which normally should not be invoked. The last \ - two options can be used to simplify the view of a clonotype.\n\n", - ); - h.doc( - "NALL", - "Turn off all the noise filters shown below. This may yield quite a mess.", - ); - h.ldoc( - "NCELL", - "Use contigs found by Cell Ranger even if they were not in a called cell, ", - ); - h.doc("", "or not called high confidence."); - h.doc( - "NALL_CELL", - "turn off all the noise filters except for the cell filter", - ); - h.ldoc( - "NGEX", - "If gene expression and/or feature barcode data are provided, if a barcode", - ); - h.doc( - "", - "is called a cell by the VDJ part of the Cell Ranger pipeline, but not", - ); - h.doc( - "", - "called a cell by the gene expression and/or feature barcode part, then the", - ); - h.doc( - "", - "default behavior of enclone is to remove such cells from clonotypes. This", - ); - h.doc("", "option disables that behavior."); - h.ldoc( - "NCROSS", - "If you specify that two or more libraries arose from the same origin (i.e.", - ); - h.doc( - "", - "cells from the same tube or tissue), then by default enclone will", - ); - h.doc( - "", - "\"cross filter\" so as to remove expanded exact subclonotypes that are", - ); - h.doc( - "", - "present in one library but not another, in a fashion that would be highly", - ); - h.doc( - "", - "improbable, assuming random draws of cells from the tube. These are", - ); - h.doc( - "", - "believed to arise when a plasma or plasmablast cell breaks up during during", - ); - h.doc( - "", - "or after pipetting from the tube, and the resulting fragments seed GEMs,", - ); - h.doc( - "", - "yielding expanded 'fake' clonotypes that are residues of real single plasma", - ); - h.doc( - "", - "cells. The NCROSS options turns off this filter, which could be useful so", - ); - h.doc( - "", - "long as you interpret the restored clonotypes as representing what are", - ); - h.doc( - "", - "probably single cells. There may also be other situations where the filter", - ); - h.doc( - "", - "should be turned off, and in particular the filter can do weird things if", - ); - h.doc( - "", - "inputs are somehow mis-specified to enclone. Note that for purposes of", - ); - h.doc("", "this option, enclone defines an origin by the pair"); - h.doc("", "(origin name, donor name)."); - h.ldoc( - "NUMI", - "Filter out B cells based on low BCR UMI counts. The heuristics for this", - ); - h.docpr( - "", - "are described on the enclone site at \\green{bit.ly/enclone}.", - ); - h.doc( - "NUMI_RATIO", - "Filter out B cells based on low BCR UMI counts relative to another cell", - ); - h.doc("", "in a given clonotype. The heuristics for this"); - h.docpr( - "", - "are described on the enclone site at \\green{bit.ly/enclone}.", - ); - h.ldoc( - "NGRAPH_FILTER", - "By default, enclone filters to remove exact subclonotypes that by virtue of", - ); - h.doc( - "", - "their relationship to other exact subclonotypes, appear to arise from", - ); - h.doc( - "", - "background mRNA or a phenotypically similar phenomenon. The", - ); - h.doc("", "NGRAPH_FILTER option turns off this filtering."); - h.ldoc( - "NQUAL", - "By default, enclone filters out exact subclonotypes having a base in V..J", - ); - h.doc( - "", - "that looks like it might be wrong. More specifically, enclone finds bases", - ); - h.doc( - "", - "which are not Q60 for a barcode, not Q40 for two barcodes, are not", - ); - h.doc( - "", - "supported by other exact subclonotypes, are variant within the clonotype,", - ); - h.doc( - "", - "and which disagree with the donor reference. NQUAL turns this off.", - ); - h.ldoc( - "NWEAK_CHAINS", - "By default, enclone filters chains from clonotypes that are", - ); - h.doc( - "", - "weak and appear to be artifacts, perhaps arising from a stray mRNA molecule", - ); - h.doc( - "", - "that floated into a GEM. The NWEAK_CHAINS option turns off this filter.", - ); - - h.ldoc( - "NWEAK_ONESIES", - "By default, enclone filters out onesie clonotypes having a single exact", - ); - h.doc( - "", - "subclonotype, and that are light chain or TRA, and whose number of cells is", - ); - h.doc( - "", - "greater than one but less than 0.1% of the total number of cells.", - ); - h.doc( - "", - "This filter reduces the likelihood of creating clonotypes containing cells", - ); - h.doc("", "that arose from different recombination events."); - h.doc("", "NWEAK_ONESIES turns this filter off."); - - h.ldoc( - "NFOURSIE_KILL", - "By default, if enclone finds a foursie exact subclonotype that", - ); - h.doc( - "", - "contains a twosie exact subclonotype having at least ten cells, it kills", - ); - h.doc( - "", - "the foursie exact subclonotype, no matter how many cells it has. The", - ); - h.doc( - "", - "foursies that are killed are believed to be rare oddball artifacts arising", - ); - h.doc( - "", - "from repeated cell doublets or GEMs that contain two cells and multiple gel", - ); - h.doc( - "", - "beads. The argument NFOURSIE_KILL turns off this filtering.", - ); - h.ldoc( - "NWHITEF", - "By default, enclone filters out rare artifacts arising from \ - contamination", - ); - h.doc( - "", - "of oligos on gel beads. The NWHITEF option turns off this filter.", - ); - h.ldoc( - "NBC_DUP", - "By default, enclone filters out duplicated barcodes within an exact", - ); - h.doc( - "", - "subclonotype. The NBC_DUP option turns off this filter.", - ); - h.ldoc( - "MIX_DONORS", - "By default, enclone will prevent cells from different donors from being", - ); - h.doc( - "", - "placed in the same clonotype. The MIX_DONORS option turns off this", - ); - h.doc( - "", - "behavior, thus allowing cells from different donors to be placed in the", - ); - h.doc( - "", - "same clonotype. The main use of this option is for specificity testing, in", - ); - - h.doc( - "", - "which data from different donors are deliberately combined in an attempt", - ); - h.doc( - "", - "to find errors. Use of the bc field for META input specification", - ); - h.doc("", "automatically turns on this option."); - h.ldoc( - "KEEP_IMPROPER", - "An exact subclonotype is improper if it does not have one chain", - ); - h.doc( - "", - "of each type. This option causes all improper exact subclonotypes to be", - ); - h.doc( - "", - "retained, although they may be removed by other filters.", - ); - h.ldoc( - "MIN_CHAINS_EXACT=n", - "Delete any exact subclonotype having less than n chains. You can use this", - ); - h.doc( - "", - "to \"purify\" a clonotype so as to display only exact subclonotypes having", - ); - h.doc("", "all their chains."); - h.doc( - "CHAINS_EXACT=n", - "Delete any exact subclonotype not having exactly n chains.", - ); - h.doc( - "MIN_CELLS_EXACT=n", - "Delete any exact subclonotype having less than n cells. You might want", - ); - h.doc( - "", - "to use this if you have a very large and complex expanded clonotype,", - ); - h.doc( - "COMPLETE", - "delete any exact subclonotype that has less chains than the clonotype", - ); - h.doc("", "for which you would like to see a simplified view."); - h.ldoc( - "FCELL=var=value", - "Supposing that var has been specified as a field using the BC option", - ); - h.doc( - "", - "(or equivalently, using bc, via META), see \"enclone help input\", this", - ); - h.doc( - "", - "option filters out all barcodes that do not satisfy the given constraint.", - ); - h.doc( - "", - "Note that for purposes of testing the constraint, if the value for a", - ); - h.doc( - "", - "particular barcode has not been specified via BC or bc, then its value is", - ); - h.doc( - "", - "taken to be null. Also multiple instances of FCELL may be used to impose", - ); - h.doc("", "multiple filters."); - h.print_tab2(); - h.print("\n"); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Function that provides an explanation used for both enclone help lvars and - // enclone help cvars. - - fn explain_alt_versions(h: &mut HelpDesk) { - h.print(&format!( - "{}", - gray_left_bar(&print_to( - "\\red{●} These variables have some alternate versions, \ - as shown in the table below:\n\n" - )) - )); - let mut rows = Vec::<Vec<String>>::new(); - let row = vec![ - "variable".to_string(), - "semantics".to_string(), - "visual".to_string(), - "visual".to_string(), - "parseable".to_string(), - "parseable".to_string(), - ]; - rows.push(row); - let row = vec![ - "".to_string(), - "".to_string(), - "".to_string(), - "(one cell)".to_string(), - "".to_string(), - "(one cell)".to_string(), - ]; - rows.push(row); - let row = vec!["\\hline".to_string(); 6]; - rows.push(row); - let row = vec![ - "x".to_string(), - "median over cells".to_string(), - "yes".to_string(), - "this cell".to_string(), - "yes".to_string(), - "yes".to_string(), - ]; - rows.push(row); - let row = vec![ - "x_mean".to_string(), - "mean over cells".to_string(), - "yes".to_string(), - "null".to_string(), - "yes".to_string(), - "yes".to_string(), - ]; - rows.push(row); - let row = vec![ - "x_μ".to_string(), - "(same as above)".to_string(), - "yes".to_string(), - "null".to_string(), - "yes".to_string(), - "yes".to_string(), - ]; - rows.push(row); - let row = vec![ - "x_sum".to_string(), - "sum over cells".to_string(), - "yes".to_string(), - "null".to_string(), - "yes".to_string(), - "yes".to_string(), - ]; - rows.push(row); - let row = vec![ - "x_Σ".to_string(), - "(same as above)".to_string(), - "yes".to_string(), - "null".to_string(), - "yes".to_string(), - "yes".to_string(), - ]; - rows.push(row); - let row = vec![ - "x_min".to_string(), - "min over cells".to_string(), - "yes".to_string(), - "null".to_string(), - "yes".to_string(), - "yes".to_string(), - ]; - rows.push(row); - let row = vec![ - "x_max".to_string(), - "max over cells".to_string(), - "yes".to_string(), - "null".to_string(), - "yes".to_string(), - "yes".to_string(), - ]; - rows.push(row); - let row = vec![ - "x_%".to_string(), - "% of total GEX (genes only)".to_string(), - "yes".to_string(), - "this cell".to_string(), - "yes".to_string(), - "yes".to_string(), - ]; - rows.push(row); - let row = vec![ - "x_cell".to_string(), - "this cell".to_string(), - "no".to_string(), - "no".to_string(), - "no".to_string(), - "this cell".to_string(), - ]; - rows.push(row); - let mut log = String::new(); - print_tabular_vbox(&mut log, &rows, 2, &b"l|l|l|l|l|l".to_vec(), false, false); - h.print_plain(&format!("{}", gray_left_bar(&log))); - h.print_plain(&format!( - "{}", - gray_left_bar(&print_to( - "Some explanation is required. If you use enclone without certain options, you \ - get the \"visual\" column.\n\ - • Add the option \\bold{PER_CELL} \ - (see \"enclone help display\") and then you get visual output with extra lines for \ - each cell within an exact subclonotype, and each of those extra lines is described by \ - the \"visual (one cell)\" column.\n\ - • If you generate parseable output (see \"enclone help parseable\"), then you get \ - the \"parseable\" column for that output, unless you specify \\bold{PCELL}, \ - and then you get the last column.\n\ - • For the forms with μ and Σ, the Greek letters are only used in column headings for \ - visual output (to save space), and optionally, in names of fields on the command \ - line.\n\ - \\green{▶} If you try out these features, you'll see exactly what happens! \ - \\green{◀}\n" - )) - )); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide lvars help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "lvars") || h.help_all { - h.begin_doc("lvars"); - h.print("\n\\bold{lead column options}\n\n"); - h.print( - "These options define lead variables, which correspond to columns that \ - appear once in each clonotype, on the left side, and have one entry for each \ - exact subclonotype row.\n\n", - ); - h.print( - "Lead variables are specified using \\bold{LVARS=x1,...,xn} \ - where each xi is one of:\n\n", - ); - h.doc("datasets", "dataset identifiers"); - h.doc("origin", "origin identifiers"); - h.doc("donors", "donor identifiers"); - h.ldoc("n", "number of cells"); - h.doc( - "n_<name>", - "number of cells associated to the given name, which can be a dataset", - ); - h.doc( - "", - "or origin or donor or tag short name; may name only one such category", - ); - h.ldoc( - "nd<k>", - "For k a positive integer, this creates k+1 fields, that are specific to each", - ); - h.doc( - "", - "clonotype. The first field is n_<d1>, where d1 is the name of the dataset", - ); - h.doc( - "", - "having the most cells in the clonotype. If k ≥ 2, then you'll get a", - ); - h.doc( - "", - "\"runner-up\" field n_<d2>, etc. Finally you get a field n_other, however", - ); - h.doc("", "fields will be elided if they represent no cells."); - h.ldoc( - "near", - "Hamming distance of V..J DNA sequence to nearest neighbor", - ); - h.doc( - "far", - "Hamming distance of V..J DNA sequence to farthest neighbor", - ); - h.doc( - "", - "both compare to cells having chains in the same columns of the clonotype,", - ); - h.doc( - "", - "with - shown if there is no other exact subclonotype to compare to", - ); - h.doc( - "dref", - "Hamming distance of V..J DNA sequence to donor reference, excluding", - ); - h.doc("", "region of recombination"); - h.ldoc( - "inkt", - "A string showing the extent to which the T cells in an exact subclonotype", - ); - h.doc( - "", - "have evidence for being an iNKT cell. The most evidence is denoted 𝝰gj𝝱gj,", - ); - h.doc( - "", - "representing both gene name and junction sequence (CDR3) requirements for", - ); - h.docpr( - "", - "both chains. See \\green{bit.ly/enclone} for details on the requirements.", - ); - h.doc("mait", "Same as with inkt but for MAIT cells instead."); - h.ldoc( - "g<d>", - "Here d is a nonnegative integer. Then all the exact subclonotypes are", - ); - h.doc( - "", - "grouped according to the Hamming distance of their V..J sequences. Those", - ); - h.doc( - "", - "within distance d are defined to be in the same group, and this is", - ); - h.doc( - "", - "extended transitively. The group identifier 1, 2, ... is shown. The", - ); - h.doc( - "", - "ordering of these identifiers is arbitrary. This option is best applied", - ); - h.doc( - "", - "to cases where all exact subclonotypes have a complete set of chains.", - ); - h.ldocpr("gex", "\\red{●} median gene expression UMI count"); - h.docpr("n_gex", "\\blue{●} number of cells reported by GEX"); - // nonpublic for now as we don't know if this is useful - /* - h.doc( - "entropy", - "Shannon entropy of GEX UMI counts (median across cells)" - ); - */ - h.ldocpr( - "<gene>_g", - "\\red{●} all five feature types: look for a declared feature of the \ - given type", - ); - h.doc( - "<antibody>_ab", - "with the given id or name; report the median UMI count for it; we allow", - ); - h.doc( - "<antigen>_ag", - "the form e.g. <abbr>:<gene>_g where abbr is an abbreviation to be shown;", - ); - h.doc( - "<crispr>_cr", - "we also allow <regular expression>_g where g can be replaced by ab, ag, cr", - ); - h.doc( - "<custom>_cu", - "or cu; this represents a sum of UMI counts across the matching features. ●", - ); - h.ldoc( - "sec", - "for human or mouse BCR, number of GEX UMIs that are characterized as secreted", - ); - h.doc( - "mem", - "for human or mouse BCR, number of GEX UMIs that are characterized as membrane", - ); - h.doc( - "", - "For both of these, the algorithm looks for reads that are aligned through the", - ); - h.doc( - "", - "right end of a constant region CH3 exon, and then read into a CH3-CHS or", - ); - h.doc( - "", - "CH4-CHS exon, in the secreted case, or a M, M1 or M2 exon, in the membrane case.", - ); - h.doc( - "", - "This choice is determined by sequence tables in the code, and we cannot be", - ); - h.doc("", "absolutely certain that these tables are complete."); - h.docpr( - "", - "\\bold{These fields require the presence of the files possorted_genome_bam.bam}", - ); - h.docpr("", "\\bold{and possorted_genome_bam.bam.bai.}"); - h.docpr( - "", - "\\bold{These fields also require that you have samtools in your path.}", - ); - h.doc("", "Note that these counts tend to be low."); - h.ldoc( - "cred", - "Short for credibility. It is a measure of the extent to which cells", - ); - h.doc( - "", - "having gene expression similar to a given putative B cell are themselves", - ); - h.doc( - "", - "B cells. (Or similarly for T cells.) For the actual definition, let n", - ); - h.doc( - "", - "be the number of VDJ cells that are also GEX cells. For a given cell,", - ); - h.doc( - "", - "find the n GEX cells that are closest to it in PCA space, and report the", - ); - h.doc( - "", - "percent of those that are also VDJ cells. For multiple datasets, it would", - ); - h.doc( - "", - "be better to \"aggr\" the data, however that is not currently supported", - ); - h.doc( - "", - "The computation is also inefficient, so let us know if it's causing", - ); - h.doc( - "", - "problems for you. And cred makes much better sense for datasets that", - ); - h.doc( - "", - "consist of mixed cell types, rather than consisting of pure B or T cells.", - ); - h.print_tab2(); - h.print( - "For gene expression and feature barcode stats, such data must be provided \ - as input to enclone.\n\n", - ); - h.print( - "● Example: IG.*_g matches all genes that begin with IG, and TR(A|B).*_g matches \ - all genes that begin with TRA or TRB. Double quotes as in \\bold{LVARS=\"...\"} \ - may be needed. The regular expression must \ - be in the alphabet A-Za-z0-9+_-.[]()|* and is only interpreted as a regular \ - expression if it contains a character in []()|*. \ - See \"enclone help filter\" \ - for more information about regular expressions.\n\n", - ); - explain_alt_versions(&mut h); - h.print( - "\n\\blue{●} Similar to the above but simpler: n_gex is just a count of cells, \ - visual (one cell) shows 0 or 1, n_gex_cell is defined for parseable (one cell), \ - and the x_mean etc. forms do not apply.\n\n", - ); - h.print( - "The default is \\bold{datasets,n}, except that datasets is suppressed if \ - there is only one dataset.\n\n", - ); - h.print("\\bold{LVARSP=x1,...,xn} is like \\bold{LVARS} but appends to the list.\n\n"); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide cvars help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "cvars") || h.help_all { - h.begin_doc("cvars"); - - // Header. - - h.print( - "\n\\bold{per-chain column options}: These options define per-chain variables, \ - which correspond to columns that appear once for each chain in each clonotype, and \ - have one entry for each exact subclonotype.\n\n", - ); - h.print( - "Per-column variables are specified using\n\ - \\bold{CVARS=x1,...,xn}\n\ - where each xi is one of:\n\n", - ); - - // Main table entries. - - h.doc( - "var", - "bases at positions in chain that vary across the clonotype", - ); - h.ldocpr( - "u", - "\\red{●} VDJ UMI count for each exact subclonotype, median across cells", - ); - h.docpr( - "r", - "\\red{●} VDJ read count for each exact subclonotype, median across cells", - ); - h.ldoc("const", "constant region name"); - h.ldoc( - "edit", - "a string that defines the edit of the reference V(D)J concatenation versus", - ); - h.doc( - "", - "the contig, from the beginning of the CDR3 to the end of the J segment;", - ); - h.doc( - "", - "this uses a coordinate system in which 0 is the first base of the J ref", - ); - h.doc( - "", - "segment (or the first base of the D ref segment for IGH and TRB); for", - ); - h.doc( - "", - "example D-4:4 denotes the deletion of the last 4 bases of the V segment, ", - ); - h.doc("", "I0:2 denotes an insertion of 2 bases after the V"); - h.doc( - "", - "and I0:2•S5 denotes that plus a substitution at position 5; in computing", - ); - h.doc( - "", - "\"edit\", for IGH and TRB, we always test every possible D segment,", - ); - h.doc( - "", - "regardless of whether one is annotated, and pick the best one; for this", - ); - h.doc("", "reason, \"edit\" may be slow"); - h.doc( - "comp", - "a measure of CDR3 complexity, which is the total number of S, D and I", - ); - h.doc("", "symbols in \"edit\" as defined above"); - h.ldoc("cdr3_dna", "the CDR3_DNA sequence"); - h.doc("cdr3_len", "number of amino acids in the CDR3 sequence"); - h.ldoc( - "vjlen", - "number of bases from the start of the V region to the end of the J region", - ); - h.doc( - "clen", - "length of observed constant region (usually truncated at primer start)", - ); - h.doc("ulen", "length of observed 5'-UTR sequence;"); - h.doc( - "", - "note however that what report is just the start of the V segment", - ); - h.doc( - "", - "on the contig, and thus the length may include junk before the UTR", - ); - h.doc( - "cdiff", - "differences with universal reference constant region, shown in the", - ); - h.doc( - "", - "abbreviated form e.g. 22T (ref changed to T at base 22) or 22T+10", - ); - h.doc( - "", - "(same but contig has 10 additional bases beyond end of ref C region", - ); - h.doc( - "", - "At most five differences are shown, and if there are more, ... is appended.", - ); - h.doc("udiff", "like cdiff, but for the 5'-UTR"); - h.ldoc( - "notes", - "optional note if there is an insertion or the end of J does not exactly abut", - ); - h.doc( - "", - "the beginning of C; elided if empty; also single base overlaps between", - ); - h.docpr( - "", - "J and C are not shown unless you use the special option \\bold{JC1}; we do this", - ); - h.doc( - "", - "because with some VDJ references, one nearly always has such an overlap", - ); - h.ldoc( - "ndiff<n>vj", - "number of base differences within V..J between this exact subclonotype and", - ); - h.doc("", "exact subclonotype n"); - h.doc( - "d_univ", - "distance from universal reference, more specifically,", - ); - h.doc( - "", - "number of base differences within V..J between this exact", - ); - h.doc( - "", - "clonotype and universal reference, exclusive of indels, the last 15", - ); - h.doc("", "bases of the V and the first 15 bases of the J"); - h.doc("d_donor", "distance from donor reference,"); - h.doc("", "as above but computed using donor reference"); - - // The rest. - - h.print_tab2(); - h.print("\n"); - explain_alt_versions(&mut h); - h.print( - "\nAt least one variable must be listed. The default is \\bold{u,const,notes}. \ - \\bold{CVARSP}: same as \\bold{CVARS} but appends.\n\n", - ); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide amino help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "amino") || h.help_all { - h.begin_doc("amino"); - h.print( - "\nThere is a complex per-chain column to the left of other \ - per-chain columns, defined by\n\ - \\bold{AMINO=x1,...,xn}: display amino acid columns for the given categories, \ - in one combined ordered group, where each xi is one of:\n\n", - ); - h.doc("cdr3", "CDR3 sequence"); - h.ldoc("var", "positions in chain that vary across the clonotype"); - h.doc( - "share", - "positions in chain that differ consistently from the donor reference", - ); - h.ldoc( - "donor", - "positions in chain where the donor reference differs from the universal \ - reference", - ); - h.ldoc( - "donorn", - "positions in chain where the donor reference differs nonsynonymously", - ); - h.doc("", "from the universal reference"); - h.ldoc( - "a-b", - "amino acids numbered a through b (zero-based, inclusive)", - ); - h.print_tab2(); - h.print("\n"); - h.print( - "Note that we compute positions in base space, and then divide by three to get \ - positions in amino acid space. Thus it can happen that a position in amino acid \ - space is shown for both \\bold{var} and \\bold{share}.\n\n", - ); - h.print( - "The default value for \\bold{AMINO} is \\bold{cdr3,var,share,donor}. \ - Note that we only report amino acids that are strictly within V..J, \ - thus specifically excluding the codon bridging J and C.\n\n", - ); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide display help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "display") || h.help_all { - h.begin_doc("display"); - h.print("\n\\bold{other options that control clonotype display}\n\n"); - h.doc( - "PER_CELL", - "expand out each exact clonotype line, showing one line per cell,", - ); - h.doc( - "", - "for each such line, displaying the barcode name, the number of UMIs assigned,", - ); - h.doc( - "", - "and the gene expression UMI count, if applicable, under gex_med", - ); - h.ldoc( - "BARCODES", - "print list of all barcodes of the cells in each clonotype, in a", - ); - h.doc( - "", - "single line near the top of the printout for a given clonotype", - ); - h.ldoc( - "SEQC", - "print V..J sequence for each chain in the first exact subclonotype, near", - ); - h.doc("", "the top of the printout for a given clonotype"); - h.ldoc( - "FULL_SEQC", - "print full sequence for each chain in the first exact subclonotype,", - ); - h.doc("", "near the top of the printout for a given clonotype"); - h.ldoc("SUM", "print sum row for each clonotype"); - h.doc("MEAN", "print mean row for each clonotype"); - h.print_tab2(); - h.print("\n"); - h.print( - "\\bold{options that control clonotype grouping}\n\n\ - By default, enclone organizes clonotypes into groups, and each group contains \ - just one clonotype! If you prefer not to see the grouping messages, you can \ - turn them off by adding the option \\bold{NGROUP} to the enclone command line. \ - We intend to add useful versions of grouping to a future version of enclone, that \ - are reflective of functional (antigen-binding) differences. For now there are the \ - following \"toy\" options:\n\n", - ); - h.rows.clear(); - - h.doc( - "GROUP_HEAVY_CDR3", - "group by perfect identity of CDR3 amino acid sequence \ - of IGH or TRB", - ); - h.doc( - "GROUP_VJ_REFNAME", - "group by sharing identical V and J reference gene names,", - ); - h.doc( - "GROUP_VJ_REFNAME_STRONG", - "same but also require identical length V..J sequences", - ); - h.doc( - "", - "(after correction for indels) and identical length CDR3 sequences,", - ); - h.doc("", "but ignores foursies and moresies"); - h.ldoc( - "MIN_GROUP", - "minimum number of clonotypes in group to print (default = 1)", - ); - h.print_tab2(); - h.print("\n"); - h.end_doc(); - } -} diff --git a/enclone_help/src/help5.rs b/enclone_help/src/help5.rs deleted file mode 100644 index 1d549c267..000000000 --- a/enclone_help/src/help5.rs +++ /dev/null @@ -1,495 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. -// -// Test for help request, under development. - -use crate::help_utils::*; -use ansi_escape::*; -use enclone_core::defs::*; -use enclone_core::print_tools::*; -use enclone_core::*; -use io_utils::*; -use std::io::Write; -use string_utils::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn help5(args: &Vec<String>, ctl: &EncloneControl, h: &mut HelpDesk) { - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide indels help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "indels") || h.help_all { - h.begin_doc("indels"); - h.print("\n\\bold{handling of insertions and deletions}\n\n"); - h.print( - "enclone can recognize and display a single insertion or deletion in a contig \ - relative to the reference, so long as its length is divisible by three, is relatively \ - short, and occurs within the V segment, not too close to its right end.\n\n\ - These indels could be germline, however most such events are already captured in a \ - reference sequence. Currently the donor reference code in enclone does not recognize \ - indels.\n\n\ - SHM deletions are rare, and SHM insertions are even more rare.\n\n\ - Deletions are displayed using hyphens (-). If you use the \\bold{var} option for \ - \\bold{cvars}, the hyphens will be displayed in base space, where they are initially \ - observed. For the \\bold{AMINO} option, the deletion is first shifted by up to two \ - bases, so that the deletion starts at a base position that is divisible by three. \ - Then the deleted amino acids are shown as hyphens.\n\n\ - Insertions are shown only in amino acid space, in a special per-chain column called \ - \\bold{notes} that \ - appears if there is an insertion. Colored amino acids are shown for the insertion, \ - and the position of the insertion is shown. The position is the position of the \ - amino acid after which the insertion appears, where the first amino acid (start \ - codon) is numbered 0.\n\n", - ); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide color help. - // - // Here, and in substitute_enclone_color in plot.rs, we swap the order of colors, so that the - // first three are as given, because they seem to make a better three-color palette. - - if (args.len() == 3 && args[1] == "help" && args[2] == "color") || h.help_all { - h.begin_doc("color"); - h.print("\nHere is the color palette that enclone uses for amino acids:\n\n"); - let mut pal = String::new(); - for i in 0..7 { - let s = best_color_order(i); - let mut log = Vec::<u8>::new(); - if !h.plain { - print_color(s, &mut log); - pal += &stringme(&log); - } - pal.push('█'); - let mut log = Vec::<u8>::new(); - if !h.plain { - emit_end_escape(&mut log); - pal += &stringme(&log); - } - if i < 7 { - pal.push(' '); - } - } - h.print_plain(&format!("{}\n", pal)); - h.print( - "\nWhen enclone shows amino acids, it uses one of two coloring schemes. The first \ - scheme (the default, or using the argument \\bold{COLOR=codon}), colors amino \ - acids by codon, according to the following scheme:\n\n", - ); - h.print_plain(&format!("{}\n\n", colored_codon_table(h.plain))); - h.print( - "Colored amino acids enable the compact display of all the information in a \ - clonotype.\n\n", - ); - h.print( - "The second scheme for coloring amino acids, \\bold{COLOR=property}, colors amino \ - acids by their properties, according to the following scheme:\n\n", - ); - { - let mut log = Vec::<u8>::new(); - if !h.plain { - fwrite!(log, "1. Aliphatic: "); - color_by_property(b"A G I L P V\n", &mut log); - fwrite!(log, "2. Aromatic: "); - color_by_property(b"F W Y\n", &mut log); - fwrite!(log, "3. Acidic: "); - color_by_property(b"D E\n", &mut log); - fwrite!(log, "4. Basic: "); - color_by_property(b"R H K\n", &mut log); - fwrite!(log, "5. Hydroxylic: "); - color_by_property(b"S T\n", &mut log); - fwrite!(log, "6. Sulfurous: "); - color_by_property(b"C M\n", &mut log); - fwrite!(log, "7. Amidic: "); - color_by_property(b"N Q\n", &mut log); - h.print_plain(&format!("{}\n", stringme(&log))); - } else { - h.print( - "1. Aliphatic: A G I L P V\n\ - 2. Aromatic: F W Y\n\ - 3. Acidic: D E\n\ - 4. Basic: R H K\n\ - 5. Hydroxylic: S T\n\ - 6. Sulfurous: C M\n\ - 7. Amidic: N Q\n\n", - ); - } - } - h.print( - "In both cases, \ - the coloring is done using special characters, called ANSI escape characters. \ - Color is used occasionally elsewhere by enclone, and there is also some \ - bolding, accomplished using the same mechanism.\n\n\ - Correct display of colors and bolding depends on having a terminal window \ - that is properly set up. As far as we know, this may always be the case, \ - but it is possible that there are exceptions. In addition, in general, text \ - editors do not correctly interpret escape characters.\n\n\ - For both of these reasons, you may wish to turn off the \"special effects\", \ - either some or all of the time. You can do this by adding the argument\n", - ); - h.print("\\bold{PLAIN}\n"); - h.print("to any enclone command.\n\n"); - h.print( - "We know of two methods to get enclone output into another document, along \ - with colors:\n\ - 1. Take a screenshot.\n\ - 2. Open a new terminal window, type the enclone command, and then convert the \ - terminal window into a pdf. See \\bold{enclone help faq} for related \ - instructions.\n\n", - ); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide faq help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "faq") || h.help_all { - h.begin_doc("faq"); - h.print("\n\\boldred{Frequently Asked Questions}\n\n"); - h.print( - "We're sorry you're having difficulty! Please see the answers below, check out \ - the other help guides, and if you're still stuck, write to us at \ - enclone@10xgenomics.com.\n\n", - ); - - h.print("\\boldblue{1. Why is my enclone output garbled?}\n\n"); - h.print( - "We can think of two possibilities:\n\n\ - A. The escape characters that enclone emits for color and bolding are not getting\n\ - translated. You have some options:\n\ - (a) Turn off escape character generation by adding PLAIN to your enclone commands.\n\ - This will work but you'll lose some information.\n\ - (b) If your terminal window is not translating escape characters, ask someone\n\ - with appropriate expertise to help you. We have not observed this phenomenon,\n\ - but it should be fixable.\n\ - (c) If you're trying to view enclone output, with escape characters, using an editor,\n\ - that's probably not going to work well.\n\n\ - B. Perhaps enclone is emitting very wide lines. Here are things you can do about \ - this:\n\ - (a) Make your terminal window wider or reduce the font size.\n\ - (b) Identify the field that is very wide and use the column controls to remove that\n\ - field. See the help for lvars and cvars. For example,\n", - ); - h.print( - "\\bold{AMINO=cdr3}\n\ - may help, or even\n\ - \\bold{AMINO=}\n\n", - ); - - h.print("\\boldblue{2. Can I convert the enclone visual output into other forms?}\n\n"); - h.print( - "Yes, there are choices:\n\ - \\bold{A}. On a Mac, you can screenshot from a terminal window.\n\ - \\bold{B}. Add the argument \\bold{HTML} to the enclone command line. Then the \ - output will be presented as html, with title \"enclone output\". If you want to \ - set the title, use \\bold{HTML=\"...\"}.\n\ - \\bold{C}. You can then convert the html to pdf. The best way on a Mac is to open \ - Safari, which is the best browser for this particular purpose, \ - select the file where you've saved the html, and then export as pdf. Do not convert \ - to pdf via printing, which produces a less readable file, and also distorts colors. \ - (We do not know why the colors are distorted.)\n\ - \\bold{D}. If you want to put enclone output in a Google Doc, you can do it via \ - approach \\bold{A}, although then you won't be able to select text \ - within the copied region. \ - Alternatively, if you open the html file in a browser, you can then select \ - text (including clonotype box text) and paste into a Google Doc. It will be pretty \ - ugly, but will capture color and correctly render the box structure, provided that \ - you use an appropriate fixed-width font for that part of the Doc. We found that \ - Courier New works, with line spacing set to 0.88. You may have to reduce the font \ - size.\n\n", - ); - - h.print("\\boldblue{3. Why is enclone slow for me?}\n\n"); - h.print( - "On a single VDJ dataset, it typically runs for us in a few seconds, on a Mac or Linux \ - server. Runs where we combine several hundred datasets execute in a couple minutes \ - (on a server). Your mileage could vary, and we are interested in cases where \ - it is underperforming. Let us know. We are aware of several things that could be \ - done to speed up enclone.\n\n", - ); - - h.print( - "\\boldblue{4. How does enclone fit into the 10x Genomics software ecosystem?}\n\n", - ); - h.print( - "There are several parts to the answer:\n\ - • enclone is a standalone executable that by default produces human-readable output.\n\ - • You can also run enclone to produce parseable output \ - (see \\bold{enclone help parseable}), \ - and that output can be digested using code that you write (for example, in R).\n\ - • When you run Cell Ranger to process 10x single cell immune profiling data, it in \ - effect calls enclone with a special option that yields only an output file for \ - the 10x visualization tool Loupe.\n\ - • Clonotypes may then be viewed using Loupe. The view of a clonotype provided by \ - Loupe is different than the view provided by enclone. Loupe shows a continuous \ - expanse of bases across each chain, which you can scroll across, rather than the \ - compressed view of \"important\" bases or amino acids that enclone shows.\n\n", - ); - - h.print("\\boldblue{5. What platforms does enclone run on?}\n\n"); - h.print( - "1. Linux/x86-64 (that's most servers)\n\ - 2. Mac.\n\n\ - However, we have not and cannot test every possible configuration of these \ - platforms. Please let us know if you encounter problems!\n\n", - ); - - h.print("\\boldblue{6. How can I print out all the donor reference sequences?}\n\n"); - h.print( - "Add the argument \\bold{DONOR_REF_FILE=filename} to your enclone command, \ - and fasta for the donor reference sequences will be dumped there.\n\n", - ); - - h.print("\\boldblue{7. How does enclone know what VDJ reference sequences I'm using?}\n\n"); - h.print( - "If you used Cell Ranger version 4.0 or greater, then the VDJ reference file was \ - included in the outs directory, and so enclone knows the reference sequence from \ - that.\n\n\ - For outs from older Cell Ranger versions, enclone has to guess which VDJ \ - reference sequences were used, and may or may not do so correctly. As part of this, \ - if you have mouse data from older Cell Ranger versions, you need to supply the \ - argument \\bold{MOUSE} on the command line.\n\n\ - It is also possible to set the reference sequence directly by adding \ - by adding \\bold{REF=f} to your command line, where \\bold{f} is the name of your \ - VDJ reference fasta file, but if that is different than the reference \ - supplied to Cell Ranger, then you will have to add the additional argument \ - \\bold{RE} to recompute annotations, and that will slow down enclone somewhat.\n\n", - ); - - h.print("\\boldblue{8. Can I provide data from more than one donor?}\n\n"); - h.print( - "Yes. Type \\bold{enclone help input} for details. The default behavior of \ - enclone is to prevent cells from different donors from being placed in the same \ - clonotype. The \\bold{MIX_DONORS} option may be used to turn off this behavior. If \ - you employ this option, then clonotypes containing cells from more than one donor \ - will be flagged as errors, unless you use the \\bold{NWARN} option to turn off those \ - warnings. The primary reason for allowing entry of data from multiple \ - donors is to allow estimation of enclone's error rate.\n\n", - ); - - h.print("\\boldblue{9. What are some command line argument values quoted?}\n\n"); - h.print( - "Command line argument values that contain any of these characters ;|* need to \ - be quoted like so\n\ - \\bold{TCR=\"a;b\"}\n\ - to prevent the shell from interpreting them for a purpose completely unrelated to \ - enclone. This is a trap, because forgetting to add the quotes can result in \ - nonsensical and confusing behavior!\n\n", - ); - - h.print("\\boldblue{10. If enclone fails, does it return nonzero exit status?}\n\n"); - h.print( - "Yes, unless output of enclone is going to a terminal. In that case, you'll always \ - get zero.\n\n", - ); - - h.print("\\boldblue{11. Could a cell be missing from an enclone clonotype?}\n\n"); - h.print( - "Yes, some cells are deliberately deleted. The cell might have been deleted by \ - one of the filters described in \\bold{enclone help special}, and which you can \ - turn off. We also delete cells for which more than four chains were found.\n\n", - ); - - h.print("\\boldblue{12. Can enclone print summary stats?}\n\n"); - h.print( - "Yes, if you add the option \\bold{SUMMARY}, then some summary stats will be \ - printed. If you only want to see the summary stats, then also add the option \ - \\bold{NOPRINT}.\n\n", - ); - - h.print("\\boldblue{13. What is the notes column?}\n\n"); - h.print( - "The notes column appears if one of two relatively rare events occurs:\n\n\ - 1. An insertion is detected in a chain sequence, relative to the reference.\n\n\ - 2. The end of the J segment on a chain sequence does not exactly coincide with\n \ - the beginning of the C segment.\n\ - The latter could correspond to one of several phenomena:\n\ - a. A transcript has an insertion between its J and C segments.\n \ - This can happen. See e.g. Behlke MA, Loh DY.\n \ - Alternative splicing of murine T-cell receptor beta-chain transcripts.\n \ - Nature 322(1986), 379-382.\n\ - b. There is an error in a reference sequence segment.\n \ - We have tried to eliminate all such errors from the built-in references for\n \ - human and mouse.\n\ - c. A cell produced a nonstandard transcript and also standard ones, and the\n \ - Cell Ranger pipeline just happened to pick a nonstandard one.\n\ - d. There was a technical artifact and the sequence does not actually represent\n \ - an mRNA molecule.\n\n\ - Overlaps of length exactly one between J and C segments are not shown unless you \ - specify the option \\bold{JC1}. The reason for this is that certain reference \ - sequences (notably those from IMGT and those supplied with Cell Ranger 3.1) often \ - have an extra base at the beginning of their C segments, resulting \ - in annoying overlap notes for a large fraction of clonotypes.\n\n", - ); - - h.print("\\boldblue{14. Can I cap the number of threads used by enclone?}\n\n"); - h.print( - "You can use the command-line argument \\bold{MAX_CORES=n} to cap the number of \ - cores used in parallel loops. The number of threads used is typically one \ - higher.\n\n", - ); - - h.print("\\boldblue{15. Can I use enclone if I have only gene expression data?}\n\n"); - h.print( - "Possibly. In some cases this works very well, but in other cases it does not. \ - Success depends on dataset characteristics that have not been carefully investigated. \ - To attempt this, you need to invoke Cell Ranger on the GEX dataset as if \ - it was a VDJ dataset, and you need to specify to Cell Ranger that the run is to be \ - treated as BCR or TCR. Two separate invocations can be used to get both. Note also \ - that Cell Ranger has been only minimally tested for this configuration and that this \ - is not an officially supported Cell Ranger configuration.\n\n", - ); - - h.print("\\boldblue{16. How can I cite enclone?}\n\n"); - h.print("This version of enclone has been provided under a non-disclosure agreement,\n"); - h.print( - "however once enclone has officially launched, you will be able to cite this \ - version as:\n", - ); - let mut log = Vec::<u8>::new(); - emit_green_escape(&mut log); - h.print(&format!("{}", strme(&log))); - if !ctl.gen_opt.stable_doc { - h.print(&format!( - "10x Genomics, https://github.com/10XGenomics/enclone,\nversion {}.\n", - version_string() - )); - } else { - h.print( - "10x Genomics, https://github.com/10XGenomics/enclone,\n\ - (your enclone version information will be printed here).\n", - ); - } - let mut log = Vec::<u8>::new(); - emit_end_escape(&mut log); - h.print(&format!("{}", strme(&log))); - h.print( - "At some point subsequent to that, there will be a white paper to which you can refer, \ - in addition to a DOI minted at Zenodo. In the spirit of reproducibility, you should \ - provide the arguments that you used when you ran enclone and indicate the version of \ - Cell Ranger that you used to generate the input data.\n\n", - ); - - h.print("\\boldblue{17. Can I print the enclone version?}\n\n"); - h.print("Yes, type \"enclone version\".\n\n"); - - h.print("\\boldblue{18. Can enclone ingest multiple datasets from the same library?}\n\n"); - h.print( - "If enclone detects significant (≥ 25%) barcode reuse between datasets, it will exit. \ - This behavior can be overridden using the argument \\bold{ACCEPT_REUSE}.\n" - ); - - h.print("\\boldblue{19. Can I turn off all the filters used in joining clonotypes?}\n\n"); - h.print( - "Pretty much. You can run with the following arguments:\n\ - MAX_CDR3_DIFFS=100\n\ - MAX_LOG_SCORE=100\n\ - EASY\n\ - MAX_DIFFS=200\n\ - MAX_DEGRADATION=150,\n\ - however this will in general be very slow and not produce useful results. Depending \ - on what your goal is, you may find it helpful to use some of these arguments, and \ - with lower values. You can see the meaning of the arguments and their default values \ - by typing \"enclone help how\".\n", - ); - - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Provide developer help. - - if (args.len() == 3 && args[1] == "help" && args[2] == "developer") || h.help_all { - h.begin_doc("developer"); - h.print("\n\\bold{a few options for developers}\n\n"); - h.print( - "For instructions on how to compile, please see\n\ - \\green{bit.ly/enclone}.\n\n", - ); - h.doc( - "COMP", - "report computational performance stats; use this with NOPRINT if you", - ); - h.doc( - "", - "only want to see the computational performance stats, and with NOPAGER if you", - ); - h.doc("", "want output to be unbuffered"); - h.doc( - "COMP2", - "like COMP, but adds more detailed lines that are prefixed with --", - ); - h.ldoc( - "LONG_HELP", - "allow long lines in help pages, which will otherwise trigger an assert", - ); - h.ldoc( - "CTRLC", - "upon CTRL-C, emit a traceback and then exit; can be used as a primitive", - ); - h.doc( - "", - "but easy profiling method, to know what the code is doing if it seems to be", - ); - h.doc("", "very slow"); - h.ldoc( - "HAPS=n", - "Interrupt code n times, at one second intervals, get a traceback, and then tally", - ); - h.doc( - "", - "the tracebacks. This only works if the n tracebacks can be obtained before", - ); - h.doc( - "", - "enclone terminates. Interrupts that occur in the allocator are ignored, and", - ); - h.doc( - "", - "in some cases, this accounts for most interrupts, resulting in confusing", - ); - h.doc( - "", - "output. In such cases, consider using CTRLC or a more sophisticated tool", - ); - h.doc( - "", - "like perf. Also HAPS only reports on the master thread, so to get useful", - ); - h.doc( - "", - "information, you probably need to change an instance in the code of", - ); - h.doc( - "", - "par_iter_mut to iter_mut, to turn off parallelization for a strategically", - ); - h.doc("", "selected section."); - h.print_tab2(); - h.print("\n"); - h.end_doc(); - } - - // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - // Finish enclone help all. - - if h.help_all { - h.dump(); - std::process::exit(0); - - // Catch unrecognized help requests. - } else if args.len() >= 2 { - let mut x = args[1].clone(); - x.make_ascii_lowercase(); - if x.contains("help") { - println!("\nYour help request doesn't match one known to enclone.\n"); - println!("Please type \"enclone\" to see the help options.\n"); - std::process::exit(1); - } - } -} diff --git a/enclone_help/src/help_utils.rs b/enclone_help/src/help_utils.rs deleted file mode 100644 index 341ec2684..000000000 --- a/enclone_help/src/help_utils.rs +++ /dev/null @@ -1,601 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -use ansi_escape::ansi_to_html::*; -use ansi_escape::*; -use enclone_core::print_tools::*; -use io_utils::*; -use std::io::Write; -use string_utils::*; -use tables::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -#[derive(Default)] -pub struct HelpDesk { - pub plain: bool, - pub help_all: bool, - pub long_help: bool, - pub html: bool, - pub rows: Vec<Vec<String>>, - pub log: Vec<u8>, - pub title: String, -} - -impl HelpDesk { - pub fn new(plain: bool, help_all: bool, long_help: bool, html: bool) -> HelpDesk { - HelpDesk { - plain: plain, - help_all: help_all, - long_help: long_help, - html: html, - rows: Vec::<Vec<String>>::new(), - log: Vec::<u8>::new(), - title: String::new(), - } - } - pub fn doc(&mut self, x1: &str, x2: &str) { - self.rows.push(vec![x1.to_string(), x2.to_string()]); - } - pub fn ldoc(&mut self, x1: &str, x2: &str) { - self.rows.push(vec!["\\hline".to_string(); 2]); - self.rows.push(vec![x1.to_string(), x2.to_string()]); - } - pub fn doc3(&mut self, x1: &str, x2: &str, x3: &str) { - self.rows - .push(vec![print_to(x1), print_to(x2), print_to(x3)]); - } - pub fn ldoc3(&mut self, x1: &str, x2: &str, x3: &str) { - self.rows.push(vec!["\\hline".to_string(); 3]); - self.rows - .push(vec![x1.to_string(), x2.to_string(), x3.to_string()]); - } - pub fn docpr(&mut self, x1: &str, x2: &str) { - self.rows.push(vec![print_to(x1), print_to(x2)]); - } - pub fn ldocpr(&mut self, x1: &str, x2: &str) { - self.rows.push(vec!["\\hline".to_string(); 2]); - self.rows.push(vec![print_to(x1), print_to(x2)]); - } - pub fn ldoc3pr(&mut self, x1: &str, x2: &str, x3: &str) { - self.rows.push(vec!["\\hline".to_string(); 3]); - self.rows - .push(vec![print_to(x1), print_to(x2), print_to(x3)]); - } - pub fn doc_red(&mut self, x1: &str, x2: &str) { - if !self.plain { - let r1 = format!("{}", x1); - let r2 = format!("{}", x2); - self.rows.push(vec![r1, r2]); - } else { - self.rows.push(vec![x1.to_string(), x2.to_string()]); - } - } - pub fn ldoc_red(&mut self, x1: &str, x2: &str) { - self.rows.push(vec!["\\hline".to_string(); 2]); - if !self.plain { - let r1 = format!("{}", x1); - let r2 = format!("{}", x2); - self.rows.push(vec![r1, r2]); - } else { - self.rows.push(vec![x1.to_string(), x2.to_string()]); - } - } - pub fn doc_greenish(&mut self, x1: &str, x2: &str) { - if !self.plain { - let r1 = format!("{}", x1); - let r2 = format!("{}", x2); - self.rows.push(vec![r1, r2]); - } else { - self.rows.push(vec![x1.to_string(), x2.to_string()]); - } - } - pub fn ldoc_greenish(&mut self, x1: &str, x2: &str) { - self.rows.push(vec!["\\hline".to_string(); 2]); - if !self.plain { - let r1 = format!("{}", x1); - let r2 = format!("{}", x2); - self.rows.push(vec![r1, r2]); - } else { - self.rows.push(vec![x1.to_string(), x2.to_string()]); - } - } - pub fn print_enclone(&mut self) { - if self.plain { - self.print("enclone"); - } else { - let mut log = Vec::<u8>::new(); - print_color(3, &mut log); - log.push(b'e'); - emit_end_escape(&mut log); - print_color(1, &mut log); - log.push(b'n'); - emit_end_escape(&mut log); - print_color(2, &mut log); - log.push(b'c'); - emit_end_escape(&mut log); - print_color(0, &mut log); - log.push(b'l'); - emit_end_escape(&mut log); - print_color(4, &mut log); - log.push(b'o'); - emit_end_escape(&mut log); - print_color(5, &mut log); - log.push(b'n'); - emit_end_escape(&mut log); - print_color(1, &mut log); - log.push(b'e'); - emit_end_escape(&mut log); - self.print(&format!("{}", strme(&log))); - } - } - pub fn print_tab2(&mut self) { - let mut log = String::new(); - print_tabular_vbox(&mut log, &self.rows, 2, &b"l|l".to_vec(), false, false); - self.print_plain(&format!("{}", log)); - } - pub fn print_tab3(&mut self) { - let mut log = String::new(); - print_tabular_vbox(&mut log, &self.rows, 2, &b"l|l|l".to_vec(), false, false); - self.print_plain(&format!("{}", log)); - } - pub fn begin_doc(&mut self, title: &str) { - self.title = format!("enclone help {}", title); - self.rows.clear(); - if self.help_all { - let mut log = Vec::<u8>::new(); - if !self.plain { - emit_blue_escape(&mut log); - } - self.print_plain(&format!("{}", strme(&log))); - for _ in 1..100 { - self.print_plain("▓"); - } - self.print_plain(&format!("{}", strme(&log))); - if title == "" { - self.print_plain(&format!( - "\nenclone main help page (what you get by typing \ - \"enclone\")\n" - )); - } else if title == "setup" { - self.print_plain(&format!( - "\nenclone setup page (for one time use, what you get by typing \ - \"enclone help\")\n" - )); - } else { - self.print_plain(&format!("\nenclone help {}\n", title)); - } - let mut log = Vec::<u8>::new(); - if !self.plain { - emit_blue_escape(&mut log); - } - self.print_plain(&format!("{}", strme(&log))); - for _ in 1..100 { - self.print_plain("▓"); - } - let mut log = Vec::<u8>::new(); - if !self.plain { - emit_end_escape(&mut log); - } - self.print_plain(&format!("{}\n", strme(&log))); - } - } - pub fn end_doc(&mut self) { - if !self.help_all { - self.dump(); - std::process::exit(0); - } - } - pub fn print_with_box(&mut self, x: &str, bold_box: bool) { - let y = print_to(x); - let mut rows = Vec::<Vec<String>>::new(); - let lines = y.split('\n').collect::<Vec<&str>>(); - for z in lines { - rows.push(vec![z.to_string()]); - } - let mut log = String::new(); - print_tabular_vbox(&mut log, &rows, 2, &b"l".to_vec(), false, bold_box); - self.print_plain(&format!("{}\n", log)); - } - pub fn print(&mut self, x: &str) { - self.print_plain(&format!("{}", print_to(x))); - } - pub fn print_plain_unchecked(&mut self, x: &str) { - fwrite!(self.log, "{}", &x); - } - pub fn print_plain(&mut self, x: &str) { - if !self.long_help { - let mut count = 0; - let mut escaped = false; - let mut line = String::new(); - for c in x.chars() { - line.push(c); - if c == '\n' { - count = 0; - line.clear(); - } else if escaped { - if c == 'm' { - escaped = false; - } - } else if c == '' { - escaped = true; - } else { - count += 1; - if count > 100 { - eprintln!("\nHelp line is too long:\n\n{}", line); - eprintln!("\nTry running with LONG_HELP to locate the problem.\n"); - std::process::exit(1); - } - } - } - } - fwrite!(self.log, "{}", &x); - } - pub fn dump(&self) { - if !self.html { - print!("{}", strme(&self.log)); - } else { - // Note that we do not link to the css file, because it is less fragile then including - // the font face information directly. In particular, the css file could be - // accidentally deleted or renamed, which would break previously generated user html - // files. This actually happened! - let s = convert_text_with_ansi_escapes_to_html( - strme(&self.log), - "", // source - &self.title, - &format!("<style type=\"text/css\">\n{}</style>", font_face_in_css()), - "DejaVuSansMono", - 14, - ); - print!("{}", s); - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn stringify(rows: Vec<Vec<&str>>) -> Vec<Vec<String>> { - let mut r = Vec::<Vec<String>>::new(); - for i in 0..rows.len() { - let mut x = Vec::<String>::new(); - for j in 0..rows[i].len() { - x.push(rows[i][j].to_string()); - } - r.push(x); - } - r -} - -// This encodes the color codes for each possible codon of a given amino acid -// that could be found in a BCR or TCR sequence. - -pub fn colored_codon_table(plainx: bool) -> String { - let plain = b"\ - Alanine A GCT GCC GCA GCG\n\ - Arginine R CGT CGC CGA CGG AGA AGG\n\ - Asparagine N AAT AAC\n\ - Aspartic Acid D GAT GAC\n\ - Cysteine C TGT TGC\n\ - Glutamine Q CAA CAG\n\ - Glutamic Acid E GAA GAG\n\ - Glycine G GGT GGC GGA GGG\n\ - Histidine H CAT CAC\n\ - Isoleucine I ATT ATC ATA\n\ - Leucine L TTA TTG CTT CTC CTA CTG\n\ - Lysine K AAA AAG\n\ - Methionine M ATG\n\ - Phenylalanine F TTT TTC\n\ - Proline P CCT CCC CCA CCG\n\ - Serine S TCT TCC TCA TCG AGT AGC\n\ - Threonine T ACT ACC ACA ACG\n\ - Tryptophan W TGG\n\ - Tyrosine Y TAT TAC\n\ - Valine V GTT GTC GTA GTG"; - let mut colored = Vec::<u8>::new(); - let mut p = 0; - while p < plain.len() { - if (plain[p] as char).is_uppercase() && (plain[p + 1] as char).is_uppercase() { - let mut log = Vec::<u8>::new(); - if !plainx { - emit_codon_color_escape(&plain[p..p + 3], &mut log); - } - for i in 0..3 { - log.push(plain[p + i]); - } - if !plainx { - emit_end_escape(&mut log); - } - colored.append(&mut log); - p += 3; - } else { - colored.push(plain[p]); - p += 1; - } - } - stringme(&colored) -} - -pub static mut PLAIN: bool = false; -pub static mut HELP_ALL: bool = false; - -// Print a string, making the following conversions, the first three of which are governed -// by the state of PLAIN: -// • Change \bold{x} into a bolded string by issuing appropriate escape characters. -// • Change \red{x} into a red string by issuing appropriate escape characters. -// • Change \boldred{x} into a bold red string by issuing appropriate escape characters. -// • Fold at 99 characters. - -pub fn print(x: &str) { - print!("{}", print_to(x)); -} - -pub fn print_to(x: &str) -> String { - let mut y = Vec::<char>::new(); - for c in x.chars() { - y.push(c); - } - let mut s = String::new(); - let mut i = 0; - while i < y.len() { - if y[i..].starts_with(&['\\', 'b', 'o', 'l', 'd', '{']) { - let mut j = i + 6; - while j < y.len() { - if y[j] == '}' { - break; - } - j += 1; - } - if j < y.len() { - let mut log = Vec::<u8>::new(); - unsafe { - if !PLAIN { - emit_bold_escape(&mut log); - } - } - s += &strme(&log); - for k in i + 6..j { - s.push(y[k]); - } - let mut log = Vec::<u8>::new(); - unsafe { - if !PLAIN { - emit_end_escape(&mut log); - } - } - s += &strme(&log); - i = j + 1; - } - } else if y[i..].starts_with(&['\\', 'r', 'e', 'd', '{']) { - let mut j = i + 5; - while j < y.len() { - if y[j] == '}' { - break; - } - j += 1; - } - if j < y.len() { - let mut log = Vec::<u8>::new(); - unsafe { - if !PLAIN { - emit_red_escape(&mut log); - } - } - s += &strme(&log); - for k in i + 5..j { - s.push(y[k]); - } - let mut log = Vec::<u8>::new(); - unsafe { - if !PLAIN { - emit_end_escape(&mut log); - } - } - s += &strme(&log); - i = j + 1; - } else { - i += 1; - } - } else if y[i..].starts_with(&['\\', 'b', 'l', 'u', 'e', '{']) { - let mut j = i + 6; - while j < y.len() { - if y[j] == '}' { - break; - } - j += 1; - } - if j < y.len() { - let mut log = Vec::<u8>::new(); - unsafe { - if !PLAIN { - emit_blue_escape(&mut log); - } - } - s += &strme(&log); - for k in i + 6..j { - s.push(y[k]); - } - let mut log = Vec::<u8>::new(); - unsafe { - if !PLAIN { - emit_end_escape(&mut log); - } - } - s += &strme(&log); - i = j + 1; - } else { - i += 1; - } - } else if y[i..].starts_with(&['\\', 'g', 'r', 'e', 'e', 'n', '{']) { - let mut j = i + 7; - while j < y.len() { - if y[j] == '}' { - break; - } - j += 1; - } - if j < y.len() { - let mut log = Vec::<u8>::new(); - unsafe { - if !PLAIN { - emit_green_escape(&mut log); - } - } - s += &strme(&log); - for k in i + 7..j { - s.push(y[k]); - } - let mut log = Vec::<u8>::new(); - unsafe { - if !PLAIN { - emit_end_escape(&mut log); - } - } - s += &strme(&log); - i = j + 1; - } else { - i += 1; - } - } else if y[i..].starts_with(&['\\', 'b', 'o', 'l', 'd', 'r', 'e', 'd', '{']) { - let mut j = i + 9; - while j < y.len() { - if y[j] == '}' { - break; - } - j += 1; - } - if j < y.len() { - let mut log = Vec::<u8>::new(); - unsafe { - if !PLAIN { - emit_bold_escape(&mut log); - emit_red_escape(&mut log); - } - } - s += &strme(&log); - for k in i + 9..j { - s.push(y[k]); - } - let mut log = Vec::<u8>::new(); - unsafe { - if !PLAIN { - emit_end_escape(&mut log); - } - } - s += &strme(&log); - i = j + 1; - } else { - i += 1; - } - } else if y[i..].starts_with(&['\\', 'b', 'o', 'l', 'd', 'b', 'l', 'u', 'e', '{']) { - let mut j = i + 10; - while j < y.len() { - if y[j] == '}' { - break; - } - j += 1; - } - if j < y.len() { - let mut log = Vec::<u8>::new(); - unsafe { - if !PLAIN { - emit_bold_escape(&mut log); - emit_blue_escape(&mut log); - } - } - s += &strme(&log); - for k in i + 10..j { - s.push(y[k]); - } - let mut log = Vec::<u8>::new(); - unsafe { - if !PLAIN { - emit_end_escape(&mut log); - } - } - s += &strme(&log); - i = j + 1; - } else { - i += 1; - } - } else { - s.push(y[i]); - i += 1; - } - } - let mut x = Vec::<char>::new(); - for c in s.chars() { - x.push(c); - } - let mut printed = 0; - let mut escaped = false; - let mut y = Vec::<char>::new(); - let mut i = 0; - while i < x.len() { - if x[i] == '' { - escaped = true; - } - if escaped { - if x[i] == 'm' { - escaped = false; - } - y.push(x[i]); - i += 1; - continue; - } - if x[i] == ' ' { - let mut j = i + 1; - while j < x.len() { - if x[j] == ' ' || x[j] == '\n' || x[j] == '' { - break; - } - j += 1; - } - if printed + j - i >= 100 - 1 { - y.push('\n'); - printed = 0; - i += 1; - continue; - } - } - y.push(x[i]); - printed += 1; - if x[i] == '\n' { - printed = 0; - } - i += 1; - } - let mut ans = String::new(); - for i in 0..y.len() { - ans.push(y[i]); - } - ans -} - -pub fn print_tab2(rows: &Vec<Vec<String>>) { - let mut log = String::new(); - print_tabular_vbox(&mut log, &rows, 2, &b"l|l".to_vec(), false, false); - print!("{}", log); -} - -// Given a string, preface every line in in by a gray left bar. - -pub fn gray_left_bar(s: &str) -> String { - let mut gray = "  ".to_string(); - unsafe { - if PLAIN { - gray = "┃ ".to_string(); - } - } - let mut x = Vec::<char>::new(); - for c in s.chars() { - x.push(c); - } - let mut t = gray.to_string(); - for i in 0..x.len() - 1 { - t.push(x[i]); - if x[i] == '\n' { - t += &gray; - } - } - t.push(x[x.len() - 1]); - t -} diff --git a/enclone_help/src/lib.rs b/enclone_help/src/lib.rs deleted file mode 100644 index a10633f4d..000000000 --- a/enclone_help/src/lib.rs +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright (c) 2020 10x Genomics, Inc. All rights reserved. - -extern crate enclone_core; - -pub mod help1; -pub mod help2; -pub mod help3; -pub mod help4; -pub mod help5; -pub mod help_utils; diff --git a/enclone_main/Cargo.toml b/enclone_main/Cargo.toml deleted file mode 100644 index 5aae56be1..000000000 --- a/enclone_main/Cargo.toml +++ /dev/null @@ -1,57 +0,0 @@ -[package] -name = "enclone_main" -version = "0.4.49" -authors = ["""David Jaffe <david.jaffe@10xgenomics.com>, - Keri Dockter <keri.dockter@10xgenomics.com>, - Shaun Jackman <shaun.jackman@10xgenomics.com>, - Sreenath Krishnan <sreenath.krishnan@10xgenomics.com>, - Meryl Lewis <meryl.lewis@10xgenomics.com>, - Patrick Marks <patrick.marks@10xgenomics.com>, - Wyatt McDonnell <wyatt.mcdonnell@10xgenomics.com>"""] -edition = "2018" -publish = false - -# Please do not edit crate versions within this file. Instead edit the file master.toml -# in the root of the enclone repo. - -[dependencies] -debruijn = "0.3.2" -enclone_core = { path = "../enclone_core" } -enclone_help = { path = "../enclone_help" } -enclone_print = { path = "../enclone_print" } -enclone_proto = { path = "../enclone_proto" } -enclone_tail = { path = "../enclone_tail" } -enclone = { path = "../enclone" } -equiv = "0.1.1" -io_utils = "0.2" -itertools = "0.9.0" -perf_stats = "0.1.2" -pretty_trace = "0.3.2" -rayon = "1.0.2" -regex = "1.3.1" -serde = "1.0.90" -serde_derive = "1.0.102" -serde_json = "*" -stats_utils = "0.1.1" -string_utils = "0.1.1" -vdj_ann = { git = "https://github.com/10XGenomics/rust-toolbox.git", rev="183e2d657e6436494072a32cf8da4f7b753d1e69" } -vector_utils = "0.1.3" - -[dependencies.hdf5] -features = ["conda"] -git = "https://github.com/pmarks/hdf5-rs.git" -rev = "0c98e57b2af1f4247708c198b324ba3a8bc18dba" - -[dev-dependencies] -ansi_escape = "0.1.0" -attohttpc = { version = "0.12", default-features = false, features = ["compress", "tls-rustls"] } -failure = "0.1.5" -file-lock = "1.1.20" -flate2 = "1.0.16" -perf_stats = "0.1.2" -rayon = "1.0.2" -sha2 = "0.9.1" - -[features] -basic = [] -cpu = [] diff --git a/enclone_main/src/bin/enclone.rs b/enclone_main/src/bin/enclone.rs deleted file mode 100644 index 21e811891..000000000 --- a/enclone_main/src/bin/enclone.rs +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -use std::env; - -use enclone_main::main_enclone::*; - -fn main() { - let args: Vec<String> = env::args().collect(); - main_enclone(&args); -} diff --git a/enclone_main/src/lib.rs b/enclone_main/src/lib.rs deleted file mode 100644 index 96fdde9a8..000000000 --- a/enclone_main/src/lib.rs +++ /dev/null @@ -1,3 +0,0 @@ -// Copyright (c) 2020 10x Genomics, Inc. All rights reserved. - -pub mod main_enclone; diff --git a/enclone_main/src/main_enclone.rs b/enclone_main/src/main_enclone.rs deleted file mode 100644 index ae586a491..000000000 --- a/enclone_main/src/main_enclone.rs +++ /dev/null @@ -1,1647 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. -// -// See README for documentation. - -use vdj_ann::*; - -use self::refx::*; -use debruijn::dna_string::DnaString; -use enclone::allele::*; -use enclone::explore::*; -use enclone::graph_filter::*; -use enclone::info::*; -use enclone::innate::*; -use enclone::join::*; -use enclone::load_gex::*; -use enclone::misc1::*; -use enclone::misc2::*; -use enclone::misc3::*; -use enclone::proc_args::*; -use enclone::proc_args2::*; -use enclone::proc_args_check::*; -use enclone::read_json::*; -use enclone::secret::*; -use enclone_core::defs::*; -use enclone_core::*; -use enclone_help::help1::*; -use enclone_help::help2::*; -use enclone_help::help3::*; -use enclone_help::help4::*; -use enclone_help::help5::*; -use enclone_help::help_utils::*; -use enclone_print::loupe::*; -use enclone_print::print_clonotypes::*; -use enclone_tail::tail::tail_code; -use equiv::EquivRel; -use io_utils::*; -use itertools::Itertools; -use perf_stats::*; -use pretty_trace::*; -use rayon::prelude::*; -use regex::Regex; -use serde_json::Value; -use stats_utils::*; -use std::{ - cmp::max, - collections::HashMap, - fs::File, - io::{BufRead, BufReader, BufWriter, Write}, - process::Command, - time::Instant, -}; -use string_utils::*; -use vector_utils::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -fn binomial_sum(n: usize, k: usize, p: f64) -> f64 { - assert!(n >= 1); - assert!(k <= n); - let mut sum = 0.0; - let mut choose = 1.0; - for _ in 0..n { - choose *= 1.0 - p; - } - let q = p / (1.0 - p); - for i in 0..=k { - sum += choose; - choose *= (n - i) as f64; - choose /= (i + 1) as f64; - choose *= q; - } - sum -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn setup(mut ctl: &mut EncloneControl, args: &Vec<String>) { - let t = Instant::now(); - // Provide help if requested. - - { - if args.len() == 2 && (args[1] == "version" || args[1] == "--version") { - println!("{} : {}", env!("CARGO_PKG_VERSION"), version_string()); - std::process::exit(0); - } - let mut args = args.clone(); - let mut to_delete = vec![false; args.len()]; - let mut nopager = false; - let mut plain = false; - let mut long_help = false; - for i in 1..args.len() { - if args[i] == "NOPAGER" { - nopager = true; - to_delete[i] = true; - } else if args[i] == "HTML" { - ctl.gen_opt.html = true; - ctl.gen_opt.html_title = "enclone output".to_string(); - to_delete[i] = true; - } else if args[i].starts_with("HTML=") { - ctl.gen_opt.html = true; - let mut title = args[i].after("HTML=").to_string(); - if title.starts_with("\"") && title.ends_with("\"") { - title = title.between("\"", "\"").to_string(); - } - ctl.gen_opt.html_title = title; - to_delete[i] = true; - } else if args[i] == "SVG" { - ctl.gen_opt.svg = true; - to_delete[i] = true; - } else if args[i] == "STABLE_DOC" { - ctl.gen_opt.stable_doc = true; - to_delete[i] = true; - } else if args[i] == "FORCE_EXTERNAL" { - to_delete[i] = true; - } else if args[i] == "LONG_HELP" { - long_help = true; - to_delete[i] = true; - } else if args[i].starts_with("MAX_CORES=") { - to_delete[i] = true; - } else if args[i].starts_with("PRE=") { - to_delete[i] = true; - } else if args[i] == "PLAIN" { - to_delete[i] = true; - plain = true; - unsafe { - PLAIN = true; - } - } - } - if ctl.gen_opt.html && ctl.gen_opt.svg { - eprintln!("\nBoth HTML and SVG cannot be used at the same time.\n"); - std::process::exit(1); - } - erase_if(&mut args, &to_delete); - if args.len() == 1 || args.contains(&"help".to_string()) { - PrettyTrace::new().on(); - setup_pager(!nopager); - } - let mut help_all = false; - if args.len() >= 3 && args[1] == "help" && args[2] == "all" { - unsafe { - HELP_ALL = true; - } - help_all = true; - } - let mut h = HelpDesk::new(plain, help_all, long_help, ctl.gen_opt.html); - help1(&args, &mut h); - help2(&args, &ctl, &mut h); - help3(&args, &mut h); - help4(&args, &mut h); - help5(&args, &ctl, &mut h); - } - - // Pretest for some options. - - ctl.pretty = true; - let mut nopretty = false; - ctl.gen_opt.h5 = true; - for i in 1..args.len() { - if is_simple_arg(&args[i], "PLAIN") { - ctl.pretty = false; - } - if is_simple_arg(&args[i], "NOPRETTY") { - nopretty = true; - } - if is_simple_arg(&args[i], "COMP") { - ctl.comp = true; - } - if is_simple_arg(&args[i], "COMP2") { - ctl.comp = true; - ctl.comp2 = true; - } - if is_simple_arg(&args[i], "CELLRANGER") { - ctl.gen_opt.cellranger = true; - } - if is_simple_arg(&args[i], "NH5") { - ctl.gen_opt.h5 = false; - } - } - - // Test for happening mode and turn on pretty trace. - - if !nopretty { - let mut happening = 0; - let mut ctrlc = false; - for i in 1..args.len() { - if args[i].starts_with("HAPS=") { - // should actually test for usize - happening = args[i].after("HAPS=").force_usize(); - } - if is_simple_arg(&args[i], "CTRLC") { - ctrlc = true; - } - } - let thread_message = new_thread_message(); - if happening > 0 { - PrettyTrace::new() - .message(&thread_message) - .profile(happening) - .whitelist(&vec![ - "amino", - "ansi_escape", - "binary_vec_io", - "enclone", - "equiv", - "graph_simple", - "io_utils", - "marsoc", - "mirror_sparse_matrix", - "perf_stats", - "stats_utils", - "stirling_numbers", - "string_utils", - "tables", - "vector_utils", - ]) - .ctrlc() - .on(); - } else if ctrlc { - PrettyTrace::new().message(&thread_message).ctrlc().on(); - } else { - let exit_message: String; - if !ctl.gen_opt.cellranger { - exit_message = format!( - "Something has gone badly wrong. You have probably encountered an internal \ - error in enclone.\n\n\ - Please email us at enclone@10xgenomics.com, including the traceback shown\n\ - above and also the following version information:\n\ - {} : {}.\n\n\ - Thank you and have a nice day!", - env!("CARGO_PKG_VERSION"), - version_string() - ); - } else { - exit_message = format!( - "Something has gone badly wrong. You have probably \ - encountered an internal error\nin cellranger. \ - Please email us at support@10xgenomics.com, including the traceback\nshown \ - above." - ); - } - PrettyTrace::new().exit_message(&exit_message).on(); - let mut nopager = false; - for i in 1..args.len() { - if args[i] == "NOPAGER" { - nopager = true; - } - } - setup_pager(!nopager); - } - } - ctl.perf_stats(&t, "in first part of setup"); - - // Process args (and set defaults for them). - - proc_args(&mut ctl, &args); - - // Dump lenas. - - for i in 1..args.len() { - if is_simple_arg(&args[i], "DUMP_INTERNAL_IDS") { - let mut x = Vec::<usize>::new(); - for y in ctl.origin_info.dataset_id.iter() { - x.push(y.force_usize()); - } - x.sort(); - println!("\n{}\n", x.iter().format(",")); - std::process::exit(0); - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn main_enclone(args: &Vec<String>) { - // Set up stuff, read args, etc. - - let tall = Instant::now(); - let (mut print_cpu, mut print_cpu_info) = (false, false); - let (mut comp, mut comp2) = (false, false); - for i in 1..args.len() { - if args[i] == "PRINT_CPU" { - print_cpu = true; - } - if args[i] == "PRINT_CPU_INFO" { - print_cpu_info = true; - } - if args[i] == "COMP" { - comp = true; - } - if args[i] == "COMP2" { - comp2 = true; - } - } - if comp && !comp2 { - println!(""); - } - let (mut cpu_all_start, mut cpu_this_start) = (0, 0); - if print_cpu || print_cpu_info { - let f = open_for_read!["/proc/stat"]; - for line in f.lines() { - let s = line.unwrap(); - let mut t = s.after("cpu"); - while t.starts_with(' ') { - t = t.after(" "); - } - cpu_all_start = t.before(" ").force_usize(); - break; - } - let f = open_for_read![&format!("/proc/{}/stat", std::process::id())]; - for line in f.lines() { - let s = line.unwrap(); - let fields = s.split(' ').collect::<Vec<&str>>(); - cpu_this_start = fields[13].force_usize(); - } - } - let mut ctl = EncloneControl::default(); - ctl.perf_stats(&tall, "before setup"); - setup(&mut ctl, &args); - - // Read external data. - - if ctl.gen_opt.ext.len() > 0 { - let f = open_for_read![&ctl.gen_opt.ext]; - let mut exts = Vec::<String>::new(); - for line in f.lines() { - let s = line.unwrap(); - let fields = s.split(' ').collect::<Vec<&str>>(); - ctl.gen_opt.extc.insert( - (fields[0].to_string(), fields[1].to_string()), - fields[2].to_string(), - ); - exts.push(fields[2].to_string()); - } - ctl.clono_print_opt.lvars.push("ext".to_string()); - exts.sort(); - let mut i = 0; - while i < exts.len() { - let j = next_diff(&exts, i); - ctl.gen_opt.extn.insert(exts[i].clone(), j - i); - i = j; - } - } - - // Get gene expression and feature barcode counts. Sanity check variables in cases where that - // has to occur after loading GEX data. This could also occur after loading only - // the feature list, which would be better. - - let gex_info = get_gex_info(&mut ctl); - let twoof = Instant::now(); - check_lvars(&ctl, &gex_info); - check_pcols(&ctl, &gex_info); - - // Find matching features for <regular expression>_g etc. - - ctl.clono_print_opt.regex_match = - vec![HashMap::<String, Vec<usize>>::new(); ctl.origin_info.n()]; - let ends0 = [ - "_g", "_ab", "_ag", "_cr", "_cu", "_g_μ", "_ab_μ", "_ag_μ", "_cr_μ", "_cu_μ", "_g_%", - ]; - let ends1 = [ - "_g", "_ab", "_ag", "_cr", "_cu", "_g", "_ab", "_ag", "_cr", "_cu", "_g", - ]; - let suffixes = ["", "_min", "_max", "_μ", "_Σ"]; - let mut ends = Vec::<String>::new(); - let mut endsz = Vec::<String>::new(); - for (ix, x) in ends0.iter().enumerate() { - for y in suffixes.iter() { - ends.push(format!("{}{}", x, y)); - endsz.push(ends1[ix].to_string()); - } - } - let mut vars = ctl.clono_print_opt.lvars.clone(); - vars.append(&mut ctl.parseable_opt.pcols.clone()); - unique_sort(&mut vars); - for x in vars.iter() { - for (iy, y) in ends.iter().enumerate() { - let mut xc = x.clone(); - if x.ends_with("_cell") { - xc = xc.rev_before("_cell").to_string(); - } - if xc.ends_with(y) { - let mut p = xc.rev_before(y); - if p.contains(':') { - p = p.after(":"); - } - let pp = format!("{}{}", p, endsz[iy]); - if !p.is_empty() && Regex::new(&p).is_ok() { - let mut ok = true; - let mut px = false; - let b = p.as_bytes(); - for i in 0..p.len() { - if !((b[i] >= b'A' && b[i] <= b'Z') - || (b[i] >= b'a' && b[i] <= b'z') - || (b[i] >= b'0' && b[i] <= b'9') - || b".-_[]()|*".contains(&b[i])) - { - ok = false; - break; - } - if b"[]()|*".contains(&b[i]) { - px = true; - } - } - if ok && px { - let reg = Regex::new(&format!("^{}$", p)); - for li in 0..ctl.origin_info.n() { - let mut js = Vec::<usize>::new(); - for j in 0..gex_info.gex_features[li].len() { - let f = &gex_info.gex_features[li][j]; - let ff = f.split('\t').collect::<Vec<&str>>(); - let mut ok = false; - if ff[2].starts_with("Antibody") { - if y.contains("_ab") { - ok = true; - } - } else if ff[2].starts_with("Antigen") { - if y.contains("_ag") { - ok = true; - } - } else if ff[2].starts_with("CRISPR") { - if y.contains("_cr") { - ok = true; - } - } else if ff[2].starts_with("Custom") { - if y.contains("_cu") { - ok = true; - } - } else if y.contains("_g") { - ok = true; - } - if ok - && (reg.as_ref().unwrap().is_match(&ff[0]) - || reg.as_ref().unwrap().is_match(&ff[1])) - { - js.push(j); - } - } - if js.len() > 0 { - ctl.clono_print_opt.regex_match[li].insert(pp.clone(), js); - } - } - let mut matches = false; - for li in 0..ctl.origin_info.n() { - if ctl.clono_print_opt.regex_match[li].contains_key(&pp) { - matches = true; - } - } - if !matches { - eprintln!( - "\nLead variable {} contains a pattern that matches \ - no features.\n", - x - ); - std::process::exit(1); - } - break; - } - } - } - } - } - ctl.perf_stats(&twoof, "doing miscellaneous stuff"); - - // Determine the Cell Ranger version that was used. To do this, we take the first dataset, - // and if it has a version line in its annotations json file, we set the version from that. - // In addition, for internal runs, or if either CURRENT_REF or CELLRANGER was specified on - // the command line, we set the version to 4.0. - - let tr = Instant::now(); - let ann; - if !ctl.gen_opt.cellranger { - ann = "all_contig_annotations.json"; - } else { - ann = "contig_annotations.json"; - } - let json = format!("{}/{}", ctl.origin_info.dataset_path[0], ann); - let json_lz4 = format!("{}/{}.lz4", ctl.origin_info.dataset_path[0], ann); - if !path_exists(&json) && !path_exists(&json_lz4) { - eprintln!("\ncan't find {} or {}\n", json, json_lz4); - std::process::exit(1); - } - let mut jsonx = json.clone(); - if !path_exists(&json) { - jsonx = format!("{}.lz4", json); - } - if jsonx.contains('/') { - let p = jsonx.rev_before("/"); - if !path_exists(&p) { - eprintln!( - "\nThere should be a directory\n\ - \"{}\"\n\ - but it does not exist. Please check how you have specified the\n\ - input files to enclone, including the PRE argument.\n", - p - ); - std::process::exit(1); - } - } - if !path_exists(&jsonx) { - eprintln!( - "\nThe path\n\ - \"{}\"\n\ - does not exist. Please check how you have specified the\n\ - input files to enclone, including the PRE argument.\n", - jsonx - ); - std::process::exit(1); - } - let mut f = BufReader::new(open_maybe_compressed(&jsonx)); - match read_vector_entry_from_json(&mut f) { - None => { - eprintln!("\nFailure reading {}.\n", jsonx); - } - Some(x) => { - let v: Value = serde_json::from_str(strme(&x)).unwrap(); - if v.get("version").is_some() { - ctl.gen_opt.cr_version = v["version"].to_string().between("\"", "\"").to_string(); - } - } - } - if ctl.gen_opt.current_ref || ctl.gen_opt.cellranger { - ctl.gen_opt.cr_version = "4.0".to_string(); - } - - // Test for presence of a reference file in the VDJ directories. - - let mut refx = String::new(); - if ctl.gen_opt.refname.len() == 0 { - let rpaths = [ - "outs/vdj_reference/fasta/regions.fa", - "vdj_reference/fasta/regions.fa", - "regions.fa", - ]; - let mut refs = Vec::<String>::new(); - for li in 0..ctl.origin_info.n() { - for r in rpaths.iter() { - let fasta = format!("{}/{}", ctl.origin_info.dataset_path[li], r); - if path_exists(&fasta) { - refs.push(std::fs::read_to_string(&fasta).unwrap()); - break; - } - } - } - if refs.len() > 0 { - unique_sort(&mut refs); - if refs.len() > 1 { - eprintln!( - "The VDJ reference sequences that were supplied to Cell Ranger are not \ - identical with each other.\nAs a consequence, the VDJ output files are not \ - compatible with each other, so enclone can't run.\nYou have some options as \ - to how to proceed:\n\ - 1. You can rerun Cell Ranger using the same reference.\n\ - 2. You can select one of the references, and supply that to enclone using the \ - REF option.\n You will also need to supply the argument RE to get enclone to \ - recompute annotations,\n and that will make it somewhat slower.\n\n" - ); - std::process::exit(1); - } - if ctl.gen_opt.mouse { - eprintln!( - "\nSince the reference sequence is already in the VDJ input directories that\n\ - you supplied to enclone, it is not necessary to supply the MOUSE argument.\n\ - Please remove that argument. Exiting now because of possible unintended\n\ - consequences.\n" - ); - std::process::exit(1); - } - refx = refs[0].clone(); - } - } - - // Find the VDJ reference. - - let mut refdata = RefData::new(); - if ctl.gen_opt.refname.len() > 0 { - if std::path::Path::new(&ctl.gen_opt.refname).is_dir() { - eprintln!( - "\nProblem with REF: \"{}\"\nis a directory, not a file.\n", - ctl.gen_opt.refname - ); - std::process::exit(1); - } - if ctl.gen_opt.descrip { - println!("using reference = {}", ctl.gen_opt.refname); - } - let fx = File::open(&ctl.gen_opt.refname); - if fx.is_err() { - eprintln!( - "\nProblem with REF: unable to read from the file\n\ - \"{}\".\nPlease check that that path makes sense and that you have read \ - permission along that path.\n", - ctl.gen_opt.refname - ); - std::process::exit(1); - } - let f = BufReader::new(fx.unwrap()); - let mut nheader = 0; - let mut bases = 0; - let mut na = 0; - let mut nc = 0; - let mut ng = 0; - let mut nt = 0; - for line in f.lines() { - let s = line.unwrap(); - refx += &s; - refx += &"\n"; - if s.starts_with('>') { - nheader += 1; - } else { - for c in s.chars() { - bases += 1; - if c == 'A' || c == 'a' { - na += 1; - } else if c == 'C' || c == 'c' { - nc += 1; - } else if c == 'G' || c == 'g' { - ng += 1; - } else if c == 'T' || c == 't' { - nt += 1; - } - } - } - } - if nheader == 0 || bases == 0 || (na + nc + ng + nt) as f64 / (bases as f64) < 0.95 { - eprintln!("\nProblem with REF: it is not a FASTA file.\n"); - std::process::exit(1); - } - } else if ctl.gen_opt.mouse && refx.len() == 0 { - if ctl.gen_opt.cr_version == "".to_string() && !ctl.gen_opt.reannotate { - if ctl.gen_opt.descrip { - println!("using old mouse reference"); - } - refx = mouse_ref_old(); - } else { - if ctl.gen_opt.descrip { - println!("using new mouse reference"); - } - refx = mouse_ref(); - } - } else if refx.len() == 0 { - if ctl.gen_opt.imgt && ctl.gen_opt.internal_run { - let imgt = - "/mnt/opt/refdata_cellranger/vdj/vdj_IMGT_human_20200415-0.0.0/fasta/regions.fa"; - if ctl.gen_opt.descrip { - println!("using imgt human reference"); - } - let f = open_for_read![imgt]; - for line in f.lines() { - let mut s = line.unwrap(); - if ctl.gen_opt.imgt_fix { - // Fix IGHJ6. - if s == "ATTACTACTACTACTACGGTATGGACGTCTGGGGCCAAGGGACCACGGTCACCGTCTCCTCA" - .to_string() - || s == "ATTACTACTACTACTACTACATGGACGTCTGGGGCAAAGGGACCACGGTCACCGTCTCCTCA" - .to_string() - { - s += "G"; - } - } - refx += &s; - refx += &"\n"; - } - ctl.gen_opt.reannotate = true; - } else if ctl.gen_opt.cr_version == "".to_string() && !ctl.gen_opt.reannotate { - if ctl.gen_opt.descrip { - println!("using old human reference"); - } - refx = human_ref_old(); - } else { - if ctl.gen_opt.descrip { - println!("using new human reference"); - } - refx = human_ref(); - } - } - let ext_refx = String::new(); - let (mut is_tcr, mut is_bcr) = (true, true); - if ctl.gen_opt.tcr { - is_bcr = false; - } - if ctl.gen_opt.bcr { - is_tcr = false; - } - - /* - - // Remove V sequences not beginning with a start codon and do some tidying. - // Commented out until proven useful. - - let lines = refx.split('\n').collect::<Vec<&str>>(); - let mut refx2 = String::new(); - let mut i = 0; - while i < lines.len() { - let mut j = i + 1; - while j < lines.len() { - if lines[j].starts_with(">") { - break; - } - j += 1; - } - let mut seq = String::new(); - for k in i + 1..j { - seq += &lines[k]; - } - seq = seq.replace('a', "A"); - seq = seq.replace('c', "C"); - seq = seq.replace('g', "G"); - seq = seq.replace('t', "T"); - let mut ok = true; - if lines[i].contains("V-REGION") { - if !seq.starts_with("ATG") { - ok = false; - } - } - if ok { - refx2 += &format!("{}\n{}\n", lines[i], seq); - } - i = j; - } - - */ - let refx2 = &refx; - - // Build reference data. - - make_vdj_ref_data_core(&mut refdata, &refx2, &ext_refx, is_tcr, is_bcr, None); - let mut to_ref_index = HashMap::<usize, usize>::new(); - for i in 0..refdata.refs.len() { - to_ref_index.insert(refdata.id[i] as usize, i); - } - - // Determine if the species is human or mouse or unknown. - - ctl.gen_opt.species = species(&refdata); - - // Test for okness of sec/mem args. - - let mut vars = ctl.parseable_opt.pcols.clone(); - vars.append(&mut ctl.clono_print_opt.lvars.clone()); - unique_sort(&mut vars); - ctl.gen_opt.using_secmem = - bin_member(&vars, &"sec".to_string()) || bin_member(&vars, &"mem".to_string()); - if !ctl.gen_opt.using_secmem - && ctl.parseable_opt.pout.len() > 0 - && ctl.parseable_opt.pcols.len() == 0 - { - if ctl.gen_opt.species == "human" || ctl.gen_opt.species == "mouse" { - if is_bcr { - let mut have_bam = true; - for g in ctl.origin_info.gex_path.iter() { - if g.len() == 0 { - have_bam = false; - break; - } - let bam = format!("{}/possorted_genome_bam.bam", g); - if !path_exists(&bam) { - have_bam = false; - break; - } - } - if have_bam { - let o = Command::new("samtools") - .arg("--help") - .output() - .expect("failed to execute samtools"); - let status = o.status.code().unwrap(); - if status == 0 { - ctl.gen_opt.using_secmem = true; - } - } - } - } - } - if bin_member(&vars, &"sec".to_string()) || bin_member(&vars, &"mem".to_string()) { - if ctl.gen_opt.species != "human" && ctl.gen_opt.species != "mouse" { - eprintln!("\nThe lvars sec and mem can only be used for data from human and mouse.\n"); - std::process::exit(1); - } - if !is_bcr { - eprintln!("\nThe lvars sec and mem do not make sense for TCR data.\n"); - std::process::exit(1); - } - for g in ctl.origin_info.gex_path.iter() { - if g.len() == 0 { - eprintln!("\nThe lvars sec and mem can only be used if GEX data are provided.\n"); - std::process::exit(1); - } - let bam = format!("{}/possorted_genome_bam.bam", g); - if !path_exists(&bam) { - eprintln!( - "\nThe lvars sec and mem can only be used if the file\n\ - pos_sorted_genome_bam.bam is provided. We did not see it at this path\n\ - {}.", - g - ); - std::process::exit(1); - } - } - let o = Command::new("samtools") - .arg("--help") - .output() - .expect("failed to execute samtools"); - let status = o.status.code().unwrap(); - if status != 0 { - eprintln!( - "\nThe lvars sec and mem can only be used if the samtools\n\ - executable is in your path.\n" - ); - std::process::exit(1); - } - } - ctl.perf_stats(&tr, "building reference and other things"); - - // If sec (secreted) or mem (membrane) lvars have been specified, gather those data. - - if ctl.gen_opt.using_secmem { - fetch_secmem(&mut ctl); - } - - // Parse the json annotations file. - - let mut tig_bc = Vec::<Vec<TigData>>::new(); - let mut vdj_cells = Vec::<Vec<String>>::new(); - let mut gex_cells = Vec::<Vec<String>>::new(); - let mut gex_cells_specified = Vec::<bool>::new(); - let tparse = Instant::now(); - parse_json_annotations_files( - &mut ctl, - &mut tig_bc, - &refdata, - &to_ref_index, - &mut vdj_cells, - &mut gex_cells, - &mut gex_cells_specified, - ); - ctl.perf_stats(&tparse, "loading from json"); - - // Search for SHM indels. - - let tproto = Instant::now(); - search_for_shm_indels(&ctl, &tig_bc); - - // Filter using light --> heavy graph. - - if !ctl.gen_opt.ngraph_filter { - graph_filter(&mut tig_bc, ctl.gen_opt.graph); - } - - // Sort tig_bc. - - sort_tig_bc(&ctl, &mut tig_bc, &refdata); - - // Cross filter. - - cross_filter(&ctl, &mut tig_bc); - - // Look for barcode reuse. - - check_for_barcode_reuse(&ctl, &tig_bc); - ctl.perf_stats(&tproto, "in proto stuff"); - - // Find exact subclonotypes. - - let texact = Instant::now(); - let mut exact_clonotypes = find_exact_subclonotypes(&ctl, &tig_bc, &refdata); - ctl.perf_stats(&texact, "finding exact subclonotypes"); - - // Test for consistency between VDJ cells and GEX cells. This is designed to work even if - // NCELL is used. We take up to 100 VDJ cells having both heavy and light (or TRB and TRA) - // chains, and having the highest VDJ UMI count total (but using only one cell per exact - // subclonotype), and find those that are GEX cells. - // - // If n cells were taken, and k of those are GEX cells, we require that - // binomial_sum(n, k, 0.7) >= 0.00002. For n = 100, this is the same as requiring that - // k >= 50. Using a binomial sum threshold allows the stringency of the requirement to be - // appropriately lower when n is small. When we tested on 260 libraries, the lowest value - // observed for k/n was 0.65, and the vast majority of values were 0.9 or higher. - // - // This code is inefficient because for every dataset, it searches the entirety of tig_bc, but - // it doesn't matter much because not much time is spent here. - - let tinc = Instant::now(); - let mut fail = false; - for li in 0..ctl.origin_info.n() { - if ctl.origin_info.gex_path[li].len() > 0 && !ctl.gen_opt.allow_inconsistent { - let vdj = &vdj_cells[li]; - let gex = &gex_info.gex_cell_barcodes[li]; - let (mut heavy, mut light) = (vec![false; vdj.len()], vec![false; vdj.len()]); - let mut exid = vec![0; vdj.len()]; - let mut inex = vec![false; vdj.len()]; - for i in 0..exact_clonotypes.len() { - let ex = &exact_clonotypes[i]; - for j in 0..ex.clones.len() { - let p = bin_position(&vdj, &ex.clones[j][0].barcode); - if p >= 0 { - inex[p as usize] = true; - exid[p as usize] = i; - } - } - } - let mut numi = vec![0; vdj.len()]; - for i in 0..tig_bc.len() { - if tig_bc[i][0].dataset_index == li { - let p = bin_position(&vdj, &tig_bc[i][0].barcode); - if p >= 0 { - for j in 0..tig_bc[i].len() { - numi[p as usize] += tig_bc[i][j].umi_count; - if tig_bc[i][j].left { - heavy[p as usize] = true; - } else { - light[p as usize] = true; - } - } - } - } - } - let mut x = Vec::<(usize, bool, usize)>::new(); - for i in 0..vdj.len() { - if heavy[i] && light[i] { - x.push((numi[i], bin_member(&gex, &vdj[i]), i)); - } - } - reverse_sort(&mut x); - let mut used = vec![false; exact_clonotypes.len()]; - let (mut total, mut good) = (0, 0); - for i in 0..x.len() { - let m = x[i].2; - if inex[m] && used[exid[m]] { - continue; - } - total += 1; - if x[i].1 { - good += 1; - } - if inex[m] { - used[exid[m]] = true; - } - if total == 100 { - break; - } - } - if total >= 1 { - let bino = binomial_sum(total, good, 0.7); - if bino < 0.00002 { - fail = true; - eprint!( - "\nThe VDJ dataset with path\n{}\nand the GEX dataset with path\n\ - {}\nshow insufficient sharing of barcodes. ", - ctl.origin_info.dataset_path[li], ctl.origin_info.gex_path[li], - ); - eprintln!( - "Of the {} VDJ cells that were tested,\nonly {} were GEX cells.", - total, good - ); - } - } - } - } - if fail { - eprintln!( - "\nThis test is restricted to VDJ cells having both chain types, uses at most \ - one cell\nper exact subclonotype, and uses up to 100 cells having the highest \ - UMI counts." - ); - eprintln!( - "\nThe data suggest a laboratory or informatic mixup. If you believe \ - that this is not the case,\nyou can force enclone to run by adding \ - the argument ALLOW_INCONSISTENT to the command line.\n" - ); - std::process::exit(1); - } - ctl.perf_stats(&tinc, "testing for inconsistency"); - - // Filter out some foursie artifacts. - - if ctl.clono_filt_opt.weak_foursies { - let t = Instant::now(); - let mut to_delete = vec![false; exact_clonotypes.len()]; - let mut twosies = Vec::<(Vec<u8>, Vec<u8>)>::new(); - for i in 0..exact_clonotypes.len() { - let ex = &exact_clonotypes[i]; - if ex.share.len() == 2 && (ex.share[0].left ^ ex.share[1].left) && ex.ncells() >= 10 { - twosies.push((ex.share[0].seq.clone(), ex.share[1].seq.clone())); - } - } - unique_sort(&mut twosies); - for i in 0..exact_clonotypes.len() { - let ex = &exact_clonotypes[i]; - if ex.share.len() == 4 { - for i1 in 0..4 { - for i2 in i1 + 1..4 { - if ex.share[i1].left ^ ex.share[i2].left { - let p = (ex.share[i1].seq.clone(), ex.share[i2].seq.clone()); - if bin_member(&twosies, &p) { - to_delete[i] = true; - } - } - } - } - } - } - erase_if(&mut exact_clonotypes, &to_delete); - ctl.perf_stats(&t, "filtering foursies"); - } - - // Look for insertions (experimental). - - find_insertions(&ctl, &exact_clonotypes); - - // Build info about clonotypes. Note that this edits the V reference sequence to perform - // an indel in some cases. - - let tinfo = Instant::now(); - let mut info: Vec<CloneInfo> = build_info(&refdata, &ctl, &mut exact_clonotypes); - ctl.perf_stats(&tinfo, "building info"); - - // Derive consensus sequences for alternate alleles of V segments. Then create donor - // reference sequences for Loupe. - - let talt = Instant::now(); - let alt_refs : Vec<(usize,usize,DnaString)> // {(donor, ref id, alt seq)} - = find_alleles( &refdata, &ctl, &exact_clonotypes ); - ctl.perf_stats(&talt, "finding alt alleles"); - if ctl.gen_opt.dref_file.len() > 0 { - let f = File::create(&ctl.gen_opt.dref_file); - let mut f = BufWriter::new(f.unwrap()); - let mut count = 0; - for i in 0..alt_refs.len() { - let donor = alt_refs[i].0; - let ref_id = alt_refs[i].1; - if i > 0 && (donor != alt_refs[i - 1].0 || ref_id != alt_refs[i - 1].1) { - count = 0; - } - let alt_seq = &alt_refs[i].2; - fwriteln!( - f, - ">{}:{}:{}:{} (reference record id : donor name : allele number : gene name)\n{}", - refdata.id[ref_id], - ctl.origin_info.donor_id[donor], - count + 1, - refdata.name[ref_id], - alt_seq.to_string() - ); - count += 1; - } - } - let tdonor = Instant::now(); - let drefs = make_donor_refs(&alt_refs, &refdata); - ctl.perf_stats(&tdonor, "making donor refs"); - - // Update reference sequences for V segments by substituting in alt alleles if better. - - sub_alts(&ctl, &alt_refs, &mut info, &mut exact_clonotypes); - - // Form equivalence relation on exact subclonotypes. - - let mut join_info = Vec::<(usize, usize, bool, Vec<u8>)>::new(); - let eq: EquivRel = join_exacts( - is_bcr, - &refdata, - &ctl, - &exact_clonotypes, - &info, - &mut join_info, - ); - /* - if ctl.comp { - if ctl.clono_filt_opt.ncells_low < ctl.clono_filt_opt.ncells_high { - println!(""); - } - } - */ - - // Lookup for heavy chain reuse (special purpose experimental option). - - lookup_heavy_chain_reuse(&ctl, &exact_clonotypes, &info, &eq); - - // For B cells, filter based on UMI counts. More details in heuristics.html. - // Find all clonotypes having one cell which has two chains, - // one heavy and one light. Get the sum of the chain UMI counts for this cell. - // - // For each cell, let umish be the umi count for its heavy chain having the most umis, and - // similarly define umisl. Let umitot = umish + umisl. - // - // If every cell in a clonotype would have been deleted, first find the exact subclonotype for - // which the sum of its umitot values is greatest, and then in it, find the cell having - // highest umitot value. Protect this cell, so long as it has at least two chains. - - let tumi = Instant::now(); - let mut orbits = Vec::<Vec<i32>>::new(); - let mut reps = Vec::<i32>::new(); - eq.orbit_reps(&mut reps); - if is_tcr || !ctl.clono_filt_opt.umi_filt { - for i in 0..reps.len() { - let mut o = Vec::<i32>::new(); - eq.orbit(reps[i], &mut o); - orbits.push(o); - } - } - if !is_tcr - && (ctl.gen_opt.baseline || ctl.clono_filt_opt.umi_filt || ctl.clono_filt_opt.umi_filt_mark) - { - let mut umis = vec![Vec::<usize>::new(); ctl.origin_info.n()]; - for i in 0..reps.len() { - let mut o = Vec::<i32>::new(); - eq.orbit(reps[i], &mut o); - if o.solo() { - let x: &CloneInfo = &info[o[0] as usize]; - let ex = &exact_clonotypes[x.clonotype_index]; - if ex.ncells() == 1 && ex.share.duo() && ex.share[0].left != ex.share[1].left { - umis[ex.clones[0][0].dataset_index] - .push(ex.clones[0][0].umi_count + ex.clones[0][1].umi_count); - } - } - } - let mut nu = vec![0; ctl.origin_info.n()]; - let mut umin = vec![0.0; ctl.origin_info.n()]; - for l in 0..ctl.origin_info.n() { - umis[l].sort(); - nu[l] = umis[l].len(); - if ctl.gen_opt.baseline { - println!( - "\n{} umi counts for dataset {} = {}", - nu[l], - l + 1, - ctl.origin_info.dataset_id[l] - ); - } - if nu[l] > 0 { - let n10 = umis[l][nu[l] / 10] as f64; - let n50 = umis[l][nu[l] / 2] as f64; - umin[l] = n10.min(n50 - (4.0 * n50.sqrt())); - } - if nu[l] > 0 && ctl.gen_opt.baseline { - println!("1% ==> {}", umis[l][umis[l].len() / 100]); - println!("2% ==> {}", umis[l][umis[l].len() / 50]); - println!("5% ==> {}", umis[l][umis[l].len() / 20]); - println!("10% ==> {}", umis[l][umis[l].len() / 10]); - println!("20% ==> {}", umis[l][umis[l].len() / 5]); - println!("50% ==> {}", umis[l][umis[l].len() / 2]); - println!("umin = {:.2}", umin[l]); - } - } - if ctl.clono_filt_opt.umi_filt || ctl.clono_filt_opt.umi_filt_mark { - const MIN_BASELINE_CELLS: usize = 20; - for i in 0..reps.len() { - let mut o = Vec::<i32>::new(); - eq.orbit(reps[i], &mut o); - let mut ncells = 0; - for j in 0..o.len() { - let x: &CloneInfo = &info[o[j] as usize]; - let ex = &exact_clonotypes[x.clonotype_index]; - ncells += ex.ncells(); - } - let mut nbads = 0; - if ncells >= 2 { - let mut to_deletex = vec![false; o.len()]; - let (mut best_ex, mut best_ex_sum) = (0, 0); - let (mut best_cell, mut best_cell_count) = (0, 0); - let mut baselined = true; - let mut protected = false; - for pass in 1..=3 { - if pass == 2 { - if nbads == 0 { - protected = true; - } else { - let p = 0.1; - let bound = 0.01; - - // Find probability of observing nbads or more events of probability - // p in a sample of size ncells, and if that is at least bound, - // don't delete any cells (except onesies). - - if binomial_sum(ncells, ncells - nbads, 1.0 - p) >= bound { - protected = true; - } - } - } - for j in 0..o.len() { - let x: &CloneInfo = &info[o[j] as usize]; - let ex = &mut exact_clonotypes[x.clonotype_index]; - let mut to_delete = vec![false; ex.ncells()]; - let mut ex_sum = 0; - for k in 0..ex.ncells() { - let li = ex.clones[k][0].dataset_index; - if nu[li] >= MIN_BASELINE_CELLS { - let (mut umish, mut umisl) = (0, 0); - for l in 0..ex.share.len() { - if ex.share[l].left { - umish = max(umish, ex.clones[k][l].umi_count); - } else { - umisl = max(umish, ex.clones[k][l].umi_count); - } - } - let umitot = umish + umisl; - if pass == 1 { - ex_sum += umitot; - } - if pass == 2 - && j == best_ex - && umitot > best_cell_count - && ex.share.len() > 1 - { - best_cell = k; - best_cell_count = umitot; - } - if (umitot as f64) < umin[li] { - if pass == 1 { - nbads += 1; - } else if pass == 3 && protected { - if ex.share.len() == 1 { - to_delete[k] = true; - if ctl.clono_filt_opt.umi_filt_mark { - ex.clones[k][0].marked = true; - } - } - } else if pass == 3 { - if !baselined - || (best_ex, best_cell) != (j, k) - || ex.share.len() == 1 - { - to_delete[k] = true; - if ctl.clono_filt_opt.umi_filt_mark { - ex.clones[k][0].marked = true; - } - } - } - } - } else { - baselined = false; - } - } - if pass == 1 && ex_sum > best_ex_sum { - best_ex = j; - best_ex_sum = ex_sum; - } - if pass == 3 && ctl.clono_filt_opt.umi_filt { - erase_if(&mut ex.clones, &to_delete); - } - } - } - for j in 0..o.len() { - let x: &CloneInfo = &info[o[j] as usize]; - let ex = &mut exact_clonotypes[x.clonotype_index]; - if ex.ncells() == 0 { - to_deletex[j] = true; - } - } - erase_if(&mut o, &to_deletex); - } - if ctl.clono_filt_opt.umi_filt && !o.is_empty() { - orbits.push(o.clone()); - } - } - } - } - - // Filter B cells based on UMI count ratios. This assumes V..J identity to filter. - - if is_bcr && (ctl.clono_filt_opt.umi_ratio_filt || ctl.clono_filt_opt.umi_ratio_filt_mark) { - const MIN_UMI_RATIO: usize = 500; - let mut orbits2 = Vec::<Vec<i32>>::new(); - 'orbit: for i in 0..orbits.len() { - let mut ncells = 0; - let mut o = orbits[i].clone(); - for j in 0..o.len() { - let x: &CloneInfo = &info[o[j] as usize]; - let ex = &exact_clonotypes[x.clonotype_index]; - ncells += ex.ncells(); - } - let mut nbads = 0; - for pass in 1..=2 { - if pass == 2 { - if nbads == 0 { - orbits2.push(o.clone()); - continue 'orbit; - } else { - let p = 0.1; - let bound = 0.01; - - // Find probability of observing nbads or more events of probability - // p in a sample of size ncells, and if that is at least bound, - // don't delete any cells. - - if binomial_sum(ncells, ncells - nbads, 1.0 - p) >= bound { - orbits2.push(o.clone()); - continue 'orbit; - } - } - } - let mut to_deletex = vec![false; o.len()]; - let mut z = Vec::<(Vec<u8>, usize, usize, usize, usize)>::new(); - let mut to_delete = Vec::<Vec<bool>>::new(); - for j in 0..o.len() { - let x: &CloneInfo = &info[o[j] as usize]; - let ex = &mut exact_clonotypes[x.clonotype_index]; - to_delete.push(vec![false; ex.ncells()]); - for k in 0..ex.ncells() { - let mut tot = 0; - for m in 0..ex.clones[k].len() { - tot += ex.clones[k][m].umi_count; - } - for m in 0..ex.clones[k].len() { - z.push(( - ex.share[m].seq.clone(), - ex.clones[k][m].umi_count, - j, - k, - tot, - )); - } - } - } - reverse_sort(&mut z); - let mut j = 0; - while j < z.len() { - let k = next_diff1_5(&z, j as i32) as usize; - for l in j..k { - if z[j].1 >= MIN_UMI_RATIO * z[l].4 { - to_delete[z[l].2][z[l].3] = true; - } - } - j = k; - } - for j in 0..o.len() { - let x: &CloneInfo = &info[o[j] as usize]; - let ex = &mut exact_clonotypes[x.clonotype_index]; - for l in 0..ex.ncells() { - if to_delete[j][l] { - if ctl.clono_filt_opt.umi_ratio_filt_mark { - ex.clones[l][0].marked = true; - } - nbads += 1; - } - } - if pass == 2 && ctl.clono_filt_opt.umi_ratio_filt { - erase_if(&mut ex.clones, &to_delete[j]); - if ex.ncells() == 0 { - to_deletex[j] = true; - } - } - } - if pass == 2 { - if ctl.clono_filt_opt.umi_ratio_filt { - erase_if(&mut o, &to_deletex); - if !o.is_empty() { - orbits2.push(o.clone()); - } - } - } - } - } - if ctl.clono_filt_opt.umi_ratio_filt { - orbits = orbits2; - } - } - - // Remove cells that are not called cells by GEX or feature barcodes. - - let mut orbits2 = Vec::<Vec<i32>>::new(); - for i in 0..orbits.len() { - let mut o = orbits[i].clone(); - let mut to_deletex = vec![false; o.len()]; - for j in 0..o.len() { - let x: &CloneInfo = &info[o[j] as usize]; - let ex = &mut exact_clonotypes[x.clonotype_index]; - let mut to_delete = vec![false; ex.ncells()]; - for k in 0..ex.ncells() { - let li = ex.clones[k][0].dataset_index; - let bc = &ex.clones[k][0].barcode; - if ctl.gen_opt.cellranger { - if gex_cells_specified[li] && !bin_member(&gex_cells[li], &bc) { - to_delete[k] = true; - } - } else if !ctl.clono_filt_opt.ngex && ctl.origin_info.gex_path[li].len() > 0 { - let gbc = &gex_info.gex_cell_barcodes[li]; - if !bin_member(&gbc, &bc) { - to_delete[k] = true; - } - } - } - erase_if(&mut ex.clones, &to_delete); - if ex.ncells() == 0 { - to_deletex[j] = true; - } - } - erase_if(&mut o, &to_deletex); - if !o.is_empty() { - orbits2.push(o.clone()); - } - } - orbits = orbits2; - - // Filter using constraints imposed by FCELL. - - if !ctl.clono_filt_opt.fcell.is_empty() { - let mut orbits2 = Vec::<Vec<i32>>::new(); - for i in 0..orbits.len() { - let mut o = orbits[i].clone(); - let mut to_deletex = vec![false; o.len()]; - for j in 0..o.len() { - let x: &CloneInfo = &info[o[j] as usize]; - let ex = &mut exact_clonotypes[x.clonotype_index]; - let mut to_delete = vec![false; ex.ncells()]; - for k in 0..ex.ncells() { - let li = ex.clones[k][0].dataset_index; - let bc = &ex.clones[k][0].barcode; - let mut keep = true; - for x in ctl.clono_filt_opt.fcell.iter() { - let (var, val) = (&x.0, &x.1); - let mut ok = false; - let alt = &ctl.origin_info.alt_bc_fields[li]; - let mut specified = false; - for j in 0..alt.len() { - if alt[j].0 == *var { - if alt[j].1.contains_key(&bc.clone()) { - specified = true; - let given_val = &alt[j].1[&bc.clone()]; - if given_val == val { - ok = true; - } - } - } - } - if !specified && val.len() == 0 { - ok = true; - } - if !ok { - keep = false; - } - } - if !keep { - to_delete[k] = true; - } - } - erase_if(&mut ex.clones, &to_delete); - if ex.ncells() == 0 { - to_deletex[j] = true; - } - } - erase_if(&mut o, &to_deletex); - if !o.is_empty() { - orbits2.push(o.clone()); - } - } - orbits = orbits2; - } - - // Check for disjoint orbits. This is an incomplete test. - - let mut orbits2 = Vec::<Vec<i32>>::new(); - for i in 0..orbits.len() { - let o = orbits[i].clone(); - let mut eqx = EquivRel::new(o.len() as i32); - for i1 in 0..o.len() { - for i2 in i1 + 1..o.len() { - if eqx.class_id(i1 as i32) != eqx.class_id(i2 as i32) { - let x1: &CloneInfo = &info[o[i1] as usize]; - let x2: &CloneInfo = &info[o[i2] as usize]; - if x1.clonotype_index == x2.clonotype_index { - eqx.join(i1 as i32, i2 as i32); - } else { - let ex1 = &exact_clonotypes[x1.clonotype_index]; - let ex2 = &exact_clonotypes[x2.clonotype_index]; - 'cloop: for m1 in 0..ex1.nchains() { - for m2 in 0..ex2.nchains() { - if ex1.share[m1].seq_del.len() == ex2.share[m2].seq_del.len() - && ex1.share[m1].cdr3_aa.len() == ex2.share[m2].cdr3_aa.len() - { - eqx.join(i1 as i32, i2 as i32); - break 'cloop; - } - } - } - } - } - } - } - if eqx.norbits() == 1 { - orbits2.push(o.clone()); - } else { - let mut repsx = Vec::<i32>::new(); - eqx.orbit_reps(&mut repsx); - for j in 0..repsx.len() { - let mut ox = Vec::<i32>::new(); - eqx.orbit(repsx[j], &mut ox); - let mut o2 = Vec::<i32>::new(); - for k in 0..ox.len() { - o2.push(o[ox[k] as usize]); - } - orbits2.push(o2); - } - } - } - orbits = orbits2; - - // Mark VDJ noncells. - - if ctl.clono_filt_opt.non_cell_mark { - for i in 0..exact_clonotypes.len() { - let ex = &mut exact_clonotypes[i]; - for j in 0..ex.clones.len() { - let di = ex.clones[j][0].dataset_index; - if !bin_member(&vdj_cells[di], &ex.clones[j][0].barcode) { - ex.clones[j][0].marked = true; - } - } - } - } - - ctl.perf_stats(&tumi, "umi filtering and such"); - - // Load the GEX and FB data. - - let tdi = Instant::now(); - let mut d_readers = Vec::<Option<hdf5::Reader>>::new(); - let mut ind_readers = Vec::<Option<hdf5::Reader>>::new(); - for li in 0..ctl.origin_info.n() { - if ctl.origin_info.gex_path[li].len() > 0 && !gex_info.gex_matrices[li].initialized() { - // THE FOLLOWING LINE HAS BEEN OBSERVED TO FAIL SPORADICALLY. THIS HAS BEEN - // OBSERVED ONCE. THE FAIL WAS THAT - // called `Option::unwrap()` on a `None` value. - - d_readers.push(Some(gex_info.h5_data[li].as_ref().unwrap().as_reader())); - ind_readers.push(Some(gex_info.h5_indices[li].as_ref().unwrap().as_reader())); - } else { - d_readers.push(None); - ind_readers.push(None); - } - } - let mut h5_data = Vec::<(usize, Vec<u32>, Vec<u32>)>::new(); - for li in 0..ctl.origin_info.n() { - h5_data.push((li, Vec::new(), Vec::new())); - } - h5_data.par_iter_mut().for_each(|res| { - let li = res.0; - if ctl.origin_info.gex_path[li].len() > 0 - && !gex_info.gex_matrices[li].initialized() - && ctl.gen_opt.h5_pre - { - res.1 = d_readers[li].as_ref().unwrap().read_raw().unwrap(); - res.2 = ind_readers[li].as_ref().unwrap().read_raw().unwrap(); - } - }); - ctl.perf_stats(&tdi, "setting up readers"); - - // Find and print clonotypes. - - let torb = Instant::now(); - let mut pics = Vec::<String>::new(); - let mut exacts = Vec::<Vec<usize>>::new(); // ugly reuse of name - let mut rsi = Vec::<ColInfo>::new(); // ditto - let mut out_datas = Vec::<Vec<HashMap<String, String>>>::new(); - let mut tests = Vec::<usize>::new(); - let mut controls = Vec::<usize>::new(); - print_clonotypes( - &refdata, - &drefs, - &ctl, - &exact_clonotypes, - &info, - &orbits, - &gex_info, - &vdj_cells, - &d_readers, - &ind_readers, - &h5_data, - &mut pics, - &mut exacts, - &mut rsi, - &mut out_datas, - &mut tests, - &mut controls, - ); - ctl.perf_stats(&torb, "making orbits"); - - // Tail code. - - let ttail = Instant::now(); - tail_code( - &tall, - &refdata, - &pics, - &exacts, - &rsi, - &exact_clonotypes, - &ctl, - &mut out_datas, - &join_info, - &gex_info, - &tests, - &controls, - &h5_data, - &d_readers, - &ind_readers, - &drefs, - ); - ctl.perf_stats(&ttail, "in tail code"); - - // Report computational performance. - - let delta; - unsafe { - delta = elapsed(&tall) - WALLCLOCK; - } - ctl.perf_stats(&tall, "total"); - if ctl.comp { - println!("used {:.2} seconds unaccounted for", delta); - } - - let (mut cpu_all_stop, mut cpu_this_stop) = (0, 0); - if print_cpu || print_cpu_info { - let f = open_for_read!["/proc/stat"]; - for line in f.lines() { - let s = line.unwrap(); - let mut t = s.after("cpu"); - while t.starts_with(' ') { - t = t.after(" "); - } - cpu_all_stop = t.before(" ").force_usize(); - break; - } - let f = open_for_read![&format!("/proc/{}/stat", std::process::id())]; - for line in f.lines() { - let s = line.unwrap(); - let fields = s.split(' ').collect::<Vec<&str>>(); - cpu_this_stop = fields[13].force_usize(); - } - let (this_used, all_used) = (cpu_this_stop - cpu_this_start, cpu_all_stop - cpu_all_start); - if print_cpu { - println!("{}", this_used); - } else { - println!( - "used cpu = {} = {:.1}% of total", - this_used, - percent_ratio(this_used, all_used) - ); - } - } - - println!(""); - // It's not totally clear that the exit below actually saves time. Would need more testing. - if !ctl.gen_opt.cellranger { - std::process::exit(0); - } -} diff --git a/enclone_main/tests/enclone_test.rs b/enclone_main/tests/enclone_test.rs deleted file mode 100644 index 638c2cd78..000000000 --- a/enclone_main/tests/enclone_test.rs +++ /dev/null @@ -1,1634 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -#![allow(unused_imports, dead_code)] - -// There are three categories of tests here: -// 1. basic tests (feature = basic), runs without additional data requirements -// 2. nonbasic tests, requires extended dataset distributed with enclone -// 3. speed test (feature = cpu), requires non-public datasets. - -use ansi_escape::*; -use enclone::html::*; -use enclone::misc3::parse_bsv; -use enclone::run_test::*; -use enclone_core::testlist::*; -use enclone_proto::proto_io::{read_proto, ClonotypeIter}; -use enclone_proto::types::EncloneOutputs; -use failure::Error; -use file_lock::FileLock; -use flate2::read::GzDecoder; -use io_utils::*; -use itertools::Itertools; -use perf_stats::*; -use pretty_trace::*; -use rayon::prelude::*; -use serde_json::Value; -use sha2::{Digest, Sha256}; -use stats_utils::*; -use std::cmp::min; -use std::collections::{HashMap, HashSet}; -use std::env; -use std::fs::{metadata, read_dir, read_to_string, remove_dir_all, remove_file, File}; -use std::io; -use std::io::prelude::*; -use std::io::{BufRead, BufReader, BufWriter, Read, Write}; -use std::process::{Command, Stdio}; -use std::thread; -use std::time; -use std::time::{Duration, Instant}; -use string_utils::*; -use vector_utils::*; - -const LOUPE_OUT_FILENAME: &str = "testx/__test_proto"; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -fn valid_link(link: &str) -> bool { - use attohttpc::*; - let req = attohttpc::get(link.clone()).read_timeout(Duration::new(10, 0)); - let response = req.send(); - if response.is_err() { - return false; - } else { - let response = response.unwrap(); - if response.is_success() { - return true; - } - return false; - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// NOT BASIC - -// Make sure all help pages have been edited. - -#[cfg(not(feature = "basic"))] -#[cfg(not(feature = "cpu"))] -#[test] -fn test_help_pages_edited() { - let all = read_dir("../pages/auto").unwrap(); - for f in all { - let f = f.unwrap().path(); - let f = f.to_str().unwrap(); - if f.contains(".help.") { - let mut edited = false; - let h = open_for_read![&format!("{}", f)]; - for line in h.lines() { - let s = line.unwrap(); - if s.contains("googletag") { - edited = true; - } - } - if !edited { - eprintln!( - "\nThe page {} has not been edited. Please run ./build.\n", - f - ); - std::process::exit(1); - } - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// NOT BASIC - -// Make sure that two css files still exist. These can never be deleted because they are -// accessed by certain html output of enclone. These files could be out in the wild and we -// don't want to break them. - -#[cfg(not(feature = "basic"))] -#[cfg(not(feature = "cpu"))] -#[test] -fn test_css_existence() { - let _ = include_str!["../../pages/enclone.css"]; - let _ = include_str!["../../pages/enclone_css_v2.css"]; -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// NOT BASIC - -// Make sure that if sync_master was run, nothing would change. -// -// A bit ugly because of duplicated code. - -#[cfg(not(feature = "basic"))] -#[cfg(not(feature = "cpu"))] -#[test] -fn test_sync_master() { - let mut version = HashMap::<String, String>::new(); - let f = open_for_read!["../master.toml"]; - for line in f.lines() { - let s = line.unwrap(); - if !s.starts_with('#') && s.contains("=") { - version.insert(s.before(" = ").to_string(), s.after(" = ").to_string()); - } - } - let all = read_dir("..").unwrap(); - for f in all { - let f = f.unwrap().path(); - let f = f.to_str().unwrap(); - let toml = format!("{}/Cargo.toml", f); - if path_exists(&toml) { - let g = open_for_read![&toml]; - for line in g.lines() { - let s = line.unwrap(); - if s.contains(" =") { - let cratex = s.before(" =").to_string(); - if version.contains_key(&cratex) { - let t = format!("{} = {}", cratex, version[&cratex]); - if t != s { - eprintln!("\nFound change in {}.\nold: {}\nnew: {}", toml, s, t); - eprintln!("You probably need to run sync_to_master\n"); - std::process::exit(1); - } - } - } - } - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// NOT BASIC - -// Run the download command on the landing page and make sure it works. -// -// Runs with "small", and passes second argument so we can put outputs in a defined place. -// -// There are two passes. The first pass tests the copy of install.sh that is one master, and -// the second pass tests the local version. -// -// This only runs internally because running this test bumps the download count that GitHub -// tracks, and we need to track the correction for our own "downloads" via this test. -// -// Increments an internal counter. - -#[cfg(not(feature = "basic"))] -#[cfg(not(feature = "cpu"))] -#[test] -fn test_curl_command() { - let mut internal_run = false; - for (key, value) in env::vars() { - if (key == "HOST" || key == "HOSTNAME") && value.ends_with(".fuzzplex.com") { - internal_run = true; - } - } - if internal_run { - fn get_github_release_counts() -> HashMap<String, isize> { - let mut m = HashMap::<String, isize>::new(); - let o = Command::new("curl") - .arg("https://api.github.com/repos/10XGenomics/enclone/releases") - .output() - .expect("failed to execute github http"); - let mut tag_name = String::new(); - let mx = String::from_utf8(o.stdout).unwrap(); - for s in mx.lines() { - if s.contains("tag_name") { - tag_name = s.between("v", "\"").to_string(); - } else if s.contains("download_count") { - let count = s.between(": ", ",").force_i64(); - m.insert(tag_name.clone(), count as isize); - } - } - m - } - if !path_exists("testx/outputs") { - eprintln!( - "\ntest_curl_command:\n\ - You need to create the directory enclone_main/testx/outputs.\n\ - If you run \"./build\" this will be done for you.\n" - ); - std::process::exit(1); - } - for pass in 1..=2 { - for f in ["enclone", "bin", ".profile", ".subversion"].iter() { - let g = format!("testx/outputs/{}", f); - if path_exists(&g) { - if !metadata(&g).unwrap().is_dir() { - remove_file(&g).unwrap(); - } else { - remove_dir_all(&g).unwrap(); - } - } - } - // let counts1 = get_github_release_counts(); - let command; - let version; - if pass == 1 { - command = "curl -sSf -L bit.ly/enclone_install | sh -s small testx/outputs"; - version = "master"; - } else { - command = "cat ../install.sh | sh -s small testx/outputs"; - version = "local"; - } - let o = Command::new("sh").arg("-c").arg(&command).output().unwrap(); - if o.status.code().unwrap() != 0 { - eprintln!( - "\nAttempt to run enclone install command using {} version of \ - install.sh failed.\n", - version - ); - eprint!("stdout:\n{}", strme(&o.stdout)); - eprint!("stderr:\n{}", strme(&o.stderr)); - std::process::exit(1); - } - let req = [ - "bin/enclone", - "enclone/datasets/123085/outs/all_contig_annotations.json.lz4", - "enclone/datasets_small_checksum", - "enclone/version", - ]; - for f in req.iter() { - if !path_exists(&format!("testx/outputs/{}", f)) { - eprintln!( - "\nAttempt to run enclone install command using {} version of \ - install.sh failed to fetch {}.\n", - version, f - ); - std::process::exit(1); - } - } - if path_exists("testx/outputs/.subversion") { - eprintln!( - "\nAttempt to run enclone install command using {} version of \ - install.sh created .subversion.\n", - version - ); - std::process::exit(1); - } - let z = open_for_read!["testx/outputs/enclone/version"]; - let mut version = String::new(); - for line in z.lines() { - version = line.unwrap(); - version = version.after("v").to_string(); - } - for f in ["enclone", "bin", ".profile", ".subversion"].iter() { - let g = format!("testx/outputs/{}", f); - if path_exists(&g) { - if *f == ".profile" { - remove_file(&g).unwrap(); - } else { - remove_dir_all(&g).unwrap(); - } - } - } - - // Increment download count. - // - // Not absolutely sure the locking mechanism used here is correct. - // - // The sleep time here was empirically determined to be enough so that GitHub has - // time to increment the release count. - - // thread::sleep(time::Duration::from_millis(1000)); - // let counts2 = get_github_release_counts(); - // if counts1.contains_key(&version) && counts2.contains_key(&version) { - // Test to see if the count on GitHub was incremented. If not, there is nothing - // to do. It seems that GitHub is erratic in incrementing the count. Or maybe - // this is not the case. It's not clear because we decided it was erratic before - // we fixed a bug. - // let delta = counts2[&version] - counts1[&version]; - // if delta >= 1 { - let count_file = "/mnt/assembly/vdj/internal_download_count"; - if !path_exists(&count_file) { - eprintln!( - "\nCan't find the file {}. Has something been moved?\n", - count_file - ); - std::process::exit(1); - } - let mut filelock = match FileLock::lock(&count_file, true, true) { - Ok(lock) => lock, - Err(err) => panic!("Error getting write lock: {}", err), - }; - let mut log = Vec::<u8>::new(); - let mut found = false; - { - let f = open_for_read![&count_file]; - for line in f.lines() { - let s = line.unwrap(); - if s.starts_with(&format!("{} = ", version)) { - fwriteln!(log, "{} = {}", version, s.after("= ").force_usize() + 1); - found = true; - } else { - fwriteln!(log, "{}", s); - } - } - } - if !found { - fwriteln!(log, "{} = 1", version); - } - filelock.file.write_all(&log).unwrap(); - // } - // } - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// NOT BASIC - -// Make sure that the dataset checksum files are current. - -#[cfg(not(feature = "basic"))] -#[cfg(not(feature = "cpu"))] -#[test] -fn test_datasets_sha256() { - let sha_command1 = format!( - "git -C ../enclone-data write-tree --prefix=big_inputs/version{}", - TEST_FILES_VERSION - ); - let sha_command2 = "cat ../datasets_medium_checksum"; - let sha1 = Command::new("sh") - .arg("-c") - .arg(&sha_command1) - .output() - .unwrap(); - let sha1_status = sha1.status.code().unwrap(); - if sha1_status != 0 { - eprintln!( - "\nsha_command1 = {}\nfailed for datasets_medium_checksum\n", - sha_command1 - ); - std::process::exit(1); - } - let sha1 = sha1.stdout; - let sha2 = Command::new("sh") - .arg("-c") - .arg(&sha_command2) - .output() - .unwrap() - .stdout; - if sha1 != sha2 { - eprintln!( - "\nThe file datasets_medium_checksum is not current. You can update it by typing\n\ - ./build\ndatasets_medium_checksum = {}\ncomputed sha = {}", - strme(&sha2), - strme(&sha1), - ); - std::process::exit(1); - } - let sha_command1 = format!( - "git -C ../enclone-data write-tree --prefix=big_inputs/version{}/123085", - TEST_FILES_VERSION - ); - let sha_command2 = "cat ../datasets_small_checksum"; - let sha1 = Command::new("sh") - .arg("-c") - .arg(&sha_command1) - .output() - .unwrap() - .stdout; - let sha2 = Command::new("sh") - .arg("-c") - .arg(&sha_command2) - .output() - .unwrap() - .stdout; - if sha1 != sha2 { - eprintln!( - "\nThe file datasets_small_checksum is not current. You can update it by typing\n\ - ./build\ndatasets_small_checksum = {}\ncomputed sha = {}", - strme(&sha2), - strme(&sha1), - ); - std::process::exit(1); - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// SPEED (AND NOT BASIC) -// calibrated for bespin1, and requires linux -// cargo test --test enclone_test --features cpu -- --nocapture -// from enclone_main directory -// or just ./speed from root directory - -#[cfg(not(feature = "basic"))] -#[cfg(feature = "cpu")] -#[test] -fn test_cpu() { - // Introductory comments. - - println!( - "\nSPEED TESTS\n\n\ - • These are calibrated for a particular server, bespin1 at \ - 10x Genomics. If this code is run\nusing a different server, or if that server is \ - changed, the tests will need to be recalibrated.\n\ - • These tests also use 10x Genomics datasets that are not distributed publicly\n\ - (although perhaps could be).\n\ - • Finally note that the datasets \ - themselves could be changed without changing this code,\nand that could affect results." - ); - println!( - "\nThese tests are expected to fail intermittently simply because of stochastic variation\n\ - in computational performance (or competing load on the server). Note also that they\n\ - depend on files being in a cached state." - ); - - // Speed test 1. - - let it = 1; - let test = "BI=10 NCROSS NGEX NOPRINT PRINT_CPU NCORES EXPECT_OK EXPECT_NULL NO_PRE NFORCE"; - let expect = 7700; - let percent_dev = 6.0; - println!("\nSpeed test 1"); - println!( - "\nThis tests cpu cycles. If the code is parallelized better, this test may get \n\ - slower. Such changes should be accepted if they reduce wallclock." - ); - let mut out = String::new(); - let mut ok = false; - let mut log = String::new(); - let mut cpu_all_start = 0; - { - let f = open_for_read!["/proc/stat"]; - for line in f.lines() { - let s = line.unwrap(); - let mut t = s.after("cpu"); - while t.starts_with(' ') { - t = t.after(" "); - } - cpu_all_start = t.before(" ").force_usize(); - break; - } - } - run_test( - env!("CARGO_BIN_EXE_enclone"), - it, - &test, - "cpu", - &mut ok, - &mut log, - &mut out, - ); - let this_used = out.before("\n").force_usize(); - let mut cpu_all_stop = 0; - { - let f = open_for_read!["/proc/stat"]; - for line in f.lines() { - let s = line.unwrap(); - let mut t = s.after("cpu"); - while t.starts_with(' ') { - t = t.after(" "); - } - cpu_all_stop = t.before(" ").force_usize(); - break; - } - } - let all_used = cpu_all_stop - cpu_all_start; - let dev = 100.0 * (this_used as f64 - expect as f64) / (expect as f64); - println!( - "\nused cpu = {} = {:.1}% of total, dev = {:.1}%\n", - this_used, - percent_ratio(this_used, all_used), - dev - ); - if dev.abs() > percent_dev { - eprintln!("cpu deviation exceeded max of {}%\n", percent_dev); - std::process::exit(1); - } - - // Speed test 2. - - let it = 2; - let test = - "BI=1-2,5-12 MIX_DONORS NOPRINT PRINT_CPU NCORES EXPECT_OK EXPECT_NULL NO_PRE NFORCE"; - let expect = 59.0; - let percent_dev = 6.0; - println!("Speed test 2"); - println!( - "\nThis tests wall clock. It is thus particularly susceptible to competing load \ - on the server.\nIt will also be very slow unless it has been run recently so files \ - are in cache.\nThis test takes about a minute and may trigger a warning from cargo \ - after 60 seconds.\n" - ); - let t = Instant::now(); - let mut out = String::new(); - let mut ok = false; - let mut log = String::new(); - run_test( - env!("CARGO_BIN_EXE_enclone"), - it, - &test, - "cpu", - &mut ok, - &mut log, - &mut out, - ); - let this_used = elapsed(&t); - let dev = 100.0 * (this_used as f64 - expect as f64) / (expect as f64); - println!( - "\nused wallclock = {:.1} seconds, dev = {:.1}%\n", - this_used, dev - ); - if dev.abs() > percent_dev { - eprintln!("cpu deviation exceeded max of {}%\n", percent_dev); - std::process::exit(1); - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// NOT BASIC - -// Test licenses of included packages and their dependencies. -// -// The following rules are applied: -// 1. If the license field in Cargo.toml is set to MIT or ISC or Zlib or WTFPL or MPL-2.0 -// or CC0-1.0, or is a logical expression for which one of those is sufficient, then there is -// no problem. Note that for MPL-2.0, we inform people how to get the source code for -// dependent crates. -// 2. If both license and license_field are null, then there is no problem. -// 3. If the license field is Apache-2.0, or a logical expression for which that is sufficient, -// and there is no NOTICE file, then there is no problem. Note that we include the -// Apache-2.0 license as part of this repo in third_party. -// 4. If the package is owned by 10x, then there is no problem. -// 5. arrayref and cloudabi OK because we've included the license for it. -// 6. fuchsia-cprng OK because Cargo.toml refers to a BSD-style license, in a file LICENSE, -// at https://fuchsia.googlesource.com/fuchsia/+/master/LICENSE, which we include in -// third_party under fuchsia. -// 7. ring OK because we acknowledge OpenSSL in the file acknowledgements and because we include -// the ring license. -// 8. webpki OK because we include the webpki license and also that for chromium. - -#[cfg(not(feature = "basic"))] -#[cfg(not(feature = "cpu"))] -#[test] -fn test_licenses() { - const ACCEPTABLE_LICENSE_TYPES: [&str; 6] = - ["MIT", "ISC", "Zlib", "WTFPL", "MPL-2.0", "CC0-1.0"]; - const A2: &str = "Apache-2.0"; - const ACCEPTABLE_10X_PACKAGES: [&str; 6] = [ - "enclone", - "enclone_print", - "enclone_tail", - "enclone_versions", - "exons", - "vdj_ann", - ]; - const ACCEPTABLE_OTHER_PACKAGES: [&str; 5] = - ["arrayref", "cloudabi", "fuchsia-cprng", "ring", "webpki"]; - let new = Command::new("cargo-license").arg("-d").arg("-j").output(); - if new.is_err() { - eprintln!( - "\nFailed to execute cargo-license. This means that either you have not \ - installed cargo-license,\nor that you have not added it to your PATH. \ - To install it, type:\n\ - cargo install cargo-license\n\ - When it is done installing, it will tell you where it put the binary, and you\n\ - should add that path to your PATH.\n\ - You can also avoid this test entirely by running instead \ - \"cd enclone; cargo test basic -- --nocapture\".\n" - ); - std::process::exit(1); - } - let lic = &new.unwrap().stdout; - let mut f = &lic[..]; - let mut fails = Vec::<String>::new(); - loop { - match read_vector_entry_from_json(&mut f) { - None => break, - Some(x) => { - let v: Value = serde_json::from_str(strme(&x)).unwrap(); - let package = v["name"].to_string().between("\"", "\"").to_string(); - let version = v["version"].to_string().between("\"", "\"").to_string(); - let mut license = String::new(); - if v.get("license").is_some() { - license = v["license"].to_string(); - if license.contains('"') { - license = license.between("\"", "\"").to_string(); - } - } - let mut license_file = String::new(); - if v.get("license_file").is_some() { - license_file = v["license_file"].to_string(); - if license_file.contains('"') { - license_file = license_file.between("\"", "\"").to_string(); - } - } - if license == "null" && license_file == "null" { - continue; - } - let mut repo = String::new(); - if v.get("repository").is_some() { - repo = v["repository"].to_string(); - if repo.contains('"') { - repo = repo.between("\"", "\"").to_string(); - } - } - let mut ok = false; - for y in ACCEPTABLE_10X_PACKAGES.iter() { - if package == *y { - ok = true; - } - } - for y in ACCEPTABLE_OTHER_PACKAGES.iter() { - if package == *y { - ok = true; - } - } - for y in ACCEPTABLE_LICENSE_TYPES.iter() { - if license == *y { - ok = true; - } - if license.ends_with(&format!(" OR {}", y)) { - ok = true; - } - if license.starts_with(&format!("{} OR ", y)) { - ok = true; - } - } - if !ok { - let (mut x1, mut x2) = (false, false); - if repo.starts_with("https://github.com") { - let f1 = format!("{}/blob/master/Cargo.toml", repo); - if valid_link(&f1) { - x1 = true; - } - let f2 = format!("{}/blob/master/NOTICE", repo); - if valid_link(&f2) { - x2 = true; - } - } - let a2 = license == A2 - || license.ends_with(&format!(" OR {}", A2)) - || license.starts_with(&format!("{} OR ", A2)); - if a2 && x1 && !x2 { - continue; - } - fails.push(format!("{}, {}, {}, {}", package, version, license, repo)); - } - } - } - } - if fails.len() > 0 { - fails.sort(); - let mut msg = format!("\nLicense check failed. The following packages had problems:\n"); - for i in 0..fails.len() { - msg += &format!("{}. {}\n", i + 1, fails[i]); - } - eprintln!("{}", msg); - std::process::exit(1); - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Test that files are rustfmt'ed. - -#[cfg(not(feature = "cpu"))] -#[test] -fn test_formatting() { - let new = Command::new("cargo-fmt") - .arg("--all") - .arg("--") - .arg("--check") - .output() - .expect(&format!("failed to execute test_formatting")); - if new.status.code().unwrap() != 0 { - eprintln!("\nYou need to run rustfmt.\n"); - eprintln!("{}\n", strme(&new.stdout)); - std::process::exit(1); - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// The following is a single test, containing many subtests, each of which is a regression test -// for a given enclone command line. -// -// If you ever need to change the output of all tests, use the main program -// update_all_main_tests.rs in enclone/src/bin. Note that there is some duplicated code there. - -#[cfg(not(feature = "cpu"))] -#[test] -fn test_enclone() { - PrettyTrace::new().on(); - let t = Instant::now(); - // id ok output - let mut results = Vec::<(usize, bool, String)>::new(); - for i in 0..TESTS.len() { - results.push((i, false, String::new())); - } - results.par_iter_mut().for_each(|res| { - let it = res.0; - let test = TESTS[it].to_string(); - let mut out = String::new(); - run_test( - env!("CARGO_BIN_EXE_enclone"), - it, - &test, - "test", - &mut res.1, - &mut res.2, - &mut out, - ); - }); - for i in 0..results.len() { - print!("{}", results[i].2); - if !results[i].1 { - std::process::exit(1); - } - } - println!( - "\ntotal time for {} enclone subtests = {:.2} seconds\n", - TESTS.len(), - elapsed(&t) - ); -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// NOT BASIC - -// Regression tests using the extended public dataset collection. - -#[cfg(not(feature = "basic"))] -#[cfg(not(feature = "cpu"))] -#[test] -fn test_extended() { - PrettyTrace::new().on(); - let t = Instant::now(); - // id ok output - let mut results = Vec::<(usize, bool, String)>::new(); - for i in 0..EXTENDED_TESTS.len() { - results.push((i, false, String::new())); - } - results.par_iter_mut().for_each(|res| { - let it = res.0; - let test = EXTENDED_TESTS[it].to_string(); - let mut out = String::new(); - run_test( - env!("CARGO_BIN_EXE_enclone"), - it, - &test, - "ext_test", - &mut res.1, - &mut res.2, - &mut out, - ); - }); - for i in 0..results.len() { - print!("{}", results[i].2); - if !results[i].1 { - std::process::exit(1); - } - } - println!( - "\nextended tests total time for {} enclone subtests = {:.2} seconds\n", - EXTENDED_TESTS.len(), - elapsed(&t) - ); -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// NOT BASIC - -// Regression tests for internal features. - -#[cfg(not(feature = "basic"))] -#[cfg(not(feature = "cpu"))] -#[test] -fn test_internal() { - PrettyTrace::new().on(); - let t = Instant::now(); - // id ok output - let mut results = Vec::<(usize, bool, String)>::new(); - for i in 0..INTERNAL_TESTS.len() { - results.push((i, false, String::new())); - } - results.par_iter_mut().for_each(|res| { - let it = res.0; - let test = INTERNAL_TESTS[it].to_string(); - let mut out = String::new(); - run_test( - env!("CARGO_BIN_EXE_enclone"), - it, - &test, - "internal_test", - &mut res.1, - &mut res.2, - &mut out, - ); - }); - for i in 0..results.len() { - print!("{}", results[i].2); - if !results[i].1 { - std::process::exit(1); - } - } - println!( - "\ninternal tests total time for {} enclone subtests = {:.2} seconds\n", - INTERNAL_TESTS.len(), - elapsed(&t) - ); -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// NOT BASIC - -// Test site for broken links and spellcheck. -// -// Two approaches for checking broken links left in place for now, to delete one, and the -// corresponding crate from Cargo.toml. -// -// This looks for -// ▓<a href="..."▓ -// ▓http:...[, '")}<#\n]▓ -// ▓https:...[, '")}<#\n]▓ -// ▓<img src="..."▓. -// (These also test termination by ". ".) -// SHOULD also look for at least: -// ▓ href="..."▓ -// ▓ href='...'▓. - -#[cfg(not(feature = "basic"))] -#[cfg(not(feature = "cpu"))] -#[test] -fn test_for_broken_links_and_spellcheck() { - extern crate attohttpc; - use std::time::Duration; - - // Set up dictionary exceptions. We should rewrite the code to avoid looking in certain - // places and reduce the dictionary exceptions accordingly. - - let extra_words = "amazonaws barcode barcodes barcoding bcn cdiff chmod clonotype clonotypes \ - clonotyping codebase colorn contig contigs cred crispr \ - csv ctrlc cvar cvars datalayer dejavusansmono dref dyiid enclone executables false fcell \ - foursie foursies \ - genomics germline github githubusercontent google googletagmanager grok gz html \ - hypermutation hypermutations igh ighm igkc imgt \ - indel indels inkt jsdelivr json levenshtein linux loh lvars macbook mait metadata mkdir \ - moresies multiomic ncbi ncross NEWICK Newick \ - nopager noprint nqual nwhitef oligos onesie onesies parseable pbmc pcell phylip \ - plasmablast preinstalled prepends redownloads samtools screenshot segn sloooooooow \ - spacebar stackexchange standalone stdout subclonotype \ - subclonotypes svg thresholding timepoint tracebacks trb twosie ubuntu \ - umi umis underperforming unicode untarring vdj website wget wikimedia \ - wikipedia workaround workflow xf xhtml xkcd xxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxx zenodo zx"; - let extra_words = extra_words.split(' ').collect::<Vec<&str>>(); - - // Set up dictionary. - - let dictionary0 = read_to_string("../enclone-data/english_wordlist").unwrap(); - let dictionary0 = dictionary0.split('\n').collect::<Vec<&str>>(); - let mut dictionary = Vec::<String>::new(); - for w in dictionary0.iter() { - let mut x = w.to_string(); - x.make_ascii_lowercase(); - dictionary.push(x); - } - for w in extra_words { - dictionary.push(w.to_string()); - } - unique_sort(&mut dictionary); - - // Find html pages on site. - - let mut htmls = vec!["../index.html".to_string()]; - let pages = read_dir("../pages").unwrap(); - for page in pages { - let page = page.unwrap().path(); - let page = page.to_str().unwrap(); - if page.ends_with(".html") { - htmls.push(format!("{}", page)); - } - } - let auto = read_dir("../pages/auto").unwrap(); - for page in auto { - let page = page.unwrap().path(); - let page = page.to_str().unwrap(); - if page.ends_with(".html") { - htmls.push(format!("{}", page)); - } - } - - // Hardcoded exceptions to link testing, because of slowness. - - let mut tested = HashSet::<String>::new(); - tested.insert("https://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".to_string()); - tested.insert("http://www.w3.org/1999/xhtml".to_string()); - - // Hardcode exception for funny svn URL. - - tested.insert("https://github.com/10XGenomics/enclone/trunk".to_string()); - - // Test each html. - - for x in htmls { - let f = open_for_read![x]; - let depth = x.matches('/').count(); - for line in f.lines() { - let mut s = line.unwrap(); - - // Test spelling. Case insensitive. - - let mut s0 = s.replace(',', " "); - s0 = s0.replace('.', " "); - s0 = s0.replace(';', " "); - let words = s0.split(' ').collect::<Vec<&str>>(); - for i in 0..words.len() { - let mut ok = true; - let w = words[i].to_string(); - for c in w.chars() { - if !c.is_ascii_alphabetic() { - ok = false; - } - } - if w.is_empty() || !ok { - continue; - } - let mut wl = w.clone(); - wl.make_ascii_lowercase(); - if !bin_member(&dictionary, &wl.to_string()) { - eprintln!( - "\nthe word \"{}\" in file {} isn't in the dictionary\n", - w, x - ); - std::process::exit(1); - } - } - - // Check links. - - let mut links = Vec::<String>::new(); - let mut chars = Vec::<char>::new(); - for c in s.chars() { - chars.push(c); - } - let mut i = 0; - let terminators = vec![',', ' ', '\'', '"', ')', '}', '<', '#', '\n']; - while i < chars.len() { - let http = chars[i..].starts_with(&vec!['h', 't', 't', 'p', ':']); - let https = chars[i..].starts_with(&vec!['h', 't', 't', 'p', 's', ':']); - if http || https { - for j in i + 5..chars.len() { - if terminators.contains(&chars[j]) - || (chars[j] == '.' && j < chars.len() - 1 && chars[j + 1] == ' ') - { - let mut link = String::new(); - for k in i..j { - link.push(chars[k]); - } - if !tested.contains(&link.to_string()) { - links.push(link.clone()); - tested.insert(link.to_string()); - } - i = j - 1; - break; - } - } - } - i += 1; - } - let s2 = s.clone(); - while s.contains("<a href=\"") { - let link = s.between("<a href=\"", "\""); - if tested.contains(&link.to_string()) { - s = s.after("<a href=\"").to_string(); - continue; - } - tested.insert(link.to_string()); - - // Allow mailto to enclone. - - if link == "mailto:enclone@10xgenomics.com" { - s = s.after("<a href=\"").to_string(); - continue; - } - - // Otherwise if not http..., assume it's a file path. - - if !link.starts_with("http") { - let mut link = link.to_string(); - if link.contains('#') { - link = link.before("#").to_string(); - } - let mut z = link.clone(); - for _ in 0..depth - 1 { - if !z.starts_with("../") { - eprintln!("something wrong with file {} on page {}", link, x); - std::process::exit(1); - } - z = z.after("../").to_string(); - } - z = format!("../{}", z); - if !path_exists(&z) { - eprintln!("failed to find file {} on page {}", link, x); - std::process::exit(1); - } - s = s.after("<a href=\"").to_string(); - continue; - } - - // And finally do http.... - - links.push(link.to_string()); - s = s.after("<a href=\"").to_string(); - } - s = s2; - while s.contains("<img src=\"") { - let path = s.between("<img src=\"", "\""); - if tested.contains(&path.to_string()) { - s = s.after("<img src=\"").to_string(); - continue; - } - tested.insert(path.to_string()); - let path = path.to_string(); - let mut z = path.clone(); - for _ in 0..depth - 1 { - if !path.starts_with("../") { - eprintln!("something wrong with file {} on page {}", path, x); - std::process::exit(1); - } - z = z.after("../").to_string(); - } - z = format!("../{}", z); - if !path_exists(&z) { - eprintln!("failed to find file {} on page {}", path, x); - std::process::exit(1); - } - s = s.after("<img src=\"").to_string(); - } - for link in links { - // Temporary workaround. - - if link == "https://10xgenomics.github.io/enclone/install.sh" { - continue; - } - - // eprintln!("checking link \"{}\"", link); - - // Approach 1 to testing if link works. This seemed to hang once in spite of - // the timeout. - - use attohttpc::*; - const LINK_RETRIES: usize = 5; - for i in 0..LINK_RETRIES { - if i > 0 { - thread::sleep(time::Duration::from_millis(100)); - eprintln!("retrying link {}, attempt {}", link, i); - } - let req = attohttpc::get(link.clone()).read_timeout(Duration::new(10, 0)); - let response = req.send(); - if response.is_err() { - eprintln!("\ncould not read link {} on page {}\n", link, x); - if i == LINK_RETRIES - 1 { - std::process::exit(1); - } - } else { - let response = response.unwrap(); - if response.is_success() { - break; - } - eprintln!("\ncould not read link {} on page {}\n", link, x); - if i == LINK_RETRIES - 1 { - std::process::exit(1); - } - } - } - - // Approach 2 to testing if link works. This may not have a timeout and does - // not auto retry like approach 1. Also may not compile anymore. - - /* - use reqwest::StatusCode; - let req = reqwest::blocking::get(link); - if req.is_err() { - eprintln!("\ncould not read link {} on page {}\n", link, x); - std::process::exit(1); - } - if req.unwrap().status() == StatusCode::NOT_FOUND { - eprintln!("\ncould not read link {} on page {}\n", link, x); - std::process::exit(1); - } - */ - } - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// NOT BASIC - -// Test site examples to make sure they are what they claim to be, and that the -// merged html files are correct. - -#[cfg(not(feature = "basic"))] -#[cfg(not(feature = "cpu"))] -#[test] -fn test_site_examples() { - for i in 0..SITE_EXAMPLES.len() { - let example_name = SITE_EXAMPLES[i].0; - let test = SITE_EXAMPLES[i].1; - let in_file = format!("../{}", example_name); - let in_stuff = read_to_string(&in_file).expect(&format!("couldn't find {}", in_file)); - let args = parse_bsv(&test); - let new = Command::new(env!("CARGO_BIN_EXE_enclone")) - .args(&args) - .output() - .expect(&format!("failed to execute test_site_examples")); - if new.status.code() != Some(0) { - eprint!( - "\nenclone_site_examples: example {} failed to execute, stderr =\n{}", - i + 1, - strme(&new.stderr), - ); - std::process::exit(1); - } - let out_stuff = strme(&new.stdout); - if in_stuff != out_stuff { - eprintln!("\nThe output for site example {} has changed.\n", i + 1); - eprintln!("stderr:\n{}", strme(&new.stderr)); - let old_lines = in_stuff.split('\n').collect::<Vec<&str>>(); - let new_lines = out_stuff.split('\n').collect::<Vec<&str>>(); - eprintln!( - "old stdout has {} lines; new stdout has {} lines", - old_lines.len(), - new_lines.len(), - ); - for i in 0..min(old_lines.len(), new_lines.len()) { - if old_lines[i] != new_lines[i] { - eprintln!( - "first different stdout line is line {}\nold = {}\nnew = {}", - i + 1, - old_lines[i], - new_lines[i], - ); - break; - } - } - let save = format!("testx/outputs/{}", example_name.rev_after("/")); - { - let mut f = open_for_write_new![&save]; - fwrite!(f, "{}", out_stuff); - } - let mut in_filex = in_file.clone(); - if in_filex.starts_with("../") { - in_filex = in_filex.after("../").to_string(); - } - eprintln!("\nPlease diff {} enclone_main/{}.", in_filex, save); - eprintln!( - "\nPossibly this could be because you're running \"cargo t\" in an \ - environment without the\n\ - extended dataset collection. Possibly you should run \ - \"cd enclone; cargo test basic -- --nocapture\" instead.\n\n\ - Otherwise, if you're satisfied with the new output, you can update using\n\n\ - enclone {} > {}.\n", - args.iter().format(" "), - example_name - ); - std::process::exit(1); - } - } - - insert_html( - "../pages/index.html.src", - "testx/outputs/index.html", - true, - 0, - ); - insert_html( - "../pages/expanded.html.src", - "testx/outputs/expanded.html", - true, - 2, - ); - let new_index = read_to_string("testx/outputs/index.html").unwrap(); - if read_to_string("../index.html").unwrap() != new_index { - eprintln!("\nContent of index.html has changed."); - { - let mut f = open_for_write_new!["testx/outputs/index.html.new"]; - fwrite!(f, "{}", new_index); - } - eprintln!("Please diff index.html enclone_main/testx/outputs/index.html.new.\n"); - std::process::exit(1); - } - /* - if read_to_string("../pages/auto/expanded.html").unwrap() - != edit_html(&read_to_string("testx/outputs/expanded.html").unwrap()) - */ - if read_to_string("../pages/auto/expanded.html").unwrap() - != read_to_string("testx/outputs/expanded.html").unwrap() - { - eprintln!("\nContent of expanded.html has changed.\n"); - std::process::exit(1); - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Test that examples are what we claim they are. - -#[cfg(not(feature = "cpu"))] -#[test] -fn test_enclone_examples() { - PrettyTrace::new().on(); - for t in 0..EXAMPLES.len() { - let testn = format!("{}", EXAMPLES[t]); - let out_file = format!("../enclone_help/src/example{}", t + 1); - let old = read_to_string(&out_file).unwrap(); - let args = testn.split(' ').collect::<Vec<&str>>(); - let mut new = Command::new(env!("CARGO_BIN_EXE_enclone")); - let mut new = new.arg(format!( - "PRE=../enclone-data/big_inputs/version{}", - TEST_FILES_VERSION - )); - for i in 0..args.len() { - new = new.arg(&args[i]); - } - let new = new - .arg("FORCE_EXTERNAL") - .output() - .expect(&format!("failed to execute test_enclone_examples")); - let new2 = stringme(&new.stdout); - if new.status.code() != Some(0) { - eprint!( - "\nenclone_test_examples: example{} failed to execute, stderr =\n{}", - t + 1, - strme(&new.stderr), - ); - std::process::exit(1); - } - if old != new2 { - eprintln!( - "\nenclone_test_examples: the file example{} is not up to date\n", - t + 1 - ); - eprintln!("old output =\n{}", old); - eprintln!("new output =\n{}\n", new2); - std::process::exit(1); - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Test that references to the dataset version in README.md are current. - -#[cfg(not(feature = "cpu"))] -#[test] -fn test_version_number_in_readme() { - PrettyTrace::new().on(); - let readme = read_to_string("../README.md").unwrap(); - let fields = readme.split('/').collect::<Vec<&str>>(); - for x in fields { - if x.starts_with("version") { - let y = x.after("version"); - if y.parse::<usize>().is_ok() { - let v = y.force_usize(); - assert_eq!(v, TEST_FILES_VERSION as usize); - } - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Test that the DejaVuSansMono definition in enclone_css_v2.css has not changed. We put this here -// because that definition has to be manually tested, and we don't want it accidentally changed -// and broken. This is really gross, but it's not clear how to do it better. -// -// Absolutely hideous implementation to verify that -// cat ../pages/enclone_css_v2.css | head -36 = "2474276863 1467". -// -// Only works with high probability. - -#[cfg(not(feature = "cpu"))] -#[test] -fn test_dejavu() { - PrettyTrace::new().on(); - let mut cat_output_child = Command::new("cat") - .arg("../pages/enclone_css_v2.css") - .stdout(Stdio::piped()) - .spawn() - .unwrap(); - if let Some(cat_output) = cat_output_child.stdout.take() { - let mut head_output_child = Command::new("head") - .arg("-36") - .stdin(cat_output) - .stdout(Stdio::piped()) - .spawn() - .unwrap(); - cat_output_child.wait().unwrap(); - if let Some(head_output) = head_output_child.stdout.take() { - let cksum_output_child = Command::new("cksum") - .stdin(head_output) - .stdout(Stdio::piped()) - .spawn() - .unwrap(); - let cksum_stdout = cksum_output_child.wait_with_output().unwrap(); - head_output_child.wait().unwrap(); - let cksum = String::from_utf8(cksum_stdout.stdout).unwrap(); - // println!("cksum = {}", cksum); - assert!(cksum == "2474276863 1467\n".to_string()); - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Test that help output hasn't changed. - -#[cfg(not(feature = "cpu"))] -#[test] -fn test_help_output() { - PrettyTrace::new().on(); - let pages = vec![ - "setup", - "main", - "quick", - "how", - "command", - "glossary", - "example1", - "example2", - "input", - "input_tech", - "parseable", - "filter", - "special", - "lvars", - "cvars", - "amino", - "display", - "indels", - "color", - "faq", - "developer", - "all", - ]; - for p in pages { - let mut command = format!("enclone help {}", p); - if p == "setup" { - command = "enclone help".to_string(); - } else if p == "main" { - command = "enclone".to_string(); - } - let out_file = format!("../pages/auto/help.{}.html", p); - let old = read_to_string(&out_file).unwrap(); - let mut new = Command::new(env!("CARGO_BIN_EXE_enclone")); - let mut new = new.arg("HTML"); - if p == "setup" { - new = new.arg("help"); - } else if p == "main" { - } else { - new = new.arg("help"); - new = new.arg(p); - } - new = new.arg("STABLE_DOC"); - new = new.arg("NOPAGER"); - let new = new - .arg("FORCE_EXTERNAL") - .output() - .expect(&format!("failed to execute test_help_output")); - if new.status.code() != Some(0) { - eprintln!("Attempt to run {} failed.\n", command); - std::process::exit(1); - } - let new2 = edit_html(&stringme(&new.stdout)); - if old != new2 { - eprintme!(old.len(), new2.len()); - eprintln!( - "\nHelp test failed on {}.\n\ - You need to update help output by typing \"./build\", \ - assuming that the change is expected.\n", - p - ); - std::process::exit(1); - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Test that enclone help all HTML works (without STABLE_DOC). - -#[cfg(not(feature = "cpu"))] -#[test] -fn test_help_no_stable() { - PrettyTrace::new().on(); - let mut new = Command::new(env!("CARGO_BIN_EXE_enclone")); - let mut new = new.arg("help"); - new = new.arg("all"); - new = new.arg("HTML"); - new = new.arg("NOPAGER"); - let new = new - .arg("FORCE_EXTERNAL") - .output() - .expect(&format!("failed to execute test_help_output")); - if new.status.code() != Some(0) { - eprintln!("Attempt to run enclone help all without STABLE_DOC failed.\n"); - std::process::exit(1); - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Test that PREBUILD works. - -#[cfg(not(feature = "cpu"))] -#[test] -fn test_enclone_prebuild() { - PrettyTrace::new().on(); - let t = Instant::now(); - let mb = format!( - "../enclone-data/big_inputs/version{}/123749/outs/raw_feature_bc_matrix/feature_barcode_matrix.bin", - TEST_FILES_VERSION - ); - if path_exists(&mb) { - remove_file(&mb).unwrap(); - } - - // First pass: run with NH5. - - let test_id = 48; - let it = test_id - 1; - let testn = format!("{} NH5", TESTS[it]); - let out_file = format!("testx/inputs/outputs/enclone_test{}_output", test_id); - let old = read_to_string(&out_file).unwrap(); - let args = testn.split(' ').collect::<Vec<&str>>(); - let mut new = Command::new(env!("CARGO_BIN_EXE_enclone")); - let mut new = new.arg(format!( - "PRE=../enclone-data/big_inputs/version{}", - TEST_FILES_VERSION - )); - for i in 0..args.len() { - new = new.arg(&args[i]); - } - // dubious use of expect: - let new = new - .arg("FORCE_EXTERNAL") - .output() - .expect(&format!("failed to execute test_enclone_prebuild")); - // let new_err = strme(&new.stderr).split('\n').collect::<Vec<&str>>(); - let new2 = stringme(&new.stdout); - if old != new2 { - eprintln!( - "\nenclone_test_prebuild: first pass output has changed.\n\ - You may want to add more info to this failure message.\n" - ); - eprintln!("old output =\n{}\n", old); - eprintln!("new output =\n{}\n", new2); - std::process::exit(1); - } - if !path_exists(&format!( - "../enclone-data/big_inputs/version{}/123749/outs/feature_barcode_matrix.bin", - TEST_FILES_VERSION - )) { - panic!("\nenclone_test_prebuild: did not create feature_barcode_matrix.bin."); - } - - // Second pass: run without PREBUILD but using the feature_barcode_matrix.bin that the first - // pass created. - - let testn = TESTS[it]; - let args = testn.split(' ').collect::<Vec<&str>>(); - let mut new = Command::new(env!("CARGO_BIN_EXE_enclone")); - let mut new = new.arg(format!( - "PRE=../enclone-data/big_inputs/version{}", - TEST_FILES_VERSION - )); - for i in 0..args.len() { - new = new.arg(&args[i]); - } - // dubious use of expect: - let new = new - .arg("FORCE_EXTERNAL") - .output() - .expect(&format!("failed to execute enclone_test_prebuild")); - // let new_err = strme(&new.stderr).split('\n').collect::<Vec<&str>>(); - let new2 = stringme(&new.stdout); - if old != new2 { - eprintln!( - "\nenclone_test_prebuild: second pass output has changed.\n\ - You may want to add more info to this failure message.\n\ - And don't forget to remove feature_barcode_matrix.bin.\n" - ); - eprintln!("new output =\n{}\n", new2); - std::process::exit(1); - } - - // Clean up: delete feature_barcode_matrix.bin. - - std::fs::remove_file(&format!( - "../enclone-data/big_inputs/version{}/123749/outs/feature_barcode_matrix.bin", - TEST_FILES_VERSION - )) - .unwrap(); - println!("\nused {:.2} seconds in enclone_test_prebuild", elapsed(&t)); -} - -fn check_enclone_outs_consistency(enclone_outs: &EncloneOutputs) { - let uref_items = &enclone_outs.universal_reference.items; - for cl in &enclone_outs.clonotypes { - for cl_chain in &cl.chains { - assert!(uref_items.len() > cl_chain.v_idx as usize); - assert!(uref_items.len() > cl_chain.j_idx as usize); - assert_eq!(uref_items[cl_chain.v_idx as usize].region, 1); - assert_eq!(uref_items[cl_chain.j_idx as usize].region, 3); - if let Some(d_idx) = cl_chain.d_idx { - assert!(uref_items.len() > d_idx as usize); - assert_eq!(uref_items[d_idx as usize].region, 2); - } - if let Some(u_idx) = cl_chain.u_idx { - assert!(uref_items.len() > u_idx as usize); - assert_eq!(uref_items[u_idx as usize].region, 0); - } - for ex_cl in &cl.exact_clonotypes { - for ex_cl_chain in ex_cl.chains.iter().map(|info| &info.chain) { - if let Some(c_region_idx) = ex_cl_chain.c_region_idx { - assert!(uref_items.len() > c_region_idx as usize); - assert_eq!(uref_items[c_region_idx as usize].region, 4); - } - } - } - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// This test runs enclone for two test inputs, with LOUPE output -// turned on. It will then read both the bincode and proto file created -// and asserts that we get the same data structure either way. -// -// It also tests to make sure that the LOUPE output is unchanged. If it changed for a good -// reason, update the output file. Otherwise perhaps something has gone wrong! - -#[cfg(not(feature = "cpu"))] -#[test] -fn test_proto_write() -> Result<(), Error> { - let tests = vec!["BCR=123085", "TCR=101287"]; - let pre_arg = format!( - "PRE=../enclone-data/big_inputs/version{}", - TEST_FILES_VERSION - ); - - let bin_file = format!("{}.binary", LOUPE_OUT_FILENAME); - let proto_file = format!("{}.proto", LOUPE_OUT_FILENAME); - - let binary_arg = format!("BINARY={}", bin_file); - let proto_arg = format!("PROTO={}", proto_file); - for t in tests.iter() { - // FIXME: It would be nicer to use the enclone API here - std::process::Command::new(env!("CARGO_BIN_EXE_enclone")) - .args(&[&pre_arg, *t, &binary_arg, &proto_arg]) - .output() - .expect(&format!("failed to execute enclone for test_proto_write")); - - // Test to make sure proto and bin are consistent. - - let outputs_proto = read_proto(&proto_file)?; - let outputs_bin: EncloneOutputs = io_utils::read_obj(&bin_file); - if outputs_proto != outputs_bin { - eprintln!("\noutputs_proto is not equal to outputs_bin\n"); - std::process::exit(1); - } - - // Test to make sure that the clonotype iterator works - - let clonotypes: Vec<_> = ClonotypeIter::from_file(&proto_file).unwrap().collect(); - assert!(clonotypes == outputs_proto.clonotypes); - - // Check consistency - - check_enclone_outs_consistency(&outputs_proto); - - // Test to make sure output is unchanged. - - let oldx = format!("testx/inputs/{}.binary.sha256", t.after("=")); - let mut fold = std::fs::File::open(&oldx)?; - let mut cksum_old = Vec::<u8>::new(); - fold.read_to_end(&mut cksum_old)?; - let mut fnew = std::fs::File::open(&bin_file)?; - let mut cksum_new = Vec::<u8>::new(); - fnew.read_to_end(&mut cksum_new)?; - let cksum_new = format!("{:x}", sha2::Sha256::digest(&cksum_new)); - std::fs::remove_file(&proto_file)?; - std::fs::remove_file(&bin_file)?; - if strme(&cksum_old) != cksum_new { - eprintln!( - "\nThe binary output of enclone on {} has changed. If this is expected,\n\ - please run the command\n\ - echo -n {} > enclone_main/testx/inputs/{}.binary.sha256", - t, - &cksum_new, - t.after("=") - ); - std::process::exit(1); - } - } - Ok(()) -} diff --git a/enclone_main/testx/.gitignore b/enclone_main/testx/.gitignore deleted file mode 100644 index 1ff7457e8..000000000 --- a/enclone_main/testx/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/__test_proto.proto -/__test_proto.bin diff --git a/enclone_main/testx/inputs/101287.binary.sha256 b/enclone_main/testx/inputs/101287.binary.sha256 deleted file mode 100644 index dc6f4a7e5..000000000 --- a/enclone_main/testx/inputs/101287.binary.sha256 +++ /dev/null @@ -1 +0,0 @@ -6d8cf757a12f465230fb1a5671517c22d7871e1fa124ebcd4237a84d88531ea0 \ No newline at end of file diff --git a/enclone_main/testx/inputs/123077_cells.csv b/enclone_main/testx/inputs/123077_cells.csv deleted file mode 100644 index a4105420e..000000000 --- a/enclone_main/testx/inputs/123077_cells.csv +++ /dev/null @@ -1,2846 +0,0 @@ -barcode,T,keeper -AAACCTGAGCGTTGCC-1,◯, -AAACCTGAGTACACCT-1,◯, -AAACCTGCAATGAAAC-1,◯, -AAACCTGCATCGTCGG-1,◯, -AAACCTGGTGGTCTCG-1,◯, -AAACCTGTCACCACCT-1,◯, -AAACGGGAGAAACGCC-1,◯, -AAACGGGGTACGCTGC-1,◯, -AAACGGGGTCTCGTTC-1,◯, -AAACGGGGTTCGGGCT-1,◯, -AAACGGGGTTTGCATG-1,◯, -AAACGGGTCATCTGTT-1,◯, -AAACGGGTCTGCCAGG-1,◯, -AAAGATGCAATAGCAA-1,◯, -AAAGATGCAGGAACGT-1,◯, -AAAGATGGTACTCTCC-1,◯, -AAAGATGGTAGAGTGC-1,◯, -AAAGATGGTCGGCTCA-1,◯, -AAAGATGTCCTGCAGG-1,◯, -AAAGCAAAGATCCCAT-1,◯, -AAAGCAACATGTTCCC-1,◯, -AAAGCAATCCTGCAGG-1,◯, -AAAGTAGAGACCTAGG-1,◯, -AAAGTAGAGCAGATCG-1,◯, -AAAGTAGAGCTAACAA-1,◯, -AAAGTAGAGGCGCTCT-1,◯, -AAAGTAGCAAGTTCTG-1,◯, -AAAGTAGCACGAAGCA-1,◯, -AAAGTAGCATGCAACT-1,◯, -AAAGTAGCATTTCAGG-1,◯, -AAAGTAGGTAAACCTC-1,◯, -AAAGTAGGTGCGCTTG-1,◯, -AAAGTAGTCGGCCGAT-1,◯, -AAATGCCAGGTAGCTG-1,◯, -AAATGCCCAATGGAGC-1,◯, -AAATGCCCAGGGTATG-1,◯, -AAATGCCCATAGTAAG-1,◯, -AAATGCCGTCATGCAT-1,◯, -AAATGCCGTCGCGAAA-1,◯, -AAATGCCGTTGCCTCT-1,◯, -AAATGCCTCATGTGGT-1,◯, -AAATGCCTCGCCTGTT-1,◯, -AACACGTAGATGAGAG-1,◯, -AACACGTCAACAACCT-1,◯, -AACACGTCAGGTCCAC-1,◯, -AACACGTCAGTGGAGT-1,◯, -AACACGTGTAAGAGGA-1,◯, -AACACGTGTTCGTGAT-1,◯, -AACACGTGTTCGTTGA-1,◯, -AACACGTTCTGTTGAG-1,◯, -AACCATGAGTTTCCTT-1,◯, -AACCATGCACCCTATC-1,◯, -AACCATGTCACTATTC-1,◯, -AACCATGTCGTGGACC-1,◯, -AACCGCGAGCTGTTCA-1,◯, -AACCGCGCACTGCCAG-1,◯, -AACCGCGCAGTATCTG-1,◯, -AACGTTGGTCCGAAGA-1,◯, -AACGTTGGTTCACCTC-1,◯, -AACGTTGTCTGTTGAG-1,◯, -AACTCAGAGTACTTGC-1,◯, -AACTCAGCACCACCAG-1,◯, -AACTCAGCAGCAGTTT-1,◯, -AACTCAGTCCTGTACC-1,◯, -AACTCCCAGAGAACAG-1,◯, -AACTCCCAGTCCGGTC-1,◯, -AACTCCCAGTGGTCCC-1,◯, -AACTCCCCACCAGTTA-1,◯, -AACTCCCGTCCATGAT-1,◯, -AACTCCCTCACGCATA-1,◯, -AACTCCCTCATCGCTC-1,◯, -AACTCCCTCTGCAAGT-1,◯, -AACTCTTCATTCTTAC-1,◯, -AACTCTTGTAGCCTCG-1,◯, -AACTCTTGTCACTGGC-1,◯, -AACTCTTGTCCAGTTA-1,◯, -AACTCTTGTCCGTGAC-1,◯, -AACTCTTGTCCTCCAT-1,◯, -AACTCTTGTTAGGGTG-1,◯, -AACTCTTGTTCAGACT-1,◯, -AACTCTTTCTGCTGTC-1,◯, -AACTGGTAGATCGGGT-1,◯, -AACTGGTAGTGGGCTA-1,◯, -AACTGGTCAAGCCATT-1,◯, -AACTGGTCAAGCTGAG-1,◯, -AACTGGTCAGATGGCA-1,◯, -AACTGGTGTACCGTAT-1,◯, -AACTGGTGTGCCTGGT-1,◯, -AACTGGTTCATAAAGG-1,◯, -AACTGGTTCCTACAGA-1,◯, -AACTGGTTCGACGGAA-1,◯, -AACTGGTTCGCCAAAT-1,◯, -AACTTTCCAAGCTGGA-1,◯, -AACTTTCCACAGACAG-1,◯, -AACTTTCCACCTATCC-1,◯, -AACTTTCCATGCATGT-1,◯, -AACTTTCTCAACGGGA-1,◯, -AAGACCTAGATCGATA-1,◯, -AAGACCTAGCACGCCT-1,◯, -AAGACCTCATGCTGGC-1,◯, -AAGACCTTCACCGGGT-1,◯, -AAGACCTTCTTACCGC-1,◯, -AAGCCGCAGAAACGAG-1,◯, -AAGCCGCAGAGTAATC-1,◯, -AAGCCGCAGCTTTGGT-1,◯, -AAGCCGCAGTGTACCT-1,◯, -AAGCCGCCAAACGTGG-1,◯, -AAGCCGCGTCCCGACA-1,◯, -AAGCCGCGTCTGCAAT-1,◯, -AAGCCGCTCAACGAAA-1,◯, -AAGGAGCAGAATTCCC-1,◯, -AAGGAGCAGATCTGAA-1,◯, -AAGGAGCAGTGCGATG-1,◯, -AAGGAGCCACTATCTT-1,◯, -AAGGAGCGTCTCAACA-1,◯, -AAGGAGCTCGATCCCT-1,◯, -AAGGAGCTCGCTGATA-1,◯, -AAGGCAGGTCACCTAA-1,◯, -AAGGCAGTCCCTTGCA-1,◯, -AAGGCAGTCCGCAAGC-1,◯, -AAGGCAGTCTGAAAGA-1,◯, -AAGGCAGTCTGCGACG-1,◯, -AAGGCAGTCTTGTATC-1,◯, -AAGGTTCAGCGGCTTC-1,◯, -AAGGTTCAGGGCACTA-1,◯, -AAGGTTCAGTACCGGA-1,◯, -AAGGTTCTCTCAAGTG-1,◯, -AAGTCTGAGCAAATCA-1,◯, -AAGTCTGAGTTATCGC-1,◯, -AAGTCTGCACATCTTT-1,◯, -AAGTCTGCATCTATGG-1,◯, -AAGTCTGGTTACGACT-1,◯, -AAGTCTGTCAACGGCC-1,◯, -AAGTCTGTCGTTGCCT-1,◯, -AATCCAGAGCGATGAC-1,◯, -AATCCAGAGTTACCCA-1,◯, -AATCCAGAGTTTGCGT-1,◯, -AATCCAGCACCGAAAG-1,◯, -AATCCAGCACGCTTTC-1,◯, -AATCCAGCAGGAATGC-1,◯, -AATCCAGGTATGCTTG-1,◯, -AATCCAGGTTCTGGTA-1,◯, -AATCCAGTCAAGATCC-1,◯, -AATCCAGTCAGTGCAT-1,◯, -AATCGGTAGGCAATTA-1,◯, -AATCGGTCACACGCTG-1,◯, -AATCGGTTCCCTTGCA-1,◯, -ACACCAACAATACGCT-1,◯, -ACACCAAGTGGTTTCA-1,◯, -ACACCCTAGCCAGTAG-1,◯, -ACACCCTAGGAATTAC-1,◯, -ACACCCTAGGTTACCT-1,◯, -ACACCCTCATAGACTC-1,◯, -ACACCCTCATTCCTGC-1,◯, -ACACCCTGTAAATGAC-1,◯, -ACACCCTGTAGAGTGC-1,◯, -ACACCCTGTTCACGGC-1,◯, -ACACCCTTCATCATTC-1,◯, -ACACCCTTCGCAGGCT-1,◯, -ACACCGGAGATCCCGC-1,◯, -ACACCGGAGTATTGGA-1,◯, -ACACCGGAGTGGTAGC-1,◯, -ACACCGGAGTGTACCT-1,◯, -ACACCGGCACACTGCG-1,◯, -ACACCGGCACTTCTGC-1,◯, -ACACCGGCATAAAGGT-1,◯, -ACACCGGGTCGCTTCT-1,◯, -ACACCGGGTTGGACCC-1,◯, -ACACTGAAGTGTTGAA-1,◯, -ACACTGACAAACCTAC-1,◯, -ACACTGAGTCTTTCAT-1,◯, -ACACTGATCAATAAGG-1,◯, -ACAGCCGAGCGATCCC-1,◯, -ACAGCCGAGTACGCCC-1,◯, -ACAGCCGAGTTGCAGG-1,◯, -ACAGCCGCAGTGGGAT-1,◯, -ACAGCCGGTCCCTACT-1,◯, -ACAGCCGTCCCAAGTA-1,◯, -ACAGCTAAGCCGCCTA-1,◯, -ACAGCTACACCAGGCT-1,◯, -ACAGCTAGTAGCGATG-1,◯, -ACAGCTAGTCCAACTA-1,◯, -ACAGCTAGTGGGTCAA-1,◯, -ACAGCTATCAGGCAAG-1,◯, -ACAGCTATCCATTCTA-1,◯, -ACATACGAGAAGGTGA-1,◯, -ACATACGAGTCAAGGC-1,◯, -ACATACGGTCTCAACA-1,◯, -ACATACGTCATCGCTC-1,◯, -ACATCAGCAAGAGGCT-1,◯, -ACATCAGCAGAGTGTG-1,◯, -ACATCAGGTGCCTGGT-1,◯, -ACATCAGGTGCTTCTC-1,◯, -ACATCAGGTTACGGAG-1,◯, -ACATCAGTCAGCACAT-1,◯, -ACATCAGTCCTCATTA-1,◯, -ACATCAGTCTAACTTC-1,◯, -ACATGGTAGAGCTGCA-1,◯, -ACATGGTCAAACGTGG-1,◯, -ACATGGTCAGTGGAGT-1,◯, -ACATGGTGTATAGTAG-1,◯, -ACATGGTGTATATCCG-1,◯, -ACATGGTTCGGTTAAC-1,◯, -ACCAGTAAGAAACGAG-1,◯, -ACCAGTAAGTGAATTG-1,◯, -ACCAGTACAAGTAATG-1,◯, -ACCAGTACATCGGACC-1,◯, -ACCAGTAGTAATCACC-1,◯, -ACCAGTAGTTTAGCTG-1,◯, -ACCCACTAGGTGCTTT-1,◯, -ACCCACTGTAAGCACG-1,◯, -ACCCACTGTGCACTTA-1,◯, -ACCCACTGTTCAGACT-1,◯, -ACCCACTTCTACTATC-1,◯, -ACCGTAACATACTACG-1,◯, -ACCGTAAGTTGCGCAC-1,◯, -ACCGTAATCCCAAGAT-1,◯, -ACCTTTAAGGGAGTAA-1,◯, -ACCTTTAAGGGTTCCC-1,◯, -ACCTTTAGTGGCAAAC-1,◯, -ACCTTTAGTGGCCCTA-1,◯, -ACCTTTATCAATCACG-1,◯, -ACCTTTATCTGCCAGG-1,◯, -ACGAGCCAGACACTAA-1,◯, -ACGAGCCAGTGGCACA-1,◯, -ACGAGCCGTGTTCGAT-1,◯, -ACGAGCCTCTAACTCT-1,◯, -ACGAGGAAGAGACGAA-1,◯, -ACGAGGAAGCTAGTGG-1,◯, -ACGAGGACACACCGAC-1,◯, -ACGAGGACATCCGGGT-1,◯, -ACGAGGAGTATGAATG-1,◯, -ACGAGGAGTGAAAGAG-1,◯, -ACGAGGAGTTCCCGAG-1,◯, -ACGAGGAGTTCCCTTG-1,◯, -ACGATACAGGATGTAT-1,◯, -ACGATACAGTACGATA-1,◯, -ACGATACGTCATGCCG-1,◯, -ACGATACGTGAGCGAT-1,◯, -ACGATGTCAAACGCGA-1,◯, -ACGATGTCATCATCCC-1,◯, -ACGATGTGTCCGACGT-1,◯, -ACGATGTGTTTGTGTG-1,◯, -ACGATGTTCACAACGT-1,◯, -ACGATGTTCTTGAGAC-1,◯, -ACGCAGCAGGTACTCT-1,◯, -ACGCAGCGTAGGAGTC-1,◯, -ACGCAGCTCCAAGCCG-1,◯, -ACGCCAGTCGTCTGAA-1,◯, -ACGCCGACAATCCGAT-1,◯, -ACGCCGAGTCTAGTCA-1,◯, -ACGCCGAGTTATGTGC-1,◯, -ACGCCGATCATATCGG-1,◯, -ACGGAGAAGGCTATCT-1,◯, -ACGGAGACAAGTTGTC-1,◯, -ACGGAGATCACCACCT-1,◯, -ACGGAGATCGGAAACG-1,◯, -ACGGCCAAGTGGGCTA-1,◯, -ACGGCCACAAGCCGTC-1,◯, -ACGGCCACAATAGCGG-1,◯, -ACGGCCACACAACGTT-1,◯, -ACGGCCACACATAACC-1,◯, -ACGGCCACACCAACCG-1,◯, -ACGGCCAGTACACCGC-1,◯, -ACGGCCATCATAACCG-1,◯, -ACGGCCATCCTGCTTG-1,◯, -ACGGCCATCTAACTGG-1,◯, -ACGGCCATCTGAGGGA-1,◯, -ACGGCCATCTTTACAC-1,◯, -ACGGGCTAGTTCGATC-1,◯, -ACGGGCTCATTCTCAT-1,◯, -ACGGGCTGTCCCGACA-1,◯, -ACGGGCTTCATTGCGA-1,◯, -ACGGGCTTCGAATGCT-1,◯, -ACGGGCTTCTGATTCT-1,◯, -ACGGGTCCAGGGATTG-1,◯, -ACGGGTCTCTTCCTTC-1,◯, -ACGTCAAAGACAGAGA-1,◯, -ACGTCAAAGAGACGAA-1,◯, -ACGTCAAAGATAGCAT-1,◯, -ACGTCAAAGCGAAGGG-1,◯, -ACGTCAAAGGGCTTGA-1,◯, -ACGTCAAGTAGTGAAT-1,◯, -ACGTCAAGTCAGAGGT-1,◯, -ACGTCAAGTCGGATCC-1,◯, -ACGTCAATCACCTTAT-1,◯, -ACGTCAATCACTGGGC-1,◯, -ACGTCAATCTTACCGC-1,◯, -ACTATCTAGCGCTTAT-1,◯, -ACTATCTCAGTTCCCT-1,◯, -ACTATCTTCACAACGT-1,◯, -ACTATCTTCACCATAG-1,◯, -ACTATCTTCGAACTGT-1,◯, -ACTGAACAGAAACGCC-1,◯, -ACTGAACCAAATTGCC-1,◯, -ACTGAACGTAGAAAGG-1,◯, -ACTGAACGTGTCTGAT-1,◯, -ACTGAACGTTGAGTTC-1,◯, -ACTGAACTCAACGAAA-1,◯, -ACTGAGTAGAGTGAGA-1,◯, -ACTGAGTAGATGGCGT-1,◯, -ACTGAGTCACGACGAA-1,◯, -ACTGAGTCAGCTGTGC-1,◯, -ACTGAGTCAGGGCATA-1,◯, -ACTGAGTCAGGTGGAT-1,◯, -ACTGAGTGTAGGACAC-1,◯, -ACTGAGTGTCTAAACC-1,◯, -ACTGAGTGTTCGTCTC-1,◯, -ACTGATGAGTGAAGAG-1,◯, -ACTGATGCAATCTGCA-1,◯, -ACTGATGGTACCGAGA-1,◯, -ACTGATGGTTCAGGCC-1,◯, -ACTGATGTCAACGCTA-1,◯, -ACTGATGTCAACGGGA-1,◯, -ACTGATGTCGGAAACG-1,◯, -ACTGATGTCTCCAGGG-1,◯, -ACTGCTCAGAGTAATC-1,◯, -ACTGCTCAGATGCCTT-1,◯, -ACTGCTCAGCCTATGT-1,◯, -ACTGCTCCAAAGCAAT-1,◯, -ACTGCTCCAGTCACTA-1,◯, -ACTGCTCGTCTCCACT-1,◯, -ACTGCTCGTCTCCCTA-1,◯, -ACTGCTCGTGATGTCT-1,◯, -ACTGTCCAGCGTTGCC-1,◯, -ACTGTCCAGGACATTA-1,◯, -ACTGTCCAGGGCATGT-1,◯, -ACTGTCCCACTACAGT-1,◯, -ACTGTCCGTGTCGCTG-1,◯, -ACTGTCCGTTCGAATC-1,◯, -ACTTACTAGTCTCAAC-1,◯, -ACTTACTCAAGGGTCA-1,◯, -ACTTACTCATGAGCGA-1,◯, -ACTTACTGTATGAATG-1,◯, -ACTTACTGTCTTGCGG-1,◯, -ACTTACTTCACAACGT-1,◯, -ACTTACTTCATGTAGC-1,◯, -ACTTACTTCTCTGCTG-1,◯, -ACTTACTTCTTAACCT-1,◯, -ACTTTCAAGAATAGGG-1,◯, -ACTTTCAAGCAGCGTA-1,◯, -ACTTTCAAGCAGGCTA-1,◯, -ACTTTCACATCCCACT-1,◯, -ACTTTCAGTGCGCTTG-1,◯, -ACTTTCATCTCCAGGG-1,◯, -AGAATAGAGAGACGAA-1,◯, -AGAATAGAGCAGCCTC-1,◯, -AGAATAGAGTACGCCC-1,◯, -AGAATAGCACACCGAC-1,◯, -AGAATAGCACAGTCGC-1,◯, -AGAATAGGTCCGTTAA-1,◯, -AGAATAGGTGCAGTAG-1,◯, -AGAATAGTCCCTCTTT-1,◯, -AGAATAGTCGGCGCTA-1,◯, -AGAATAGTCTCTAAGG-1,◯, -AGACGTTAGAGTGACC-1,◯, -AGACGTTAGGGATACC-1,◯, -AGACGTTCAAGGGTCA-1,◯, -AGACGTTCAGCTGTAT-1,◯, -AGACGTTTCGGATGGA-1,◯, -AGACGTTTCTAACTGG-1,◯, -AGAGCGAAGTTCCACA-1,◯, -AGAGCGACATACAGCT-1,◯, -AGAGCGAGTACAGACG-1,◯, -AGAGCGAGTGTGAAAT-1,◯, -AGAGCGAGTTACCAGT-1,◯, -AGAGCTTAGAGTAAGG-1,◯, -AGAGCTTAGATCCCAT-1,◯, -AGAGCTTCAGCTGCAC-1,◯, -AGAGCTTGTAGCGATG-1,◯, -AGAGCTTGTCTCCATC-1,◯, -AGAGCTTGTCTGCGGT-1,◯, -AGAGCTTGTTCTGTTT-1,◯, -AGAGCTTTCCAAAGTC-1,◯, -AGAGCTTTCTCGGACG-1,◯, -AGAGTGGTCACGCGGT-1,◯, -AGAGTGGTCTTACCTA-1,◯, -AGATCTGAGATCACGG-1,◯, -AGATCTGCAGAGTGTG-1,◯, -AGATCTGGTCAACTGT-1,◯, -AGATCTGTCGACGGAA-1,◯, -AGATTGCCAAGCGATG-1,◯, -AGATTGCCACAGATTC-1,◯, -AGATTGCTCCAAACTG-1,◯, -AGCAGCCAGATGCCTT-1,◯, -AGCAGCCAGTATCTCG-1,◯, -AGCAGCCCAATCGGTT-1,◯, -AGCAGCCGTATATGGA-1,◯, -AGCAGCCGTTAAGTAG-1,◯, -AGCATACAGAACAACT-1,◯, -AGCATACAGCACGCCT-1,◯, -AGCATACAGGCTCATT-1,◯, -AGCATACGTCTAGCGC-1,◯, -AGCATACTCGGAGGTA-1,◯, -AGCATACTCGGTGTTA-1,◯, -AGCCTAAAGGACAGCT-1,◯, -AGCCTAACAAACCCAT-1,◯, -AGCCTAACATCATCCC-1,◯, -AGCCTAAGTCCGAGTC-1,◯, -AGCCTAAGTTCCGTCT-1,◯, -AGCGGTCAGGGTCGAT-1,◯, -AGCGGTCCAAGCGATG-1,◯, -AGCGGTCGTCAACTGT-1,◯, -AGCGGTCGTGAAAGAG-1,◯, -AGCGGTCGTTAGTGGG-1,◯, -AGCGGTCGTTCGTGAT-1,◯, -AGCGTATAGTGGCACA-1,◯, -AGCGTATCACGTCTCT-1,◯, -AGCGTATGTATAGTAG-1,◯, -AGCGTATGTCCAGTTA-1,◯, -AGCGTATGTTTGACAC-1,◯, -AGCGTCGCATGTAAGA-1,◯, -AGCGTCGCATTTCACT-1,◯, -AGCGTCGGTCTCGTTC-1,◯, -AGCGTCGTCCTGCAGG-1,◯, -AGCTCCTAGCACCGTC-1,◯, -AGCTCCTAGCGTAATA-1,◯, -AGCTCCTCACAGGAGT-1,◯, -AGCTCCTCAGTCTTCC-1,◯, -AGCTCTCAGAGATGAG-1,◯, -AGCTCTCCAAGGTTTC-1,◯, -AGCTCTCCAGAAGCAC-1,◯, -AGCTCTCGTGCAACGA-1,◯, -AGCTCTCTCACCCTCA-1,◯, -AGCTTGAAGCCATCGC-1,◯, -AGCTTGACAATCAGAA-1,◯, -AGCTTGACACTACAGT-1,◯, -AGCTTGACACTCAGGC-1,◯, -AGCTTGACAGCTCCGA-1,◯, -AGCTTGACATACTACG-1,◯, -AGCTTGAGTAAATACG-1,◯, -AGCTTGAGTTGATTGC-1,◯, -AGCTTGATCACCCTCA-1,◯, -AGCTTGATCCAAACTG-1,◯, -AGCTTGATCCACGCAG-1,◯, -AGCTTGATCCGCAAGC-1,◯, -AGCTTGATCCGTCATC-1,◯, -AGCTTGATCCTCGCAT-1,◯, -AGGCCACAGCGAAGGG-1,◯, -AGGCCACAGGCCCGTT-1,◯, -AGGCCACCAGCCAATT-1,◯, -AGGCCACGTACTCGCG-1,◯, -AGGCCACGTCTTGCGG-1,◯, -AGGCCGTAGACCGGAT-1,◯, -AGGCCGTAGCCATCGC-1,◯, -AGGCCGTCAACCGCCA-1,◯, -AGGCCGTGTAGCAAAT-1,◯, -AGGCCGTGTCTACCTC-1,◯, -AGGCCGTGTCTTTCAT-1,◯, -AGGCCGTTCACAACGT-1,◯, -AGGGAGTAGCGCCTTG-1,◯, -AGGGAGTCAGCGATCC-1,◯, -AGGGAGTCATGTAGTC-1,◯, -AGGGAGTCATTACGAC-1,◯, -AGGGAGTGTGCAGACA-1,◯, -AGGGAGTTCAACGGCC-1,◯, -AGGGAGTTCTCAACTT-1,◯, -AGGGATGAGCTAGTTC-1,◯, -AGGGATGGTGCACCAC-1,◯, -AGGGATGTCCTGCAGG-1,◯, -AGGGATGTCGGATGGA-1,◯, -AGGGTGAAGACATAAC-1,◯, -AGGGTGAAGACCTAGG-1,◯, -AGGGTGACAATTCCTT-1,◯, -AGGGTGACACAGTCGC-1,◯, -AGGGTGAGTAGCACGA-1,◯, -AGGGTGAGTAGCGCAA-1,◯, -AGGGTGAGTTCCGTCT-1,◯, -AGGTCATAGATCTGCT-1,◯, -AGGTCATAGGTAGCCA-1,◯, -AGGTCATCACACCGCA-1,◯, -AGGTCATCATCCTAGA-1,◯, -AGGTCATGTAAATACG-1,◯, -AGGTCATGTCAGAATA-1,◯, -AGGTCATGTGTTGAGG-1,◯, -AGGTCATTCCTGCCAT-1,◯, -AGGTCATTCTGAGGGA-1,◯, -AGGTCATTCTGCTTGC-1,◯, -AGGTCCGAGGTACTCT-1,◯, -AGGTCCGAGTTACGGG-1,◯, -AGGTCCGGTCTGCAAT-1,◯, -AGGTCCGGTTCGTGAT-1,◯, -AGGTCCGTCAGGCCCA-1,◯, -AGTAGTCAGAGCTGGT-1,◯, -AGTAGTCCACCAGATT-1,◯, -AGTAGTCCACCTTGTC-1,◯, -AGTAGTCCATAGGATA-1,◯, -AGTAGTCGTGGCTCCA-1,◯, -AGTCTTTAGATGCCTT-1,◯, -AGTCTTTAGGAGCGTT-1,◯, -AGTCTTTAGTCGATAA-1,◯, -AGTCTTTAGTGTCTCA-1,◯, -AGTCTTTGTCCATGAT-1,◯, -AGTCTTTGTGATAAGT-1,◯, -AGTCTTTTCAAGAAGT-1,◯, -AGTCTTTTCGCAAACT-1,◯, -AGTGAGGAGTCATCCA-1,◯, -AGTGGGAAGAAGGGTA-1,◯, -AGTGGGACAATCTACG-1,◯, -AGTGGGACAGTCAGAG-1,◯, -AGTGGGAGTCTAGCCG-1,◯, -AGTGGGAGTGCACGAA-1,◯, -AGTGGGATCCAAACAC-1,◯, -AGTGTCACACCCAGTG-1,◯, -AGTGTCACACGAAATA-1,◯, -AGTGTCACACTTCGAA-1,◯, -AGTGTCACATAAAGGT-1,◯, -AGTGTCACATCTACGA-1,◯, -AGTGTCACATTGGGCC-1,◯, -AGTGTCACATTTGCTT-1,◯, -AGTGTCAGTGGTCTCG-1,◯, -AGTGTCATCCACTCCA-1,◯, -AGTTGGTAGCCCTAAT-1,◯, -AGTTGGTAGTGGGCTA-1,◯, -AGTTGGTGTCACCCAG-1,◯, -AGTTGGTGTCTAGTCA-1,◯, -AGTTGGTTCCGCTGTT-1,◯, -AGTTGGTTCTAGAGTC-1,◯, -ATAACGCAGACGACGT-1,◯, -ATAACGCAGCCCTAAT-1,◯, -ATAACGCAGTTGCAGG-1,◯, -ATAACGCCAAGTAGTA-1,◯, -ATAACGCCAAGTCTGT-1,◯, -ATAACGCCAGCAGTTT-1,◯, -ATAACGCCATGCGCAC-1,◯, -ATAACGCGTGACGCCT-1,◯, -ATAACGCTCTTGAGAC-1,◯, -ATAAGAGAGAGTGAGA-1,◯, -ATAAGAGAGATATGGT-1,◯, -ATAAGAGCAAACCCAT-1,◯, -ATAAGAGCAGTGGGAT-1,◯, -ATAAGAGGTAGCTCCG-1,◯, -ATAAGAGGTATATCCG-1,◯, -ATAAGAGTCAAGGCTT-1,◯, -ATAAGAGTCCGTTGCT-1,◯, -ATAAGAGTCTGGTTCC-1,◯, -ATAGACCCATCGACGC-1,◯, -ATAGACCCATTGGCGC-1,◯, -ATAGACCGTAATCACC-1,◯, -ATAGACCGTATGGTTC-1,◯, -ATAGACCGTTAGTGGG-1,◯, -ATAGACCTCGTGACAT-1,◯, -ATAGACCTCTGTTGAG-1,◯, -ATAGACCTCTTACCTA-1,◯, -ATCACGATCATAGCAC-1,◯, -ATCACGATCATTGCCC-1,◯, -ATCACGATCTCGCATC-1,◯, -ATCATCTAGAGACTAT-1,◯, -ATCATCTAGCGGATCA-1,◯, -ATCATCTCATTGAGCT-1,◯, -ATCATCTTCTGCGACG-1,◯, -ATCATGGAGGTGCAAC-1,◯, -ATCATGGCATCGACGC-1,◯, -ATCATGGGTCTAAAGA-1,◯, -ATCATGGGTGAGGGAG-1,◯, -ATCATGGTCGACAGCC-1,◯, -ATCATGGTCTTCAACT-1,◯, -ATCCACCAGATAGTCA-1,◯, -ATCCACCAGCGTGAAC-1,◯, -ATCCACCCACTAAGTC-1,◯, -ATCCACCCAGTCTTCC-1,◯, -ATCCACCGTTGATTGC-1,◯, -ATCCACCTCAACACAC-1,◯, -ATCCACCTCACTCCTG-1,◯, -ATCCGAAAGAATAGGG-1,◯, -ATCCGAAAGGTTACCT-1,◯, -ATCCGAACACGGATAG-1,◯, -ATCCGAACAGACAAAT-1,◯, -ATCCGAACAGCATACT-1,◯, -ATCCGAACAGCTCGAC-1,◯, -ATCCGAATCACAGTAC-1,◯, -ATCCGAATCACCGTAA-1,◯, -ATCCGAATCTCAAGTG-1,◯, -ATCGAGTAGTGCTGCC-1,◯, -ATCGAGTAGTTGTAGA-1,◯, -ATCGAGTCAAGTCTAC-1,◯, -ATCGAGTCACCCTATC-1,◯, -ATCGAGTGTGACCAAG-1,◯, -ATCGAGTTCCTAGAAC-1,◯, -ATCTACTAGCAGACTG-1,◯, -ATCTACTAGCATGGCA-1,◯, -ATCTACTCACGGCTAC-1,◯, -ATCTACTCAGCTGCTG-1,◯, -ATCTACTGTCTTCTCG-1,◯, -ATCTGCCAGCGCCTCA-1,◯, -ATCTGCCCAATGTAAG-1,◯, -ATCTGCCCAGTCAGCC-1,◯, -ATCTGCCGTCAGGACA-1,◯, -ATCTGCCGTCTAGCCG-1,◯, -ATCTGCCGTGTCAATC-1,◯, -ATCTGCCGTTTGCATG-1,◯, -ATGAGGGAGTACGATA-1,◯, -ATGAGGGGTCGGCATC-1,◯, -ATGAGGGGTGGTACAG-1,◯, -ATGAGGGGTTCGCGAC-1,◯, -ATGAGGGTCTGAGGGA-1,◯, -ATGCGATAGAATAGGG-1,◯, -ATGCGATAGGTCGGAT-1,◯, -ATGCGATAGGTTACCT-1,◯, -ATGCGATCATTCTTAC-1,◯, -ATGGGAGCAAAGTGCG-1,◯, -ATGGGAGCAATCCAAC-1,◯, -ATGGGAGCACGTTGGC-1,◯, -ATGGGAGGTAGAGGAA-1,◯, -ATGGGAGGTTCTGGTA-1,◯, -ATGGGAGTCGTAGGAG-1,◯, -ATGGGAGTCTAACTCT-1,◯, -ATGGGAGTCTTAGCCC-1,◯, -ATGTGTGAGAGTTGGC-1,◯, -ATGTGTGAGATACACA-1,◯, -ATGTGTGAGATCACGG-1,◯, -ATGTGTGCATACTACG-1,◯, -ATGTGTGGTGTTGAGG-1,◯, -ATGTGTGTCCGAACGC-1,◯, -ATGTGTGTCCGATATG-1,◯, -ATGTGTGTCGTTTATC-1,◯, -ATTACTCAGGTGCTAG-1,◯, -ATTACTCCAATCTACG-1,◯, -ATTACTCGTTCCATGA-1,◯, -ATTATCCAGATTACCC-1,◯, -ATTATCCAGTGCCAGA-1,◯, -ATTATCCCAATGAAAC-1,◯, -ATTCTACGTCCGTCAG-1,◯, -ATTCTACTCAACGAAA-1,◯, -ATTCTACTCATCTGCC-1,◯, -ATTCTACTCGCTAGCG-1,◯, -ATTCTACTCGGTGTCG-1,◯, -ATTGGACAGCTCAACT-1,◯, -ATTGGACAGCTGCGAA-1,◯, -ATTGGACCAAAGGAAG-1,◯, -ATTGGACCAACTGCGC-1,◯, -ATTGGACCAAGGGTCA-1,◯, -ATTGGACCAGGTCTCG-1,◯, -ATTGGACCATGTCCTC-1,◯, -ATTGGACGTATATGGA-1,◯, -ATTGGACGTCAACATC-1,◯, -ATTGGACGTTCGTCTC-1,◯, -ATTGGACTCGCACTCT-1,◯, -ATTGGTGAGTGGGATC-1,◯, -ATTGGTGAGTTGTAGA-1,◯, -ATTGGTGCAGGAACGT-1,◯, -ATTGGTGCAGTCAGCC-1,◯, -ATTGGTGCATGAACCT-1,◯, -ATTGGTGTCAGTTTGG-1,◯, -ATTGGTGTCATCATTC-1,◯, -ATTTCTGCACAAGACG-1,◯, -ATTTCTGCACGACGAA-1,◯, -ATTTCTGCAGGTGGAT-1,◯, -ATTTCTGGTTATGTGC-1,◯, -CAACCAAAGGGATACC-1,◯, -CAACCAAGTACATCCA-1,◯, -CAACCTCAGAATTCCC-1,◯, -CAACCTCAGAGATGAG-1,◯, -CAACCTCAGCTCCCAG-1,◯, -CAACCTCAGTTACCCA-1,◯, -CAACCTCAGTTTAGGA-1,◯, -CAACCTCCAATGGATA-1,◯, -CAACCTCCAGCTATTG-1,◯, -CAACCTCCAGTTCATG-1,◯, -CAACCTCTCCTTCAAT-1,◯, -CAACTAGGTCATGCCG-1,◯, -CAACTAGGTCGACTGC-1,◯, -CAACTAGGTGCATCTA-1,◯, -CAAGAAACACATGACT-1,◯, -CAAGAAACACCGATAT-1,◯, -CAAGAAAGTTATTCTC-1,◯, -CAAGAAATCTGGCGTG-1,◯, -CAAGATCAGAGGTTAT-1,◯, -CAAGATCAGCCGCCTA-1,◯, -CAAGATCAGGATTCGG-1,◯, -CAAGATCAGGGTTTCT-1,◯, -CAAGATCAGTGTCCAT-1,◯, -CAAGATCGTTCGTTGA-1,◯, -CAAGATCTCCCAAGAT-1,◯, -CAAGATCTCCTTCAAT-1,◯, -CAAGATCTCGAATCCA-1,◯, -CAAGATCTCGTTGACA-1,◯, -CAAGATCTCTCTGAGA-1,◯, -CAAGGCCAGCGATATA-1,◯, -CAAGGCCAGGACAGAA-1,◯, -CAAGGCCAGGCTAGGT-1,◯, -CAAGGCCAGGCTCATT-1,◯, -CAAGGCCCACCCTATC-1,◯, -CAAGGCCGTAATCACC-1,◯, -CAAGGCCGTACTCAAC-1,◯, -CAAGGCCGTCGAACAG-1,◯, -CAAGGCCTCCATGAAC-1,◯, -CAAGGCCTCCTAGAAC-1,◯, -CAAGGCCTCGAACTGT-1,◯, -CAAGTTGAGCCGCCTA-1,◯, -CAAGTTGAGTCAAGCG-1,◯, -CAAGTTGCACGAAGCA-1,◯, -CAAGTTGGTAGAAAGG-1,◯, -CAAGTTGGTATAGGTA-1,◯, -CAAGTTGGTGAGTGAC-1,◯, -CAAGTTGGTTACCAGT-1,◯, -CAAGTTGGTTCCACTC-1,◯, -CAAGTTGTCGGCGCAT-1,◯, -CACAAACAGAGAACAG-1,◯, -CACAAACGTTAAAGTG-1,◯, -CACAAACGTTACTGAC-1,◯, -CACACAACAGGCAGTA-1,◯, -CACACAACAGTCCTTC-1,◯, -CACACAAGTCATATCG-1,◯, -CACACAATCAGAGCTT-1,◯, -CACACCTCATGTTGAC-1,◯, -CACACCTGTAGTAGTA-1,◯, -CACACCTGTTGATTGC-1,◯, -CACACTCAGACACGAC-1,◯, -CACACTCAGGCCCTCA-1,◯, -CACACTCCACCGTTGG-1,◯, -CACACTCGTCAATACC-1,◯, -CACACTCGTCAGGACA-1,◯, -CACACTCTCACCCTCA-1,◯, -CACAGGCAGGTCATCT-1,◯, -CACAGGCGTATAAACG-1,◯, -CACAGGCGTCGGGTCT-1,◯, -CACAGGCGTGCGGTAA-1,◯, -CACAGGCTCGGTGTTA-1,◯, -CACAGTAAGATCACGG-1,◯, -CACAGTACACCCAGTG-1,◯, -CACAGTACACCGAAAG-1,◯, -CACAGTAGTCACACGC-1,◯, -CACAGTAGTGAGTATA-1,◯, -CACAGTAGTTCCCGAG-1,◯, -CACAGTATCTCTGTCG-1,◯, -CACAGTATCTTGCAAG-1,◯, -CACATAGAGAACAACT-1,◯, -CACATAGAGAGTACAT-1,◯, -CACATAGAGCAGGCTA-1,◯, -CACATAGAGGGATACC-1,◯, -CACATAGCAGCTCGAC-1,◯, -CACATAGTCGCACTCT-1,◯, -CACATTTAGGTGCTTT-1,◯, -CACATTTCACCTGGTG-1,◯, -CACATTTCATTAGCCA-1,◯, -CACATTTGTGATAAGT-1,◯, -CACATTTTCCCAAGTA-1,◯, -CACATTTTCGTCTGAA-1,◯, -CACCACTAGGAGTTTA-1,◯, -CACCACTCAAAGGAAG-1,◯, -CACCACTCAAATACAG-1,◯, -CACCACTGTGCACCAC-1,◯, -CACCACTGTTGGAGGT-1,◯, -CACCACTTCCGCGCAA-1,◯, -CACCACTTCGTAGGTT-1,◯, -CACCAGGAGAAAGTGG-1,◯, -CACCAGGAGCATCATC-1,◯, -CACCAGGAGGATGTAT-1,◯, -CACCAGGCAGATGGGT-1,◯, -CACCAGGGTTACCGAT-1,◯, -CACCAGGTCAGGTTCA-1,◯, -CACCTTGAGAGTACCG-1,◯, -CACCTTGAGAGTGAGA-1,◯, -CACCTTGAGCGTTGCC-1,◯, -CACCTTGAGGGCACTA-1,◯, -CACCTTGGTAGTACCT-1,◯, -CACCTTGGTCACCTAA-1,◯, -CACTCCAAGACGCACA-1,◯, -CACTCCAAGATGTTAG-1,◯, -CACTCCAAGTCATCCA-1,◯, -CACTCCAGTAGGCTGA-1,◯, -CACTCCAGTCAACTGT-1,◯, -CACTCCAGTCGCATAT-1,◯, -CACTCCAGTTCGAATC-1,◯, -CACTCCATCAGTCAGT-1,◯, -CAGAATCAGGTAGCTG-1,◯, -CAGAATCCAATCTGCA-1,◯, -CAGAATCCACTCGACG-1,◯, -CAGAATCCAGGCGATA-1,◯, -CAGAATCGTTTACTCT-1,◯, -CAGAATCTCCCGGATG-1,◯, -CAGAGAGAGACCCACC-1,◯, -CAGAGAGAGCGATATA-1,◯, -CAGAGAGAGGCAAAGA-1,◯, -CAGAGAGGTTAGTGGG-1,◯, -CAGAGAGTCCTTTCGG-1,◯, -CAGATCAAGATACACA-1,◯, -CAGATCAAGCTCAACT-1,◯, -CAGATCACATACGCCG-1,◯, -CAGATCAGTACACCGC-1,◯, -CAGATCATCAACCAAC-1,◯, -CAGATCATCACGCGGT-1,◯, -CAGCAGCCAAGCCGTC-1,◯, -CAGCAGCCACCGTTGG-1,◯, -CAGCAGCCAGGAATCG-1,◯, -CAGCAGCCAGGTCCAC-1,◯, -CAGCAGCGTACGCACC-1,◯, -CAGCAGCGTCCATGAT-1,◯, -CAGCAGCTCATAAAGG-1,◯, -CAGCATACAAGCCATT-1,◯, -CAGCATATCAAAGTAG-1,◯, -CAGCATATCATCTGTT-1,◯, -CAGCATATCGCTAGCG-1,◯, -CAGCCGAAGACAGACC-1,◯, -CAGCCGAAGCAGCCTC-1,◯, -CAGCCGAAGGTAGCCA-1,◯, -CAGCCGACACATTTCT-1,◯, -CAGCCGAGTTGCTCCT-1,◯, -CAGCCGATCCGCAGTG-1,◯, -CAGCCGATCTCTGAGA-1,◯, -CAGCGACAGTTCGATC-1,◯, -CAGCGACCAGATCGGA-1,◯, -CAGCGACGTGTGAAAT-1,◯, -CAGCGACTCAGCAACT-1,◯, -CAGCGACTCCTCAACC-1,◯, -CAGCGACTCTACTTAC-1,◯, -CAGCGACTCTATCCTA-1,◯, -CAGCGACTCTGCGGCA-1,◯, -CAGCTAAAGTACGACG-1,◯, -CAGCTAAGTAGAGTGC-1,◯, -CAGCTAAGTTTGTGTG-1,◯, -CAGCTAATCCCACTTG-1,◯, -CAGCTGGCAAGTTGTC-1,◯, -CAGCTGGCAATGGAAT-1,◯, -CAGCTGGTCGGTTCGG-1,◯, -CAGCTGGTCTGACCTC-1,◯, -CAGGTGCAGACTGGGT-1,◯, -CAGGTGCAGATCCCAT-1,◯, -CAGGTGCCACTCAGGC-1,◯, -CAGGTGCGTTACGTCA-1,◯, -CAGGTGCTCGGCGCAT-1,◯, -CAGTAACAGTATGACA-1,◯, -CAGTAACCACGGTAAG-1,◯, -CAGTAACCATGGTCAT-1,◯, -CAGTAACGTCGCCATG-1,◯, -CAGTAACGTCTGCGGT-1,◯, -CAGTAACGTTACCAGT-1,◯, -CAGTAACGTTCGCTAA-1,◯, -CAGTAACTCCCTGACT-1,◯, -CAGTAACTCGAGAACG-1,◯, -CAGTAACTCGCGTTTC-1,◯, -CAGTCCTAGCCAGTTT-1,◯, -CAGTCCTAGGAGTCTG-1,◯, -CAGTCCTAGGTCGGAT-1,◯, -CAGTCCTGTCCGAACC-1,◯, -CAGTCCTTCGCCTGTT-1,◯, -CAGTCCTTCTCTAAGG-1,◯, -CAGTCCTTCTGAAAGA-1,◯, -CATATGGAGAGGTTGC-1,◯, -CATATGGAGATCTGCT-1,◯, -CATATGGAGCGATGAC-1,◯, -CATATGGAGGATGGAA-1,◯, -CATATGGCACCAGGCT-1,◯, -CATATGGCAGTCACTA-1,◯, -CATATGGCATATACCG-1,◯,yes -CATATGGGTTGCGTTA-1,◯, -CATATGGTCAACACCA-1,◯, -CATATGGTCAGTTGAC-1,◯, -CATATTCAGCTAACTC-1,◯, -CATATTCCAGACACTT-1,◯, -CATATTCGTACTCTCC-1,◯, -CATATTCGTGGTAACG-1,◯, -CATATTCGTTAGAACA-1,◯, -CATATTCTCGCTTAGA-1,◯, -CATCAAGAGATCTGAA-1,◯, -CATCAAGAGCTGCAAG-1,◯, -CATCAAGCAGGCTCAC-1,◯, -CATCAAGGTGGGTATG-1,◯, -CATCAAGTCACCTCGT-1,◯, -CATCAAGTCATGTCTT-1,◯, -CATCAAGTCGCAGGCT-1,◯, -CATCAAGTCGCCAAAT-1,◯, -CATCAGAAGATTACCC-1,◯, -CATCAGAAGTGAATTG-1,◯, -CATCAGACATATACCG-1,◯, -CATCAGACATTATCTC-1,◯, -CATCAGAGTAGCGATG-1,◯, -CATCAGAGTAGCGCTC-1,◯, -CATCAGAGTTTGTTTC-1,◯, -CATCAGATCAGTTCGA-1,◯, -CATCAGATCGTAGATC-1,◯, -CATCCACAGGTTCCTA-1,◯, -CATCCACGTCCAGTGC-1,◯, -CATCGAACACCCATGG-1,◯, -CATCGAACACTGTCGG-1,◯, -CATCGAACAGCTGTGC-1,◯, -CATCGAACAGGGTACA-1,◯, -CATCGAACAGTGACAG-1,◯, -CATCGAAGTAGCGATG-1,◯, -CATCGAAGTCTTGCGG-1,◯, -CATCGAAGTGGCAAAC-1,◯, -CATCGAAGTTCGTCTC-1,◯, -CATCGAATCAGCTTAG-1,◯, -CATCGAATCTGACCTC-1,◯, -CATCGGGAGCCACGTC-1,◯, -CATCGGGCACACATGT-1,◯, -CATCGGGGTGCAGTAG-1,◯, -CATGACAAGCACGCCT-1,◯, -CATGACAGTCGAACAG-1,◯, -CATGACAGTGGTCTCG-1,◯, -CATGACAGTTCACCTC-1,◯, -CATGACATCTTCTGGC-1,◯, -CATGCCTAGGCTAGAC-1,◯, -CATGCCTAGTAGGTGC-1,◯, -CATGCCTCAGAAGCAC-1,◯, -CATGCCTGTAGCGCAA-1,◯, -CATGCCTTCACCAGGC-1,◯, -CATGCCTTCCTATTCA-1,◯, -CATGGCGAGCGTGTCC-1,◯, -CATGGCGGTAGCGTCC-1,◯, -CATGGCGGTGTGTGCC-1,◯, -CATGGCGTCGTTTATC-1,◯, -CATTATCAGCTATGCT-1,◯, -CATTATCGTAAGAGGA-1,◯, -CATTATCGTCACTGGC-1,◯, -CATTATCGTTAAAGAC-1,◯, -CATTCGCAGTCATGCT-1,◯, -CATTCGCAGTGTACGG-1,◯, -CATTCGCGTAGAGGAA-1,◯, -CATTCGCTCCCTCAGT-1,◯, -CATTCGCTCGCAGGCT-1,◯, -CCAATCCAGGCAGTCA-1,◯, -CCAATCCCAGACTCGC-1,◯, -CCAATCCGTGATGCCC-1,◯, -CCAATCCGTGCCTGGT-1,◯, -CCAATCCTCACCTCGT-1,◯, -CCAATCCTCAGAGCTT-1,◯, -CCAATCCTCTTATCTG-1,◯, -CCACCTAAGCCCTAAT-1,◯, -CCACCTAAGTTGCAGG-1,◯, -CCACCTACACACCGAC-1,◯, -CCACCTACATAGGATA-1,◯, -CCACCTAGTGGAAAGA-1,◯, -CCACCTATCTATGTGG-1,◯, -CCACCTATCTCGCATC-1,◯, -CCACCTATCTTCCTTC-1,◯, -CCACGGACACGGCTAC-1,◯, -CCACGGACATCCCATC-1,◯, -CCACGGACATTCTCAT-1,◯, -CCACGGATCACCTCGT-1,◯, -CCACTACAGGGTTCCC-1,◯, -CCACTACCAAGTTAAG-1,◯, -CCACTACCACCTCGTT-1,◯, -CCACTACCATTAACCG-1,◯, -CCACTACGTCTTTCAT-1,◯, -CCACTACGTGTCAATC-1,◯, -CCACTACTCCAGTATG-1,◯, -CCAGCGAAGAGACTAT-1,◯, -CCAGCGAAGAGTACAT-1,◯, -CCAGCGAAGGCCATAG-1,◯, -CCAGCGAAGGCTCAGA-1,◯, -CCAGCGAGTGGTCCGT-1,◯, -CCAGCGATCATCGGAT-1,◯, -CCATGTCAGACGCAAC-1,◯, -CCATGTCAGCAGCGTA-1,◯, -CCATGTCAGCGTCTAT-1,◯, -CCATGTCAGGCTAGCA-1,◯, -CCATGTCCAAGACACG-1,◯, -CCATGTCCAAGCGAGT-1,◯, -CCATGTCCAATCGAAA-1,◯, -CCATGTCCATGTAGTC-1,◯, -CCATGTCGTACTCGCG-1,◯, -CCATGTCGTCTGGTCG-1,◯, -CCATTCGAGGAGTAGA-1,◯, -CCATTCGAGTAGTGCG-1,◯, -CCATTCGCAGATGGGT-1,◯, -CCATTCGCAGTCAGCC-1,◯, -CCATTCGCAGTTTACG-1,◯, -CCATTCGGTAAGGGCT-1,◯, -CCATTCGTCGTCTGCT-1,◯, -CCCAATCAGCACCGCT-1,◯, -CCCAATCAGCCCTAAT-1,◯, -CCCAATCAGGTAGCTG-1,◯, -CCCAATCCAGCGTCCA-1,◯, -CCCAATCGTCTCCATC-1,◯, -CCCAATCGTTCACGGC-1,◯, -CCCAATCGTTCAGGCC-1,◯, -CCCAATCTCGCTTAGA-1,◯, -CCCAATCTCTTGCAAG-1,◯, -CCCAGTTAGAAAGTGG-1,◯, -CCCAGTTCATACTACG-1,◯, -CCCAGTTGTAGGAGTC-1,◯, -CCCAGTTGTATATGGA-1,◯, -CCCATACAGAACAACT-1,◯, -CCCATACCAGCCTATA-1,◯, -CCCATACCATGTAAGA-1,◯, -CCCATACGTAGGGTAC-1,◯, -CCCATACGTCTAGAGG-1,◯, -CCCTCCTAGTCCGGTC-1,◯, -CCCTCCTCAAGTAGTA-1,◯, -CCCTCCTCACCTGGTG-1,◯, -CCCTCCTCAGTAACGG-1,◯, -CCCTCCTCATCCTAGA-1,◯, -CCCTCCTGTGAAGGCT-1,◯, -CCCTCCTGTGGAAAGA-1,◯, -CCCTCCTGTTGAGTTC-1,◯, -CCCTCCTTCCGAAGAG-1,◯, -CCCTCCTTCGCTTAGA-1,◯, -CCCTCCTTCTACGAGT-1,◯, -CCGGGATAGATGTTAG-1,◯, -CCGGGATGTCAGCTAT-1,◯, -CCGGGATGTGATGATA-1,◯, -CCGGGATGTTACCGAT-1,◯, -CCGGTAGAGCTTATCG-1,◯, -CCGGTAGAGTCGCCGT-1,◯, -CCGGTAGGTACCGAGA-1,◯, -CCGGTAGTCATGTGGT-1,◯, -CCGGTAGTCCCGGATG-1,◯, -CCGTACTCAGGCTCAC-1,◯, -CCGTACTGTATAAACG-1,◯, -CCGTACTGTCTTGCGG-1,◯, -CCGTACTGTTTAAGCC-1,◯, -CCGTACTTCAAACAAG-1,◯, -CCGTACTTCCTCAATT-1,◯, -CCGTGGAAGCGAGAAA-1,◯, -CCGTGGAAGTCATCCA-1,◯, -CCGTGGACACATGACT-1,◯, -CCGTGGACAGAGCCAA-1,◯, -CCGTGGAGTAGTGAAT-1,◯, -CCGTGGAGTGACTACT-1,◯, -CCGTGGATCAATCACG-1,◯, -CCGTGGATCACCAGGC-1,◯, -CCGTGGATCACCATAG-1,◯, -CCGTGGATCTCGCTTG-1,◯, -CCGTTCACAACAACCT-1,◯, -CCGTTCACACATTCGA-1,◯, -CCGTTCACATTGGGCC-1,◯, -CCGTTCAGTTGCCTCT-1,◯, -CCGTTCATCGGTTCGG-1,◯, -CCGTTCATCGTTTAGG-1,◯, -CCTAAAGCAAGCGCTC-1,◯, -CCTAAAGGTCACTGGC-1,◯, -CCTAAAGTCCTTCAAT-1,◯, -CCTACACAGGTGTTAA-1,◯, -CCTACACGTCGAAAGC-1,◯, -CCTACACTCCTAGTGA-1,◯, -CCTACCACAGGTGGAT-1,◯, -CCTACCAGTAGCACGA-1,◯, -CCTACCAGTAGCTGCC-1,◯, -CCTAGCTAGAATGTGT-1,◯, -CCTAGCTAGACGCTTT-1,◯, -CCTAGCTAGCAGCCTC-1,◯, -CCTAGCTAGCGTCTAT-1,◯, -CCTAGCTAGGATGGAA-1,◯, -CCTAGCTCACGCCAGT-1,◯, -CCTAGCTGTGACGCCT-1,◯, -CCTAGCTGTTCAGCGC-1,◯, -CCTAGCTTCCTTAATC-1,◯, -CCTAGCTTCGAACTGT-1,◯, -CCTATTAAGCGTGTCC-1,◯, -CCTCAGTCAGCTGTGC-1,◯, -CCTCAGTGTAAACGCG-1,◯, -CCTCAGTGTACTCAAC-1,◯, -CCTCTGAAGAAGATTC-1,◯, -CCTCTGAAGACAAGCC-1,◯, -CCTCTGAAGCCACGTC-1,◯, -CCTCTGAAGGCAGTCA-1,◯, -CCTCTGACAGCGATCC-1,◯, -CCTCTGAGTACGCACC-1,◯, -CCTTACGCACTACAGT-1,◯, -CCTTACGGTTCCACAA-1,◯, -CCTTACGTCATTGCGA-1,◯, -CCTTACGTCGTACCGG-1,◯, -CCTTCCCAGTCGAGTG-1,◯, -CCTTCCCCAGGAACGT-1,◯, -CCTTCCCGTTCGCGAC-1,◯, -CCTTCGAAGCACACAG-1,◯, -CCTTCGAAGCTAAGAT-1,◯, -CCTTCGACAATCTACG-1,◯, -CCTTCGACACGAAAGC-1,◯, -CCTTTCTAGAGATGAG-1,◯, -CCTTTCTAGATGCGAC-1,◯, -CCTTTCTAGGAGTAGA-1,◯, -CCTTTCTAGGTTACCT-1,◯, -CCTTTCTTCACGCGGT-1,◯, -CCTTTCTTCAGGATCT-1,◯, -CCTTTCTTCATAACCG-1,◯, -CGAACATAGGACACCA-1,◯, -CGAACATCAAGAAAGG-1,◯, -CGAACATTCAGAGGTG-1,◯, -CGAATGTAGCTCCTTC-1,◯, -CGAATGTCAGCATGAG-1,◯, -CGAATGTGTATTAGCC-1,◯, -CGAATGTGTCTGCAAT-1,◯, -CGAATGTTCGAGGTAG-1,◯, -CGAATGTTCTTTAGTC-1,◯, -CGACCTTAGGGAAACA-1,◯, -CGACCTTAGTAACCCT-1,◯, -CGACCTTAGTGGCACA-1,◯, -CGACCTTCAAAGGCGT-1,◯, -CGACCTTCAAGTCATC-1,◯, -CGACCTTCAATGGATA-1,◯, -CGACCTTCATCATCCC-1,◯, -CGACCTTGTATCGCAT-1,◯, -CGACCTTGTTTCGCTC-1,◯, -CGACCTTTCAACGCTA-1,◯, -CGACCTTTCAGAGCTT-1,◯, -CGACTTCAGCTAGTCT-1,◯, -CGACTTCAGGGCTTGA-1,◯, -CGACTTCCACTCAGGC-1,◯, -CGACTTCCATGCATGT-1,◯, -CGACTTCCATGCGCAC-1,◯, -CGACTTCGTCGGGTCT-1,◯, -CGACTTCGTGAACCTT-1,◯, -CGACTTCTCACGATGT-1,◯, -CGACTTCTCATGTCCC-1,◯, -CGAGAAGCAAGGCTCC-1,◯, -CGAGAAGCAGTAGAGC-1,◯, -CGAGAAGTCACCTCGT-1,◯, -CGAGCACAGGACGAAA-1,◯, -CGAGCACGTATAGTAG-1,◯, -CGAGCACTCAAACAAG-1,◯, -CGAGCCACACATTTCT-1,◯, -CGAGCCACATCTATGG-1,◯, -CGAGCCACATGCAACT-1,◯, -CGAGCCAGTAAGAGAG-1,◯, -CGAGCCATCATATCGG-1,◯, -CGATCGGCACAGACAG-1,◯, -CGATCGGGTCAGAATA-1,◯, -CGATCGGGTGTGGCTC-1,◯, -CGATCGGGTTTGTGTG-1,◯, -CGATGGCAGGGTGTTG-1,◯, -CGATGGCGTTTACTCT-1,◯, -CGATGTATCGAATGCT-1,◯, -CGATGTATCGTGGACC-1,◯, -CGATTGAAGATCGATA-1,◯, -CGATTGAAGCTGAAAT-1,◯, -CGATTGAAGTCACGCC-1,◯, -CGATTGACAGTTTACG-1,◯, -CGATTGAGTATAGGTA-1,◯, -CGATTGATCCGGGTGT-1,◯, -CGCCAAGAGCTAGTGG-1,◯, -CGCCAAGCACCGCTAG-1,◯, -CGCCAAGGTATAAACG-1,◯, -CGCGGTAAGGCGTACA-1,◯, -CGCGGTAAGGCTCTTA-1,◯, -CGCGGTAAGTTAACGA-1,◯, -CGCGGTAGTCAAAGAT-1,◯, -CGCGGTAGTCTTTCAT-1,◯, -CGCGGTAGTGATGATA-1,◯, -CGCGGTATCACCGTAA-1,◯, -CGCGGTATCATCATTC-1,◯, -CGCGGTATCGTCCAGG-1,◯, -CGCGGTATCTGTTGAG-1,◯, -CGCGTTTAGCCAGTTT-1,◯, -CGCGTTTAGCTTATCG-1,◯, -CGCGTTTAGTGTACGG-1,◯, -CGCGTTTGTAGCGTCC-1,◯, -CGCGTTTGTCCGTTAA-1,◯, -CGCGTTTTCCTCTAGC-1,◯, -CGCGTTTTCTTGTCAT-1,◯, -CGCTATCAGTCACGCC-1,◯, -CGCTATCCAAAGTGCG-1,◯, -CGCTATCCAAGCGAGT-1,◯, -CGCTATCCATGGGAAC-1,◯, -CGCTATCGTCAGCTAT-1,◯, -CGCTATCGTCGTGGCT-1,◯, -CGCTATCGTGACTCAT-1,◯, -CGCTATCTCAGGTAAA-1,◯, -CGCTGGACAAGCGTAG-1,◯, -CGCTGGAGTCTGCGGT-1,◯, -CGCTGGAGTTCAGCGC-1,◯, -CGCTGGATCAGCACAT-1,◯, -CGCTGGATCATTGCCC-1,◯, -CGCTTCAAGAGACTTA-1,◯, -CGCTTCAAGGCGACAT-1,◯, -CGCTTCAGTCTAGCGC-1,◯, -CGCTTCAGTTCTGTTT-1,◯, -CGGACACAGGCTCAGA-1,◯, -CGGACACAGTATTGGA-1,◯, -CGGACACAGTCGTTTG-1,◯, -CGGACACCACGTCAGC-1,◯, -CGGACACCACTATCTT-1,◯, -CGGACACCATTAACCG-1,◯, -CGGACACGTGTGACGA-1,◯, -CGGACACTCATACGGT-1,◯, -CGGACACTCTGGGCCA-1,◯, -CGGACGTAGAATTGTG-1,◯, -CGGACGTAGATCACGG-1,◯, -CGGACGTCAAGCCATT-1,◯, -CGGACGTCACATAACC-1,◯, -CGGACGTGTAGGGTAC-1,◯, -CGGACGTGTCAAAGCG-1,◯, -CGGACGTGTCATCCCT-1,◯, -CGGACGTTCGCCCTTA-1,◯, -CGGACTGAGTGAACAT-1,◯, -CGGACTGGTTACCGAT-1,◯, -CGGACTGTCAATCTCT-1,◯, -CGGAGCTAGACTCGGA-1,◯, -CGGAGCTAGATCCTGT-1,◯, -CGGAGCTAGGTTCCTA-1,◯, -CGGAGCTCAGATCGGA-1,◯, -CGGAGCTCATCGACGC-1,◯, -CGGAGCTGTCCTAGCG-1,◯, -CGGAGCTGTCTGCGGT-1,◯, -CGGAGCTGTGTCCTCT-1,◯, -CGGAGCTTCTAGAGTC-1,◯, -CGGAGTCAGGGCTTCC-1,◯, -CGGAGTCAGTGCGATG-1,◯, -CGGAGTCAGTGGCACA-1,◯, -CGGAGTCCAGCCTATA-1,◯, -CGGAGTCCATTCCTGC-1,◯, -CGGAGTCGTAGCGTCC-1,◯, -CGGAGTCGTCCGTGAC-1,◯, -CGGAGTCGTGACGGTA-1,◯, -CGGCTAGCACAACGTT-1,◯, -CGGCTAGCACAAGCCC-1,◯, -CGGCTAGCAGGTGGAT-1,◯, -CGGCTAGGTATAGGGC-1,◯, -CGGCTAGGTTGAGGTG-1,◯, -CGGCTAGTCACATGCA-1,◯, -CGGCTAGTCTACCTGC-1,◯, -CGGGTCAAGTCAATAG-1,◯, -CGGGTCACACATCCAA-1,◯, -CGGGTCACATGCATGT-1,◯, -CGGGTCACATTCACTT-1,◯, -CGGGTCACATTCGACA-1,◯, -CGGGTCAGTAAGGGAA-1,◯, -CGGGTCAGTTTAGGAA-1,◯, -CGGGTCATCCTTAATC-1,◯, -CGGGTCATCGGCCGAT-1,◯, -CGGGTCATCTCGTATT-1,◯, -CGGTTAAAGAGGGATA-1,◯, -CGGTTAAAGCCTATGT-1,◯, -CGGTTAAAGGTTCCTA-1,◯, -CGGTTAACAAGACACG-1,◯, -CGGTTAACATTGGCGC-1,◯, -CGGTTAAGTCGCATAT-1,◯, -CGGTTAAGTGGACGAT-1,◯, -CGGTTAATCAAGATCC-1,◯, -CGGTTAATCGATAGAA-1,◯, -CGTAGCGCACGGCGTT-1,◯, -CGTAGCGGTAGCGCAA-1,◯, -CGTAGCGGTCATCGGC-1,◯, -CGTAGGCGTGATAAAC-1,◯, -CGTAGGCTCCTGTAGA-1,◯, -CGTCACTAGACAAGCC-1,◯, -CGTCACTAGTGATCGG-1,◯, -CGTCACTCATTCTCAT-1,◯, -CGTCACTGTAGGCATG-1,◯, -CGTCAGGAGATGGGTC-1,◯, -CGTCAGGCAAGTCTAC-1,◯, -CGTCAGGCAGTTTACG-1,◯, -CGTCAGGGTCCGAACC-1,◯, -CGTCAGGTCATCTGTT-1,◯, -CGTCAGGTCGCTAGCG-1,◯, -CGTCAGGTCGGACAAG-1,◯, -CGTCCATAGAAACCGC-1,◯, -CGTCCATAGATCCTGT-1,◯, -CGTCCATGTTAAGGGC-1,◯, -CGTCCATTCAACGGCC-1,◯, -CGTCCATTCATGGTCA-1,◯, -CGTCCATTCCGTTGCT-1,◯, -CGTCTACCAAGCGTAG-1,◯, -CGTCTACCAAGTTCTG-1,◯, -CGTCTACGTAGCGATG-1,◯, -CGTCTACTCGAACTGT-1,◯, -CGTGAGCAGGGAACGG-1,◯, -CGTGAGCCAGACTCGC-1,◯, -CGTGAGCTCTTGAGGT-1,◯, -CGTGTAAAGCTGAAAT-1,◯, -CGTGTAACAGCTCGAC-1,◯, -CGTGTAAGTACAGACG-1,◯, -CGTGTAAGTCAAAGCG-1,◯, -CGTGTAAGTCCATCCT-1,◯, -CGTGTAAGTTCTGAAC-1,◯, -CGTGTAATCGCATGGC-1,◯, -CGTGTCTAGACCCACC-1,◯, -CGTGTCTAGATCCGAG-1,◯, -CGTGTCTAGCGTGTCC-1,◯, -CGTGTCTAGGTCGGAT-1,◯, -CGTGTCTCACATAACC-1,◯, -CGTGTCTCATGGTCTA-1,◯, -CGTGTCTCATTAGCCA-1,◯, -CGTGTCTGTTCCCTTG-1,◯, -CGTGTCTGTTCTCATT-1,◯, -CGTGTCTGTTGTGGCC-1,◯, -CGTTAGAAGCAGGTCA-1,◯, -CGTTAGAAGGTGACCA-1,◯, -CGTTAGAAGTGCCATT-1,◯, -CGTTAGAAGTGGTAGC-1,◯, -CGTTAGACACGGTAGA-1,◯, -CGTTAGACATCGGACC-1,◯, -CGTTAGAGTTACCAGT-1,◯, -CGTTAGATCAGAGACG-1,◯, -CGTTCTGAGCAGATCG-1,◯, -CGTTCTGAGCGTTCCG-1,◯, -CGTTCTGAGGAATTAC-1,◯, -CGTTCTGCAGCATACT-1,◯, -CGTTCTGCATATGAGA-1,◯, -CGTTCTGGTCGCCATG-1,◯, -CGTTCTGGTTGGACCC-1,◯, -CGTTCTGGTTTAAGCC-1,◯, -CGTTGGGAGCCACGCT-1,◯, -CGTTGGGCACAACGTT-1,◯, -CGTTGGGCAGACGCAA-1,◯, -CGTTGGGCATAGACTC-1,◯, -CGTTGGGGTCGCGGTT-1,◯, -CGTTGGGGTCGTTGTA-1,◯, -CGTTGGGGTGCCTTGG-1,◯, -CGTTGGGTCACCCGAG-1,◯, -CGTTGGGTCAGCGACC-1,◯, -CGTTGGGTCGCGATCG-1,◯, -CTAACTTCAGCATGAG-1,◯, -CTAACTTGTACCATCA-1,◯, -CTAACTTGTCGTGGCT-1,◯, -CTAACTTTCAGGATCT-1,◯, -CTAACTTTCTTAACCT-1,◯, -CTAAGACAGACTAAGT-1,◯, -CTAAGACAGCGTTTAC-1,◯, -CTAAGACAGGCCCGTT-1,◯, -CTAAGACAGGGTTCCC-1,◯, -CTAAGACAGTTCGCGC-1,◯, -CTAAGACGTATAGTAG-1,◯, -CTAAGACTCAAGATCC-1,◯, -CTAAGACTCGGTCTAA-1,◯, -CTAAGACTCTCTGTCG-1,◯, -CTAAGACTCTGGTGTA-1,◯, -CTAAGACTCTTCCTTC-1,◯, -CTAATGGAGATGTGTA-1,◯, -CTAATGGCATCGGACC-1,◯, -CTAATGGCATTTCACT-1,◯, -CTAATGGGTACCGGCT-1,◯, -CTAATGGTCAACGAAA-1,◯, -CTAATGGTCAATCACG-1,◯, -CTAATGGTCATTCACT-1,◯, -CTAATGGTCCATGCTC-1,◯, -CTAATGGTCCGTTGTC-1,◯, -CTACACCCAGCGTTCG-1,◯, -CTACACCCATGACATC-1,◯, -CTACACCGTAGCGTAG-1,◯, -CTACACCGTGCGAAAC-1,◯, -CTACACCTCCTTGACC-1,◯, -CTACATTAGTTCGCAT-1,◯, -CTACATTCAAGGCTCC-1,◯, -CTACATTCACCGGAAA-1,◯, -CTACATTCAGCTGTTA-1,◯, -CTACATTGTCTCATCC-1,◯, -CTACCCAAGAAACCAT-1,◯, -CTACCCAAGGCAGTCA-1,◯, -CTACCCACAGCCTTTC-1,◯, -CTACCCACATTAGCCA-1,◯, -CTACCCAGTATAGGTA-1,◯, -CTACCCAGTGTATGGG-1,◯, -CTACCCATCGCCGTGA-1,◯, -CTACGTCAGTCAAGGC-1,◯, -CTACGTCCACCAGCAC-1,◯, -CTACGTCGTAGCTAAA-1,◯, -CTACGTCGTTGACGTT-1,◯, -CTACGTCTCATCGATG-1,◯, -CTACGTCTCCTGTACC-1,◯, -CTAGAGTGTAGATTAG-1,◯, -CTAGAGTGTATTAGCC-1,◯, -CTAGAGTGTCAAAGAT-1,◯, -CTAGAGTGTTCACGGC-1,◯, -CTAGAGTTCTCAAGTG-1,◯, -CTAGCCTAGCTCAACT-1,◯, -CTAGCCTAGGGAACGG-1,◯, -CTAGCCTCAAACGCGA-1,◯, -CTAGCCTTCTACTATC-1,◯, -CTAGTGAAGGGCTTCC-1,◯, -CTAGTGAAGTGTGGCA-1,◯, -CTAGTGAGTGCCTGCA-1,◯, -CTAGTGATCACTGGGC-1,◯, -CTCACACAGATGGGTC-1,◯, -CTCACACAGTTCGATC-1,◯, -CTCACACCACCAGATT-1,◯, -CTCACACCAGGCAGTA-1,◯, -CTCACACCATACGCCG-1,◯, -CTCACACGTCGACTGC-1,◯, -CTCACACGTGTTCGAT-1,◯, -CTCACACTCTATCCTA-1,◯, -CTCAGAAAGCCGTCGT-1,◯, -CTCAGAAAGTCATGCT-1,◯, -CTCAGAAAGTTAGCGG-1,◯, -CTCAGAAGTGTTAAGA-1,◯, -CTCATTAAGGCATGGT-1,◯, -CTCATTACATGCCACG-1,◯, -CTCATTAGTCCGAACC-1,◯, -CTCATTAGTTGCGCAC-1,◯, -CTCATTATCGCCGTGA-1,◯, -CTCATTATCTGGTTCC-1,◯, -CTCCTAGAGAAGATTC-1,◯, -CTCCTAGAGATGGGTC-1,◯, -CTCCTAGAGCAACGGT-1,◯, -CTCCTAGAGCGAAGGG-1,◯, -CTCCTAGAGGACAGCT-1,◯, -CTCCTAGCAGACGCCT-1,◯, -CTCCTAGCAGGCAGTA-1,◯, -CTCCTAGGTCTCAACA-1,◯, -CTCCTAGGTGCTCTTC-1,◯, -CTCCTAGGTTATCCGA-1,◯, -CTCGAAAAGAGTACAT-1,◯, -CTCGAAAAGCGATGAC-1,◯, -CTCGAAACACTAAGTC-1,◯, -CTCGAAAGTGACTACT-1,◯, -CTCGAAAGTGTTCTTT-1,◯, -CTCGAAATCGCGGATC-1,◯, -CTCGAGGAGTAGATGT-1,◯, -CTCGAGGCACACCGAC-1,◯, -CTCGAGGCAGCTGGCT-1,◯, -CTCGAGGCATGGGACA-1,◯, -CTCGAGGGTGTGAAAT-1,◯, -CTCGAGGTCACTCTTA-1,◯, -CTCGGAGAGCCCTAAT-1,◯, -CTCGGAGAGGCTAGCA-1,◯, -CTCGGAGGTCTAAACC-1,◯, -CTCGGAGTCTGCTGTC-1,◯, -CTCGGGACAAAGGCGT-1,◯, -CTCGGGACACAGACAG-1,◯, -CTCGGGACAGATGGCA-1,◯, -CTCGGGACAGCAGTTT-1,◯, -CTCGGGACAGGTGGAT-1,◯, -CTCGGGAGTGATGATA-1,◯, -CTCGGGATCTAACCGA-1,◯, -CTCGTACAGCCCAGCT-1,◯, -CTCGTACAGTATCTCG-1,◯, -CTCGTACCACGGCCAT-1,◯, -CTCGTACGTCTGGAGA-1,◯, -CTCGTCAAGGCGCTCT-1,◯, -CTCGTCAAGGCTACGA-1,◯, -CTCGTCAAGGGATGGG-1,◯, -CTCGTCACAGACGTAG-1,◯, -CTCGTCACATGCCTTC-1,◯, -CTCGTCATCACTTACT-1,◯, -CTCGTCATCAGGCAAG-1,◯, -CTCTAATAGCTGATAA-1,◯, -CTCTAATAGTCCTCCT-1,◯, -CTCTAATCAAGGCTCC-1,◯, -CTCTAATCACGGACAA-1,◯, -CTCTAATGTAGGACAC-1,◯, -CTCTAATTCATGGTCA-1,◯, -CTCTACGAGTGCGTGA-1,◯, -CTCTACGCAATAGAGT-1,◯, -CTCTACGCACCAGCAC-1,◯, -CTCTACGCAGGCAGTA-1,◯, -CTCTACGCATCCCATC-1,◯, -CTCTACGGTCAGGACA-1,◯, -CTCTACGGTGCCTTGG-1,◯, -CTCTACGTCCAATGGT-1,◯, -CTCTGGTAGATCCCAT-1,◯, -CTCTGGTAGATCCCGC-1,◯, -CTCTGGTAGGAGCGTT-1,◯, -CTCTGGTCATTCGACA-1,◯, -CTCTGGTGTCCTCTTG-1,◯, -CTCTGGTTCAGCTCGG-1,◯, -CTCTGGTTCTCTGAGA-1,◯, -CTGAAACAGTGCAAGC-1,◯, -CTGAAACAGTGGGTTG-1,◯, -CTGAAACAGTTGTAGA-1,◯, -CTGAAACCATATGGTC-1,◯, -CTGAAACGTCGAATCT-1,◯, -CTGAAACGTGTTTGGT-1,◯, -CTGAAACGTTAAGGGC-1,◯, -CTGAAACTCCCTCTTT-1,◯, -CTGAAACTCGGTCCGA-1,◯, -CTGAAGTAGTGTTTGC-1,◯, -CTGAAGTGTACCCAAT-1,◯, -CTGAAGTTCTGTCAAG-1,◯, -CTGATAGAGTACGTTC-1,◯, -CTGATAGAGTTGCAGG-1,◯, -CTGATAGGTCCGAACC-1,◯, -CTGATAGTCTGCCCTA-1,◯, -CTGATAGTCTTGTTTG-1,◯, -CTGATCCAGAGACTTA-1,◯, -CTGATCCCAAGCCGCT-1,◯, -CTGATCCGTTGATTCG-1,◯, -CTGATCCTCAATACCG-1,◯, -CTGATCCTCTTAGAGC-1,◯, -CTGCCTAAGCGAGAAA-1,◯, -CTGCCTACACTCGACG-1,◯, -CTGCCTATCCACTCCA-1,◯, -CTGCCTATCGCGGATC-1,◯, -CTGCGGATCGTGGTCG-1,◯, -CTGCTGTAGAGAACAG-1,◯, -CTGCTGTAGCGATCCC-1,◯, -CTGCTGTCACACCGCA-1,◯, -CTGCTGTGTAGAGCTG-1,◯, -CTGCTGTGTCATCCCT-1,◯, -CTGCTGTGTCGTTGTA-1,◯, -CTGCTGTTCGTCCAGG-1,◯, -CTGGTCTAGTTTCCTT-1,◯, -CTGGTCTCACAACGTT-1,◯, -CTGGTCTGTCTTCAAG-1,◯, -CTGGTCTTCAGCTCGG-1,◯, -CTGGTCTTCCGTAGGC-1,◯, -CTGTGCTGTAAGCACG-1,◯, -CTGTGCTGTGGTGTAG-1,◯, -CTGTGCTGTTCCCTTG-1,◯, -CTGTGCTTCTACCAGA-1,◯, -CTGTTTAAGGAATCGC-1,◯, -CTGTTTACAAGTTCTG-1,◯, -CTGTTTACACTAAGTC-1,◯, -CTGTTTACAGAGTGTG-1,◯, -CTGTTTAGTTAGGGTG-1,◯, -CTTAACTAGCAATCTC-1,◯, -CTTAACTCAATAGCAA-1,◯, -CTTAACTCAGGACCCT-1,◯, -CTTAACTGTCGTGGCT-1,◯, -CTTAACTGTCTAGAGG-1,◯, -CTTAACTGTTACTGAC-1,◯, -CTTAACTTCCAGTAGT-1,◯, -CTTAACTTCGCAAACT-1,◯, -CTTACCGAGGGCATGT-1,◯, -CTTACCGAGTCCGGTC-1,◯, -CTTACCGCACTGTCGG-1,◯, -CTTACCGGTCATTAGC-1,◯, -CTTACCGTCATCGGAT-1,◯, -CTTAGGACATGGTTGT-1,◯, -CTTAGGAGTCGTCTTC-1,◯, -CTTAGGAGTGCGAAAC-1,◯, -CTTAGGAGTTATCACG-1,◯, -CTTAGGATCACTCTTA-1,◯, -CTTCTCTAGAGGGATA-1,◯, -CTTCTCTAGCGATCCC-1,◯, -CTTCTCTAGGAGTACC-1,◯, -CTTCTCTCAACTGCGC-1,◯, -CTTCTCTCATCCCACT-1,◯, -CTTCTCTCATCGGACC-1,◯, -CTTCTCTCATGTTGAC-1,◯, -CTTCTCTGTAACGCGA-1,◯, -CTTCTCTGTCTCACCT-1,◯, -CTTCTCTTCAATACCG-1,◯, -CTTCTCTTCACAGGCC-1,◯, -CTTCTCTTCCGCATCT-1,◯, -CTTCTCTTCCTAGTGA-1,◯, -CTTCTCTTCGATAGAA-1,◯, -CTTCTCTTCTCGCTTG-1,◯, -CTTGGCTAGTACGTTC-1,◯, -CTTGGCTCACACGCTG-1,◯, -CTTGGCTCAGTTCCCT-1,◯, -CTTGGCTTCGCAAGCC-1,◯, -CTTTGCGAGACACGAC-1,◯, -CTTTGCGAGCAGGTCA-1,◯, -CTTTGCGAGGATGCGT-1,◯, -CTTTGCGAGTGCCAGA-1,◯, -CTTTGCGAGTGTACGG-1,◯, -CTTTGCGTCGCCTGTT-1,◯, -GAAACTCAGTGTACTC-1,◯, -GAAACTCGTCGGCTCA-1,◯, -GAAACTCGTTCATGGT-1,◯, -GAAACTCTCCCTGACT-1,◯, -GAAACTCTCTGGTGTA-1,◯, -GAAATGAAGAATGTTG-1,◯, -GAAATGAAGAATTCCC-1,◯, -GAAATGAAGTCCAGGA-1,◯, -GAAATGAAGTCCGGTC-1,◯, -GAAATGACAAATCCGT-1,◯, -GAAATGACAAGCTGTT-1,◯, -GAAATGACATTTCACT-1,◯, -GAAATGAGTGTGAAAT-1,◯, -GAAATGATCACATAGC-1,◯, -GAAATGATCATCGATG-1,◯, -GAACATCAGCGAAGGG-1,◯, -GAACATCAGCTGAACG-1,◯, -GAACATCAGGTGCAAC-1,◯, -GAACATCAGTCAATAG-1,◯, -GAACATCAGTCTCGGC-1,◯, -GAACATCCATCGATGT-1,◯, -GAACATCCATGCTAGT-1,◯, -GAACATCGTAAAGTCA-1,◯, -GAACATCGTCGCATAT-1,◯, -GAACATCTCAAACCAC-1,◯, -GAACCTAAGGTAGCTG-1,◯, -GAACCTAGTCCATCCT-1,◯, -GAACCTATCAGCATGT-1,◯, -GAACCTATCAGGTAAA-1,◯, -GAACCTATCGCGGATC-1,◯, -GAACGGAAGCTCTCGG-1,◯, -GAACGGATCTGGCGAC-1,◯, -GAAGCAGAGAATAGGG-1,◯, -GAAGCAGCAATAGAGT-1,◯, -GAAGCAGTCCATGAAC-1,◯, -GAAGCAGTCTCTGAGA-1,◯, -GAATAAGAGCAACGGT-1,◯, -GAATAAGAGCTGAACG-1,◯, -GAATAAGAGTTAGCGG-1,◯, -GAATAAGCAGGTGCCT-1,◯, -GAATAAGTCAGTCCCT-1,◯, -GAATAAGTCCCTGACT-1,◯, -GAATGAAAGACCTTTG-1,◯, -GAATGAAAGCGATCCC-1,◯, -GAATGAAAGGCTCAGA-1,◯, -GAATGAAAGTGAATTG-1,◯, -GAATGAACAAGGGTCA-1,◯, -GAATGAACACATTAGC-1,◯, -GAATGAACAGGATCGA-1,◯, -GAATGAACATAGTAAG-1,◯, -GAATGAAGTGAAGGCT-1,◯, -GAATGAATCGTTGCCT-1,◯, -GACACGCAGAGCTGGT-1,◯, -GACACGCAGGTAAACT-1,◯, -GACACGCAGTAGCCGA-1,◯, -GACACGCAGTGTACCT-1,◯, -GACACGCCAAAGCAAT-1,◯, -GACACGCCATACAGCT-1,◯, -GACACGCGTTCCATGA-1,◯, -GACAGAGAGTGTCTCA-1,◯, -GACAGAGCATTAGCCA-1,◯, -GACAGAGGTGATGATA-1,◯, -GACCAATAGCGTAATA-1,◯, -GACCAATCAAGTCATC-1,◯, -GACCAATCAAGTCTAC-1,◯, -GACCAATCATTAGGCT-1,◯, -GACCAATGTCTTCTCG-1,◯, -GACCAATGTGCCTGGT-1,◯, -GACCTGGAGCAAATCA-1,◯, -GACCTGGAGTGGCACA-1,◯, -GACCTGGCATCCAACA-1,◯, -GACCTGGGTATGAAAC-1,◯, -GACCTGGGTGCGCTTG-1,◯, -GACCTGGTCTTCAACT-1,◯, -GACGCGTAGAGACTTA-1,◯, -GACGCGTAGTACCGGA-1,◯, -GACGCGTGTCTCACCT-1,◯, -GACGCGTGTGTAACGG-1,◯, -GACGCGTGTTGTGGCC-1,◯, -GACGCGTTCGTTGCCT-1,◯, -GACGGCTAGTTCGCGC-1,◯, -GACGGCTGTCATATGC-1,◯, -GACGGCTGTGTGAAAT-1,◯, -GACGGCTTCGTCGTTC-1,◯, -GACGTGCAGACGCACA-1,◯, -GACGTGCCAGGTGGAT-1,◯, -GACGTGCGTAGGGACT-1,◯, -GACGTGCGTCTCATCC-1,◯, -GACGTGCGTTAAGATG-1,◯, -GACGTGCGTTCGGCAC-1,◯, -GACGTTAAGACAAGCC-1,◯, -GACGTTAAGACCTTTG-1,◯, -GACGTTAAGAGTACCG-1,◯, -GACGTTAAGCAGCCTC-1,◯, -GACGTTACACGACGAA-1,◯, -GACGTTACAGGTCTCG-1,◯, -GACGTTAGTTTACTCT-1,◯, -GACGTTATCATCACCC-1,◯, -GACGTTATCCGCTGTT-1,◯, -GACGTTATCGTCACGG-1,◯, -GACGTTATCTGTCAAG-1,◯, -GACTAACAGCTAACTC-1,◯, -GACTAACAGCTGGAAC-1,◯, -GACTAACCAATGAAAC-1,◯, -GACTAACTCCGTTGCT-1,◯, -GACTAACTCCTGTAGA-1,◯, -GACTACAAGGCCCTCA-1,◯, -GACTACACATCGACGC-1,◯, -GACTACAGTCTAGGTT-1,◯, -GACTACAGTTAAGTAG-1,◯, -GACTGCGAGAATTGTG-1,◯, -GACTGCGAGAGGGCTT-1,◯, -GACTGCGCAGGTGCCT-1,◯, -GACTGCGGTCTCTTTA-1,◯, -GACTGCGTCCCGACTT-1,◯, -GAGCAGAAGGCTATCT-1,◯, -GAGCAGAAGGGTGTTG-1,◯, -GAGCAGAGTACCATCA-1,◯, -GAGCAGATCAGGTAAA-1,◯, -GAGCAGATCTTACCGC-1,◯, -GAGGTGAAGTGGAGAA-1,◯, -GAGGTGAGTACAGTGG-1,◯, -GAGGTGAGTTGCTCCT-1,◯, -GAGTCCGAGCGGCTTC-1,◯, -GAGTCCGAGTCATGCT-1,◯, -GAGTCCGAGTTATCGC-1,◯, -GAGTCCGCACAACGCC-1,◯, -GAGTCCGCAGACAGGT-1,◯, -GAGTCCGTCGATAGAA-1,◯, -GAGTCCGTCTCTGCTG-1,◯, -GATCAGTAGACGCACA-1,◯, -GATCAGTAGCTGGAAC-1,◯, -GATCAGTGTACAAGTA-1,◯, -GATCAGTGTCATATGC-1,◯, -GATCAGTGTTCCACGG-1,◯, -GATCAGTTCACAGGCC-1,◯, -GATCAGTTCTGCGTAA-1,◯, -GATCGATAGAAACCTA-1,◯, -GATCGATAGAGGTTGC-1,◯, -GATCGATAGATGGGTC-1,◯, -GATCGATAGGTGTTAA-1,◯, -GATCGATGTCTAGCCG-1,◯, -GATCGATGTGGGTCAA-1,◯, -GATCGATTCGTTACGA-1,◯, -GATCGATTCTACTCAT-1,◯, -GATCGCGAGCCACGCT-1,◯, -GATCGCGAGGCATTGG-1,◯, -GATCGCGCATCACCCT-1,◯, -GATCGCGGTAACGACG-1,◯, -GATCGCGGTGCCTGCA-1,◯, -GATCGTACAGGTCTCG-1,◯, -GATCGTAGTACCGTTA-1,◯, -GATCGTATCACGACTA-1,◯, -GATCGTATCATCTGTT-1,◯, -GATCGTATCCTGCTTG-1,◯, -GATCTAGAGCGTAATA-1,◯, -GATCTAGAGTAGCCGA-1,◯, -GATCTAGAGTCCAGGA-1,◯, -GATCTAGAGTTATCGC-1,◯, -GATCTAGCAAGGACAC-1,◯, -GATCTAGCACGGTAGA-1,◯, -GATCTAGCAGGGTTAG-1,◯, -GATCTAGTCAGTTGAC-1,◯, -GATGAAACAAAGCGGT-1,◯, -GATGAAACACGAGAGT-1,◯, -GATGAAACACTCGACG-1,◯, -GATGAAAGTATGCTTG-1,◯, -GATGAAAGTTCGTCTC-1,◯, -GATGAAATCACAAACC-1,◯, -GATGAAATCGTTGCCT-1,◯, -GATGAGGAGAAACCGC-1,◯, -GATGAGGAGGAGTCTG-1,◯, -GATGAGGAGTACGTAA-1,◯, -GATGAGGAGTCCCACG-1,◯, -GATGAGGAGTTTGCGT-1,◯, -GATGAGGCACGGCTAC-1,◯, -GATGAGGGTAGCTAAA-1,◯, -GATGAGGTCCTAGTGA-1,◯, -GATGCTAAGATATGCA-1,◯, -GATGCTAAGTAGGTGC-1,◯, -GATGCTACAATACGCT-1,◯, -GATGCTACATCAGTAC-1,◯, -GATGCTATCCACGTTC-1,◯, -GATTCAGAGATGTAAC-1,◯, -GATTCAGAGCTGCGAA-1,◯, -GATTCAGCAAGGTGTG-1,◯, -GATTCAGCAAGTTGTC-1,◯, -GATTCAGGTCTCTCTG-1,◯, -GATTCAGGTTTGGGCC-1,◯, -GCAAACTAGAATCTCC-1,◯, -GCAAACTAGTACCGGA-1,◯, -GCAAACTCAAGGTTTC-1,◯, -GCAAACTGTAATCACC-1,◯, -GCAAACTGTCATATGC-1,◯, -GCAAACTGTCGCGGTT-1,◯, -GCAATCAAGAAGAAGC-1,◯, -GCAATCAGTCCTCTTG-1,◯, -GCAATCAGTTGATTGC-1,◯, -GCAATCATCCCTAATT-1,◯, -GCACATAGTTATTCTC-1,◯, -GCACTCTAGACTGTAA-1,◯, -GCACTCTAGCAACGGT-1,◯, -GCACTCTCAAGCGAGT-1,◯, -GCACTCTCAGCAGTTT-1,◯, -GCACTCTCATTCTTAC-1,◯, -GCACTCTGTACAGTTC-1,◯, -GCACTCTTCCAACCAA-1,◯, -GCAGCCACACGAAGCA-1,◯, -GCAGCCACACTTGGAT-1,◯, -GCAGTTAGTAAGGGCT-1,◯, -GCAGTTAGTGGACGAT-1,◯, -GCAGTTATCAATCACG-1,◯, -GCAGTTATCATCGCTC-1,◯, -GCAGTTATCTGGAGCC-1,◯, -GCATACAAGCCAGGAT-1,◯, -GCATACAAGCGATAGC-1,◯, -GCATACAGTAGGAGTC-1,◯, -GCATACATCACCGGGT-1,◯, -GCATACATCATCACCC-1,◯, -GCATGATAGAGCTTCT-1,◯, -GCATGATCAGTCCTTC-1,◯, -GCATGATGTTGCGCAC-1,◯, -GCATGATTCACCGGGT-1,◯, -GCATGATTCCCAAGAT-1,◯, -GCATGATTCCCATTAT-1,◯, -GCATGATTCGTTGACA-1,◯, -GCATGATTCTAGAGTC-1,◯, -GCATGCGAGATGAGAG-1,◯, -GCATGCGAGGGTGTTG-1,◯, -GCATGCGGTAGCACGA-1,◯, -GCATGTAAGATCTGCT-1,◯, -GCATGTACAATGCCAT-1,◯, -GCATGTACAGCCTTGG-1,◯, -GCATGTAGTAGGAGTC-1,◯, -GCATGTAGTCGATTGT-1,◯, -GCCAAATAGGTGATAT-1,◯, -GCCAAATAGTTAACGA-1,◯, -GCCAAATGTCGATTGT-1,◯, -GCCAAATGTGATGCCC-1,◯, -GCCAAATGTTACAGAA-1,◯, -GCCTCTAAGCTGAAAT-1,◯, -GCCTCTACATTAGCCA-1,◯, -GCCTCTAGTTAAAGTG-1,◯, -GCGACCACACATCCGG-1,◯, -GCGACCACACGTCTCT-1,◯, -GCGACCACATCGGAAG-1,◯, -GCGACCAGTTACCAGT-1,◯, -GCGACCATCGAGCCCA-1,◯, -GCGAGAACAGATCGGA-1,◯, -GCGAGAACAGTGAGTG-1,◯, -GCGAGAATCGGCTTGG-1,◯, -GCGCAACCAGCTGTAT-1,◯, -GCGCAACTCGCGCCAA-1,◯, -GCGCAACTCTGATTCT-1,◯, -GCGCAGTAGACCTAGG-1,◯, -GCGCAGTAGAGTAATC-1,◯, -GCGCAGTCAGGTGGAT-1,◯, -GCGCAGTGTGATGTCT-1,◯, -GCGCAGTGTGCAACGA-1,◯, -GCGCAGTTCCCAACGG-1,◯, -GCGCAGTTCCGCAGTG-1,◯, -GCGCAGTTCGCAGGCT-1,◯, -GCGCCAACAGCCAGAA-1,◯, -GCGCCAACAGTCCTTC-1,◯, -GCGCCAAGTGATGCCC-1,◯, -GCGCCAAGTGTCTGAT-1,◯, -GCGCCAAGTTTACTCT-1,◯, -GCGCCAATCAACCATG-1,◯, -GCGCCAATCAGCTCTC-1,◯, -GCGCGATAGACATAAC-1,◯, -GCGCGATAGAGTACCG-1,◯, -GCGCGATAGCCTATGT-1,◯, -GCGCGATAGGCCGAAT-1,◯, -GCGCGATCATACGCTA-1,◯, -GCGCGATGTACACCGC-1,◯, -GCGGGTTAGACCTAGG-1,◯, -GCGGGTTCAATGAATG-1,◯, -GCGGGTTCAATGGATA-1,◯, -GCGGGTTCAGATGAGC-1,◯, -GCGGGTTCATGAACCT-1,◯, -GCGGGTTGTAGCAAAT-1,◯, -GCGGGTTGTCAACTGT-1,◯, -GCGGGTTTCCAGTAGT-1,◯, -GCGGGTTTCTAGAGTC-1,◯, -GCTCCTAAGAGTACAT-1,◯, -GCTCCTAGTAACGACG-1,◯, -GCTCCTAGTTACTGAC-1,◯, -GCTCCTATCTCCAACC-1,◯, -GCTCTGTAGCGCCTTG-1,◯, -GCTCTGTCAATACGCT-1,◯, -GCTCTGTCATGCTGGC-1,◯, -GCTCTGTGTACAGCAG-1,◯, -GCTCTGTTCGTCTGAA-1,◯, -GCTGCAGAGATATACG-1,◯, -GCTGCAGCACGAAACG-1,◯, -GCTGCAGGTCGCATAT-1,◯, -GCTGCAGGTTCGCGAC-1,◯, -GCTGCAGTCAAGCCTA-1,◯, -GCTGCAGTCCAAGTAC-1,◯, -GCTGCAGTCCACGAAT-1,◯, -GCTGCGACAGATGAGC-1,◯, -GCTGCGAGTTGGTAAA-1,◯, -GCTGCGATCGCCTGAG-1,◯, -GCTGCTTAGAGTCTGG-1,◯, -GCTGCTTAGTGAACAT-1,◯, -GCTGCTTCAAGAGGCT-1,◯, -GCTGCTTGTCAACATC-1,◯, -GCTGCTTGTCGTCTTC-1,◯, -GCTGCTTGTGAGTGAC-1,◯, -GCTGCTTGTGTCTGAT-1,◯, -GCTGCTTGTGTTCGAT-1,◯, -GCTGCTTTCCTATGTT-1,◯, -GCTGGGTAGATATGGT-1,◯, -GCTGGGTAGATGCGAC-1,◯, -GCTGGGTGTCGATTGT-1,◯, -GCTTCCAGTACCCAAT-1,◯, -GCTTCCAGTCTGCCAG-1,◯, -GCTTCCAGTCTTGTCC-1,◯, -GCTTCCATCGGTGTTA-1,◯, -GCTTGAAAGAGTAATC-1,◯, -GCTTGAACACGAGAGT-1,◯, -GCTTGAACATGTCGAT-1,◯, -GCTTGAAGTCAAACTC-1,◯, -GCTTGAATCACTCTTA-1,◯, -GCTTGAATCAGCATGT-1,◯, -GCTTGAATCCTATTCA-1,◯, -GCTTGAATCTCTGAGA-1,◯, -GGAAAGCAGGCAGTCA-1,◯, -GGAAAGCCACCGAAAG-1,◯, -GGAAAGCGTAAATGTG-1,◯, -GGAAAGCTCAGATAAG-1,◯, -GGAAAGCTCCCGGATG-1,◯, -GGAAAGCTCCTCCTAG-1,◯, -GGAAAGCTCGGATGTT-1,◯, -GGAAAGCTCGTACCGG-1,◯, -GGAAAGCTCTAGCACA-1,◯, -GGAAAGCTCTCGAGTA-1,◯, -GGAACTTAGCCATCGC-1,◯, -GGAACTTAGCGTAATA-1,◯, -GGAACTTAGTCTTGCA-1,◯, -GGAACTTCAAGACGTG-1,◯, -GGAACTTTCCCATTTA-1,◯, -GGAACTTTCGTAGATC-1,◯, -GGAATAAAGCGACGTA-1,◯, -GGAATAAAGTACGATA-1,◯, -GGAATAAGTTATTCTC-1,◯, -GGAATAATCATCGCTC-1,◯, -GGAATAATCGATCCCT-1,◯, -GGACAAGGTAACGCGA-1,◯, -GGACAAGGTGCAACTT-1,◯, -GGACAAGTCTTGAGGT-1,◯, -GGACAGACAATGACCT-1,◯, -GGACAGACAGAGCCAA-1,◯, -GGACAGAGTAGCGTCC-1,◯, -GGACAGAGTGCCTGTG-1,◯, -GGACAGAGTGCTGTAT-1,◯, -GGACAGAGTTCTCATT-1,◯, -GGACATTGTATGGTTC-1,◯, -GGACATTGTTGTCGCG-1,◯, -GGACATTTCCAGAAGG-1,◯, -GGACATTTCTAAGCCA-1,◯, -GGACATTTCTCTTGAT-1,◯, -GGACGTCCATAACCTG-1,◯, -GGACGTCCATTTGCCC-1,◯, -GGACGTCGTCGCATAT-1,◯, -GGACGTCGTTTGGGCC-1,◯, -GGAGCAACAACTGCTA-1,◯, -GGAGCAAGTAACGCGA-1,◯, -GGAGCAAGTTACCAGT-1,◯, -GGAGCAATCCACGAAT-1,◯, -GGATGTTAGGCGCTCT-1,◯, -GGATGTTCACCGATAT-1,◯, -GGATGTTCAGGAATGC-1,◯, -GGATGTTCATTCCTCG-1,◯, -GGATGTTTCAACACTG-1,◯, -GGATGTTTCAGTCAGT-1,◯, -GGATGTTTCTGTTGAG-1,◯, -GGATGTTTCTTGCAAG-1,◯, -GGATGTTTCTTGCATT-1,◯, -GGATTACAGCTAAGAT-1,◯, -GGATTACCAACAACCT-1,◯, -GGATTACCAGACACTT-1,◯, -GGATTACGTAGGGACT-1,◯, -GGATTACGTTAGGGTG-1,◯, -GGATTACTCATTCACT-1,◯, -GGCAATTAGAGACTTA-1,◯, -GGCAATTCACAGGCCT-1,◯, -GGCAATTCAGTTCATG-1,◯, -GGCAATTTCAATACCG-1,◯, -GGCAATTTCACATGCA-1,◯, -GGCAATTTCCCAGGTG-1,◯, -GGCAATTTCTTTACAC-1,◯, -GGCAATTTCTTTCCTC-1,◯, -GGCCGATAGGACCACA-1,◯, -GGCCGATAGTGGACGT-1,◯, -GGCCGATCATAAGACA-1,◯, -GGCCGATCATGACATC-1,◯, -GGCCGATGTCCCTTGT-1,◯, -GGCGACTTCAGCTGGC-1,◯, -GGCGACTTCTTCGGTC-1,◯, -GGCGTGTGTGATGTGG-1,◯, -GGCGTGTGTTGGTTTG-1,◯, -GGCGTGTTCCTTTCGG-1,◯, -GGCGTGTTCTACCTGC-1,◯, -GGCTCGACAGGACGTA-1,◯, -GGCTCGACATTTCAGG-1,◯, -GGCTCGAGTCGAGATG-1,◯, -GGCTCGAGTTGATTCG-1,◯, -GGCTCGATCCAGAAGG-1,◯, -GGCTCGATCGAACGGA-1,◯, -GGCTGGTAGCTACCGC-1,◯, -GGCTGGTGTCGAACAG-1,◯, -GGCTGGTGTTCAGGCC-1,◯, -GGCTGGTTCAACACAC-1,◯, -GGCTGGTTCGCGCCAA-1,◯, -GGGAATGAGGTAGCCA-1,◯, -GGGAATGCAATGGACG-1,◯, -GGGAATGGTAAGAGGA-1,◯, -GGGAATGGTAATCACC-1,◯, -GGGAATGGTTGGTGGA-1,◯, -GGGACCTCAACGATGG-1,◯, -GGGACCTCAATAACGA-1,◯, -GGGACCTGTTTGACTG-1,◯, -GGGACCTTCATGTCCC-1,◯, -GGGACCTTCGGCCGAT-1,◯, -GGGAGATAGCTCCTTC-1,◯, -GGGAGATAGGATGGAA-1,◯, -GGGAGATCATGACGGA-1,◯, -GGGAGATGTCTAGTGT-1,◯, -GGGATGAAGAAACGAG-1,◯, -GGGATGAAGCTCTCGG-1,◯, -GGGATGAAGGATTCGG-1,◯, -GGGATGAAGGTACTCT-1,◯, -GGGATGACAATGCCAT-1,◯, -GGGATGACACTTCGAA-1,◯, -GGGATGACAGTTAACC-1,◯, -GGGATGAGTATGGTTC-1,◯, -GGGATGAGTCCAGTTA-1,◯, -GGGATGAGTCCGAACC-1,◯, -GGGATGAGTGCATCTA-1,◯, -GGGATGAGTTCAGGCC-1,◯, -GGGCACTAGCACCGTC-1,◯, -GGGCACTAGCCGATTT-1,◯, -GGGCACTCACACATGT-1,◯, -GGGCACTCATTCCTGC-1,◯, -GGGCACTGTACGACCC-1,◯, -GGGCACTGTTTGTGTG-1,◯, -GGGCACTTCCACTGGG-1,◯, -GGGCATCAGGCTCTTA-1,◯, -GGGCATCAGTATCGAA-1,◯, -GGGCATCAGTCGCCGT-1,◯, -GGGCATCTCTTCAACT-1,◯, -GGGCATCTCTTCTGGC-1,◯, -GGGTCTGCATCCCATC-1,◯, -GGGTCTGGTATAGGTA-1,◯, -GGGTCTGTCGTACGGC-1,◯, -GGGTCTGTCGTCCAGG-1,◯, -GGGTTGCCACCATGTA-1,◯, -GGGTTGCGTAAGGATT-1,◯, -GGGTTGCTCAAGATCC-1,◯, -GGTATTGAGGCTAGGT-1,◯, -GGTATTGAGTGCGTGA-1,◯, -GGTATTGCAACTGGCC-1,◯, -GGTATTGCACAAGACG-1,◯, -GGTATTGCACGTAAGG-1,◯, -GGTATTGTCACCGGGT-1,◯, -GGTGAAGAGCGATCCC-1,◯, -GGTGAAGCACCAGGCT-1,◯, -GGTGAAGGTCTGCCAG-1,◯, -GGTGAAGGTTCACGGC-1,◯, -GGTGAAGTCATATCGG-1,◯, -GGTGAAGTCCGGGTGT-1,◯, -GGTGAAGTCCTTGGTC-1,◯, -GGTGAAGTCTAACTGG-1,◯, -GGTGCGTAGAACAACT-1,◯, -GGTGCGTAGACCCACC-1,◯, -GGTGCGTAGGGTTTCT-1,◯, -GGTGCGTAGTTTAGGA-1,◯, -GGTGCGTCACAGGTTT-1,◯, -GGTGCGTCACTCTGTC-1,◯, -GGTGCGTGTACCGGCT-1,◯, -GGTGCGTTCCTCCTAG-1,◯, -GGTGCGTTCGTCCAGG-1,◯, -GGTGCGTTCTTCATGT-1,◯, -GGTGTTAAGTGCCATT-1,◯, -GGTGTTACAAAGCAAT-1,◯, -GGTGTTACAGAAGCAC-1,◯, -GGTGTTACATCCCACT-1,◯, -GGTGTTACATGGTAGG-1,◯, -GGTGTTAGTAATTGGA-1,◯, -GGTGTTAGTGGTTTCA-1,◯, -GGTGTTATCTTATCTG-1,◯, -GGTGTTATCTTGTACT-1,◯, -GTAACGTAGAAGGCCT-1,◯, -GTAACGTAGTCGATAA-1,◯, -GTAACGTCAAGTAGTA-1,◯, -GTAACGTCAGTCAGAG-1,◯, -GTAACGTCAGTGGAGT-1,◯, -GTAACGTCATGGTAGG-1,◯, -GTAACGTGTCCGCTGA-1,◯, -GTAACGTGTTACCGAT-1,◯, -GTAACGTGTTCGTGAT-1,◯, -GTAACGTTCCACGACG-1,◯, -GTAACGTTCCCACTTG-1,◯, -GTAACTGAGACTCGGA-1,◯, -GTAACTGAGGTGTTAA-1,◯, -GTAACTGCAAGAGGCT-1,◯, -GTAACTGCACACAGAG-1,◯, -GTAACTGGTATTAGCC-1,◯, -GTACGTAAGATCGATA-1,◯, -GTACGTAAGTGGGATC-1,◯, -GTACTCCAGCACAGGT-1,◯, -GTACTCCAGTCCGGTC-1,◯, -GTACTCCGTGTTTGTG-1,◯, -GTACTCCTCCACTGGG-1,◯, -GTACTTTAGATCCTGT-1,◯, -GTACTTTAGGATGGTC-1,◯, -GTACTTTCACATCTTT-1,◯, -GTACTTTCATTACCTT-1,◯, -GTACTTTGTAAAGTCA-1,◯, -GTACTTTGTAGAAAGG-1,◯, -GTACTTTGTATCACCA-1,◯, -GTACTTTGTCCATCCT-1,◯, -GTACTTTGTTCAGTAC-1,◯, -GTACTTTGTTCTCATT-1,◯, -GTAGGCCAGGTCATCT-1,◯, -GTAGTCAAGCGGATCA-1,◯, -GTAGTCACAATGGATA-1,◯, -GTAGTCACACAAGTAA-1,◯, -GTAGTCAGTTAAGTAG-1,◯, -GTAGTCATCACGCGGT-1,◯, -GTAGTCATCATGGTCA-1,◯, -GTAGTCATCATGTCTT-1,◯, -GTAGTCATCCTAAGTG-1,◯, -GTAGTCATCGCTGATA-1,◯, -GTATCTTAGGCCCTTG-1,◯, -GTATCTTCACCAGTTA-1,◯, -GTATCTTGTAATCGTC-1,◯, -GTATCTTGTGTGGTTT-1,◯, -GTATCTTGTTTCGCTC-1,◯, -GTATCTTTCAACACGT-1,◯, -GTATCTTTCCGGCACA-1,◯, -GTATTCTCAAGGACAC-1,◯, -GTATTCTCACAGTCGC-1,◯, -GTATTCTGTAGCGATG-1,◯, -GTATTCTGTTTAGGAA-1,◯, -GTCAAGTAGAGCTATA-1,◯, -GTCAAGTCAACTTGAC-1,◯, -GTCAAGTCATCCTTGC-1,◯, -GTCAAGTCATGCCTTC-1,◯, -GTCAAGTCATGCTGGC-1,◯, -GTCAAGTGTTAAGTAG-1,◯, -GTCAAGTTCAAACAAG-1,◯, -GTCAAGTTCATTTGGG-1,◯, -GTCACAAAGACTTGAA-1,◯, -GTCACAAAGCTGGAAC-1,◯, -GTCACAACACAGGCCT-1,◯, -GTCACAACATAGACTC-1,◯, -GTCACAAGTCAGATAA-1,◯, -GTCACGGAGCTACCTA-1,◯, -GTCACGGAGCTTATCG-1,◯, -GTCACGGAGGGTCGAT-1,◯, -GTCACGGCAAGTAGTA-1,◯, -GTCACGGCACGGATAG-1,◯, -GTCACGGGTCAACTGT-1,◯, -GTCACGGGTGGTCTCG-1,◯, -GTCACGGGTGTAACGG-1,◯, -GTCACGGTCATCGATG-1,◯, -GTCATTTAGATGTCGG-1,◯, -GTCATTTAGGCATTGG-1,◯, -GTCATTTCACATTCGA-1,◯, -GTCATTTTCCGAAGAG-1,◯, -GTCATTTTCTACTCAT-1,◯, -GTCCTCATCACTATTC-1,◯, -GTCCTCATCCTTGGTC-1,◯, -GTCGGGTAGGTGATTA-1,◯, -GTCGGGTCACACATGT-1,◯, -GTCGGGTCACACGCTG-1,◯, -GTCGGGTCAGGCGATA-1,◯, -GTCGGGTGTGATAAGT-1,◯, -GTCGGGTTCTACCAGA-1,◯, -GTCGGGTTCTACTATC-1,◯, -GTCGTAAAGCAATCTC-1,◯, -GTCGTAAAGGTTCCTA-1,◯, -GTCGTAAAGTGTACCT-1,◯, -GTCGTAACAGGCTGAA-1,◯, -GTCGTAAGTCCGTTAA-1,◯, -GTCGTAATCAACTCTT-1,◯, -GTCGTAATCAGCTGGC-1,◯, -GTCGTAATCCACTCCA-1,◯, -GTCGTAATCGTCCAGG-1,◯, -GTCTCGTAGCGCCTCA-1,◯, -GTCTCGTAGCTAAACA-1,◯, -GTCTCGTTCGTTTAGG-1,◯, -GTCTTCGAGCGAGAAA-1,◯, -GTCTTCGGTACATGTC-1,◯, -GTCTTCGGTCTAAACC-1,◯, -GTCTTCGGTGGCGAAT-1,◯, -GTCTTCGGTGTTGGGA-1,◯, -GTCTTCGTCGACAGCC-1,◯, -GTCTTCGTCGGCTACG-1,◯, -GTCTTCGTCTTATCTG-1,◯, -GTGAAGGAGAAACCAT-1,◯, -GTGAAGGAGGTGCTTT-1,◯, -GTGAAGGTCAGTTGAC-1,◯, -GTGAAGGTCGTACGGC-1,◯, -GTGCAGCAGATGAGAG-1,◯, -GTGCAGCAGCGTGAGT-1,◯, -GTGCAGCCACATGACT-1,◯, -GTGCAGCCACGGTGTC-1,◯, -GTGCAGCGTGGCCCTA-1,◯, -GTGCAGCTCTGTACGA-1,◯, -GTGCATAAGGAGCGAG-1,◯, -GTGCATACAAGCTGAG-1,◯, -GTGCATATCATGCATG-1,◯, -GTGCATATCCGTCAAA-1,◯, -GTGCGGTAGAAGGCCT-1,◯, -GTGCGGTAGGCTAGCA-1,◯, -GTGCGGTCACGGATAG-1,◯, -GTGCGGTGTCCTGCTT-1,◯, -GTGCGGTGTCGCCATG-1,◯, -GTGCGGTTCAGTGTTG-1,◯, -GTGCGGTTCATACGGT-1,◯, -GTGCGGTTCCTTGGTC-1,◯, -GTGCGGTTCTCGGACG-1,◯, -GTGCTTCGTCAGGACA-1,◯, -GTGCTTCGTTGTGGCC-1,◯, -GTGCTTCTCACCTTAT-1,◯, -GTGCTTCTCAGGCAAG-1,◯, -GTGCTTCTCCACGTTC-1,◯, -GTGGGTCAGAGCCTAG-1,◯, -GTGGGTCAGGTGTTAA-1,◯, -GTGGGTCCATGTCGAT-1,◯, -GTGGGTCGTCTCCCTA-1,◯, -GTGTGCGCAGGGCATA-1,◯, -GTGTGCGGTAACGCGA-1,◯, -GTGTGCGGTAGCGTCC-1,◯, -GTGTGCGTCACAATGC-1,◯, -GTGTTAGAGAGCTATA-1,◯, -GTGTTAGAGTGTTGAA-1,◯, -GTGTTAGCACAGACTT-1,◯, -GTGTTAGCACGGCTAC-1,◯, -GTGTTAGCATGCAACT-1,◯, -GTGTTAGCATGCGCAC-1,◯, -GTGTTAGGTTAGTGGG-1,◯, -GTTAAGCTCAAAGACA-1,◯, -GTTACAGCATTACCTT-1,◯, -GTTACAGGTCCGCTGA-1,◯, -GTTACAGGTCTAAAGA-1,◯, -GTTACAGGTTCTGAAC-1,◯, -GTTCATTAGAAGGTTT-1,◯, -GTTCATTAGTGGAGAA-1,◯, -GTTCATTCAAGTCTGT-1,◯, -GTTCATTCACTCAGGC-1,◯, -GTTCATTGTAAGGGAA-1,◯, -GTTCATTGTCCGAACC-1,◯, -GTTCATTTCAGCAACT-1,◯, -GTTCATTTCCGAACGC-1,◯, -GTTCATTTCGTGGACC-1,◯, -GTTCGGGAGGATGTAT-1,◯, -GTTCGGGAGGGTCTCC-1,◯, -GTTCGGGGTACAGCAG-1,◯, -GTTCGGGTCGGTGTCG-1,◯, -GTTCGGGTCTGCAGTA-1,◯, -GTTCTCGAGGACTGGT-1,◯, -GTTCTCGAGGAGTTGC-1,◯, -GTTCTCGAGTACGCGA-1,◯, -GTTCTCGCATAGTAAG-1,◯, -GTTCTCGGTCCGAAGA-1,◯, -GTTCTCGTCACCGGGT-1,◯, -GTTTCTACACAACGCC-1,◯, -GTTTCTACACAGATTC-1,◯, -GTTTCTACACATGGGA-1,◯, -GTTTCTATCTTGAGGT-1,◯, -TAAACCGAGACTTTCG-1,◯, -TAAACCGCAATGACCT-1,◯, -TAAACCGCAGTATAAG-1,◯, -TAAACCGGTATAGGTA-1,◯, -TAAACCGGTCACTGGC-1,◯, -TAAACCGGTGGACGAT-1,◯, -TAAGAGAAGCGTCTAT-1,◯, -TAAGAGAAGGAGTAGA-1,◯, -TAAGAGAAGGCGACAT-1,◯, -TAAGAGATCTCGATGA-1,◯, -TAAGCGTAGAATGTGT-1,◯, -TAAGCGTAGGACATTA-1,◯, -TAAGCGTGTCGCGGTT-1,◯, -TAAGCGTTCGGCTTGG-1,◯, -TAAGTGCAGTAGCCGA-1,◯, -TAAGTGCCAAGTAGTA-1,◯, -TAAGTGCCATAGGATA-1,◯, -TAAGTGCGTACCCAAT-1,◯, -TAAGTGCTCACAACGT-1,◯, -TAAGTGCTCATTGCGA-1,◯, -TACACGAAGAGACGAA-1,◯, -TACACGAAGCCAGAAC-1,◯, -TACACGAAGGTGCACA-1,◯, -TACACGAAGTTAAGTG-1,◯, -TACACGACAGTATGCT-1,◯, -TACACGAGTCACACGC-1,◯, -TACACGAGTTCCGGCA-1,◯, -TACACGATCATAGCAC-1,◯, -TACACGATCTTAGCCC-1,◯, -TACAGTGAGATAGCAT-1,◯, -TACAGTGAGTACGCGA-1,◯, -TACAGTGCATCACCCT-1,◯, -TACAGTGGTCTAGCCG-1,◯, -TACAGTGTCCAAGTAC-1,◯, -TACAGTGTCTAAGCCA-1,◯, -TACAGTGTCTTGTCAT-1,◯, -TACCTATAGGAGTAGA-1,◯, -TACCTATAGGCCATAG-1,◯, -TACCTATAGGGTCGAT-1,◯, -TACCTATCACATCTTT-1,◯, -TACCTATCAGGCTGAA-1,◯, -TACCTATGTACAGACG-1,◯, -TACCTATTCTTGCAAG-1,◯, -TACCTTAAGGTTCCTA-1,◯, -TACCTTAAGTTGAGTA-1,◯, -TACCTTAGTGTCTGAT-1,◯, -TACCTTAGTTACGCGC-1,◯, -TACCTTATCGAGAACG-1,◯, -TACGGATAGAATTGTG-1,◯, -TACGGATAGGACAGAA-1,◯, -TACGGATCAAAGTGCG-1,◯, -TACGGATCAGGCTCAC-1,◯, -TACGGATTCCTATTCA-1,◯, -TACGGGCAGAATTCCC-1,◯, -TACGGGCGTATCACCA-1,◯, -TACGGGCTCCTGCTTG-1,◯, -TACGGTAAGCGCTTAT-1,◯, -TACGGTAAGTAGCGGT-1,◯, -TACGGTACAGTATCTG-1,◯, -TACGGTAGTAGCGATG-1,◯, -TACGGTAGTCTCACCT-1,◯, -TACGGTATCTTGTTTG-1,◯, -TACTCATAGGCAATTA-1,◯, -TACTCATGTCCGTGAC-1,◯, -TACTCGCAGCGCTTAT-1,◯, -TACTCGCAGGACACCA-1,◯, -TACTCGCCAAGACACG-1,◯, -TACTCGCGTGCTCTTC-1,◯, -TACTCGCGTGTAACGG-1,◯, -TACTCGCTCAATCTCT-1,◯, -TACTCGCTCCGTACAA-1,◯, -TACTTACAGAATCTCC-1,◯, -TACTTACAGGCGATAC-1,◯, -TACTTACAGTGACTCT-1,◯, -TACTTACCAAGGTGTG-1,◯, -TACTTACCAGACACTT-1,◯, -TACTTACGTTACGTCA-1,◯, -TACTTACGTTCCAACA-1,◯, -TACTTACTCCTCAATT-1,◯, -TACTTACTCTAACTTC-1,◯, -TACTTACTCTGGGCCA-1,◯, -TACTTGTAGTTGTAGA-1,◯, -TACTTGTCAACTGGCC-1,◯, -TACTTGTCACCTATCC-1,◯, -TACTTGTCATCCAACA-1,◯, -TACTTGTGTAGAGCTG-1,◯, -TACTTGTGTAGGACAC-1,◯, -TACTTGTTCAACGAAA-1,◯, -TACTTGTTCACCAGGC-1,◯, -TACTTGTTCTGGTATG-1,◯, -TAGACCAAGGATGCGT-1,◯, -TAGACCAGTACGACCC-1,◯, -TAGACCATCGACCAGC-1,◯, -TAGAGCTAGCGGATCA-1,◯, -TAGAGCTCATGGAATA-1,◯, -TAGAGCTGTAGGACAC-1,◯, -TAGAGCTTCCGTTGTC-1,◯, -TAGCCGGAGAATCTCC-1,◯, -TAGCCGGAGCACAGGT-1,◯, -TAGCCGGAGCCGCCTA-1,◯, -TAGCCGGCACCATGTA-1,◯, -TAGCCGGCAGTAAGCG-1,◯, -TAGGCATAGACTACAA-1,◯, -TAGGCATCACTACAGT-1,◯, -TAGGCATGTCTAGTGT-1,◯, -TAGGCATGTGTAAGTA-1,◯, -TAGGCATTCAACCAAC-1,◯, -TAGTGGTCAGGACGTA-1,◯, -TAGTGGTCATTAGCCA-1,◯, -TAGTGGTCATTGTGCA-1,◯, -TAGTGGTGTACCGAGA-1,◯, -TAGTGGTGTACGCTGC-1,◯, -TAGTGGTGTCTAAAGA-1,◯, -TAGTGGTTCCTAAGTG-1,◯, -TAGTGGTTCGGAAATA-1,◯, -TAGTTGGAGACTACAA-1,◯, -TAGTTGGAGAGTACCG-1,◯, -TAGTTGGAGTGGGATC-1,◯, -TAGTTGGCAATGTTGC-1,◯, -TAGTTGGGTTATGTGC-1,◯, -TAGTTGGTCTAACTTC-1,◯, -TATCAGGAGTACCGGA-1,◯, -TATCAGGCAATAGAGT-1,◯, -TATCAGGCACGGCGTT-1,◯, -TATCAGGGTACCCAAT-1,◯, -TATCAGGGTCGTTGTA-1,◯, -TATCAGGGTTCGTGAT-1,◯, -TATCAGGTCATCACCC-1,◯, -TATCTCAAGTTCGATC-1,◯, -TATCTCAGTAGCTGCC-1,◯, -TATCTCAGTAGGGACT-1,◯, -TATCTCATCAAGGTAA-1,◯, -TATCTCATCCGTCATC-1,◯, -TATGCCCAGCGCCTCA-1,◯, -TATGCCCAGCTAGGCA-1,◯, -TATGCCCCAATAGCGG-1,◯, -TATGCCCCACCATCCT-1,◯, -TATGCCCCAGGCGATA-1,◯, -TATGCCCCAGTAGAGC-1,◯, -TATGCCCGTTCGCGAC-1,◯, -TATGCCCTCAAGCCTA-1,◯, -TATGCCCTCGAGGTAG-1,◯, -TATGCCCTCTATCGCC-1,◯, -TATTACCAGTGTGGCA-1,◯, -TATTACCCACCCTATC-1,◯, -TATTACCGTCTTTCAT-1,◯, -TATTACCGTGAGGGAG-1,◯, -TATTACCGTGTTAAGA-1,◯, -TATTACCTCAACCATG-1,◯, -TATTACCTCTTAACCT-1,◯, -TCAACGAAGTGGTCCC-1,◯, -TCAACGAGTAACGTTC-1,◯, -TCAACGAGTAGCGATG-1,◯, -TCAACGAGTCTGCAAT-1,◯, -TCAACGATCCAACCAA-1,◯, -TCAATCTAGGTGACCA-1,◯, -TCAATCTCACCGATAT-1,◯, -TCAATCTGTCTTCGTC-1,◯, -TCAATCTTCAAGGTAA-1,◯, -TCAATCTTCAGATAAG-1,◯, -TCACAAGAGGATGGAA-1,◯, -TCACAAGCATCACCCT-1,◯, -TCACAAGCATTCACTT-1,◯, -TCACAAGGTACTCGCG-1,◯, -TCACAAGGTGCACCAC-1,◯, -TCACAAGGTGTGCGTC-1,◯, -TCACAAGTCAACGAAA-1,◯, -TCACGAAAGATCGATA-1,◯, -TCACGAACACATTCGA-1,◯, -TCACGAACAGAGCCAA-1,◯, -TCAGATGAGCCATCGC-1,◯, -TCAGATGCAAGGTTTC-1,◯, -TCAGATGCAGGACCCT-1,◯, -TCAGATGGTTACGCGC-1,◯, -TCAGATGTCATCGCTC-1,◯, -TCAGCAAAGCTAACAA-1,◯, -TCAGCAACAGTCAGAG-1,◯, -TCAGCAACATAACCTG-1,◯, -TCAGCTCAGCGTGAGT-1,◯, -TCAGCTCCAGCGTTCG-1,◯, -TCAGCTCGTTACAGAA-1,◯, -TCAGCTCGTTGGTTTG-1,◯, -TCAGCTCTCACCTTAT-1,◯, -TCAGCTCTCGGTGTTA-1,◯, -TCAGCTCTCTATCCTA-1,◯, -TCAGGATAGAAGGGTA-1,◯, -TCAGGATAGCACACAG-1,◯, -TCAGGATGTTGAGGTG-1,◯, -TCAGGATGTTTGACAC-1,◯, -TCAGGATTCACGACTA-1,◯, -TCAGGATTCATGCTCC-1,◯, -TCAGGATTCGGCGCTA-1,◯, -TCAGGTAAGCAAATCA-1,◯, -TCAGGTACAATACGCT-1,◯, -TCAGGTACAATGGAAT-1,◯, -TCAGGTACAGGCTGAA-1,◯, -TCAGGTAGTAAACACA-1,◯, -TCAGGTATCAAGGCTT-1,◯, -TCAGGTATCAGCGATT-1,◯, -TCATTACCAAACAACA-1,◯, -TCATTACCACCGGAAA-1,◯, -TCATTACCAGACGCCT-1,◯, -TCATTACGTAGTAGTA-1,◯, -TCATTACTCATGTAGC-1,◯, -TCATTACTCCCTCAGT-1,◯, -TCATTACTCCGTTGCT-1,◯, -TCATTACTCCTTGACC-1,◯, -TCATTACTCGCCTGTT-1,◯, -TCATTACTCGTGGTCG-1,◯, -TCATTACTCTTCATGT-1,◯, -TCATTTGAGATCGGGT-1,◯, -TCATTTGCACATCCGG-1,◯, -TCATTTGCACCAGGCT-1,◯, -TCATTTGCATCTCGCT-1,◯, -TCATTTGCATGTAAGA-1,◯, -TCATTTGGTAGGAGTC-1,◯, -TCATTTGGTTATGCGT-1,◯, -TCATTTGGTTCCCTTG-1,◯, -TCCACACCAGCTATTG-1,◯, -TCCACACCATGTAAGA-1,◯, -TCCACACGTACCCAAT-1,◯, -TCCACACGTCTAGTCA-1,◯, -TCCCGATAGTGAACGC-1,◯, -TCCCGATCACACAGAG-1,◯, -TCCCGATCACGGACAA-1,◯, -TCCCGATGTAATCACC-1,◯, -TCCCGATGTCGATTGT-1,◯, -TCCCGATTCCGCATAA-1,◯, -TCCCGATTCTGCTGTC-1,◯, -TCGAGGCAGATCCGAG-1,◯, -TCGAGGCAGTACGCCC-1,◯, -TCGAGGCCAAGTCATC-1,◯, -TCGAGGCGTACCTACA-1,◯, -TCGAGGCGTCTGCGGT-1,◯, -TCGAGGCTCTCTGTCG-1,◯, -TCGCGAGAGCCCAACC-1,◯, -TCGCGAGAGCTTTGGT-1,◯, -TCGCGAGCATGGTCTA-1,◯, -TCGCGAGTCGAACTGT-1,◯, -TCGCGTTAGAAGATTC-1,◯, -TCGCGTTAGTTCGCAT-1,◯, -TCGCGTTCAGAGCCAA-1,◯, -TCGCGTTTCGTCCAGG-1,◯, -TCGGGACAGATGGGTC-1,◯, -TCGGGACAGGATTCGG-1,◯, -TCGGGACAGGCTAGAC-1,◯, -TCGGGACCAACTGCGC-1,◯, -TCGGGACCACACGCTG-1,◯, -TCGGGACCATCCTTGC-1,◯, -TCGGGACGTGAGTGAC-1,◯, -TCGGGACTCCGAGCCA-1,◯, -TCGGGACTCCGCGCAA-1,◯, -TCGGTAAAGTCTCCTC-1,◯, -TCGGTAAAGTGGAGTC-1,◯, -TCGGTAAGTTACGACT-1,◯, -TCGGTAATCCGCATCT-1,◯, -TCGGTAATCTCGTTTA-1,◯, -TCGTACCAGACCACGA-1,◯, -TCGTACCAGGATCGCA-1,◯, -TCGTACCAGGGAAACA-1,◯, -TCGTACCCAACACCTA-1,◯, -TCGTACCCACCGTTGG-1,◯, -TCGTACCTCCAAACAC-1,◯, -TCGTACCTCCAAGCCG-1,◯, -TCGTACCTCTTGAGAC-1,◯, -TCGTAGATCGCCTGAG-1,◯, -TCTATTGAGGAGTAGA-1,◯, -TCTATTGAGGTTACCT-1,◯, -TCTATTGCAACGATGG-1,◯, -TCTATTGCATATGCTG-1,◯, -TCTATTGGTCTTGTCC-1,◯, -TCTATTGTCCCATTTA-1,◯, -TCTATTGTCTTGCAAG-1,◯, -TCTCATAAGAGACTAT-1,◯, -TCTCATAAGGATCGCA-1,◯, -TCTCATAAGTGAACGC-1,◯, -TCTCATAGTTCGTTGA-1,◯, -TCTCATATCCGAATGT-1,◯, -TCTCATATCGCACTCT-1,◯, -TCTCTAAAGCCGCCTA-1,◯, -TCTCTAACAAGTTGTC-1,◯, -TCTCTAAGTCTTCAAG-1,◯, -TCTCTAATCATAAAGG-1,◯, -TCTCTAATCCGCAAGC-1,◯, -TCTCTAATCGTGGTCG-1,◯, -TCTCTAATCTATCCCG-1,◯, -TCTCTAATCTTATCTG-1,◯, -TCTGAGAAGCCACGCT-1,◯, -TCTGAGAAGGGCTTCC-1,◯, -TCTGAGATCAACGGCC-1,◯, -TCTGAGATCAGCTCTC-1,◯, -TCTGAGATCGCCAAAT-1,◯, -TCTGGAACACGAGGTA-1,◯, -TCTGGAACAGATGAGC-1,◯, -TCTTCGGCAAGCGATG-1,◯, -TCTTCGGCACATCCAA-1,◯, -TCTTCGGCAGTGAGTG-1,◯, -TCTTCGGTCCTACAGA-1,◯, -TCTTTCCAGCAAATCA-1,◯, -TCTTTCCAGCCTCGTG-1,◯, -TCTTTCCAGGGTCGAT-1,◯, -TCTTTCCAGGGTCTCC-1,◯, -TCTTTCCAGGGTGTGT-1,◯, -TCTTTCCCAATAGCAA-1,◯, -TCTTTCCCACCACCAG-1,◯, -TCTTTCCGTAAGAGGA-1,◯, -TCTTTCCGTTCCGGCA-1,◯, -TCTTTCCTCTCTTGAT-1,◯, -TGAAAGAAGAAGGACA-1,◯, -TGAAAGAAGGAACTGC-1,◯, -TGAAAGAAGGCTCAGA-1,◯, -TGAAAGAAGTAGTGCG-1,◯, -TGAAAGACAACTGCTA-1,◯, -TGAAAGACAAGGTTCT-1,◯, -TGAAAGACATTGGTAC-1,◯, -TGAAAGATCAATCTCT-1,◯, -TGACAACCACAGAGGT-1,◯, -TGACAACCACGAAACG-1,◯, -TGACAACGTTAAGATG-1,◯, -TGACAACGTTGAGTTC-1,◯, -TGACAACTCAACACCA-1,◯, -TGACAACTCATGCATG-1,◯, -TGACGGCCAAGCGATG-1,◯, -TGACGGCCAGTGACAG-1,◯, -TGACGGCGTACTTCTT-1,◯, -TGACGGCGTATCAGTC-1,◯, -TGACGGCGTCATACTG-1,◯, -TGACGGCGTGCAGACA-1,◯, -TGACGGCTCAGTGTTG-1,◯, -TGACTAGAGAGACGAA-1,◯, -TGACTAGGTCTCCCTA-1,◯, -TGACTAGGTGACTCAT-1,◯, -TGACTAGTCATCGGAT-1,◯, -TGACTAGTCATTCACT-1,◯, -TGACTAGTCGAATCCA-1,◯, -TGACTAGTCTCGTATT-1,◯, -TGACTTTAGCCCAGCT-1,◯, -TGACTTTAGCTAAGAT-1,◯, -TGACTTTAGGGCTTGA-1,◯, -TGACTTTAGGTGATAT-1,◯, -TGACTTTGTCCGTTAA-1,◯, -TGACTTTGTTGGTGGA-1,◯, -TGACTTTTCCGCGTTT-1,◯, -TGACTTTTCTACTCAT-1,◯, -TGAGAGGGTCAGATAA-1,◯, -TGAGAGGGTCGAAAGC-1,◯, -TGAGCATCACTTAACG-1,◯, -TGAGCCGAGGGTGTGT-1,◯, -TGAGCCGCACTTAAGC-1,◯, -TGAGCCGCATGACGGA-1,◯, -TGAGGGAAGAATGTTG-1,◯, -TGAGGGAAGATGGGTC-1,◯, -TGAGGGAGTGATGTCT-1,◯, -TGAGGGAGTTGATTCG-1,◯, -TGAGGGATCACGCGGT-1,◯, -TGAGGGATCTCAAACG-1,◯, -TGATTTCCAGGACCCT-1,◯, -TGATTTCTCCAGAGGA-1,◯, -TGCACCTAGAATTCCC-1,◯, -TGCACCTAGAGACTTA-1,◯, -TGCACCTAGAGTCTGG-1,◯, -TGCACCTAGGGAACGG-1,◯, -TGCACCTAGTCGTACT-1,◯, -TGCACCTCATGAAGTA-1,◯, -TGCACCTGTAAACGCG-1,◯, -TGCACCTGTCTCCATC-1,◯, -TGCACCTTCATTGCCC-1,◯, -TGCACCTTCCAGATCA-1,◯, -TGCACCTTCTGGCGAC-1,◯, -TGCCAAAAGGAGTTTA-1,◯, -TGCCAAACAAAGGTGC-1,◯, -TGCCAAACAAGTTGTC-1,◯, -TGCCAAACATTGGTAC-1,◯, -TGCCAAATCAAGGTAA-1,◯, -TGCCAAATCACCGTAA-1,◯, -TGCCAAATCGGAAACG-1,◯, -TGCCCATAGCTAGGCA-1,◯, -TGCCCATAGCTCTCGG-1,◯, -TGCCCATAGGTGGGTT-1,◯, -TGCCCATAGTCTCAAC-1,◯, -TGCCCATCAAGCCCAC-1,◯, -TGCCCATCAATAGCAA-1,◯, -TGCCCATCACGCTTTC-1,◯, -TGCCCATGTCTTCTCG-1,◯, -TGCCCATTCCTTGCCA-1,◯, -TGCCCATTCGACGGAA-1,◯, -TGCCCTAAGGTGATTA-1,◯, -TGCCCTACATGCTAGT-1,◯, -TGCCCTACATGTAAGA-1,◯, -TGCCCTATCCACTCCA-1,◯, -TGCCCTATCGCCGTGA-1,◯, -TGCCCTATCTGCAAGT-1,◯, -TGCGCAGGTAAATACG-1,◯, -TGCGCAGGTTATGTGC-1,◯, -TGCGCAGTCGAGAACG-1,◯, -TGCGCAGTCGAGCCCA-1,◯, -TGCGGGTCATCTGGTA-1,◯, -TGCGGGTTCAGCGACC-1,◯, -TGCGTGGAGAAGAAGC-1,◯, -TGCGTGGCACTTCGAA-1,◯, -TGCGTGGCAGACGCTC-1,◯, -TGCGTGGGTAGCGTCC-1,◯, -TGCGTGGGTAGGAGTC-1,◯, -TGCGTGGGTCTAGAGG-1,◯, -TGCGTGGGTGACTCAT-1,◯, -TGCGTGGGTGATGTCT-1,◯, -TGCGTGGTCGTTTAGG-1,◯, -TGCTACCAGATGTTAG-1,◯, -TGCTACCAGTTAACGA-1,◯, -TGCTACCGTGAGGGAG-1,◯, -TGCTACCGTGCACCAC-1,◯, -TGCTACCGTGGCTCCA-1,◯, -TGCTACCTCAGTTGAC-1,◯, -TGCTGCTAGAAGGTGA-1,◯, -TGCTGCTAGCGTGTCC-1,◯, -TGCTGCTAGTGGTCCC-1,◯, -TGCTGCTGTCGAAAGC-1,◯, -TGCTGCTGTGGCGAAT-1,◯, -TGCTGCTTCGCTGATA-1,◯, -TGGACGCAGAGCTGGT-1,◯, -TGGACGCAGATCCGAG-1,◯, -TGGACGCGTCGATTGT-1,◯, -TGGACGCGTCTCTCTG-1,◯, -TGGACGCTCATTTGGG-1,◯, -TGGACGCTCCGCTGTT-1,◯, -TGGACGCTCCTATGTT-1,◯, -TGGCCAGAGAGAACAG-1,◯, -TGGCCAGAGCTAAACA-1,◯, -TGGCCAGCAGGGTTAG-1,◯, -TGGCCAGGTTACGCGC-1,◯, -TGGCCAGTCGTGACAT-1,◯, -TGGCGCAAGGGATGGG-1,◯, -TGGCGCAGTAAGCACG-1,◯, -TGGCGCAGTTAGATGA-1,◯, -TGGCGCAGTTTACTCT-1,◯, -TGGCGCATCGATGAGG-1,◯, -TGGCTGGAGAGACTTA-1,◯, -TGGCTGGAGGCAATTA-1,◯, -TGGCTGGCAGGGTTAG-1,◯, -TGGCTGGCAGTAACGG-1,◯, -TGGCTGGGTCTTCGTC-1,◯, -TGGCTGGTCAGGCGAA-1,◯, -TGGCTGGTCTAACCGA-1,◯, -TGGCTGGTCTACTTAC-1,◯, -TGGCTGGTCTTTACAC-1,◯, -TGGGAAGCAAGAAGAG-1,◯, -TGGGAAGCATATACCG-1,◯, -TGGGAAGTCAACCAAC-1,◯, -TGGGAAGTCTCAAACG-1,◯, -TGGGCGTAGTCCAGGA-1,◯, -TGGGCGTCACTTAACG-1,◯, -TGGGCGTGTAAACACA-1,◯, -TGGGCGTGTACAGTGG-1,◯, -TGGGCGTGTCCTAGCG-1,◯, -TGGGCGTGTGTTCTTT-1,◯, -TGGGCGTTCCCGGATG-1,◯, -TGGGCGTTCTGCAGTA-1,◯, -TGGTTAGAGGGCATGT-1,◯, -TGGTTAGCACCATCCT-1,◯, -TGGTTAGGTATTAGCC-1,◯, -TGGTTAGGTTAAGAAC-1,◯, -TGGTTAGGTTGTTTGG-1,◯, -TGGTTCCAGCCATCGC-1,◯, -TGGTTCCCAAGCCTAT-1,◯, -TGGTTCCCACCCAGTG-1,◯, -TGGTTCCCACGACTCG-1,◯, -TGGTTCCGTTCTCATT-1,◯, -TGGTTCCTCACAGTAC-1,◯, -TGGTTCCTCTTCATGT-1,◯, -TGTATTCAGCGTGAAC-1,◯, -TGTATTCAGGTTACCT-1,◯, -TGTATTCCATGCTGGC-1,◯, -TGTATTCGTGACTCAT-1,◯, -TGTATTCTCCAAACAC-1,◯, -TGTATTCTCGTTTAGG-1,◯, -TGTATTCTCTCAAACG-1,◯, -TGTCCCAAGACTAGAT-1,◯, -TGTCCCAAGACTTGAA-1,◯, -TGTCCCAAGTGCGATG-1,◯, -TGTCCCACACAGACTT-1,◯, -TGTCCCACAGGATTGG-1,◯, -TGTCCCACAGTACACT-1,◯, -TGTCCCACATAGTAAG-1,◯, -TGTCCCAGTCATCCCT-1,◯, -TGTCCCATCGTTACAG-1,◯, -TGTCCCATCTAGCACA-1,◯, -TGTGGTAAGTCAAGGC-1,◯, -TGTGGTATCAAGGCTT-1,◯, -TGTGGTATCCTCAACC-1,◯, -TGTGGTATCTTCTGGC-1,◯, -TGTGTTTAGAGGTTGC-1,◯, -TGTGTTTAGGACGAAA-1,◯, -TGTGTTTAGTGGGCTA-1,◯, -TGTGTTTCAGTCAGCC-1,◯, -TGTGTTTCATGTAGTC-1,◯, -TGTGTTTGTGAAAGAG-1,◯, -TGTGTTTTCGATCCCT-1,◯, -TGTGTTTTCTCCGGTT-1,◯, -TGTGTTTTCTTGTCAT-1,◯, -TGTTCCGAGACGCTTT-1,◯, -TGTTCCGAGATTACCC-1,◯, -TTAACTCCAAGCCGTC-1,◯, -TTAACTCCAAGCGAGT-1,◯, -TTAACTCGTAGCCTCG-1,◯, -TTAACTCGTTCGCGAC-1,◯, -TTAACTCTCCGTAGGC-1,◯, -TTAACTCTCTCGCATC-1,◯, -TTAGGACAGCGATATA-1,◯, -TTAGGACAGCGTAATA-1,◯, -TTAGGACGTCGCTTTC-1,◯, -TTAGGACTCACATAGC-1,◯, -TTAGGACTCTTTACAC-1,◯, -TTAGTTCAGGGATACC-1,◯, -TTAGTTCAGTGTGAAT-1,◯, -TTAGTTCCAGACAAAT-1,◯, -TTAGTTCCATGCCACG-1,◯, -TTAGTTCGTCTGGAGA-1,◯, -TTAGTTCTCAACGGGA-1,◯, -TTAGTTCTCGCCTGAG-1,◯, -TTATGCTAGTGGTAGC-1,◯, -TTATGCTGTACCGAGA-1,◯, -TTATGCTGTCTGATCA-1,◯, -TTATGCTGTCTTGATG-1,◯, -TTATGCTGTGCACCAC-1,◯, -TTATGCTTCCAAAGTC-1,◯, -TTCCCAGAGGCTCATT-1,◯, -TTCCCAGAGTGTCCAT-1,◯, -TTCCCAGCAATCCGAT-1,◯, -TTCCCAGCAGTCCTTC-1,◯, -TTCCCAGGTGATGCCC-1,◯, -TTCCCAGGTGTGTGCC-1,◯, -TTCCCAGGTTACGTCA-1,◯, -TTCCCAGTCGGAATCT-1,◯, -TTCGAAGAGTAGTGCG-1,◯, -TTCGAAGCAGGAATCG-1,◯, -TTCGAAGCAGGGTACA-1,◯, -TTCGAAGCATATGCTG-1,◯, -TTCGAAGTCAGGTTCA-1,◯, -TTCGAAGTCTCTTGAT-1,◯, -TTCGGTCAGAAGAAGC-1,◯, -TTCGGTCAGGTACTCT-1,◯, -TTCGGTCCAACACCTA-1,◯, -TTCGGTCCATGACATC-1,◯, -TTCGGTCGTCCGAATT-1,◯, -TTCTACAAGACTAGGC-1,◯, -TTCTACAAGATAGGAG-1,◯, -TTCTACAAGCCGATTT-1,◯, -TTCTACAGTCTAGGTT-1,◯, -TTCTACAGTCTAGTCA-1,◯, -TTCTACAGTTCAGTAC-1,◯, -TTCTACATCAACGAAA-1,◯, -TTCTCAAAGACTCGGA-1,◯, -TTCTCAAAGCGATTCT-1,◯, -TTCTCAACAAAGCAAT-1,◯, -TTCTCAAGTGCACGAA-1,◯, -TTCTCCTAGATCCCGC-1,◯, -TTCTCCTCAAACAACA-1,◯, -TTCTCCTCATCATCCC-1,◯, -TTCTCCTGTGCAACGA-1,◯, -TTCTCCTTCCGCATCT-1,◯, -TTCTCCTTCTGTACGA-1,◯, -TTCTTAGCAATAACGA-1,◯, -TTCTTAGCACATGTGT-1,◯, -TTCTTAGCACCCAGTG-1,◯, -TTCTTAGCATACGCTA-1,◯, -TTCTTAGGTTCGGGCT-1,◯, -TTGAACGAGGATGGAA-1,◯, -TTGAACGCAAGTCTAC-1,◯, -TTGAACGGTTCAACCA-1,◯, -TTGAACGTCAACACTG-1,◯, -TTGAACGTCACCCGAG-1,◯, -TTGAACGTCCTCATTA-1,◯, -TTGAACGTCTCGTTTA-1,◯, -TTGACTTAGCTTTGGT-1,◯, -TTGACTTAGGGATACC-1,◯, -TTGACTTCAATACGCT-1,◯, -TTGACTTGTTGGTTTG-1,◯, -TTGACTTTCTGTTTGT-1,◯, -TTGCCGTAGCTGCGAA-1,◯, -TTGCCGTCACTAGTAC-1,◯, -TTGCCGTCATGCAATC-1,◯, -TTGCCGTCATTGGCGC-1,◯, -TTGCCGTTCGAATGGG-1,◯, -TTGCCGTTCGGTCCGA-1,◯, -TTGCCGTTCGTTGACA-1,◯, -TTGCCGTTCTTGAGGT-1,◯, -TTGCGTCAGTCTTGCA-1,◯, -TTGCGTCCAACACCTA-1,◯, -TTGCGTCCAGCAGTTT-1,◯, -TTGCGTCCAGCATGAG-1,◯, -TTGCGTCCAGTGGAGT-1,◯, -TTGCGTCTCACTTACT-1,◯, -TTGGAACAGAAGGCCT-1,◯, -TTGGAACAGATGGGTC-1,◯, -TTGGAACGTGCCTGGT-1,◯, -TTGGAACGTGTGCGTC-1,◯, -TTGGCAAAGCCGGTAA-1,◯, -TTGGCAAAGTTTAGGA-1,◯, -TTGGCAACAAAGGCGT-1,◯, -TTGGCAACAAGGTTTC-1,◯, -TTGGCAACACCTCGTT-1,◯, -TTGGCAAGTACTTAGC-1,◯, -TTGGCAATCAGCATGT-1,◯, -TTGTAGGAGCTAGGCA-1,◯, -TTGTAGGAGTGAACAT-1,◯, -TTGTAGGAGTTTGCGT-1,◯, -TTGTAGGGTCAGAATA-1,◯, -TTGTAGGTCCCAAGAT-1,◯, -TTTACTGAGGTGATAT-1,◯, -TTTACTGCATGTTCCC-1,◯, -TTTACTGGTAGTAGTA-1,◯, -TTTACTGTCGCCAAAT-1,◯, -TTTATGCAGGACAGAA-1,◯, -TTTATGCAGTCGAGTG-1,◯, -TTTATGCCATGCTGGC-1,◯, -TTTATGCGTGCATCTA-1,◯, -TTTATGCGTTGCCTCT-1,◯, -TTTCCTCTCGAGAACG-1,◯, -TTTGCGCAGACAGGCT-1,◯, -TTTGCGCAGCGCCTCA-1,◯, -TTTGCGCAGTGTCCCG-1,◯, -TTTGCGCTCATCGATG-1,◯, -TTTGCGCTCCATGCTC-1,◯, -TTTGGTTAGTAGGCCA-1,◯, -TTTGGTTAGTGGAGTC-1,◯, -TTTGGTTCACCGGAAA-1,◯, -TTTGGTTCATCATCCC-1,◯, -TTTGGTTGTGTGGTTT-1,◯, -TTTGGTTGTTAAGATG-1,◯, -TTTGGTTGTTCATGGT-1,◯, -TTTGGTTGTTCGCTAA-1,◯, -TTTGGTTTCGCCATAA-1,◯, -TTTGTCAAGCTCAACT-1,◯, -TTTGTCAAGGCTATCT-1,◯, -TTTGTCACAGTCACTA-1,◯, -TTTGTCACATACGCCG-1,◯, -TTTGTCACATTCCTGC-1,◯, -TTTGTCAGTATTCTCT-1,◯, -TTTGTCAGTCTTGCGG-1,◯, -TTTGTCAGTGTAATGA-1,◯, -TTTGTCATCCCAAGAT-1,◯, -TTTGTCATCCTGCTTG-1,◯, diff --git a/enclone_main/testx/inputs/123085.binary.sha256 b/enclone_main/testx/inputs/123085.binary.sha256 deleted file mode 100644 index 7021b6b7a..000000000 --- a/enclone_main/testx/inputs/123085.binary.sha256 +++ /dev/null @@ -1 +0,0 @@ -28e124e5a2d523e58ec62939f42e8448015c1b584222449a131f437eb0806298 \ No newline at end of file diff --git a/enclone_main/testx/inputs/128024_cells.csv b/enclone_main/testx/inputs/128024_cells.csv deleted file mode 100644 index 23435b0f1..000000000 --- a/enclone_main/testx/inputs/128024_cells.csv +++ /dev/null @@ -1,5346 +0,0 @@ -barcode,T -AAACCTGAGAGACTAT-1,◯ -AAACCTGAGCTACCGC-1,◯ -AAACCTGAGTGGGTTG-1,◯ -AAACCTGCACGTAAGG-1,◯ -AAACCTGCAGATTGCT-1,◯ -AAACCTGCATAAAGGT-1,◯ -AAACCTGGTAGCTAAA-1,◯ -AAACCTGGTTGCCTCT-1,◯ -AAACCTGTCATGTAGC-1,◯ -AAACCTGTCCAACCAA-1,◯ -AAACCTGTCCTTGCCA-1,◯ -AAACCTGTCGCCAGCA-1,◯ -AAACCTGTCGTAGGTT-1,◯ -AAACGGGAGAAGAAGC-1,◯ -AAACGGGAGAGAACAG-1,◯ -AAACGGGCAATGCCAT-1,◯ -AAACGGGCATCGTCGG-1,◯ -AAACGGGGTATCGCAT-1,◯ -AAACGGGTCAATACCG-1,◯ -AAACGGGTCCACGCAG-1,◯ -AAACGGGTCCGCTGTT-1,◯ -AAAGATGAGGCGCTCT-1,◯ -AAAGATGAGTGTCTCA-1,◯ -AAAGATGCAATCGGTT-1,◯ -AAAGATGCACTTCGAA-1,◯ -AAAGATGCAGCGAACA-1,◯ -AAAGATGCAGCGTAAG-1,◯ -AAAGATGCATACGCTA-1,◯ -AAAGATGCATTAGCCA-1,◯ -AAAGATGGTAGCGTAG-1,◯ -AAAGATGGTGATGTGG-1,◯ -AAAGCAAAGCAGCCTC-1,◯ -AAAGCAAAGGCACATG-1,◯ -AAAGCAAAGGGTATCG-1,◯ -AAAGCAACACCCTATC-1,◯ -AAAGCAACACGGATAG-1,◯ -AAAGCAACACTTCGAA-1,◯ -AAAGCAACAGCATGAG-1,◯ -AAAGCAAGTATCTGCA-1,◯ -AAAGCAATCTTGACGA-1,◯ -AAAGCAATCTTTACGT-1,◯ -AAAGTAGAGAAACCGC-1,◯ -AAAGTAGAGACCACGA-1,◯ -AAAGTAGAGTAACCCT-1,◯ -AAAGTAGGTACCATCA-1,◯ -AAAGTAGTCACATGCA-1,◯ -AAAGTAGTCAGCCTAA-1,◯ -AAAGTAGTCCGCGGTA-1,◯ -AAAGTAGTCTAACCGA-1,◯ -AAATGCCAGGACAGCT-1,◯ -AAATGCCAGGCTAGCA-1,◯ -AAATGCCAGTACGACG-1,◯ -AAATGCCCAACACGCC-1,◯ -AAATGCCCAATGGAGC-1,◯ -AAATGCCCACCAGATT-1,◯ -AAATGCCCACTGAAGG-1,◯ -AAATGCCCAGTGAGTG-1,◯ -AAATGCCGTAAGGATT-1,◯ -AAATGCCGTCTGCGGT-1,◯ -AAATGCCGTGCCTGTG-1,◯ -AAATGCCGTTCAGCGC-1,◯ -AAATGCCTCCGCGCAA-1,◯ -AAATGCCTCCGCGGTA-1,◯ -AACACGTAGAGACTTA-1,◯ -AACACGTAGGTACTCT-1,◯ -AACACGTAGTTGAGAT-1,◯ -AACACGTCAATTCCTT-1,◯ -AACACGTCACGAAAGC-1,◯ -AACACGTGTCATATCG-1,◯ -AACACGTGTGTTTGGT-1,◯ -AACCATGAGGGTCGAT-1,◯ -AACCATGAGTCATCCA-1,◯ -AACCATGCAAAGAATC-1,◯ -AACCATGCACGCGAAA-1,◯ -AACCATGGTAAACACA-1,◯ -AACCATGGTCGTTGTA-1,◯ -AACCATGGTGGTCCGT-1,◯ -AACCATGTCAAACGGG-1,◯ -AACCATGTCCTCTAGC-1,◯ -AACCATGTCCTGCAGG-1,◯ -AACCATGTCGAACGGA-1,◯ -AACCGCGAGCGATAGC-1,◯ -AACCGCGAGGACCACA-1,◯ -AACCGCGCAAGCGATG-1,◯ -AACCGCGCATAAAGGT-1,◯ -AACCGCGCATACGCTA-1,◯ -AACCGCGCATGAGCGA-1,◯ -AACCGCGCATTCTCAT-1,◯ -AACCGCGGTGTCGCTG-1,◯ -AACCGCGGTTATCACG-1,◯ -AACCGCGTCAACACAC-1,◯ -AACCGCGTCCTCAATT-1,◯ -AACGTTGAGATCCGAG-1,◯ -AACGTTGAGCAAATCA-1,◯ -AACGTTGAGCACACAG-1,◯ -AACGTTGAGTTCGCGC-1,◯ -AACGTTGAGTTTGCGT-1,◯ -AACGTTGGTACGACCC-1,◯ -AACGTTGGTATGCTTG-1,◯ -AACGTTGGTCAAGCGA-1,◯ -AACGTTGGTGACAAAT-1,◯ -AACGTTGGTGCATCTA-1,◯ -AACGTTGTCCTTTACA-1,◯ -AACGTTGTCGAATCCA-1,◯ -AACGTTGTCTCAAACG-1,◯ -AACTCAGAGGAGTCTG-1,◯ -AACTCAGCAGGCGATA-1,◯ -AACTCAGGTCGCCATG-1,◯ -AACTCAGTCCAGTATG-1,◯ -AACTCAGTCTGTTTGT-1,◯ -AACTCCCAGCTAAACA-1,◯ -AACTCCCAGTACACCT-1,◯ -AACTCCCAGTCTCCTC-1,◯ -AACTCCCAGTGAACAT-1,◯ -AACTCCCCACGGTTTA-1,◯ -AACTCCCGTACTCTCC-1,◯ -AACTCCCGTAGGACAC-1,◯ -AACTCCCGTCTCTCTG-1,◯ -AACTCCCTCACTTCAT-1,◯ -AACTCCCTCCTCGCAT-1,◯ -AACTCCCTCGTAGGTT-1,◯ -AACTCCCTCTGCAGTA-1,◯ -AACTCTTCAAGCCATT-1,◯ -AACTCTTCAGTAACGG-1,◯ -AACTCTTCATCTCGCT-1,◯ -AACTCTTGTATTACCG-1,◯ -AACTCTTGTGCTGTAT-1,◯ -AACTCTTGTTAAAGAC-1,◯ -AACTCTTGTTCGCTAA-1,◯ -AACTCTTGTTTAGCTG-1,◯ -AACTCTTTCCACGTGG-1,◯ -AACTCTTTCCTTGACC-1,◯ -AACTCTTTCTCGCATC-1,◯ -AACTGGTAGATCCGAG-1,◯ -AACTGGTAGTCGATAA-1,◯ -AACTGGTCAACTGCGC-1,◯ -AACTGGTGTAGGCATG-1,◯ -AACTGGTGTCGAACAG-1,◯ -AACTGGTGTTCGTGAT-1,◯ -AACTGGTTCATGTGGT-1,◯ -AACTGGTTCGGACAAG-1,◯ -AACTGGTTCGTTGCCT-1,◯ -AACTTTCAGACAGAGA-1,◯ -AACTTTCAGCGTGAAC-1,◯ -AACTTTCAGGCTCATT-1,◯ -AACTTTCCACTGAAGG-1,◯ -AACTTTCCACTTCTGC-1,◯ -AACTTTCCAGCTTAAC-1,◯ -AACTTTCCATGCAATC-1,◯ -AACTTTCGTATTACCG-1,◯ -AACTTTCGTCATATGC-1,◯ -AACTTTCGTCCGACGT-1,◯ -AACTTTCGTGTTGGGA-1,◯ -AACTTTCTCCAAACAC-1,◯ -AACTTTCTCCTTTACA-1,◯ -AACTTTCTCGTGTAGT-1,◯ -AAGACCTCAACAACCT-1,◯ -AAGACCTCAGCTATTG-1,◯ -AAGACCTCATCCAACA-1,◯ -AAGACCTCATGTCCTC-1,◯ -AAGACCTGTCAAACTC-1,◯ -AAGACCTGTCGCGGTT-1,◯ -AAGACCTGTCTAAACC-1,◯ -AAGACCTGTGCAACTT-1,◯ -AAGACCTGTGTAAGTA-1,◯ -AAGACCTGTGTCCTCT-1,◯ -AAGACCTGTTCATGGT-1,◯ -AAGACCTTCAGTTCGA-1,◯ -AAGACCTTCCTTTCTC-1,◯ -AAGACCTTCTAACCGA-1,◯ -AAGACCTTCTGCTTGC-1,◯ -AAGACCTTCTTTACGT-1,◯ -AAGCCGCAGGACCACA-1,◯ -AAGCCGCAGGTGGGTT-1,◯ -AAGCCGCCACCGATAT-1,◯ -AAGCCGCCATAAAGGT-1,◯ -AAGCCGCGTAATCACC-1,◯ -AAGCCGCGTACATCCA-1,◯ -AAGCCGCGTCTTGATG-1,◯ -AAGCCGCGTTTGGCGC-1,◯ -AAGCCGCTCAAGATCC-1,◯ -AAGCCGCTCCAAACTG-1,◯ -AAGCCGCTCCTCATTA-1,◯ -AAGCCGCTCGGCTACG-1,◯ -AAGCCGCTCTGTCTAT-1,◯ -AAGGAGCAGCCTATGT-1,◯ -AAGGAGCAGCTACCGC-1,◯ -AAGGAGCAGTAAGTAC-1,◯ -AAGGAGCCATCTATGG-1,◯ -AAGGAGCGTAACGCGA-1,◯ -AAGGAGCGTCTAGCGC-1,◯ -AAGGAGCGTGAACCTT-1,◯ -AAGGAGCGTGCAACTT-1,◯ -AAGGAGCGTTAGGGTG-1,◯ -AAGGAGCTCACCACCT-1,◯ -AAGGAGCTCACGCATA-1,◯ -AAGGAGCTCATAAAGG-1,◯ -AAGGAGCTCCTCGCAT-1,◯ -AAGGAGCTCCTTTACA-1,◯ -AAGGAGCTCTGATTCT-1,◯ -AAGGAGCTCTTGCATT-1,◯ -AAGGCAGAGGCAATTA-1,◯ -AAGGCAGAGGGCTTGA-1,◯ -AAGGCAGCACAGGAGT-1,◯ -AAGGCAGCATGGGAAC-1,◯ -AAGGCAGGTCGGCTCA-1,◯ -AAGGCAGGTCTAAAGA-1,◯ -AAGGCAGGTCTCACCT-1,◯ -AAGGCAGGTTGTGGAG-1,◯ -AAGGCAGTCAACACAC-1,◯ -AAGGCAGTCTACTATC-1,◯ -AAGGTTCAGACACTAA-1,◯ -AAGGTTCAGGCCCGTT-1,◯ -AAGGTTCAGGCTATCT-1,◯ -AAGGTTCGTTAAGAAC-1,◯ -AAGGTTCTCATAGCAC-1,◯ -AAGGTTCTCGCCCTTA-1,◯ -AAGTCTGCAAACAACA-1,◯ -AAGTCTGCAAGCCATT-1,◯ -AAGTCTGCACATGGGA-1,◯ -AAGTCTGCAGCGTAAG-1,◯ -AAGTCTGCATATACCG-1,◯ -AAGTCTGGTAACGCGA-1,◯ -AAGTCTGGTACAGACG-1,◯ -AAGTCTGGTACAGTGG-1,◯ -AAGTCTGGTAGCGCAA-1,◯ -AAGTCTGGTCCAACTA-1,◯ -AAGTCTGGTGTAACGG-1,◯ -AAGTCTGGTGTTGGGA-1,◯ -AAGTCTGTCACCACCT-1,◯ -AAGTCTGTCGACAGCC-1,◯ -AAGTCTGTCGTACCGG-1,◯ -AATCCAGAGGTGATAT-1,◯ -AATCCAGAGTAGGCCA-1,◯ -AATCCAGCACCTCGGA-1,◯ -AATCCAGGTAAGTGGC-1,◯ -AATCCAGGTAGGCTGA-1,◯ -AATCCAGGTCCGTGAC-1,◯ -AATCCAGTCAGGCGAA-1,◯ -AATCCAGTCCTGTACC-1,◯ -AATCGGTAGACTAGAT-1,◯ -AATCGGTAGCCTCGTG-1,◯ -AATCGGTAGTGATCGG-1,◯ -AATCGGTCACCACGTG-1,◯ -AATCGGTCAGGGAGAG-1,◯ -AATCGGTCATGTTCCC-1,◯ -AATCGGTGTATCAGTC-1,◯ -AATCGGTGTCTTCTCG-1,◯ -AATCGGTGTGAACCTT-1,◯ -AATCGGTTCAAGGTAA-1,◯ -AATCGGTTCCAGAAGG-1,◯ -AATCGGTTCCTGCTTG-1,◯ -AATCGGTTCTTACCTA-1,◯ -ACACCAAAGGGTCTCC-1,◯ -ACACCAAAGGTAGCTG-1,◯ -ACACCAAAGTGTTGAA-1,◯ -ACACCAACACGCATCG-1,◯ -ACACCAACACTCGACG-1,◯ -ACACCAACAGGATTGG-1,◯ -ACACCAACATATGAGA-1,◯ -ACACCAACATGTTCCC-1,◯ -ACACCAAGTCGCCATG-1,◯ -ACACCAAGTCGTCTTC-1,◯ -ACACCAATCAACACAC-1,◯ -ACACCAATCAACACCA-1,◯ -ACACCAATCAGTGCAT-1,◯ -ACACCAATCGCAAACT-1,◯ -ACACCCTAGACCACGA-1,◯ -ACACCCTAGCTCCCAG-1,◯ -ACACCCTCACAACGTT-1,◯ -ACACCCTCACAGGAGT-1,◯ -ACACCCTCAGAAGCAC-1,◯ -ACACCCTCAGACGCCT-1,◯ -ACACCCTCATCCGGGT-1,◯ -ACACCCTTCACAGTAC-1,◯ -ACACCCTTCACCCGAG-1,◯ -ACACCCTTCAGCGACC-1,◯ -ACACCCTTCAGCTCGG-1,◯ -ACACCGGAGAATGTTG-1,◯ -ACACCGGAGATCTGAA-1,◯ -ACACCGGAGCCAGTAG-1,◯ -ACACCGGAGTGGGCTA-1,◯ -ACACCGGCAAAGTCAA-1,◯ -ACACCGGCAATAACGA-1,◯ -ACACCGGGTAAGTAGT-1,◯ -ACACCGGGTACCCAAT-1,◯ -ACACCGGGTCTAGTCA-1,◯ -ACACCGGGTGTAAGTA-1,◯ -ACACCGGGTTGATTGC-1,◯ -ACACCGGTCGTTACGA-1,◯ -ACACTGAAGAGTGACC-1,◯ -ACACTGAAGTGGACGT-1,◯ -ACACTGACAATCACAC-1,◯ -ACACTGACAGCCTTGG-1,◯ -ACACTGAGTAGAAAGG-1,◯ -ACACTGAGTCTCCATC-1,◯ -ACACTGAGTCTCTCGT-1,◯ -ACACTGAGTGATAAGT-1,◯ -ACACTGAGTGATGTCT-1,◯ -ACACTGAGTGCAGGTA-1,◯ -ACACTGAGTTACGTCA-1,◯ -ACACTGAGTTATGCGT-1,◯ -ACACTGATCGGAAACG-1,◯ -ACACTGATCGTCCAGG-1,◯ -ACAGCCGAGACCCACC-1,◯ -ACAGCCGAGCAATATG-1,◯ -ACAGCCGCAAGAGGCT-1,◯ -ACAGCCGCACAAGCCC-1,◯ -ACAGCCGGTATGAAAC-1,◯ -ACAGCCGGTCCGAACC-1,◯ -ACAGCCGGTCGAGTTT-1,◯ -ACAGCCGGTCTAGTGT-1,◯ -ACAGCCGGTTTGGGCC-1,◯ -ACAGCCGTCACCTTAT-1,◯ -ACAGCCGTCAGAGCTT-1,◯ -ACAGCCGTCAGCACAT-1,◯ -ACAGCCGTCCAGAAGG-1,◯ -ACAGCCGTCGGCGCTA-1,◯ -ACAGCCGTCTGAAAGA-1,◯ -ACAGCCGTCTGCAAGT-1,◯ -ACAGCCGTCTGTCAAG-1,◯ -ACAGCTAAGATGTGTA-1,◯ -ACAGCTACACAGAGGT-1,◯ -ACAGCTACAGCGTCCA-1,◯ -ACAGCTACAGGAATCG-1,◯ -ACAGCTACATGCTAGT-1,◯ -ACAGCTAGTTCGCTAA-1,◯ -ACATACGAGCTAAGAT-1,◯ -ACATACGAGGAATGGA-1,◯ -ACATACGCACAGCGTC-1,◯ -ACATACGCAGCTGTAT-1,◯ -ACATACGCATCGGAAG-1,◯ -ACATACGCATTAGGCT-1,◯ -ACATACGGTCCCGACA-1,◯ -ACATACGTCAAGATCC-1,◯ -ACATACGTCACTTACT-1,◯ -ACATACGTCAGAGGTG-1,◯ -ACATACGTCCTAGTGA-1,◯ -ACATACGTCTCGAGTA-1,◯ -ACATCAGAGAAACGAG-1,◯ -ACATCAGAGACAGGCT-1,◯ -ACATCAGAGCGTTGCC-1,◯ -ACATCAGCACCCAGTG-1,◯ -ACATCAGCATAGTAAG-1,◯ -ACATCAGGTAAGAGGA-1,◯ -ACATCAGGTCTCCCTA-1,◯ -ACATCAGGTCTCGTTC-1,◯ -ACATCAGGTGCTCTTC-1,◯ -ACATCAGGTTACGACT-1,◯ -ACATCAGGTTTGGCGC-1,◯ -ACATCAGTCAACACAC-1,◯ -ACATCAGTCTGCTGCT-1,◯ -ACATCAGTCTTATCTG-1,◯ -ACATGGTAGCGTAGTG-1,◯ -ACATGGTAGTCTTGCA-1,◯ -ACATGGTCAGCTGCAC-1,◯ -ACATGGTCATTAGCCA-1,◯ -ACATGGTGTAGATTAG-1,◯ -ACATGGTGTCTAACGT-1,◯ -ACATGGTGTGTTCGAT-1,◯ -ACATGGTGTTTGTTTC-1,◯ -ACATGGTTCACCCGAG-1,◯ -ACATGGTTCACCGGGT-1,◯ -ACATGGTTCAGTGCAT-1,◯ -ACATGGTTCAGTGTTG-1,◯ -ACCAGTAAGACACGAC-1,◯ -ACCAGTAAGACAGGCT-1,◯ -ACCAGTAAGACTAGAT-1,◯ -ACCAGTAAGACTGGGT-1,◯ -ACCAGTAAGCCCAATT-1,◯ -ACCAGTAAGCCGTCGT-1,◯ -ACCAGTAAGGAGTAGA-1,◯ -ACCAGTAAGGCGTACA-1,◯ -ACCAGTAAGTACGTAA-1,◯ -ACCAGTACAACACGCC-1,◯ -ACCAGTACAAGTAGTA-1,◯ -ACCAGTACACGGATAG-1,◯ -ACCAGTACAGGATTGG-1,◯ -ACCAGTACATCACAAC-1,◯ -ACCAGTACATGGTCAT-1,◯ -ACCAGTAGTCAAAGAT-1,◯ -ACCAGTAGTGAAAGAG-1,◯ -ACCAGTAGTTTCGCTC-1,◯ -ACCAGTATCACGACTA-1,◯ -ACCAGTATCGTACGGC-1,◯ -ACCCACTAGCGATAGC-1,◯ -ACCCACTAGCGTGTCC-1,◯ -ACCCACTAGTCTCAAC-1,◯ -ACCCACTCAATCAGAA-1,◯ -ACCCACTTCGACGGAA-1,◯ -ACCCACTTCGGAGCAA-1,◯ -ACCCACTTCGTCTGCT-1,◯ -ACCGTAAAGGATGGTC-1,◯ -ACCGTAAAGGCGCTCT-1,◯ -ACCGTAAAGTGGTCCC-1,◯ -ACCGTAACAAGCTGGA-1,◯ -ACCGTAACACCTATCC-1,◯ -ACCGTAAGTCGAAAGC-1,◯ -ACCGTAAGTTCCACTC-1,◯ -ACCGTAATCGCCATAA-1,◯ -ACCTTTAAGAAGGTGA-1,◯ -ACCTTTAAGAGGGCTT-1,◯ -ACCTTTAAGATGCCTT-1,◯ -ACCTTTAAGCTAGGCA-1,◯ -ACCTTTAAGGCTACGA-1,◯ -ACCTTTACAAATTGCC-1,◯ -ACCTTTACAGATAATG-1,◯ -ACCTTTAGTGACGGTA-1,◯ -ACCTTTAGTGCTGTAT-1,◯ -ACCTTTATCGCCATAA-1,◯ -ACGAGCCAGGGCTTCC-1,◯ -ACGAGCCCATGCGCAC-1,◯ -ACGAGCCGTACTTGAC-1,◯ -ACGAGCCTCCTAGAAC-1,◯ -ACGAGCCTCGTCCGTT-1,◯ -ACGAGGAAGAGTTGGC-1,◯ -ACGAGGAAGGCAAAGA-1,◯ -ACGAGGAAGGCCCTTG-1,◯ -ACGAGGAAGTACGATA-1,◯ -ACGAGGAAGTACGTAA-1,◯ -ACGAGGACACCAGGCT-1,◯ -ACGAGGACATGTTGAC-1,◯ -ACGAGGAGTAGCTCCG-1,◯ -ACGAGGAGTTCCAACA-1,◯ -ACGAGGATCACCACCT-1,◯ -ACGAGGATCACTTATC-1,◯ -ACGATACAGATCTGCT-1,◯ -ACGATACAGTGGTAAT-1,◯ -ACGATACAGTGTACGG-1,◯ -ACGATACCACATTAGC-1,◯ -ACGATACCACCCATGG-1,◯ -ACGATACCAGATCTGT-1,◯ -ACGATACCAGGAATGC-1,◯ -ACGATACGTACCGAGA-1,◯ -ACGATACGTAGCGATG-1,◯ -ACGATACGTCTAGAGG-1,◯ -ACGATACTCAGTGCAT-1,◯ -ACGATACTCAGTTCGA-1,◯ -ACGATACTCTGATTCT-1,◯ -ACGATGTAGCTACCGC-1,◯ -ACGATGTAGCTAGGCA-1,◯ -ACGATGTCAAACGTGG-1,◯ -ACGATGTGTCGTGGCT-1,◯ -ACGATGTGTGTGTGCC-1,◯ -ACGATGTGTTCCATGA-1,◯ -ACGATGTTCAGGTAAA-1,◯ -ACGATGTTCCCAGGTG-1,◯ -ACGATGTTCGCGGATC-1,◯ -ACGATGTTCTGCCCTA-1,◯ -ACGCAGCAGAACTCGG-1,◯ -ACGCAGCAGAATGTTG-1,◯ -ACGCAGCAGCTGATAA-1,◯ -ACGCAGCAGGCTATCT-1,◯ -ACGCAGCAGGGAAACA-1,◯ -ACGCAGCCAAGAAAGG-1,◯ -ACGCAGCCACAGTCGC-1,◯ -ACGCAGCCATCAGTCA-1,◯ -ACGCAGCCATCTACGA-1,◯ -ACGCAGCGTCCATGAT-1,◯ -ACGCAGCGTCGGATCC-1,◯ -ACGCAGCGTGCCTGGT-1,◯ -ACGCAGCGTTGCCTCT-1,◯ -ACGCAGCTCAATACCG-1,◯ -ACGCAGCTCGTACGGC-1,◯ -ACGCAGCTCGTCACGG-1,◯ -ACGCAGCTCGTGTAGT-1,◯ -ACGCCAGAGCTGAACG-1,◯ -ACGCCAGAGGACCACA-1,◯ -ACGCCAGAGGCCATAG-1,◯ -ACGCCAGAGTGCAAGC-1,◯ -ACGCCAGAGTTAAGTG-1,◯ -ACGCCAGCACAAGCCC-1,◯ -ACGCCAGCAGGACCCT-1,◯ -ACGCCAGCATACGCTA-1,◯ -ACGCCAGGTCGTGGCT-1,◯ -ACGCCAGTCCGAACGC-1,◯ -ACGCCAGTCCGTAGTA-1,◯ -ACGCCAGTCCTATGTT-1,◯ -ACGCCAGTCGAACGGA-1,◯ -ACGCCGAAGAAACCGC-1,◯ -ACGCCGAAGTCAAGGC-1,◯ -ACGCCGACAGCCTGTG-1,◯ -ACGCCGACATGTAAGA-1,◯ -ACGCCGAGTAGGCTGA-1,◯ -ACGCCGATCAACGGGA-1,◯ -ACGCCGATCAAGAAGT-1,◯ -ACGCCGATCAGCTCGG-1,◯ -ACGCCGATCATAACCG-1,◯ -ACGCCGATCGGTTCGG-1,◯ -ACGGAGACAACACGCC-1,◯ -ACGGAGACAGTATGCT-1,◯ -ACGGAGACATGACGGA-1,◯ -ACGGAGAGTTCGTTGA-1,◯ -ACGGAGATCAACGGCC-1,◯ -ACGGAGATCACTCCTG-1,◯ -ACGGAGATCGCATGGC-1,◯ -ACGGAGATCGTGGGAA-1,◯ -ACGGCCAAGACTACAA-1,◯ -ACGGCCACAAGTAGTA-1,◯ -ACGGCCACACTTCGAA-1,◯ -ACGGCCACATTGAGCT-1,◯ -ACGGCCAGTAAGTGGC-1,◯ -ACGGCCAGTCTTCAAG-1,◯ -ACGGCCAGTGACGGTA-1,◯ -ACGGCCATCTAGAGTC-1,◯ -ACGGCCATCTTGACGA-1,◯ -ACGGGCTAGGCGTACA-1,◯ -ACGGGCTAGTCGATAA-1,◯ -ACGGGCTCAACACCCG-1,◯ -ACGGGCTCAGCTATTG-1,◯ -ACGGGCTCATCAGTCA-1,◯ -ACGGGCTGTACATCCA-1,◯ -ACGGGCTGTCATACTG-1,◯ -ACGGGCTGTCTGCCAG-1,◯ -ACGGGCTGTTCCACAA-1,◯ -ACGGGCTGTTCTGAAC-1,◯ -ACGGGCTTCTTGTTTG-1,◯ -ACGGGTCAGATTACCC-1,◯ -ACGGGTCAGCAACGGT-1,◯ -ACGGGTCAGCTCCCAG-1,◯ -ACGGGTCAGGATATAC-1,◯ -ACGGGTCCAATGGATA-1,◯ -ACGGGTCCACATTTCT-1,◯ -ACGGGTCGTATATGGA-1,◯ -ACGGGTCGTTACGACT-1,◯ -ACGGGTCGTTGGTTTG-1,◯ -ACGGGTCTCCGTTGTC-1,◯ -ACGGGTCTCTGTGCAA-1,◯ -ACGTCAACACGAAACG-1,◯ -ACGTCAACAGGGTATG-1,◯ -ACGTCAACAGGTCCAC-1,◯ -ACGTCAATCTCCTATA-1,◯ -ACGTCAATCTCGCTTG-1,◯ -ACGTCAATCTGGGCCA-1,◯ -ACTATCTAGTTCGCGC-1,◯ -ACTATCTCAGATGGGT-1,◯ -ACTATCTCAGGGCATA-1,◯ -ACTATCTCATGCTAGT-1,◯ -ACTATCTCATTGTGCA-1,◯ -ACTATCTGTCTGCCAG-1,◯ -ACTATCTGTGCAACTT-1,◯ -ACTATCTTCACGAAGG-1,◯ -ACTATCTTCAGCCTAA-1,◯ -ACTATCTTCATACGGT-1,◯ -ACTATCTTCGCTTAGA-1,◯ -ACTATCTTCTACTCAT-1,◯ -ACTGAACAGAATCTCC-1,◯ -ACTGAACAGCTCTCGG-1,◯ -ACTGAACAGCTGGAAC-1,◯ -ACTGAACAGGCTAGGT-1,◯ -ACTGAACAGTGTTTGC-1,◯ -ACTGAACCAGCTCCGA-1,◯ -ACTGAACCAGTGAGTG-1,◯ -ACTGAACGTGAAATCA-1,◯ -ACTGAACGTGTAACGG-1,◯ -ACTGAACGTGTAATGA-1,◯ -ACTGAACGTTTAAGCC-1,◯ -ACTGAACTCAAGCCTA-1,◯ -ACTGAACTCCGAACGC-1,◯ -ACTGAGTAGCCCGAAA-1,◯ -ACTGAGTAGGCTCATT-1,◯ -ACTGAGTCAACACCCG-1,◯ -ACTGAGTTCCTGCAGG-1,◯ -ACTGAGTTCTTCGGTC-1,◯ -ACTGATGAGACAAGCC-1,◯ -ACTGATGAGCAATCTC-1,◯ -ACTGATGAGCCCTAAT-1,◯ -ACTGATGCACCATGTA-1,◯ -ACTGATGCAGCAGTTT-1,◯ -ACTGATGCATCAGTAC-1,◯ -ACTGATGCATTCCTGC-1,◯ -ACTGATGGTGCAACTT-1,◯ -ACTGATGTCAGCCTAA-1,◯ -ACTGATGTCCACGTTC-1,◯ -ACTGATGTCCGAAGAG-1,◯ -ACTGCTCAGAGACTAT-1,◯ -ACTGCTCAGCACCGTC-1,◯ -ACTGCTCAGTACGTTC-1,◯ -ACTGCTCCACGAAATA-1,◯ -ACTGCTCCACTTAAGC-1,◯ -ACTGCTCCATTGGGCC-1,◯ -ACTGCTCGTAGCAAAT-1,◯ -ACTGCTCGTAGTAGTA-1,◯ -ACTGCTCGTGAGTATA-1,◯ -ACTGCTCGTGCCTGGT-1,◯ -ACTGCTCGTTTAAGCC-1,◯ -ACTGCTCTCAGAGCTT-1,◯ -ACTGCTCTCGCGGATC-1,◯ -ACTGTCCAGATCTGAA-1,◯ -ACTGTCCAGGTGCACA-1,◯ -ACTGTCCAGGTGCTTT-1,◯ -ACTGTCCCAAGCCATT-1,◯ -ACTGTCCCATGCGCAC-1,◯ -ACTGTCCCATTAGCCA-1,◯ -ACTGTCCTCAACACAC-1,◯ -ACTGTCCTCCTTTCTC-1,◯ -ACTGTCCTCGACCAGC-1,◯ -ACTGTCCTCTGGCGAC-1,◯ -ACTTACTAGACTGTAA-1,◯ -ACTTACTAGATGGCGT-1,◯ -ACTTACTAGGAGCGAG-1,◯ -ACTTACTAGTTGCAGG-1,◯ -ACTTACTGTTAGGGTG-1,◯ -ACTTACTGTTGCGTTA-1,◯ -ACTTACTTCATTCACT-1,◯ -ACTTACTTCGTTTGCC-1,◯ -ACTTGTTAGCTAGTGG-1,◯ -ACTTGTTCAATGGATA-1,◯ -ACTTGTTCACCATGTA-1,◯ -ACTTGTTCACGTCTCT-1,◯ -ACTTGTTGTATAGTAG-1,◯ -ACTTGTTGTCTCCACT-1,◯ -ACTTGTTGTCTTTCAT-1,◯ -ACTTGTTTCCTGTAGA-1,◯ -ACTTTCAAGGACAGAA-1,◯ -ACTTTCAAGTGTACTC-1,◯ -ACTTTCACAAGCCTAT-1,◯ -ACTTTCACAGCGAACA-1,◯ -ACTTTCAGTGCGAAAC-1,◯ -ACTTTCAGTTATGCGT-1,◯ -ACTTTCAGTTTAAGCC-1,◯ -ACTTTCATCATAAAGG-1,◯ -ACTTTCATCTGCCCTA-1,◯ -AGAATAGAGTCGCCGT-1,◯ -AGAATAGCAAAGTGCG-1,◯ -AGAATAGCAAGCGTAG-1,◯ -AGAATAGCAGCTGCAC-1,◯ -AGAATAGCATTTGCCC-1,◯ -AGAATAGCATTTGCTT-1,◯ -AGAATAGGTAGGCTGA-1,◯ -AGAATAGGTCTTGTCC-1,◯ -AGAATAGTCACATAGC-1,◯ -AGAATAGTCCTAGAAC-1,◯ -AGAATAGTCTGTTGAG-1,◯ -AGACGTTAGACTAGGC-1,◯ -AGACGTTAGAGTAAGG-1,◯ -AGACGTTAGGTACTCT-1,◯ -AGACGTTAGTGGGATC-1,◯ -AGACGTTCAACTGGCC-1,◯ -AGACGTTCAAGCGTAG-1,◯ -AGACGTTCAATGTAAG-1,◯ -AGACGTTCAGTAAGCG-1,◯ -AGACGTTCATGCAACT-1,◯ -AGACGTTCATGCTGGC-1,◯ -AGACGTTCATTAGCCA-1,◯ -AGACGTTGTCATTAGC-1,◯ -AGACGTTGTGACCAAG-1,◯ -AGACGTTGTTCGCGAC-1,◯ -AGACGTTTCTGTCAAG-1,◯ -AGAGCGAAGCCACCTG-1,◯ -AGAGCGAAGTACACCT-1,◯ -AGAGCGAAGTGCTGCC-1,◯ -AGAGCGACACATGGGA-1,◯ -AGAGCGACACCATCCT-1,◯ -AGAGCGACACGGTGTC-1,◯ -AGAGCGACACGGTTTA-1,◯ -AGAGCGACATCATCCC-1,◯ -AGAGCGACATGGTCAT-1,◯ -AGAGCGAGTAGAAAGG-1,◯ -AGAGCGAGTAGGACAC-1,◯ -AGAGCGAGTGAGTGAC-1,◯ -AGAGCGATCTGTTGAG-1,◯ -AGAGCTTAGAGATGAG-1,◯ -AGAGCTTAGATGCCTT-1,◯ -AGAGCTTAGCTGCCCA-1,◯ -AGAGCTTCAGGGTACA-1,◯ -AGAGCTTGTAACGCGA-1,◯ -AGAGCTTGTCGCGGTT-1,◯ -AGAGCTTGTGCGATAG-1,◯ -AGAGCTTGTTAAGGGC-1,◯ -AGAGCTTGTTCAGCGC-1,◯ -AGAGCTTGTTTGTGTG-1,◯ -AGAGCTTTCAACGGGA-1,◯ -AGAGCTTTCATATCGG-1,◯ -AGAGCTTTCGGAGGTA-1,◯ -AGAGCTTTCGTTTGCC-1,◯ -AGAGTGGAGTAGGTGC-1,◯ -AGAGTGGCACGCGAAA-1,◯ -AGAGTGGCACTGTGTA-1,◯ -AGAGTGGCAGCCTGTG-1,◯ -AGAGTGGCAGTGACAG-1,◯ -AGAGTGGCATTCTCAT-1,◯ -AGAGTGGGTCTAAAGA-1,◯ -AGAGTGGTCAGTTCGA-1,◯ -AGAGTGGTCATAACCG-1,◯ -AGAGTGGTCGAGAACG-1,◯ -AGAGTGGTCGCATGAT-1,◯ -AGATCTGAGATCTGAA-1,◯ -AGATCTGAGGCCCTCA-1,◯ -AGATCTGCAGCTTAAC-1,◯ -AGATCTGCATCACCCT-1,◯ -AGATCTGGTATATCCG-1,◯ -AGATCTGGTCATTAGC-1,◯ -AGATCTGTCACCCGAG-1,◯ -AGATCTGTCAGCGATT-1,◯ -AGATCTGTCAGTTCGA-1,◯ -AGATCTGTCATCGGAT-1,◯ -AGATCTGTCCTATGTT-1,◯ -AGATCTGTCTGATTCT-1,◯ -AGATTGCAGCGTCAAG-1,◯ -AGATTGCAGGCTACGA-1,◯ -AGATTGCCAATGTAAG-1,◯ -AGATTGCCACATCCAA-1,◯ -AGATTGCCAGACGCTC-1,◯ -AGATTGCCAGCTTCGG-1,◯ -AGATTGCGTATTCTCT-1,◯ -AGATTGCTCAGAGGTG-1,◯ -AGATTGCTCCCGGATG-1,◯ -AGATTGCTCGGATGTT-1,◯ -AGATTGCTCTGGTATG-1,◯ -AGCAGCCAGTAGATGT-1,◯ -AGCAGCCCAGCCTTGG-1,◯ -AGCAGCCCAGTTCATG-1,◯ -AGCAGCCCATCCTAGA-1,◯ -AGCAGCCGTACATGTC-1,◯ -AGCAGCCGTATGGTTC-1,◯ -AGCAGCCGTCTGATTG-1,◯ -AGCAGCCGTGGTGTAG-1,◯ -AGCAGCCGTTCCACAA-1,◯ -AGCAGCCTCCACGAAT-1,◯ -AGCAGCCTCCTTTACA-1,◯ -AGCAGCCTCGAATGCT-1,◯ -AGCATACAGACTGTAA-1,◯ -AGCATACAGACTTGAA-1,◯ -AGCATACAGGGTATCG-1,◯ -AGCATACCATTCGACA-1,◯ -AGCATACGTCCGAATT-1,◯ -AGCATACGTTTCCACC-1,◯ -AGCATACTCCACGACG-1,◯ -AGCATACTCTCTGAGA-1,◯ -AGCCTAACAAACAACA-1,◯ -AGCCTAACAAGTCTGT-1,◯ -AGCCTAACACGGCCAT-1,◯ -AGCCTAACACTTAAGC-1,◯ -AGCCTAACATCGACGC-1,◯ -AGCCTAAGTCGATTGT-1,◯ -AGCCTAATCCGCAAGC-1,◯ -AGCCTAATCCTGTACC-1,◯ -AGCCTAATCGGACAAG-1,◯ -AGCGGTCAGGACTGGT-1,◯ -AGCGGTCAGTTTGCGT-1,◯ -AGCGGTCTCACCCGAG-1,◯ -AGCGGTCTCAGCACAT-1,◯ -AGCGGTCTCGAATGCT-1,◯ -AGCGTATAGACCTAGG-1,◯ -AGCGTATAGGGTTCCC-1,◯ -AGCGTATAGTCATCCA-1,◯ -AGCGTATCAATGGAGC-1,◯ -AGCGTATCACTCTGTC-1,◯ -AGCGTATCAGGTGCCT-1,◯ -AGCGTATGTCCCTTGT-1,◯ -AGCGTATGTGGGTCAA-1,◯ -AGCGTATTCAAGGCTT-1,◯ -AGCGTATTCAATACCG-1,◯ -AGCGTATTCATCGGAT-1,◯ -AGCGTATTCTACTCAT-1,◯ -AGCGTCGAGACAAGCC-1,◯ -AGCGTCGAGAGACTTA-1,◯ -AGCGTCGAGAGTACCG-1,◯ -AGCGTCGAGGCAAAGA-1,◯ -AGCGTCGCAGCGTAAG-1,◯ -AGCGTCGCAGTCCTTC-1,◯ -AGCGTCGCATCTCGCT-1,◯ -AGCGTCGGTAAATACG-1,◯ -AGCGTCGGTTCAGGCC-1,◯ -AGCGTCGTCAAAGTAG-1,◯ -AGCGTCGTCATATCGG-1,◯ -AGCGTCGTCCACGTGG-1,◯ -AGCTCCTAGACTTGAA-1,◯ -AGCTCCTAGAGGGCTT-1,◯ -AGCTCCTAGGGAACGG-1,◯ -AGCTCCTAGTGAAGAG-1,◯ -AGCTCCTCAACACCTA-1,◯ -AGCTCCTCAGGTGCCT-1,◯ -AGCTCCTCATATACGC-1,◯ -AGCTCCTCATCCGTGG-1,◯ -AGCTCCTGTGGTGTAG-1,◯ -AGCTCCTTCAACTCTT-1,◯ -AGCTCCTTCATCACCC-1,◯ -AGCTCCTTCATCTGTT-1,◯ -AGCTCCTTCGTAGATC-1,◯ -AGCTCCTTCTACTCAT-1,◯ -AGCTCCTTCTGCTGCT-1,◯ -AGCTCTCAGAGACTTA-1,◯ -AGCTCTCAGCGCCTCA-1,◯ -AGCTCTCAGGTGTGGT-1,◯ -AGCTCTCAGTGCTGCC-1,◯ -AGCTCTCCACATCCGG-1,◯ -AGCTCTCCATTACCTT-1,◯ -AGCTCTCGTCGGCATC-1,◯ -AGCTCTCGTTCCTCCA-1,◯ -AGCTCTCTCTAGCACA-1,◯ -AGCTCTCTCTCAACTT-1,◯ -AGCTCTCTCTCTGAGA-1,◯ -AGCTTGAAGCTCTCGG-1,◯ -AGCTTGAAGGATCGCA-1,◯ -AGCTTGACAAACGCGA-1,◯ -AGCTTGACAAGCCATT-1,◯ -AGCTTGACACCACCAG-1,◯ -AGCTTGACACCACGTG-1,◯ -AGCTTGACAGCCTTTC-1,◯ -AGCTTGAGTACCAGTT-1,◯ -AGCTTGAGTATCAGTC-1,◯ -AGCTTGAGTATTCTCT-1,◯ -AGCTTGAGTCCTGCTT-1,◯ -AGCTTGAGTCGCGAAA-1,◯ -AGCTTGATCAGCACAT-1,◯ -AGCTTGATCCGAACGC-1,◯ -AGCTTGATCTGTCTCG-1,◯ -AGGCCACAGCCTCGTG-1,◯ -AGGCCACCATTATCTC-1,◯ -AGGCCACGTAAGTGTA-1,◯ -AGGCCACGTACTTAGC-1,◯ -AGGCCACGTCAATGTC-1,◯ -AGGCCACGTCGAAAGC-1,◯ -AGGCCACTCACCGTAA-1,◯ -AGGCCACTCAGTGCAT-1,◯ -AGGCCACTCCCTTGTG-1,◯ -AGGCCACTCCTTTCGG-1,◯ -AGGCCGTAGGCGCTCT-1,◯ -AGGCCGTAGTGCCAGA-1,◯ -AGGCCGTCAAACGTGG-1,◯ -AGGCCGTCAAGTCATC-1,◯ -AGGCCGTCACAGATTC-1,◯ -AGGCCGTCATGTCGAT-1,◯ -AGGCCGTGTAATCGTC-1,◯ -AGGCCGTGTACAGTTC-1,◯ -AGGCCGTGTAGCCTAT-1,◯ -AGGCCGTGTCCGAGTC-1,◯ -AGGCCGTGTTAGAACA-1,◯ -AGGCCGTTCTCCAGGG-1,◯ -AGGGAGTAGCTGATAA-1,◯ -AGGGAGTAGTTGCAGG-1,◯ -AGGGAGTCACAAGTAA-1,◯ -AGGGAGTCAGATCTGT-1,◯ -AGGGAGTCATTAGCCA-1,◯ -AGGGAGTGTAGCGTCC-1,◯ -AGGGAGTTCAGCTGGC-1,◯ -AGGGAGTTCCGCATCT-1,◯ -AGGGATGAGAAGAAGC-1,◯ -AGGGATGAGAGTCGGT-1,◯ -AGGGATGAGCCGGTAA-1,◯ -AGGGATGCAATGGAGC-1,◯ -AGGGATGCACCTGGTG-1,◯ -AGGGATGCAGGTCGTC-1,◯ -AGGGATGGTACAGTTC-1,◯ -AGGGATGGTAGCGCAA-1,◯ -AGGGATGGTAGGGTAC-1,◯ -AGGGATGGTCTCAACA-1,◯ -AGGGATGGTGATAAGT-1,◯ -AGGGATGTCAGGTTCA-1,◯ -AGGGATGTCCAGTAGT-1,◯ -AGGGATGTCCGGCACA-1,◯ -AGGGATGTCTCATTCA-1,◯ -AGGGTGAAGCACCGTC-1,◯ -AGGGTGACAAGCGTAG-1,◯ -AGGGTGACAAGTAGTA-1,◯ -AGGGTGACAATAACGA-1,◯ -AGGGTGACAGTCAGCC-1,◯ -AGGGTGACATGCCCGA-1,◯ -AGGGTGAGTCATCGGC-1,◯ -AGGGTGAGTCTCATCC-1,◯ -AGGGTGAGTTATCCGA-1,◯ -AGGGTGAGTTCGTTGA-1,◯ -AGGGTGAGTTTCGCTC-1,◯ -AGGGTGATCAGCTCGG-1,◯ -AGGGTGATCGTAGGTT-1,◯ -AGGGTGATCTGTCTCG-1,◯ -AGGGTGATCTTTAGGG-1,◯ -AGGGTGATCTTTCCTC-1,◯ -AGGTCATAGCAGCGTA-1,◯ -AGGTCATAGCTACCGC-1,◯ -AGGTCATAGCTCCTTC-1,◯ -AGGTCATAGGCCATAG-1,◯ -AGGTCATAGTTCCACA-1,◯ -AGGTCATCACCTCGGA-1,◯ -AGGTCATGTACCGGCT-1,◯ -AGGTCATGTGTCCTCT-1,◯ -AGGTCATTCTGCTGCT-1,◯ -AGGTCCGAGAATAGGG-1,◯ -AGGTCCGAGACATAAC-1,◯ -AGGTCCGAGAGGTACC-1,◯ -AGGTCCGAGCGATATA-1,◯ -AGGTCCGAGGTGTGGT-1,◯ -AGGTCCGAGTACGCCC-1,◯ -AGGTCCGCAGATGGCA-1,◯ -AGGTCCGCAGCCTTGG-1,◯ -AGGTCCGGTAGAAAGG-1,◯ -AGGTCCGGTGAGGGTT-1,◯ -AGGTCCGGTGGTACAG-1,◯ -AGGTCCGTCAACGGCC-1,◯ -AGGTCCGTCCGCGCAA-1,◯ -AGTAGTCCAACCGCCA-1,◯ -AGTAGTCCAATGAATG-1,◯ -AGTAGTCCACAGAGGT-1,◯ -AGTAGTCCAGCATACT-1,◯ -AGTAGTCCATAACCTG-1,◯ -AGTAGTCGTTAAGATG-1,◯ -AGTAGTCTCACCGTAA-1,◯ -AGTCTTTAGATTACCC-1,◯ -AGTCTTTAGTACTTGC-1,◯ -AGTCTTTCAGTAACGG-1,◯ -AGTCTTTGTATAGGGC-1,◯ -AGTCTTTGTATATCCG-1,◯ -AGTCTTTGTCAATACC-1,◯ -AGTCTTTGTCGACTGC-1,◯ -AGTCTTTGTGTTTGTG-1,◯ -AGTCTTTGTTTCGCTC-1,◯ -AGTCTTTTCAGTTAGC-1,◯ -AGTCTTTTCCTCATTA-1,◯ -AGTCTTTTCCTGCTTG-1,◯ -AGTCTTTTCCTTGCCA-1,◯ -AGTGAGGAGCTGCCCA-1,◯ -AGTGAGGCAAAGGTGC-1,◯ -AGTGAGGCAACACCCG-1,◯ -AGTGAGGGTATTAGCC-1,◯ -AGTGAGGGTCTAGCCG-1,◯ -AGTGAGGGTTTAGCTG-1,◯ -AGTGAGGTCAGTTGAC-1,◯ -AGTGGGAAGCGCTTAT-1,◯ -AGTGGGAAGGCGTACA-1,◯ -AGTGGGAAGTGGGATC-1,◯ -AGTGGGACACGCATCG-1,◯ -AGTGGGAGTAGCAAAT-1,◯ -AGTGGGAGTAGCGTGA-1,◯ -AGTGGGAGTCTCTTTA-1,◯ -AGTGGGATCGTAGATC-1,◯ -AGTGTCAAGAGCTATA-1,◯ -AGTGTCAAGTACGACG-1,◯ -AGTGTCACAAGGCTCC-1,◯ -AGTGTCACACCTGGTG-1,◯ -AGTGTCACATTCCTCG-1,◯ -AGTGTCAGTACCAGTT-1,◯ -AGTGTCAGTATAATGG-1,◯ -AGTGTCAGTCGACTAT-1,◯ -AGTGTCAGTTCTGTTT-1,◯ -AGTGTCAGTTGCGCAC-1,◯ -AGTGTCATCCACGACG-1,◯ -AGTGTCATCCACGTTC-1,◯ -AGTGTCATCCTTCAAT-1,◯ -AGTGTCATCGCCCTTA-1,◯ -AGTGTCATCGCGTTTC-1,◯ -AGTGTCATCGTCCGTT-1,◯ -AGTGTCATCTATCCCG-1,◯ -AGTTGGTAGAGCAATT-1,◯ -AGTTGGTAGCCGCCTA-1,◯ -AGTTGGTAGCTGTCTA-1,◯ -AGTTGGTAGGCATTGG-1,◯ -AGTTGGTAGGGAAACA-1,◯ -AGTTGGTAGTACGCGA-1,◯ -AGTTGGTAGTGAAGTT-1,◯ -AGTTGGTCACGTCTCT-1,◯ -AGTTGGTCAGATTGCT-1,◯ -AGTTGGTCAGCTGCTG-1,◯ -AGTTGGTCAGGTCCAC-1,◯ -AGTTGGTGTCCAGTAT-1,◯ -AGTTGGTTCAAAGTAG-1,◯ -AGTTGGTTCAATAAGG-1,◯ -AGTTGGTTCCGTTGTC-1,◯ -AGTTGGTTCCTGCAGG-1,◯ -AGTTGGTTCGTTGCCT-1,◯ -ATAACGCAGAGTAATC-1,◯ -ATAACGCAGCAGGCTA-1,◯ -ATAACGCAGGGATGGG-1,◯ -ATAACGCCAACAACCT-1,◯ -ATAACGCCACGAAACG-1,◯ -ATAACGCGTGAACCTT-1,◯ -ATAACGCGTTTGCATG-1,◯ -ATAACGCTCACCACCT-1,◯ -ATAAGAGAGAAGATTC-1,◯ -ATAAGAGAGACAGAGA-1,◯ -ATAAGAGAGGCCGAAT-1,◯ -ATAAGAGAGTACGATA-1,◯ -ATAAGAGAGTTAGGTA-1,◯ -ATAAGAGCACCAGATT-1,◯ -ATAAGAGCACCCAGTG-1,◯ -ATAAGAGCAGTATGCT-1,◯ -ATAAGAGCATAGACTC-1,◯ -ATAAGAGCATCGATGT-1,◯ -ATAAGAGGTCCTCCAT-1,◯ -ATAAGAGGTCTCTTAT-1,◯ -ATAAGAGGTTCAACCA-1,◯ -ATAAGAGTCTAACTCT-1,◯ -ATAAGAGTCTAGAGTC-1,◯ -ATAAGAGTCTCGATGA-1,◯ -ATAGACCAGACTAGGC-1,◯ -ATAGACCAGATCTGAA-1,◯ -ATAGACCAGCTGATAA-1,◯ -ATAGACCAGGCAGGTT-1,◯ -ATAGACCAGTGTGGCA-1,◯ -ATAGACCCAAACTGTC-1,◯ -ATAGACCCAACGATGG-1,◯ -ATAGACCCAACTTGAC-1,◯ -ATAGACCCACGGTAGA-1,◯ -ATAGACCCAGACAGGT-1,◯ -ATAGACCGTAACGCGA-1,◯ -ATAGACCGTCAGTGGA-1,◯ -ATAGACCTCAGCGATT-1,◯ -ATAGACCTCCTGCAGG-1,◯ -ATAGACCTCTTAGAGC-1,◯ -ATCACGACAAAGCAAT-1,◯ -ATCACGACACGAGAGT-1,◯ -ATCACGACATCGTCGG-1,◯ -ATCACGATCTGTACGA-1,◯ -ATCATCTCATGGGAAC-1,◯ -ATCATCTGTACCAGTT-1,◯ -ATCATCTTCCGTACAA-1,◯ -ATCATGGAGATGTAAC-1,◯ -ATCATGGAGCAGATCG-1,◯ -ATCATGGAGCTCCTTC-1,◯ -ATCATGGAGGCGACAT-1,◯ -ATCATGGAGGCGCTCT-1,◯ -ATCATGGAGTTAAGTG-1,◯ -ATCATGGCAACCGCCA-1,◯ -ATCATGGCACGAAATA-1,◯ -ATCATGGCAGACAAAT-1,◯ -ATCATGGGTACCATCA-1,◯ -ATCATGGGTCCGCTGA-1,◯ -ATCATGGGTCTCTCTG-1,◯ -ATCATGGTCAGTTGAC-1,◯ -ATCATGGTCTGCGTAA-1,◯ -ATCATGGTCTTGTTTG-1,◯ -ATCCACCAGCGCTTAT-1,◯ -ATCCACCCACAAGACG-1,◯ -ATCCACCCACCAGGTC-1,◯ -ATCCACCCACCTGGTG-1,◯ -ATCCACCCAGGAATGC-1,◯ -ATCCACCCATGCCCGA-1,◯ -ATCCACCGTAAACGCG-1,◯ -ATCCACCGTCAGAATA-1,◯ -ATCCACCGTGGACGAT-1,◯ -ATCCACCGTTCCATGA-1,◯ -ATCCACCGTTCTGAAC-1,◯ -ATCCACCTCGGTCTAA-1,◯ -ATCCACCTCGTAGATC-1,◯ -ATCCACCTCTTGTATC-1,◯ -ATCCGAAAGTACACCT-1,◯ -ATCCGAAAGTTCCACA-1,◯ -ATCCGAACAAGACACG-1,◯ -ATCCGAAGTCGGCATC-1,◯ -ATCCGAAGTGCGGTAA-1,◯ -ATCCGAAGTGCTGTAT-1,◯ -ATCCGAAGTGGCGAAT-1,◯ -ATCCGAAGTGTTGGGA-1,◯ -ATCCGAATCTGGCGAC-1,◯ -ATCCGAATCTGTCCGT-1,◯ -ATCCGAATCTTCAACT-1,◯ -ATCGAGTAGCTGAACG-1,◯ -ATCGAGTAGGATATAC-1,◯ -ATCGAGTCAAGGTTTC-1,◯ -ATCGAGTCATTGGGCC-1,◯ -ATCGAGTGTAAGTAGT-1,◯ -ATCGAGTGTCATGCCG-1,◯ -ATCGAGTTCATAACCG-1,◯ -ATCGAGTTCGATCCCT-1,◯ -ATCGAGTTCGGCGCTA-1,◯ -ATCTACTAGAATTGTG-1,◯ -ATCTACTAGCGATATA-1,◯ -ATCTACTCAGACAGGT-1,◯ -ATCTACTCAGCCAATT-1,◯ -ATCTACTCATTACGAC-1,◯ -ATCTACTGTGCGATAG-1,◯ -ATCTACTGTTCCCGAG-1,◯ -ATCTACTGTTGTACAC-1,◯ -ATCTACTTCAGGCCCA-1,◯ -ATCTACTTCAGTTAGC-1,◯ -ATCTACTTCCATGAAC-1,◯ -ATCTACTTCCTGCTTG-1,◯ -ATCTACTTCTAACCGA-1,◯ -ATCTACTTCTCCGGTT-1,◯ -ATCTACTTCTGGTATG-1,◯ -ATCTGCCAGCCACTAT-1,◯ -ATCTGCCCAACACCTA-1,◯ -ATCTGCCCACGCGAAA-1,◯ -ATCTGCCCACTCTGTC-1,◯ -ATCTGCCCAGCTGTGC-1,◯ -ATCTGCCCATCGATGT-1,◯ -ATCTGCCGTATCAGTC-1,◯ -ATCTGCCGTCGAACAG-1,◯ -ATCTGCCGTCTGCAAT-1,◯ -ATCTGCCGTGAAGGCT-1,◯ -ATCTGCCGTTACGACT-1,◯ -ATCTGCCTCACAGTAC-1,◯ -ATCTGCCTCACGGTTA-1,◯ -ATCTGCCTCCTCAATT-1,◯ -ATCTGCCTCGCGTAGC-1,◯ -ATCTGCCTCGTAGGTT-1,◯ -ATCTGCCTCGTCGTTC-1,◯ -ATGAGGGAGTACGCCC-1,◯ -ATGAGGGAGTACGCGA-1,◯ -ATGAGGGAGTGATCGG-1,◯ -ATGAGGGAGTGTTGAA-1,◯ -ATGAGGGCATATGGTC-1,◯ -ATGAGGGGTCCGTGAC-1,◯ -ATGAGGGTCAACGAAA-1,◯ -ATGAGGGTCAATCTCT-1,◯ -ATGAGGGTCGGTGTTA-1,◯ -ATGCGATAGCCTTGAT-1,◯ -ATGCGATAGGACCACA-1,◯ -ATGCGATAGTCCAGGA-1,◯ -ATGCGATCAAGTTGTC-1,◯ -ATGCGATCACATCTTT-1,◯ -ATGCGATCAGACGCTC-1,◯ -ATGCGATGTCCAAGTT-1,◯ -ATGCGATGTCCCGACA-1,◯ -ATGCGATGTGGGTATG-1,◯ -ATGCGATTCTATCCCG-1,◯ -ATGCGATTCTATCGCC-1,◯ -ATGGGAGAGAGTAATC-1,◯ -ATGGGAGAGTACGCCC-1,◯ -ATGGGAGAGTTACGGG-1,◯ -ATGGGAGCACAGGCCT-1,◯ -ATGGGAGCAGGACGTA-1,◯ -ATGGGAGGTAGTACCT-1,◯ -ATGGGAGGTGTTTGGT-1,◯ -ATGGGAGGTTAAAGTG-1,◯ -ATGGGAGGTTCTGGTA-1,◯ -ATGGGAGTCCGAATGT-1,◯ -ATGGGAGTCGGCATCG-1,◯ -ATGGGAGTCTACTCAT-1,◯ -ATGGGAGTCTCTTGAT-1,◯ -ATGTGTGAGTTTGCGT-1,◯ -ATGTGTGCAAAGTGCG-1,◯ -ATGTGTGCACCATCCT-1,◯ -ATGTGTGCAGACGCCT-1,◯ -ATGTGTGGTAGCAAAT-1,◯ -ATGTGTGGTCGCATCG-1,◯ -ATGTGTGGTCTCCATC-1,◯ -ATGTGTGGTTCCCGAG-1,◯ -ATGTGTGTCAATCTCT-1,◯ -ATTACTCAGACTTTCG-1,◯ -ATTACTCAGTACGCCC-1,◯ -ATTACTCCAAGAGGCT-1,◯ -ATTACTCCAATCGGTT-1,◯ -ATTACTCGTAACGACG-1,◯ -ATTACTCGTAAGAGAG-1,◯ -ATTACTCGTAGGCATG-1,◯ -ATTACTCTCCGCGCAA-1,◯ -ATTACTCTCCGTTGCT-1,◯ -ATTATCCAGACTAGGC-1,◯ -ATTATCCAGATCTGAA-1,◯ -ATTATCCAGATGGGTC-1,◯ -ATTATCCAGCGATATA-1,◯ -ATTATCCCACCGCTAG-1,◯ -ATTATCCCATCGGTTA-1,◯ -ATTATCCGTCGAATCT-1,◯ -ATTATCCGTCTAGCCG-1,◯ -ATTATCCGTCTCGTTC-1,◯ -ATTATCCGTGCATCTA-1,◯ -ATTATCCTCCTATGTT-1,◯ -ATTATCCTCCTCGCAT-1,◯ -ATTATCCTCGGCTACG-1,◯ -ATTATCCTCGGCTTGG-1,◯ -ATTCTACAGTTCGATC-1,◯ -ATTCTACTCCATGAAC-1,◯ -ATTCTACTCGCTAGCG-1,◯ -ATTGGACAGCACCGCT-1,◯ -ATTGGACAGGGCACTA-1,◯ -ATTGGACCAAATTGCC-1,◯ -ATTGGACCACCTGGTG-1,◯ -ATTGGACCAGTAGAGC-1,◯ -ATTGGACCATGAAGTA-1,◯ -ATTGGACTCCGTAGGC-1,◯ -ATTGGACTCCTAGGGC-1,◯ -ATTGGACTCCTAGTGA-1,◯ -ATTGGTGAGAAGGCCT-1,◯ -ATTGGTGAGAGTCGGT-1,◯ -ATTGGTGAGATGCCAG-1,◯ -ATTGGTGAGATGCCTT-1,◯ -ATTGGTGAGCCAGAAC-1,◯ -ATTGGTGAGGTAAACT-1,◯ -ATTGGTGCATCACGAT-1,◯ -ATTGGTGCATGCAATC-1,◯ -ATTGGTGGTACCGTTA-1,◯ -ATTGGTGGTGAAGGCT-1,◯ -ATTGGTGTCAAAGTAG-1,◯ -ATTGGTGTCATTGCGA-1,◯ -ATTGGTGTCGAGAGCA-1,◯ -ATTGGTGTCGTCCAGG-1,◯ -ATTGGTGTCTAACCGA-1,◯ -ATTTCTGAGCAAATCA-1,◯ -ATTTCTGAGCTAGTCT-1,◯ -ATTTCTGAGGACATTA-1,◯ -ATTTCTGCAGACAAAT-1,◯ -ATTTCTGCAGTATGCT-1,◯ -ATTTCTGCATGGTCAT-1,◯ -ATTTCTGGTACGAAAT-1,◯ -ATTTCTGGTCTGCGGT-1,◯ -ATTTCTGTCGGCATCG-1,◯ -ATTTCTGTCGTCCGTT-1,◯ -CAACCAAAGACCGGAT-1,◯ -CAACCAAAGCACCGTC-1,◯ -CAACCAAAGGACGAAA-1,◯ -CAACCAAAGGAGCGTT-1,◯ -CAACCAACAAGCCATT-1,◯ -CAACCAACACTTAAGC-1,◯ -CAACCAACAGCTGTTA-1,◯ -CAACCAAGTGAGTATA-1,◯ -CAACCAATCAGAGGTG-1,◯ -CAACCAATCAGTTAGC-1,◯ -CAACCAATCCCTGACT-1,◯ -CAACCTCAGAGAGCTC-1,◯ -CAACCTCCAACCGCCA-1,◯ -CAACCTCCATGATCCA-1,◯ -CAACCTCGTAGAAAGG-1,◯ -CAACCTCGTCCCGACA-1,◯ -CAACCTCGTCGGGTCT-1,◯ -CAACCTCGTGTGCCTG-1,◯ -CAACCTCTCACAGTAC-1,◯ -CAACCTCTCGACGGAA-1,◯ -CAACTAGAGACTAAGT-1,◯ -CAACTAGAGATGTAAC-1,◯ -CAACTAGAGCATCATC-1,◯ -CAACTAGAGGAGTTGC-1,◯ -CAACTAGCAATGGATA-1,◯ -CAACTAGGTAAACACA-1,◯ -CAACTAGGTTGTCGCG-1,◯ -CAACTAGGTTTGACTG-1,◯ -CAACTAGTCGACGGAA-1,◯ -CAACTAGTCGCCATAA-1,◯ -CAAGAAAAGATAGCAT-1,◯ -CAAGAAAAGGACACCA-1,◯ -CAAGAAACACTGAAGG-1,◯ -CAAGAAACATAGGATA-1,◯ -CAAGAAACATTGCGGC-1,◯ -CAAGAAAGTGCTCTTC-1,◯ -CAAGAAAGTGTCGCTG-1,◯ -CAAGAAAGTGTTGGGA-1,◯ -CAAGAAATCCAAACAC-1,◯ -CAAGAAATCCGCATCT-1,◯ -CAAGATCAGGATGGTC-1,◯ -CAAGATCAGGCAATTA-1,◯ -CAAGATCAGGTCATCT-1,◯ -CAAGATCCAATCGAAA-1,◯ -CAAGATCCACCAGGCT-1,◯ -CAAGATCCACCGAATT-1,◯ -CAAGATCCACGAAGCA-1,◯ -CAAGATCCAGAGCCAA-1,◯ -CAAGATCGTACCGAGA-1,◯ -CAAGATCTCAGTCAGT-1,◯ -CAAGGCCAGTTAACGA-1,◯ -CAAGGCCCACAACGCC-1,◯ -CAAGGCCCACTTACGA-1,◯ -CAAGGCCCAGCTTAAC-1,◯ -CAAGGCCGTCGAGATG-1,◯ -CAAGGCCTCTACCTGC-1,◯ -CAAGTTGAGGCGACAT-1,◯ -CAAGTTGAGTTAACGA-1,◯ -CAAGTTGCAATGAAAC-1,◯ -CAAGTTGCACATCCAA-1,◯ -CAAGTTGCACGACGAA-1,◯ -CAAGTTGCAGGTGCCT-1,◯ -CAAGTTGCATGTCTCC-1,◯ -CAAGTTGGTCTGCGGT-1,◯ -CAAGTTGGTTAAGACA-1,◯ -CAAGTTGGTTGATTGC-1,◯ -CAAGTTGTCCGGCACA-1,◯ -CAAGTTGTCCGTCAAA-1,◯ -CAAGTTGTCGAGAGCA-1,◯ -CAAGTTGTCGCCAAAT-1,◯ -CAAGTTGTCTTTCCTC-1,◯ -CACAAACAGACTCGGA-1,◯ -CACAAACAGAGGTTAT-1,◯ -CACAAACAGCCACGCT-1,◯ -CACAAACAGGTTACCT-1,◯ -CACAAACCAAATACAG-1,◯ -CACAAACCAAGTACCT-1,◯ -CACAAACCAGATCCAT-1,◯ -CACAAACGTACGACCC-1,◯ -CACAAACGTAGAGTGC-1,◯ -CACAAACGTCCGCTGA-1,◯ -CACAAACTCTCGAGTA-1,◯ -CACACAAAGAGCAATT-1,◯ -CACACAAAGAGCTATA-1,◯ -CACACAAAGATGAGAG-1,◯ -CACACAAAGCGTAGTG-1,◯ -CACACAACAGCTGTAT-1,◯ -CACACAACAGTCACTA-1,◯ -CACACAAGTACACCGC-1,◯ -CACACAAGTATTCGTG-1,◯ -CACACAAGTCTCCACT-1,◯ -CACACAAGTGTAACGG-1,◯ -CACACAATCGGCGCAT-1,◯ -CACACAATCTGCTTGC-1,◯ -CACACAATCTGTCTCG-1,◯ -CACACCTAGCCGCCTA-1,◯ -CACACCTAGGCCCTCA-1,◯ -CACACCTCAGGATTGG-1,◯ -CACACCTCATCAGTCA-1,◯ -CACACCTTCAACGAAA-1,◯ -CACACCTTCAACTCTT-1,◯ -CACACCTTCAGCATGT-1,◯ -CACACCTTCAGCTCGG-1,◯ -CACACCTTCGAGAACG-1,◯ -CACACCTTCTTATCTG-1,◯ -CACACTCAGATCCTGT-1,◯ -CACACTCCAAATTGCC-1,◯ -CACACTCCAAGACACG-1,◯ -CACACTCCACGGACAA-1,◯ -CACACTCGTGAGGGTT-1,◯ -CACACTCGTGGACGAT-1,◯ -CACACTCGTGTAAGTA-1,◯ -CACACTCGTTGGGACA-1,◯ -CACACTCTCCTAGGGC-1,◯ -CACAGGCAGAAGGACA-1,◯ -CACAGGCAGTACGTTC-1,◯ -CACAGGCAGTGTGGCA-1,◯ -CACAGGCCACAGGAGT-1,◯ -CACAGGCCACTTCGAA-1,◯ -CACAGGCGTACATGTC-1,◯ -CACAGGCGTCTGGAGA-1,◯ -CACAGGCGTGAGCGAT-1,◯ -CACAGGCGTTGCGCAC-1,◯ -CACAGGCTCAGCGATT-1,◯ -CACAGGCTCATGCATG-1,◯ -CACAGGCTCGTTGCCT-1,◯ -CACAGGCTCTTGACGA-1,◯ -CACAGTAAGAAGAAGC-1,◯ -CACAGTAAGATGGGTC-1,◯ -CACAGTAAGCCATCGC-1,◯ -CACAGTACATGTTCCC-1,◯ -CACAGTAGTCAAACTC-1,◯ -CACAGTAGTCGAGATG-1,◯ -CACAGTATCACCACCT-1,◯ -CACAGTATCATAACCG-1,◯ -CACAGTATCCTTTCTC-1,◯ -CACAGTATCGCCGTGA-1,◯ -CACAGTATCGGAAATA-1,◯ -CACAGTATCGTTTAGG-1,◯ -CACAGTATCTTGTATC-1,◯ -CACATAGAGAAAGTGG-1,◯ -CACATAGAGAACTGTA-1,◯ -CACATAGAGAGACTAT-1,◯ -CACATAGAGTGAAGAG-1,◯ -CACATAGAGTGTACGG-1,◯ -CACATAGCAAGTCTGT-1,◯ -CACATAGGTGAGTGAC-1,◯ -CACATAGTCACCGTAA-1,◯ -CACATAGTCCACGCAG-1,◯ -CACATAGTCCCAAGTA-1,◯ -CACATTTAGAGTGACC-1,◯ -CACATTTAGCCACGTC-1,◯ -CACATTTAGTACGATA-1,◯ -CACATTTCACTGCCAG-1,◯ -CACATTTCAGCCTTGG-1,◯ -CACATTTCATGTTCCC-1,◯ -CACATTTCATTATCTC-1,◯ -CACATTTGTAAGTAGT-1,◯ -CACATTTTCACGAAGG-1,◯ -CACATTTTCAGGCCCA-1,◯ -CACATTTTCATGTCTT-1,◯ -CACCACTAGTGGACGT-1,◯ -CACCACTCAAAGTGCG-1,◯ -CACCACTCAGGATTGG-1,◯ -CACCACTCATCGATTG-1,◯ -CACCACTGTCCTCCAT-1,◯ -CACCACTTCGCACTCT-1,◯ -CACCACTTCTACCAGA-1,◯ -CACCACTTCTATCCTA-1,◯ -CACCAGGAGGTGTTAA-1,◯ -CACCAGGCACCACCAG-1,◯ -CACCAGGCACCTCGTT-1,◯ -CACCAGGCACGAAGCA-1,◯ -CACCAGGCACGTAAGG-1,◯ -CACCAGGCAGTGAGTG-1,◯ -CACCAGGGTCAGTGGA-1,◯ -CACCAGGGTGAAGGCT-1,◯ -CACCAGGTCGCCTGAG-1,◯ -CACCAGGTCTGTCAAG-1,◯ -CACCTTGAGAGTCTGG-1,◯ -CACCTTGAGCTAAACA-1,◯ -CACCTTGAGTCAAGGC-1,◯ -CACCTTGCAAGCCTAT-1,◯ -CACCTTGCACTGTTAG-1,◯ -CACCTTGGTTGTGGAG-1,◯ -CACCTTGTCACCCTCA-1,◯ -CACCTTGTCTAACTTC-1,◯ -CACCTTGTCTTAACCT-1,◯ -CACCTTGTCTTCGGTC-1,◯ -CACTCCAAGACGCTTT-1,◯ -CACTCCAAGATGTAAC-1,◯ -CACTCCAAGTATCGAA-1,◯ -CACTCCAAGTCAAGCG-1,◯ -CACTCCACACCGATAT-1,◯ -CACTCCACATGGTCAT-1,◯ -CACTCCACATTGGGCC-1,◯ -CACTCCAGTCCCGACA-1,◯ -CACTCCAGTGAGGGTT-1,◯ -CACTCCAGTTAGATGA-1,◯ -CACTCCAGTTTGTTTC-1,◯ -CACTCCATCAGCATGT-1,◯ -CACTCCATCGCGGATC-1,◯ -CAGAATCAGCAACGGT-1,◯ -CAGAATCAGCTGGAAC-1,◯ -CAGAATCAGTAATCCC-1,◯ -CAGAATCAGTGTCCCG-1,◯ -CAGAATCCACCCATGG-1,◯ -CAGAATCCAGGTGGAT-1,◯ -CAGAATCCAGGTTTCA-1,◯ -CAGAATCGTACGAAAT-1,◯ -CAGAATCGTAGGAGTC-1,◯ -CAGAATCGTTAAGAAC-1,◯ -CAGAATCTCACCCTCA-1,◯ -CAGAATCTCTCTTATG-1,◯ -CAGAGAGAGATGGGTC-1,◯ -CAGAGAGAGCCACTAT-1,◯ -CAGAGAGCAAGGGTCA-1,◯ -CAGAGAGCAGGCGATA-1,◯ -CAGAGAGCATTTCACT-1,◯ -CAGAGAGGTCACCCAG-1,◯ -CAGAGAGTCATCACCC-1,◯ -CAGAGAGTCCTACAGA-1,◯ -CAGAGAGTCGCTAGCG-1,◯ -CAGAGAGTCTTGTACT-1,◯ -CAGATCAAGCAAATCA-1,◯ -CAGATCAAGCCTATGT-1,◯ -CAGATCAAGGTGTTAA-1,◯ -CAGATCAAGTGTCCAT-1,◯ -CAGATCAAGTTCCACA-1,◯ -CAGATCACACCTCGTT-1,◯ -CAGATCACAGTCCTTC-1,◯ -CAGATCAGTCAAACTC-1,◯ -CAGATCAGTGTAAGTA-1,◯ -CAGCAGCAGTGCAAGC-1,◯ -CAGCAGCCACTCGACG-1,◯ -CAGCAGCCAGTCGATT-1,◯ -CAGCAGCCATGTCGAT-1,◯ -CAGCAGCGTCCGAATT-1,◯ -CAGCAGCGTTAAGAAC-1,◯ -CAGCAGCGTTGGGACA-1,◯ -CAGCAGCTCCGCATCT-1,◯ -CAGCAGCTCCTCATTA-1,◯ -CAGCATAAGATCGATA-1,◯ -CAGCATAAGGCGTACA-1,◯ -CAGCATAAGTACGCCC-1,◯ -CAGCATAAGTAGTGCG-1,◯ -CAGCATAAGTTCCACA-1,◯ -CAGCATACACAAGTAA-1,◯ -CAGCATACACCTTGTC-1,◯ -CAGCATACAGCTTAAC-1,◯ -CAGCATACATGAAGTA-1,◯ -CAGCATACATGGGACA-1,◯ -CAGCATAGTCTGGTCG-1,◯ -CAGCATAGTCTTCGTC-1,◯ -CAGCATATCTATCGCC-1,◯ -CAGCCGAAGAGACTAT-1,◯ -CAGCCGAAGAGAGCTC-1,◯ -CAGCCGAAGGCGCTCT-1,◯ -CAGCCGACAAGTAGTA-1,◯ -CAGCCGACAGGTCGTC-1,◯ -CAGCCGAGTAGCAAAT-1,◯ -CAGCCGAGTCAAACTC-1,◯ -CAGCCGAGTTGGTAAA-1,◯ -CAGCCGATCGGATGGA-1,◯ -CAGCCGATCTACGAGT-1,◯ -CAGCGACAGAGGGATA-1,◯ -CAGCGACAGCTCCCAG-1,◯ -CAGCGACCACACAGAG-1,◯ -CAGCGACCACATTTCT-1,◯ -CAGCGACCAGGCGATA-1,◯ -CAGCGACGTAAGAGGA-1,◯ -CAGCGACGTAGCGCAA-1,◯ -CAGCGACGTGGAAAGA-1,◯ -CAGCGACTCATTATCC-1,◯ -CAGCGACTCCAAATGC-1,◯ -CAGCGACTCGAATGCT-1,◯ -CAGCGACTCTGTACGA-1,◯ -CAGCTAACACGGCGTT-1,◯ -CAGCTAACAGTCGATT-1,◯ -CAGCTAAGTAAGAGAG-1,◯ -CAGCTAAGTCATACTG-1,◯ -CAGCTAAGTGGCGAAT-1,◯ -CAGCTAATCAAACAAG-1,◯ -CAGCTAATCAGGCAAG-1,◯ -CAGCTAATCGTAGGTT-1,◯ -CAGCTAATCTCATTCA-1,◯ -CAGCTAATCTTGTCAT-1,◯ -CAGCTGGAGATCGGGT-1,◯ -CAGCTGGAGATGCCAG-1,◯ -CAGCTGGAGGATGGAA-1,◯ -CAGCTGGCAACGCACC-1,◯ -CAGCTGGCAAGCCCAC-1,◯ -CAGCTGGCACACAGAG-1,◯ -CAGCTGGCACGCATCG-1,◯ -CAGCTGGCATTGGCGC-1,◯ -CAGCTGGGTATATGGA-1,◯ -CAGCTGGGTCTGCCAG-1,◯ -CAGCTGGGTGAGTATA-1,◯ -CAGCTGGGTTATCCGA-1,◯ -CAGCTGGTCATAACCG-1,◯ -CAGGTGCCACACCGAC-1,◯ -CAGGTGCCACACCGCA-1,◯ -CAGGTGCCACCAGTTA-1,◯ -CAGGTGCCACGAAACG-1,◯ -CAGGTGCCAGCCAATT-1,◯ -CAGGTGCCATGCCCGA-1,◯ -CAGGTGCGTAGAAAGG-1,◯ -CAGGTGCGTGTGACCC-1,◯ -CAGGTGCTCAGATAAG-1,◯ -CAGGTGCTCGAATGGG-1,◯ -CAGTAACAGAGGGATA-1,◯ -CAGTAACAGGAGTACC-1,◯ -CAGTAACAGTACATGA-1,◯ -CAGTAACCAAGTTGTC-1,◯ -CAGTAACCATGTCCTC-1,◯ -CAGTAACCATTAGGCT-1,◯ -CAGTAACGTAGCTGCC-1,◯ -CAGTAACGTATTAGCC-1,◯ -CAGTAACGTGCAGTAG-1,◯ -CAGTAACGTTAAAGAC-1,◯ -CAGTAACTCAGTCAGT-1,◯ -CAGTAACTCTATCGCC-1,◯ -CAGTCCTAGAAGGTGA-1,◯ -CAGTCCTAGAGCCTAG-1,◯ -CAGTCCTCATGTTCCC-1,◯ -CAGTCCTCATTTGCTT-1,◯ -CAGTCCTGTAATCGTC-1,◯ -CAGTCCTGTCTAGGTT-1,◯ -CAGTCCTGTTCCACAA-1,◯ -CAGTCCTTCCCTAATT-1,◯ -CAGTCCTTCGGTTAAC-1,◯ -CAGTCCTTCTGACCTC-1,◯ -CATATGGCAATGAATG-1,◯ -CATATGGCAATGTTGC-1,◯ -CATATGGGTCAAGCGA-1,◯ -CATATGGGTTACGACT-1,◯ -CATATGGTCGCAAGCC-1,◯ -CATATGGTCGCCAAAT-1,◯ -CATATGGTCTTCGAGA-1,◯ -CATATGGTCTTCGGTC-1,◯ -CATATTCAGATGGCGT-1,◯ -CATATTCAGCCACGTC-1,◯ -CATATTCCACATCCAA-1,◯ -CATATTCGTAATCACC-1,◯ -CATATTCGTCTCTTAT-1,◯ -CATATTCTCAACACCA-1,◯ -CATATTCTCAACGAAA-1,◯ -CATATTCTCGCATGGC-1,◯ -CATATTCTCTCAACTT-1,◯ -CATATTCTCTTGAGAC-1,◯ -CATCAAGAGATATGCA-1,◯ -CATCAAGAGCTGAAAT-1,◯ -CATCAAGAGGACGAAA-1,◯ -CATCAAGAGTGAAGTT-1,◯ -CATCAAGAGTGACTCT-1,◯ -CATCAAGCAAATACAG-1,◯ -CATCAAGCACGCCAGT-1,◯ -CATCAAGCACTTACGA-1,◯ -CATCAAGCAGACACTT-1,◯ -CATCAAGCAGACGTAG-1,◯ -CATCAAGCAGATGAGC-1,◯ -CATCAAGCAGTATCTG-1,◯ -CATCAAGGTCTACCTC-1,◯ -CATCAAGGTTCAGGCC-1,◯ -CATCAAGTCTACCAGA-1,◯ -CATCAAGTCTGCCAGG-1,◯ -CATCAGAAGATGCGAC-1,◯ -CATCAGAAGCGTGAGT-1,◯ -CATCAGAAGGAGCGTT-1,◯ -CATCAGAAGGTGTTAA-1,◯ -CATCAGAAGGTTCCTA-1,◯ -CATCAGAAGTTGTCGT-1,◯ -CATCAGACAGTGGAGT-1,◯ -CATCAGACATTATCTC-1,◯ -CATCAGACATTTCAGG-1,◯ -CATCAGAGTCACTGGC-1,◯ -CATCAGAGTGTTGGGA-1,◯ -CATCAGAGTTATCCGA-1,◯ -CATCAGATCACTTATC-1,◯ -CATCAGATCCAGAAGG-1,◯ -CATCAGATCCTTCAAT-1,◯ -CATCAGATCTCGGACG-1,◯ -CATCCACAGATGGCGT-1,◯ -CATCCACAGATGGGTC-1,◯ -CATCCACAGTATGACA-1,◯ -CATCCACCATCCTAGA-1,◯ -CATCCACGTAAACACA-1,◯ -CATCCACGTACAGACG-1,◯ -CATCCACGTCAGAAGC-1,◯ -CATCCACTCCGAATGT-1,◯ -CATCCACTCGAATGCT-1,◯ -CATCCACTCGAGAGCA-1,◯ -CATCCACTCGTTTGCC-1,◯ -CATCGAAAGAGACTAT-1,◯ -CATCGAAAGCGGATCA-1,◯ -CATCGAAAGCTGCAAG-1,◯ -CATCGAACAAGCCTAT-1,◯ -CATCGAACACATGACT-1,◯ -CATCGAACACGTCAGC-1,◯ -CATCGAACACTGTGTA-1,◯ -CATCGAACAGTAAGAT-1,◯ -CATCGAACATCTATGG-1,◯ -CATCGAAGTCGGCTCA-1,◯ -CATCGAAGTGTTTGTG-1,◯ -CATCGAAGTTGTTTGG-1,◯ -CATCGAATCTTGAGAC-1,◯ -CATCGAATCTTTACGT-1,◯ -CATCGGGAGGATGCGT-1,◯ -CATCGGGCAAATCCGT-1,◯ -CATCGGGCAAGCCTAT-1,◯ -CATCGGGCAGCTTAAC-1,◯ -CATCGGGCATCTCCCA-1,◯ -CATCGGGCATGCGCAC-1,◯ -CATCGGGGTCGATTGT-1,◯ -CATCGGGGTGATAAGT-1,◯ -CATCGGGTCAAACCAC-1,◯ -CATCGGGTCGTCGTTC-1,◯ -CATGACAAGAACTGTA-1,◯ -CATGACAAGACAGACC-1,◯ -CATGACAAGGAGTTGC-1,◯ -CATGACACAGATGGCA-1,◯ -CATGACAGTATAAACG-1,◯ -CATGACAGTGTTCTTT-1,◯ -CATGACATCGAACTGT-1,◯ -CATGACATCGGATGGA-1,◯ -CATGACATCTGAGGGA-1,◯ -CATGACATCTGGTTCC-1,◯ -CATGCCTAGACGCAAC-1,◯ -CATGCCTAGGAATTAC-1,◯ -CATGCCTAGTCAATAG-1,◯ -CATGCCTCAACGCACC-1,◯ -CATGCCTCAATAGAGT-1,◯ -CATGCCTCACTACAGT-1,◯ -CATGCCTCATGCTGGC-1,◯ -CATGCCTGTCTCTTAT-1,◯ -CATGCCTTCACATGCA-1,◯ -CATGCCTTCCGAACGC-1,◯ -CATGCCTTCCGCATCT-1,◯ -CATGCCTTCCTTCAAT-1,◯ -CATGGCGAGATCACGG-1,◯ -CATGGCGAGTGTACGG-1,◯ -CATGGCGCAATCCAAC-1,◯ -CATGGCGCATACGCTA-1,◯ -CATGGCGCATTGGGCC-1,◯ -CATGGCGGTCGGCATC-1,◯ -CATGGCGGTCTTGATG-1,◯ -CATGGCGGTGATGTCT-1,◯ -CATGGCGGTGGTAACG-1,◯ -CATGGCGTCCAGATCA-1,◯ -CATTATCAGGGCTTCC-1,◯ -CATTATCAGTTGAGTA-1,◯ -CATTATCCAAGGGTCA-1,◯ -CATTATCCAGCGTTCG-1,◯ -CATTATCGTAATTGGA-1,◯ -CATTATCGTAGCCTAT-1,◯ -CATTATCTCAGAGCTT-1,◯ -CATTATCTCGGCTACG-1,◯ -CATTATCTCTTTCCTC-1,◯ -CATTCGCAGAGAGCTC-1,◯ -CATTCGCAGCGTCAAG-1,◯ -CATTCGCAGGTTACCT-1,◯ -CATTCGCAGTGCGATG-1,◯ -CATTCGCCAATAAGCA-1,◯ -CATTCGCCACCTGGTG-1,◯ -CATTCGCCACGGTAAG-1,◯ -CATTCGCGTACCATCA-1,◯ -CATTCGCGTACCGGCT-1,◯ -CATTCGCGTGTCTGAT-1,◯ -CATTCGCTCAAGATCC-1,◯ -CATTCGCTCAGTGTTG-1,◯ -CCAATCCAGACAATAC-1,◯ -CCAATCCAGACTCGGA-1,◯ -CCAATCCAGTACACCT-1,◯ -CCAATCCAGTGGAGTC-1,◯ -CCAATCCCACTCAGGC-1,◯ -CCAATCCGTTCGTGAT-1,◯ -CCAATCCGTTCTCATT-1,◯ -CCAATCCTCCCGGATG-1,◯ -CCAATCCTCGCCTGAG-1,◯ -CCACCTAAGTAGGTGC-1,◯ -CCACCTACAGTTCCCT-1,◯ -CCACCTAGTAGGCTGA-1,◯ -CCACCTATCGAACTGT-1,◯ -CCACGGAAGAACAACT-1,◯ -CCACGGAAGAGACTTA-1,◯ -CCACGGAAGTACGTAA-1,◯ -CCACGGACAGTATCTG-1,◯ -CCACGGAGTACCGGCT-1,◯ -CCACGGATCTCATTCA-1,◯ -CCACTACAGACAGGCT-1,◯ -CCACTACAGGGATCTG-1,◯ -CCACTACCAGGCAGTA-1,◯ -CCACTACCATCAGTCA-1,◯ -CCACTACGTAACGCGA-1,◯ -CCACTACGTACACCGC-1,◯ -CCACTACGTTCAGCGC-1,◯ -CCACTACTCCTAAGTG-1,◯ -CCACTACTCCTCCTAG-1,◯ -CCACTACTCGATCCCT-1,◯ -CCACTACTCTGATACG-1,◯ -CCAGCGAAGGCTAGAC-1,◯ -CCAGCGACAACCGCCA-1,◯ -CCAGCGACAATCTACG-1,◯ -CCAGCGAGTCTGATCA-1,◯ -CCAGCGAGTGATAAGT-1,◯ -CCAGCGAGTGTGACCC-1,◯ -CCAGCGAGTTCGGCAC-1,◯ -CCAGCGATCAAGATCC-1,◯ -CCAGCGATCACGAAGG-1,◯ -CCAGCGATCAGAGCTT-1,◯ -CCATGTCAGCTAGTTC-1,◯ -CCATGTCAGTGTTTGC-1,◯ -CCATGTCCAAAGTCAA-1,◯ -CCATGTCCACTAAGTC-1,◯ -CCATGTCGTGCGATAG-1,◯ -CCATGTCGTTCGCGAC-1,◯ -CCATGTCGTTCGTCTC-1,◯ -CCATGTCTCTCCCTGA-1,◯ -CCATTCGAGGAACTGC-1,◯ -CCATTCGAGGGTCTCC-1,◯ -CCATTCGAGTGCGATG-1,◯ -CCATTCGCAAGTAATG-1,◯ -CCATTCGCAGTGGAGT-1,◯ -CCATTCGCATAACCTG-1,◯ -CCATTCGGTAAATACG-1,◯ -CCATTCGGTAAGTGGC-1,◯ -CCATTCGGTACAGTGG-1,◯ -CCATTCGGTACGAAAT-1,◯ -CCATTCGGTCACTGGC-1,◯ -CCATTCGGTCGCCATG-1,◯ -CCATTCGTCACCCTCA-1,◯ -CCCAATCAGAATGTTG-1,◯ -CCCAATCCACCTCGGA-1,◯ -CCCAATCGTAGGCTGA-1,◯ -CCCAGTTAGCCAGTTT-1,◯ -CCCAGTTAGCTTCGCG-1,◯ -CCCAGTTAGTGCAAGC-1,◯ -CCCAGTTCAAACGTGG-1,◯ -CCCAGTTCAGACAAGC-1,◯ -CCCAGTTGTCTTCAAG-1,◯ -CCCAGTTGTTATCCGA-1,◯ -CCCAGTTTCAATCACG-1,◯ -CCCAGTTTCGCAAACT-1,◯ -CCCATACGTAAGGGAA-1,◯ -CCCATACTCCATGAAC-1,◯ -CCCATACTCCATGAGT-1,◯ -CCCTCCTAGGCCATAG-1,◯ -CCCTCCTCACACGCTG-1,◯ -CCCTCCTCACGAGGTA-1,◯ -CCCTCCTCAGCCACCA-1,◯ -CCCTCCTCAGCTCCGA-1,◯ -CCCTCCTCATCACGAT-1,◯ -CCCTCCTGTACCTACA-1,◯ -CCCTCCTGTAGCGTCC-1,◯ -CCCTCCTGTCGAGATG-1,◯ -CCCTCCTGTCGGATCC-1,◯ -CCCTCCTTCATGCAAC-1,◯ -CCCTCCTTCCAATGGT-1,◯ -CCGGGATAGAAGGGTA-1,◯ -CCGGGATAGAATGTGT-1,◯ -CCGGGATAGCCGGTAA-1,◯ -CCGGGATAGGGCTTGA-1,◯ -CCGGGATAGGTAGCTG-1,◯ -CCGGGATCAACCGCCA-1,◯ -CCGGGATCAATGGATA-1,◯ -CCGGGATCACCGAAAG-1,◯ -CCGGGATCATGAACCT-1,◯ -CCGGGATGTCTCTCTG-1,◯ -CCGGGATTCATAGCAC-1,◯ -CCGGTAGAGACGACGT-1,◯ -CCGGTAGAGACTAAGT-1,◯ -CCGGTAGAGCTTATCG-1,◯ -CCGGTAGAGGTAGCTG-1,◯ -CCGGTAGCAAGGCTCC-1,◯ -CCGGTAGCACATAACC-1,◯ -CCGGTAGCAGTGAGTG-1,◯ -CCGGTAGGTAAACGCG-1,◯ -CCGGTAGGTACATCCA-1,◯ -CCGGTAGGTAGTACCT-1,◯ -CCGGTAGGTATGAATG-1,◯ -CCGGTAGGTCTTCGTC-1,◯ -CCGGTAGTCGACGGAA-1,◯ -CCGGTAGTCGTCTGAA-1,◯ -CCGGTAGTCGTTTAGG-1,◯ -CCGGTAGTCTCTAAGG-1,◯ -CCGTACTAGGCTAGAC-1,◯ -CCGTACTAGTGAAGAG-1,◯ -CCGTACTAGTGATCGG-1,◯ -CCGTACTCACATAACC-1,◯ -CCGTACTCACCGCTAG-1,◯ -CCGTACTGTTATGTGC-1,◯ -CCGTACTTCAATCTCT-1,◯ -CCGTACTTCACTTCAT-1,◯ -CCGTACTTCCAGAAGG-1,◯ -CCGTACTTCCCATTTA-1,◯ -CCGTACTTCGCCAGCA-1,◯ -CCGTACTTCTAGCACA-1,◯ -CCGTGGAAGATGCCTT-1,◯ -CCGTGGACAAACTGCT-1,◯ -CCGTGGACAGATCCAT-1,◯ -CCGTGGACAGTCAGAG-1,◯ -CCGTGGACAGTTAACC-1,◯ -CCGTGGACATTAGGCT-1,◯ -CCGTGGAGTACCGTTA-1,◯ -CCGTGGAGTCTAACGT-1,◯ -CCGTGGAGTTAAGTAG-1,◯ -CCGTGGATCAAGGTAA-1,◯ -CCGTGGATCCACTGGG-1,◯ -CCGTGGATCCAGTAGT-1,◯ -CCGTGGATCTTTACGT-1,◯ -CCGTTCAAGCACCGCT-1,◯ -CCGTTCAAGCGATATA-1,◯ -CCGTTCAAGGCGATAC-1,◯ -CCGTTCAAGGGCTTCC-1,◯ -CCGTTCACAAGGGTCA-1,◯ -CCGTTCACAATGTAAG-1,◯ -CCGTTCACACATCCAA-1,◯ -CCGTTCACAGTAACGG-1,◯ -CCGTTCACAGTTTACG-1,◯ -CCGTTCAGTGACCAAG-1,◯ -CCGTTCAGTGTTCTTT-1,◯ -CCGTTCAGTTAAGAAC-1,◯ -CCGTTCAGTTAAGTAG-1,◯ -CCGTTCATCATTATCC-1,◯ -CCGTTCATCCGTAGGC-1,◯ -CCTAAAGAGAGTAAGG-1,◯ -CCTAAAGAGCTCAACT-1,◯ -CCTAAAGCAACTGCTA-1,◯ -CCTAAAGCACTGAAGG-1,◯ -CCTAAAGGTATAGGTA-1,◯ -CCTAAAGGTCTAAAGA-1,◯ -CCTAAAGGTTCCATGA-1,◯ -CCTAAAGTCACCCGAG-1,◯ -CCTAAAGTCCGTCAAA-1,◯ -CCTAAAGTCTGGCGTG-1,◯ -CCTACACAGATCTGAA-1,◯ -CCTACACAGGAGTAGA-1,◯ -CCTACACAGTGTCTCA-1,◯ -CCTACACCAGGCTGAA-1,◯ -CCTACACCAGTAAGCG-1,◯ -CCTACACCAGTCAGCC-1,◯ -CCTACACGTGACTCAT-1,◯ -CCTACACGTGAGGGAG-1,◯ -CCTACACGTTCCACGG-1,◯ -CCTACACTCCGTCAAA-1,◯ -CCTACACTCCTTTCTC-1,◯ -CCTACACTCTGCTGTC-1,◯ -CCTACACTCTGGCGTG-1,◯ -CCTACCAAGCCCGAAA-1,◯ -CCTACCACACAGCCCA-1,◯ -CCTACCACACATGACT-1,◯ -CCTACCAGTAAGAGAG-1,◯ -CCTACCAGTCCGAATT-1,◯ -CCTACCAGTTCAGGCC-1,◯ -CCTACCATCATGTCCC-1,◯ -CCTAGCTAGCTCCCAG-1,◯ -CCTAGCTAGGTGCTAG-1,◯ -CCTAGCTAGTCTTGCA-1,◯ -CCTAGCTAGTTTCCTT-1,◯ -CCTAGCTGTCAAAGAT-1,◯ -CCTAGCTTCCTTGGTC-1,◯ -CCTAGCTTCGCTAGCG-1,◯ -CCTAGCTTCGGTTCGG-1,◯ -CCTAGCTTCTAACGGT-1,◯ -CCTAGCTTCTACTATC-1,◯ -CCTATTACAATGCCAT-1,◯ -CCTATTACACTTAAGC-1,◯ -CCTATTATCATTGCCC-1,◯ -CCTCAGTAGCCATCGC-1,◯ -CCTCAGTAGCTGAACG-1,◯ -CCTCAGTAGCTGGAAC-1,◯ -CCTCAGTAGTCAATAG-1,◯ -CCTCAGTAGTGCCAGA-1,◯ -CCTCAGTCAAAGAATC-1,◯ -CCTCAGTCAAAGGAAG-1,◯ -CCTCAGTCACCATCCT-1,◯ -CCTCAGTCAGATGGCA-1,◯ -CCTCAGTCAGCCTTTC-1,◯ -CCTCAGTGTATAGGGC-1,◯ -CCTCAGTGTCCGTTAA-1,◯ -CCTCAGTGTCCTGCTT-1,◯ -CCTCAGTGTCTTGCGG-1,◯ -CCTCAGTGTGCGATAG-1,◯ -CCTCAGTGTTCGAATC-1,◯ -CCTCAGTTCGAGCCCA-1,◯ -CCTCAGTTCTGATACG-1,◯ -CCTCTGAAGGGTCTCC-1,◯ -CCTCTGAAGTTTCCTT-1,◯ -CCTCTGATCAGCCTAA-1,◯ -CCTTACGAGAGCCTAG-1,◯ -CCTTACGAGATGCGAC-1,◯ -CCTTACGAGGACTGGT-1,◯ -CCTTACGCACATTCGA-1,◯ -CCTTACGCAGCTGTGC-1,◯ -CCTTACGGTAAGCACG-1,◯ -CCTTACGGTCTTGCGG-1,◯ -CCTTACGGTGTGACCC-1,◯ -CCTTACGGTTCGTGAT-1,◯ -CCTTACGTCACAGGCC-1,◯ -CCTTACGTCATCTGTT-1,◯ -CCTTACGTCGTAGGTT-1,◯ -CCTTACGTCTCTGCTG-1,◯ -CCTTCCCAGGATGCGT-1,◯ -CCTTCCCAGGCAAAGA-1,◯ -CCTTCCCCAGCTGGCT-1,◯ -CCTTCCCCAGGACGTA-1,◯ -CCTTCCCCATGGGAAC-1,◯ -CCTTCCCGTCGCATCG-1,◯ -CCTTCCCGTCGCTTTC-1,◯ -CCTTCCCGTTAGTGGG-1,◯ -CCTTCGAAGAATGTTG-1,◯ -CCTTCGAAGAGACGAA-1,◯ -CCTTCGAAGAGTAAGG-1,◯ -CCTTCGAAGATATGCA-1,◯ -CCTTCGAGTACAGACG-1,◯ -CCTTCGAGTACCGTAT-1,◯ -CCTTCGATCACCACCT-1,◯ -CCTTCGATCCCAGGTG-1,◯ -CCTTTCTAGAATGTTG-1,◯ -CCTTTCTAGACCGGAT-1,◯ -CCTTTCTAGGAATTAC-1,◯ -CCTTTCTAGGGTCGAT-1,◯ -CCTTTCTCAAAGAATC-1,◯ -CCTTTCTCAATAGCGG-1,◯ -CCTTTCTCACCAGATT-1,◯ -CCTTTCTCATCCGTGG-1,◯ -CCTTTCTGTAAGTTCC-1,◯ -CCTTTCTGTCAAAGAT-1,◯ -CCTTTCTTCCTCCTAG-1,◯ -CCTTTCTTCGGTCTAA-1,◯ -CGAACATAGGGATGGG-1,◯ -CGAACATCACCAGCAC-1,◯ -CGAACATCACCTGGTG-1,◯ -CGAACATCAGTACACT-1,◯ -CGAACATCATTAGGCT-1,◯ -CGAACATCATTGGCGC-1,◯ -CGAACATGTCACTTCC-1,◯ -CGAACATGTTGTCGCG-1,◯ -CGAACATTCAAAGTAG-1,◯ -CGAACATTCTTCTGGC-1,◯ -CGAATGTAGCACCGTC-1,◯ -CGAATGTAGCAGATCG-1,◯ -CGAATGTAGGCCCTCA-1,◯ -CGAATGTCAAGTAATG-1,◯ -CGAATGTCAATAGAGT-1,◯ -CGAATGTCAGATCCAT-1,◯ -CGAATGTGTAGTGAAT-1,◯ -CGAATGTGTCAGAAGC-1,◯ -CGAATGTGTCCCTTGT-1,◯ -CGAATGTGTCCGAGTC-1,◯ -CGAATGTGTTACGTCA-1,◯ -CGAATGTTCATCGCTC-1,◯ -CGAATGTTCTGTCTAT-1,◯ -CGACCTTCACTCGACG-1,◯ -CGACCTTCAGTCCTTC-1,◯ -CGACCTTGTAGCTCCG-1,◯ -CGACCTTGTCCGCTGA-1,◯ -CGACCTTGTGGTCTCG-1,◯ -CGACCTTTCATCGGAT-1,◯ -CGACCTTTCGGTGTTA-1,◯ -CGACCTTTCTCGGACG-1,◯ -CGACTTCAGACTAGAT-1,◯ -CGACTTCAGCCAGTTT-1,◯ -CGACTTCAGGGATCTG-1,◯ -CGACTTCAGTACCGGA-1,◯ -CGACTTCAGTGATCGG-1,◯ -CGACTTCAGTGGTAAT-1,◯ -CGACTTCCAATCTACG-1,◯ -CGACTTCCACAGCCCA-1,◯ -CGACTTCCACTCAGGC-1,◯ -CGACTTCCAGACTCGC-1,◯ -CGACTTCCAGGAATGC-1,◯ -CGACTTCCAGTTTACG-1,◯ -CGACTTCCATGGTTGT-1,◯ -CGACTTCGTACCGAGA-1,◯ -CGACTTCGTAGGAGTC-1,◯ -CGACTTCGTCCGTTAA-1,◯ -CGACTTCGTCTCGTTC-1,◯ -CGACTTCGTTACGGAG-1,◯ -CGACTTCTCCTTGACC-1,◯ -CGACTTCTCGCTTGTC-1,◯ -CGACTTCTCGTACCGG-1,◯ -CGACTTCTCTGTTTGT-1,◯ -CGAGAAGAGCTCCCAG-1,◯ -CGAGAAGAGGCTCATT-1,◯ -CGAGAAGAGTGAATTG-1,◯ -CGAGAAGAGTGGAGTC-1,◯ -CGAGAAGCAAACGTGG-1,◯ -CGAGAAGCAATGCCAT-1,◯ -CGAGAAGCAGAGCCAA-1,◯ -CGAGAAGCAGCAGTTT-1,◯ -CGAGAAGGTCCTCCAT-1,◯ -CGAGAAGGTTGAGTTC-1,◯ -CGAGAAGTCAGGCCCA-1,◯ -CGAGCACCAAGCTGAG-1,◯ -CGAGCACCAAGTAATG-1,◯ -CGAGCACCATGGAATA-1,◯ -CGAGCACGTCTTCGTC-1,◯ -CGAGCACGTTTAAGCC-1,◯ -CGAGCCAAGATCCTGT-1,◯ -CGAGCCAAGTGCAAGC-1,◯ -CGAGCCACATCACGTA-1,◯ -CGAGCCACATTCACTT-1,◯ -CGAGCCAGTTGTTTGG-1,◯ -CGAGCCATCATGCATG-1,◯ -CGAGCCATCGGAAATA-1,◯ -CGATCGGAGGACAGCT-1,◯ -CGATCGGAGTTTGCGT-1,◯ -CGATCGGCACATGGGA-1,◯ -CGATCGGCACCTCGTT-1,◯ -CGATCGGGTATTCGTG-1,◯ -CGATCGGGTGACAAAT-1,◯ -CGATCGGGTTAAAGAC-1,◯ -CGATCGGTCCAAAGTC-1,◯ -CGATCGGTCCGATATG-1,◯ -CGATCGGTCCTCAATT-1,◯ -CGATGGCAGCCACTAT-1,◯ -CGATGGCAGCGTAGTG-1,◯ -CGATGGCAGTTGTCGT-1,◯ -CGATGGCGTATTACCG-1,◯ -CGATGGCGTCAGAATA-1,◯ -CGATGGCGTCGCCATG-1,◯ -CGATGGCGTTATCACG-1,◯ -CGATGGCTCAACGCTA-1,◯ -CGATGGCTCTATCCTA-1,◯ -CGATGGCTCTTCCTTC-1,◯ -CGATGGCTCTTGTACT-1,◯ -CGATGTAAGATCGATA-1,◯ -CGATGTAAGCGTGAGT-1,◯ -CGATGTAAGGAGCGTT-1,◯ -CGATGTAAGTCGAGTG-1,◯ -CGATGTACAATCGGTT-1,◯ -CGATGTACACGAGAGT-1,◯ -CGATGTACACGTTGGC-1,◯ -CGATGTACACTGCCAG-1,◯ -CGATGTACATGCTGGC-1,◯ -CGATGTAGTAAAGTCA-1,◯ -CGATGTAGTAACGTTC-1,◯ -CGATGTAGTACTTCTT-1,◯ -CGATGTAGTCCAAGTT-1,◯ -CGATGTAGTCTCCACT-1,◯ -CGATGTAGTGACGGTA-1,◯ -CGATGTATCGCCAAAT-1,◯ -CGATTGAAGAGTTGGC-1,◯ -CGATTGAAGTCTCGGC-1,◯ -CGATTGACACTAAGTC-1,◯ -CGATTGAGTGATAAAC-1,◯ -CGATTGATCAAGATCC-1,◯ -CGATTGATCACAACGT-1,◯ -CGATTGATCGGAAATA-1,◯ -CGATTGATCTGTTGAG-1,◯ -CGCCAAGAGAGGGATA-1,◯ -CGCCAAGCAAAGGCGT-1,◯ -CGCCAAGCAACGATGG-1,◯ -CGCCAAGCACGGTGTC-1,◯ -CGCCAAGGTCCTGCTT-1,◯ -CGCCAAGGTGACAAAT-1,◯ -CGCCAAGTCCAACCAA-1,◯ -CGCCAAGTCCCACTTG-1,◯ -CGCCAAGTCCGCGTTT-1,◯ -CGCCAAGTCGGTTCGG-1,◯ -CGCCAAGTCTTCGAGA-1,◯ -CGCGGTAAGAACAACT-1,◯ -CGCGGTAAGATGGGTC-1,◯ -CGCGGTAAGCCACCTG-1,◯ -CGCGGTACACCTGGTG-1,◯ -CGCGGTACATCTACGA-1,◯ -CGCGGTAGTTATTCTC-1,◯ -CGCGGTATCCGAATGT-1,◯ -CGCGGTATCGCCGTGA-1,◯ -CGCGGTATCTTGTACT-1,◯ -CGCGTTTAGATCCTGT-1,◯ -CGCGTTTAGTGGGCTA-1,◯ -CGCGTTTCAAACTGTC-1,◯ -CGCGTTTCAACTGCGC-1,◯ -CGCGTTTGTCGAGTTT-1,◯ -CGCGTTTGTCTGCCAG-1,◯ -CGCGTTTTCAGGCCCA-1,◯ -CGCGTTTTCGCTGATA-1,◯ -CGCGTTTTCTCTGTCG-1,◯ -CGCGTTTTCTGCGTAA-1,◯ -CGCTATCAGCTCAACT-1,◯ -CGCTATCCAACTGCTA-1,◯ -CGCTATCGTACAGCAG-1,◯ -CGCTATCGTAGCTCCG-1,◯ -CGCTATCGTATTCGTG-1,◯ -CGCTATCGTGGTGTAG-1,◯ -CGCTATCGTTGACGTT-1,◯ -CGCTATCTCATGGTCA-1,◯ -CGCTATCTCCACGTGG-1,◯ -CGCTATCTCGACGGAA-1,◯ -CGCTATCTCTTAGCCC-1,◯ -CGCTGGAAGGCAAAGA-1,◯ -CGCTGGAAGGCTCTTA-1,◯ -CGCTGGAAGTACTTGC-1,◯ -CGCTGGACAATGACCT-1,◯ -CGCTGGACACAACGTT-1,◯ -CGCTGGACACTTAAGC-1,◯ -CGCTGGACATCCAACA-1,◯ -CGCTGGAGTGTGGTTT-1,◯ -CGCTGGAGTTCTCATT-1,◯ -CGCTGGATCACTGGGC-1,◯ -CGCTGGATCCCAACGG-1,◯ -CGCTGGATCCCTTGCA-1,◯ -CGCTTCAAGCGTCTAT-1,◯ -CGCTTCAAGGCTCATT-1,◯ -CGCTTCAGTATTAGCC-1,◯ -CGCTTCAGTCCGAGTC-1,◯ -CGCTTCAGTCTCCCTA-1,◯ -CGCTTCAGTGAGTATA-1,◯ -CGCTTCAGTTGCGTTA-1,◯ -CGCTTCATCAACGCTA-1,◯ -CGCTTCATCACCGGGT-1,◯ -CGCTTCATCCGCAAGC-1,◯ -CGCTTCATCGGCCGAT-1,◯ -CGGACACAGACAGAGA-1,◯ -CGGACACAGACCACGA-1,◯ -CGGACACAGCTCCTTC-1,◯ -CGGACACAGTTAACGA-1,◯ -CGGACACCAAGTACCT-1,◯ -CGGACACCAGTCGATT-1,◯ -CGGACACGTAGGCTGA-1,◯ -CGGACACGTCAAAGCG-1,◯ -CGGACACTCCCGACTT-1,◯ -CGGACGTGTACCCAAT-1,◯ -CGGACGTGTAGCACGA-1,◯ -CGGACGTGTGAGTATA-1,◯ -CGGACGTTCATACGGT-1,◯ -CGGACGTTCGGTCTAA-1,◯ -CGGACGTTCTGCGGCA-1,◯ -CGGACTGAGAGATGAG-1,◯ -CGGACTGAGCAGGCTA-1,◯ -CGGACTGAGCCCAACC-1,◯ -CGGACTGAGTACCGGA-1,◯ -CGGACTGCAGATAATG-1,◯ -CGGACTGCAGCTGTTA-1,◯ -CGGACTGGTAGAAAGG-1,◯ -CGGACTGGTAGGCATG-1,◯ -CGGACTGGTCCGTTAA-1,◯ -CGGACTGGTCTCCCTA-1,◯ -CGGACTGGTTCTGTTT-1,◯ -CGGACTGTCAAGGTAA-1,◯ -CGGACTGTCCTGTACC-1,◯ -CGGACTGTCCTGTAGA-1,◯ -CGGAGCTAGACCTAGG-1,◯ -CGGAGCTAGATCTGAA-1,◯ -CGGAGCTAGATGCGAC-1,◯ -CGGAGCTAGCCAGTTT-1,◯ -CGGAGCTAGCGTCAAG-1,◯ -CGGAGCTAGCTCCTCT-1,◯ -CGGAGCTAGTCCGTAT-1,◯ -CGGAGCTAGTGACTCT-1,◯ -CGGAGCTCACGAAACG-1,◯ -CGGAGCTCAGATGGGT-1,◯ -CGGAGCTCATTAGGCT-1,◯ -CGGAGCTGTACTCAAC-1,◯ -CGGAGCTTCGTATCAG-1,◯ -CGGAGCTTCTGCCAGG-1,◯ -CGGAGTCAGACCCACC-1,◯ -CGGAGTCAGACTAGGC-1,◯ -CGGAGTCAGATGAGAG-1,◯ -CGGAGTCAGCACCGCT-1,◯ -CGGAGTCAGCGAGAAA-1,◯ -CGGAGTCAGTACTTGC-1,◯ -CGGAGTCAGTCCCACG-1,◯ -CGGAGTCAGTGAACGC-1,◯ -CGGAGTCCAACACCTA-1,◯ -CGGAGTCCACCATGTA-1,◯ -CGGAGTCGTCAACATC-1,◯ -CGGAGTCGTCCGAAGA-1,◯ -CGGAGTCTCACAATGC-1,◯ -CGGAGTCTCCAGTATG-1,◯ -CGGAGTCTCGGCGCAT-1,◯ -CGGCTAGAGGATGGAA-1,◯ -CGGCTAGCAAGGACTG-1,◯ -CGGCTAGGTCAACTGT-1,◯ -CGGCTAGGTGATGATA-1,◯ -CGGCTAGGTTCTGTTT-1,◯ -CGGCTAGGTTGGTTTG-1,◯ -CGGCTAGGTTGTCGCG-1,◯ -CGGCTAGTCAGAGACG-1,◯ -CGGCTAGTCCAAAGTC-1,◯ -CGGCTAGTCCTAGTGA-1,◯ -CGGCTAGTCTAAGCCA-1,◯ -CGGCTAGTCTCTGTCG-1,◯ -CGGCTAGTCTGTTGAG-1,◯ -CGGGTCAAGAATGTGT-1,◯ -CGGGTCAAGCTAGCCC-1,◯ -CGGGTCAAGCTGCAAG-1,◯ -CGGGTCAAGGTAGCCA-1,◯ -CGGGTCACACAGCGTC-1,◯ -CGGGTCACACTTACGA-1,◯ -CGGGTCAGTAGCCTCG-1,◯ -CGGGTCAGTGAGTATA-1,◯ -CGGGTCAGTGCCTGCA-1,◯ -CGGGTCATCACAAACC-1,◯ -CGGGTCATCCACGTGG-1,◯ -CGGGTCATCTGAGTGT-1,◯ -CGGTTAAAGGTGATAT-1,◯ -CGGTTAACAGATCCAT-1,◯ -CGGTTAACAGCGTAAG-1,◯ -CGGTTAACAGGCAGTA-1,◯ -CGGTTAAGTAGGAGTC-1,◯ -CGGTTAAGTGCCTGCA-1,◯ -CGGTTAATCAGTTAGC-1,◯ -CGGTTAATCGAATGGG-1,◯ -CGGTTAATCTGATACG-1,◯ -CGGTTAATCTTCCTTC-1,◯ -CGTAGCGAGCGATGAC-1,◯ -CGTAGCGAGGGATCTG-1,◯ -CGTAGCGCAGACTCGC-1,◯ -CGTAGCGCAGATGAGC-1,◯ -CGTAGCGCAGCCTTTC-1,◯ -CGTAGCGGTCGCCATG-1,◯ -CGTAGCGGTCGCGTGT-1,◯ -CGTAGCGGTGTGCGTC-1,◯ -CGTAGCGTCAGCACAT-1,◯ -CGTAGCGTCGTAGGAG-1,◯ -CGTAGGCAGAGGTTGC-1,◯ -CGTAGGCAGGCCCTCA-1,◯ -CGTAGGCAGTAAGTAC-1,◯ -CGTAGGCAGTAGATGT-1,◯ -CGTAGGCAGTGGAGAA-1,◯ -CGTAGGCCAAGCCGCT-1,◯ -CGTAGGCCAATCCAAC-1,◯ -CGTAGGCCACACCGAC-1,◯ -CGTAGGCCAGGCTCAC-1,◯ -CGTAGGCCATTCTCAT-1,◯ -CGTAGGCGTGTATGGG-1,◯ -CGTAGGCTCACAGGCC-1,◯ -CGTAGGCTCATATCGG-1,◯ -CGTAGGCTCCGCATAA-1,◯ -CGTAGGCTCGACGGAA-1,◯ -CGTCACTCACAGCGTC-1,◯ -CGTCACTCACAGGCCT-1,◯ -CGTCACTCACGTTGGC-1,◯ -CGTCACTCAGCATGAG-1,◯ -CGTCACTCATCACCCT-1,◯ -CGTCACTGTGGAAAGA-1,◯ -CGTCACTTCAGAGCTT-1,◯ -CGTCACTTCCAGGGCT-1,◯ -CGTCACTTCCGTCAAA-1,◯ -CGTCAGGAGACGCTTT-1,◯ -CGTCAGGAGAGACGAA-1,◯ -CGTCAGGAGCAGGCTA-1,◯ -CGTCAGGAGTGAATTG-1,◯ -CGTCAGGAGTGTTAGA-1,◯ -CGTCAGGCAAAGCAAT-1,◯ -CGTCAGGCACATCTTT-1,◯ -CGTCAGGCACATGGGA-1,◯ -CGTCAGGCAGTATCTG-1,◯ -CGTCAGGCATGACGGA-1,◯ -CGTCAGGGTACCGTAT-1,◯ -CGTCAGGGTAGCGCAA-1,◯ -CGTCAGGGTAGCTTGT-1,◯ -CGTCAGGGTCCTAGCG-1,◯ -CGTCAGGGTTGTGGAG-1,◯ -CGTCAGGTCATGTAGC-1,◯ -CGTCAGGTCCAAATGC-1,◯ -CGTCAGGTCCACGACG-1,◯ -CGTCAGGTCCTAGAAC-1,◯ -CGTCAGGTCCTATGTT-1,◯ -CGTCAGGTCGCCTGTT-1,◯ -CGTCAGGTCTTCATGT-1,◯ -CGTCCATAGACAAAGG-1,◯ -CGTCCATAGAGGTACC-1,◯ -CGTCCATAGCCTATGT-1,◯ -CGTCCATAGTGGGTTG-1,◯ -CGTCCATCACACATGT-1,◯ -CGTCCATCACCCATTC-1,◯ -CGTCCATCAGATCCAT-1,◯ -CGTCCATCAGCAGTTT-1,◯ -CGTCCATCAGCTGTTA-1,◯ -CGTCCATCATTGGGCC-1,◯ -CGTCCATGTCTGGTCG-1,◯ -CGTCTACAGATCGGGT-1,◯ -CGTCTACAGCGACGTA-1,◯ -CGTCTACAGTCAAGGC-1,◯ -CGTCTACCAAGTCTGT-1,◯ -CGTCTACCAGAAGCAC-1,◯ -CGTCTACCAGAGCCAA-1,◯ -CGTCTACCAGGAATCG-1,◯ -CGTCTACCATATGCTG-1,◯ -CGTCTACGTCGGATCC-1,◯ -CGTCTACGTTCGTTGA-1,◯ -CGTCTACTCACCCTCA-1,◯ -CGTCTACTCAGTGCAT-1,◯ -CGTCTACTCGTAGGAG-1,◯ -CGTGAGCAGCTACCTA-1,◯ -CGTGAGCAGTCCTCCT-1,◯ -CGTGAGCCACAACGCC-1,◯ -CGTGAGCCAGGACGTA-1,◯ -CGTGAGCCAGTTTACG-1,◯ -CGTGAGCCATCGGAAG-1,◯ -CGTGAGCGTACCTACA-1,◯ -CGTGAGCGTCATATGC-1,◯ -CGTGAGCTCATAAAGG-1,◯ -CGTGAGCTCGATCCCT-1,◯ -CGTGTAAAGACAATAC-1,◯ -CGTGTAAAGGAATCGC-1,◯ -CGTGTAAAGGTCATCT-1,◯ -CGTGTAACAACTGCTA-1,◯ -CGTGTAACAGACGCTC-1,◯ -CGTGTAACATACTCTT-1,◯ -CGTGTAACATTGGGCC-1,◯ -CGTGTAAGTACCGTAT-1,◯ -CGTGTAAGTATTACCG-1,◯ -CGTGTAAGTGTTGAGG-1,◯ -CGTGTAATCAAAGTAG-1,◯ -CGTGTAATCAACGGCC-1,◯ -CGTGTAATCCGTCAAA-1,◯ -CGTGTAATCCTTGACC-1,◯ -CGTGTAATCTACCTGC-1,◯ -CGTGTCTCACGAAATA-1,◯ -CGTGTCTCAGATTGCT-1,◯ -CGTGTCTCAGCTTAAC-1,◯ -CGTGTCTGTAAACGCG-1,◯ -CGTGTCTGTACCGTTA-1,◯ -CGTGTCTGTAGGCATG-1,◯ -CGTGTCTGTCGAGTTT-1,◯ -CGTGTCTGTGGCCCTA-1,◯ -CGTGTCTTCAACTCTT-1,◯ -CGTGTCTTCCATGAGT-1,◯ -CGTTAGAAGAAAGTGG-1,◯ -CGTTAGAAGAATTCCC-1,◯ -CGTTAGAAGATAGCAT-1,◯ -CGTTAGAAGCGTCAAG-1,◯ -CGTTAGAAGGCAGGTT-1,◯ -CGTTAGAAGGCTCTTA-1,◯ -CGTTAGAAGTTCGATC-1,◯ -CGTTAGACACCCAGTG-1,◯ -CGTTAGACAGGCTCAC-1,◯ -CGTTAGAGTAGCTGCC-1,◯ -CGTTAGAGTATTACCG-1,◯ -CGTTAGAGTGTCAATC-1,◯ -CGTTAGATCACCTCGT-1,◯ -CGTTAGATCACGACTA-1,◯ -CGTTCTGAGAAAGTGG-1,◯ -CGTTCTGAGGACCACA-1,◯ -CGTTCTGAGGGCTCTC-1,◯ -CGTTCTGCAATGGATA-1,◯ -CGTTCTGCAGGGTATG-1,◯ -CGTTCTGCATCGGGTC-1,◯ -CGTTCTGGTGAGGGTT-1,◯ -CGTTCTGGTGGACGAT-1,◯ -CGTTCTGGTTGTACAC-1,◯ -CGTTCTGTCAGGTAAA-1,◯ -CGTTCTGTCCAAACTG-1,◯ -CGTTCTGTCCTGCAGG-1,◯ -CGTTGGGAGAAGGCCT-1,◯ -CGTTGGGAGCAGATCG-1,◯ -CGTTGGGAGGGATCTG-1,◯ -CGTTGGGCAGCCTTGG-1,◯ -CGTTGGGCATCGATGT-1,◯ -CGTTGGGGTATTAGCC-1,◯ -CGTTGGGGTCAAAGAT-1,◯ -CGTTGGGGTGTGTGCC-1,◯ -CGTTGGGTCATAAAGG-1,◯ -CGTTGGGTCCCTAATT-1,◯ -CGTTGGGTCGGTTCGG-1,◯ -CGTTGGGTCTCTAGGA-1,◯ -CGTTGGGTCTGCGTAA-1,◯ -CGTTGGGTCTTCGAGA-1,◯ -CGTTGGGTCTTGAGGT-1,◯ -CTAACTTAGAATCTCC-1,◯ -CTAACTTAGAGCCCAA-1,◯ -CTAACTTAGTTCGATC-1,◯ -CTAACTTCAGTTCATG-1,◯ -CTAACTTGTCACCCAG-1,◯ -CTAACTTGTCTGATTG-1,◯ -CTAACTTTCAACACTG-1,◯ -CTAAGACAGAAACGAG-1,◯ -CTAAGACAGCGATCCC-1,◯ -CTAAGACAGTGCAAGC-1,◯ -CTAAGACCAAGCTGTT-1,◯ -CTAAGACCAATCCAAC-1,◯ -CTAAGACCACAGCGTC-1,◯ -CTAAGACCACCATCCT-1,◯ -CTAAGACCAGATCGGA-1,◯ -CTAAGACCATCCTTGC-1,◯ -CTAAGACCATGGTTGT-1,◯ -CTAAGACGTAGCTTGT-1,◯ -CTAAGACTCGGGAGTA-1,◯ -CTAAGACTCGTTGCCT-1,◯ -CTAAGACTCTTGCCGT-1,◯ -CTAATGGAGAACTGTA-1,◯ -CTAATGGAGACTAAGT-1,◯ -CTAATGGAGCGATCCC-1,◯ -CTAATGGAGGCCCTCA-1,◯ -CTAATGGAGTAACCCT-1,◯ -CTAATGGAGTCAAGGC-1,◯ -CTAATGGCAGACTCGC-1,◯ -CTAATGGGTCCCGACA-1,◯ -CTAATGGGTGCATCTA-1,◯ -CTAATGGGTGTGGCTC-1,◯ -CTAATGGTCGTCCGTT-1,◯ -CTACACCAGCCAGTTT-1,◯ -CTACACCAGCCCAATT-1,◯ -CTACACCAGCTAGTCT-1,◯ -CTACACCCAGTATCTG-1,◯ -CTACACCGTCATCCCT-1,◯ -CTACACCGTGATGTCT-1,◯ -CTACATTAGACTGGGT-1,◯ -CTACATTAGAGCTGGT-1,◯ -CTACATTAGAGTACCG-1,◯ -CTACATTAGCGATAGC-1,◯ -CTACATTAGGACTGGT-1,◯ -CTACATTCAAGAGGCT-1,◯ -CTACATTCACAACTGT-1,◯ -CTACATTCACAAGTAA-1,◯ -CTACATTCATACTACG-1,◯ -CTACATTGTAGTGAAT-1,◯ -CTACATTGTATTAGCC-1,◯ -CTACATTTCAATACCG-1,◯ -CTACATTTCATGCATG-1,◯ -CTACATTTCCCAAGTA-1,◯ -CTACATTTCTATCCCG-1,◯ -CTACATTTCTCTTATG-1,◯ -CTACCCAAGTACATGA-1,◯ -CTACCCACAAGTTAAG-1,◯ -CTACCCACAGTTTACG-1,◯ -CTACCCACATCACCCT-1,◯ -CTACCCAGTCTGGTCG-1,◯ -CTACCCATCCTCCTAG-1,◯ -CTACCCATCCTTGCCA-1,◯ -CTACCCATCTTCCTTC-1,◯ -CTACGTCAGAGTTGGC-1,◯ -CTACGTCAGCCAGAAC-1,◯ -CTACGTCCAACTGCTA-1,◯ -CTACGTCGTCAGAGGT-1,◯ -CTACGTCGTCTCTTTA-1,◯ -CTACGTCGTTACTGAC-1,◯ -CTACGTCTCATCTGTT-1,◯ -CTACGTCTCCACGTTC-1,◯ -CTACGTCTCCCAAGAT-1,◯ -CTACGTCTCGCGTTTC-1,◯ -CTAGAGTAGCGCTTAT-1,◯ -CTAGAGTCAACACCCG-1,◯ -CTAGAGTCAAGCGTAG-1,◯ -CTAGAGTCACCTTGTC-1,◯ -CTAGAGTCAGCCTTTC-1,◯ -CTAGAGTGTACGCACC-1,◯ -CTAGAGTGTCCGACGT-1,◯ -CTAGAGTTCAATACCG-1,◯ -CTAGAGTTCCCAGGTG-1,◯ -CTAGAGTTCCCTTGTG-1,◯ -CTAGAGTTCGAATGCT-1,◯ -CTAGCCTAGAATTCCC-1,◯ -CTAGCCTAGGCAAAGA-1,◯ -CTAGCCTAGGCTCATT-1,◯ -CTAGCCTCAAATACAG-1,◯ -CTAGCCTCACCTCGTT-1,◯ -CTAGCCTCACTATCTT-1,◯ -CTAGCCTCAGGAACGT-1,◯ -CTAGCCTCAGGTTTCA-1,◯ -CTAGCCTTCATGGTCA-1,◯ -CTAGCCTTCCGCGGTA-1,◯ -CTAGCCTTCGAACGGA-1,◯ -CTAGTGACAATCAGAA-1,◯ -CTAGTGACATGCTGGC-1,◯ -CTAGTGAGTATGGTTC-1,◯ -CTAGTGATCAAGAAGT-1,◯ -CTAGTGATCCGGGTGT-1,◯ -CTAGTGATCCTTCAAT-1,◯ -CTAGTGATCCTTGCCA-1,◯ -CTAGTGATCTGAGTGT-1,◯ -CTCACACAGGAGCGAG-1,◯ -CTCACACAGGCATGGT-1,◯ -CTCACACAGGGATCTG-1,◯ -CTCACACAGGTACTCT-1,◯ -CTCACACAGTTAAGTG-1,◯ -CTCACACCAAGGTTTC-1,◯ -CTCACACCACCGCTAG-1,◯ -CTCACACCAGCCAGAA-1,◯ -CTCACACCAGTCAGCC-1,◯ -CTCACACCAGTCTTCC-1,◯ -CTCACACGTACATCCA-1,◯ -CTCACACTCACGACTA-1,◯ -CTCACACTCTGCAGTA-1,◯ -CTCACACTCTTGTCAT-1,◯ -CTCAGAAAGACACTAA-1,◯ -CTCAGAACACATGGGA-1,◯ -CTCAGAAGTCCGACGT-1,◯ -CTCAGAAGTCTAAACC-1,◯ -CTCAGAATCACAATGC-1,◯ -CTCATTAAGCACCGCT-1,◯ -CTCATTACACCACGTG-1,◯ -CTCATTACACCGGAAA-1,◯ -CTCATTACACGTAAGG-1,◯ -CTCATTACATGGTCAT-1,◯ -CTCATTAGTCACCCAG-1,◯ -CTCATTAGTTGATTGC-1,◯ -CTCATTATCGCCGTGA-1,◯ -CTCATTATCGGAATCT-1,◯ -CTCATTATCTACGAGT-1,◯ -CTCATTATCTCAACTT-1,◯ -CTCCTAGAGAAACCGC-1,◯ -CTCCTAGAGCAGCGTA-1,◯ -CTCCTAGAGTTACCCA-1,◯ -CTCCTAGCACCAACCG-1,◯ -CTCCTAGCAGACGCAA-1,◯ -CTCCTAGGTTACCAGT-1,◯ -CTCCTAGTCACGCATA-1,◯ -CTCCTAGTCATCTGCC-1,◯ -CTCCTAGTCGAATCCA-1,◯ -CTCCTAGTCGGGAGTA-1,◯ -CTCCTAGTCTCGATGA-1,◯ -CTCGAAAAGAGCCTAG-1,◯ -CTCGAAAAGGACAGCT-1,◯ -CTCGAAACAAGCTGTT-1,◯ -CTCGAAACACAGGTTT-1,◯ -CTCGAAATCTTTACGT-1,◯ -CTCGAGGAGGTGTGGT-1,◯ -CTCGAGGCAAGCCGTC-1,◯ -CTCGAGGCACTCGACG-1,◯ -CTCGAGGCAGGGATTG-1,◯ -CTCGAGGGTCGAATCT-1,◯ -CTCGAGGGTGGAAAGA-1,◯ -CTCGGAGAGGCAGGTT-1,◯ -CTCGGAGAGTCCGTAT-1,◯ -CTCGGAGTCCTACAGA-1,◯ -CTCGGAGTCGGCTTGG-1,◯ -CTCGGAGTCTAACTCT-1,◯ -CTCGGAGTCTGATTCT-1,◯ -CTCGGAGTCTTCAACT-1,◯ -CTCGGGAAGAAGAAGC-1,◯ -CTCGGGAAGGACAGCT-1,◯ -CTCGGGAAGTACACCT-1,◯ -CTCGGGACACATGGGA-1,◯ -CTCGGGACAGGTTTCA-1,◯ -CTCGGGACATATACGC-1,◯ -CTCGGGACATGGTCTA-1,◯ -CTCGGGAGTGTATGGG-1,◯ -CTCGGGATCCGAATGT-1,◯ -CTCGGGATCCGTCAAA-1,◯ -CTCGGGATCCTGCCAT-1,◯ -CTCGGGATCGCCAAAT-1,◯ -CTCGGGATCTTGACGA-1,◯ -CTCGTACAGGTGATAT-1,◯ -CTCGTACCAAGCGAGT-1,◯ -CTCGTACCACCTATCC-1,◯ -CTCGTACCAGCATGAG-1,◯ -CTCGTACCAGGACGTA-1,◯ -CTCGTACCATGGTCAT-1,◯ -CTCGTACGTAAACCTC-1,◯ -CTCGTACGTCCGTCAG-1,◯ -CTCGTACGTCCGTGAC-1,◯ -CTCGTACGTGCAGGTA-1,◯ -CTCGTACGTTCCCGAG-1,◯ -CTCGTACGTTCGTCTC-1,◯ -CTCGTACTCCATGAAC-1,◯ -CTCGTCAAGCAGCGTA-1,◯ -CTCGTCAAGTACGCGA-1,◯ -CTCGTCAAGTGACTCT-1,◯ -CTCGTCACAACAACCT-1,◯ -CTCGTCACATGAACCT-1,◯ -CTCGTCACATGCTGGC-1,◯ -CTCGTCAGTAGTGAAT-1,◯ -CTCGTCAGTATAGGGC-1,◯ -CTCGTCAGTATCAGTC-1,◯ -CTCGTCAGTTGAGTTC-1,◯ -CTCGTCATCGCGCCAA-1,◯ -CTCGTCATCGTTACGA-1,◯ -CTCGTCATCTGCTGTC-1,◯ -CTCGTCATCTTGAGAC-1,◯ -CTCTAATAGCAGATCG-1,◯ -CTCTAATAGCTAAACA-1,◯ -CTCTAATAGGATCGCA-1,◯ -CTCTAATAGGCCATAG-1,◯ -CTCTAATAGGTCATCT-1,◯ -CTCTAATAGTGACATA-1,◯ -CTCTAATCAATGGAGC-1,◯ -CTCTAATCACTAGTAC-1,◯ -CTCTAATCATGAAGTA-1,◯ -CTCTAATCATGCCCGA-1,◯ -CTCTAATGTAGGACAC-1,◯ -CTCTAATGTGTCAATC-1,◯ -CTCTAATTCACAGGCC-1,◯ -CTCTAATTCGGTCTAA-1,◯ -CTCTAATTCTGTCTCG-1,◯ -CTCTACGAGATGCGAC-1,◯ -CTCTACGAGGGTTTCT-1,◯ -CTCTACGAGGTGTGGT-1,◯ -CTCTACGGTGTTGAGG-1,◯ -CTCTACGTCCTCAATT-1,◯ -CTCTACGTCTTAACCT-1,◯ -CTCTGGTAGTCGTACT-1,◯ -CTCTGGTAGTGAAGTT-1,◯ -CTCTGGTCAATCAGAA-1,◯ -CTCTGGTGTACGCACC-1,◯ -CTCTGGTGTATGGTTC-1,◯ -CTCTGGTGTTGTCTTT-1,◯ -CTCTGGTTCAATCACG-1,◯ -CTCTGGTTCAGGTAAA-1,◯ -CTCTGGTTCCGCGTTT-1,◯ -CTCTGGTTCGATAGAA-1,◯ -CTCTGGTTCGCCGTGA-1,◯ -CTGAAACAGAGTGACC-1,◯ -CTGAAACAGGCAGGTT-1,◯ -CTGAAACAGGTGCACA-1,◯ -CTGAAACAGGTTACCT-1,◯ -CTGAAACAGTGGCACA-1,◯ -CTGAAACCACGCCAGT-1,◯ -CTGAAACCATTAGCCA-1,◯ -CTGAAACCATTGGGCC-1,◯ -CTGAAACGTAGGGTAC-1,◯ -CTGAAACGTCTGCAAT-1,◯ -CTGAAACTCCTAGGGC-1,◯ -CTGAAGTAGACGCACA-1,◯ -CTGAAGTAGCCTTGAT-1,◯ -CTGAAGTCACCTGGTG-1,◯ -CTGAAGTCACGAGGTA-1,◯ -CTGAAGTCACTCGACG-1,◯ -CTGAAGTGTGCAACTT-1,◯ -CTGAAGTTCCTAGGGC-1,◯ -CTGAAGTTCTGGCGAC-1,◯ -CTGATAGAGACAGACC-1,◯ -CTGATAGAGCTAGTGG-1,◯ -CTGATAGAGCTCCCAG-1,◯ -CTGATAGAGGCTAGAC-1,◯ -CTGATAGCAGATGAGC-1,◯ -CTGATAGGTCTCACCT-1,◯ -CTGATAGGTGTGAATA-1,◯ -CTGATAGTCACGAAGG-1,◯ -CTGATAGTCAGGATCT-1,◯ -CTGATAGTCATTATCC-1,◯ -CTGATAGTCCAAGTAC-1,◯ -CTGATAGTCGGCGCAT-1,◯ -CTGATAGTCTGGTGTA-1,◯ -CTGATAGTCTTGTATC-1,◯ -CTGATCCAGAGGGATA-1,◯ -CTGATCCAGCCACCTG-1,◯ -CTGATCCCAACGCACC-1,◯ -CTGATCCCAAGGTTCT-1,◯ -CTGATCCCACCCTATC-1,◯ -CTGATCCGTGATGTGG-1,◯ -CTGATCCTCAGAGGTG-1,◯ -CTGATCCTCGGATGTT-1,◯ -CTGCCTAAGCGGCTTC-1,◯ -CTGCCTAAGTGGGCTA-1,◯ -CTGCCTACACAGAGGT-1,◯ -CTGCCTACACGAGAGT-1,◯ -CTGCCTAGTAAACCTC-1,◯ -CTGCCTAGTCCGACGT-1,◯ -CTGCCTATCAACTCTT-1,◯ -CTGCCTATCCTTTCTC-1,◯ -CTGCCTATCTTTAGTC-1,◯ -CTGCGGAAGAGTACCG-1,◯ -CTGCGGAAGATGTCGG-1,◯ -CTGCGGAAGCAGATCG-1,◯ -CTGCGGAAGCGTGAAC-1,◯ -CTGCGGAAGTAGGTGC-1,◯ -CTGCGGAAGTTGTCGT-1,◯ -CTGCGGACAGTGGAGT-1,◯ -CTGCGGACATTCCTGC-1,◯ -CTGCGGAGTGCCTGGT-1,◯ -CTGCGGAGTGGTGTAG-1,◯ -CTGCGGAGTGTGCGTC-1,◯ -CTGCGGAGTTTAGCTG-1,◯ -CTGCGGATCAAACCAC-1,◯ -CTGCGGATCATCTGCC-1,◯ -CTGCGGATCTTTAGGG-1,◯ -CTGCTGTAGACAAAGG-1,◯ -CTGCTGTAGCCCTAAT-1,◯ -CTGCTGTAGGGTCTCC-1,◯ -CTGCTGTAGTGGGATC-1,◯ -CTGCTGTCATGCAACT-1,◯ -CTGCTGTCATGGTAGG-1,◯ -CTGCTGTCATTGGGCC-1,◯ -CTGCTGTGTACGACCC-1,◯ -CTGCTGTGTCGAAAGC-1,◯ -CTGCTGTGTGCCTGGT-1,◯ -CTGCTGTTCGTGGACC-1,◯ -CTGGTCTAGAACAACT-1,◯ -CTGGTCTAGAGACTAT-1,◯ -CTGGTCTAGAGTACCG-1,◯ -CTGGTCTAGCTAAGAT-1,◯ -CTGGTCTAGTTAAGTG-1,◯ -CTGGTCTCACTGTGTA-1,◯ -CTGGTCTCAGACAAGC-1,◯ -CTGGTCTGTACAGTGG-1,◯ -CTGGTCTGTAGCGATG-1,◯ -CTGGTCTGTAGGCATG-1,◯ -CTGGTCTGTCGACTGC-1,◯ -CTGGTCTGTCGCGAAA-1,◯ -CTGGTCTGTGAGCGAT-1,◯ -CTGGTCTGTTCAGCGC-1,◯ -CTGGTCTTCCACGTTC-1,◯ -CTGGTCTTCGCTAGCG-1,◯ -CTGGTCTTCTTGAGAC-1,◯ -CTGGTCTTCTTGCAAG-1,◯ -CTGTGCTAGACTAGAT-1,◯ -CTGTGCTAGCTGGAAC-1,◯ -CTGTGCTCAATCCGAT-1,◯ -CTGTGCTCACGAGAGT-1,◯ -CTGTGCTCAGACACTT-1,◯ -CTGTGCTCATACCATG-1,◯ -CTGTGCTGTAAACGCG-1,◯ -CTGTGCTGTCCAACTA-1,◯ -CTGTGCTGTCGCTTCT-1,◯ -CTGTGCTTCACAACGT-1,◯ -CTGTGCTTCAGGTTCA-1,◯ -CTGTGCTTCGCCTGTT-1,◯ -CTGTGCTTCTCTGCTG-1,◯ -CTGTTTAAGCGATCCC-1,◯ -CTGTTTAAGTAATCCC-1,◯ -CTGTTTACAGACGCAA-1,◯ -CTGTTTACAGTTCATG-1,◯ -CTGTTTACATATGGTC-1,◯ -CTGTTTATCCGATATG-1,◯ -CTGTTTATCCGTCATC-1,◯ -CTGTTTATCTGGTGTA-1,◯ -CTTAACTAGACATAAC-1,◯ -CTTAACTAGATCCCGC-1,◯ -CTTAACTAGGACATTA-1,◯ -CTTAACTCAAGCTGAG-1,◯ -CTTAACTCAGACAAGC-1,◯ -CTTAACTCAGACGCCT-1,◯ -CTTAACTCAGTAACGG-1,◯ -CTTAACTCAGTATGCT-1,◯ -CTTAACTCATTATCTC-1,◯ -CTTAACTCATTCCTCG-1,◯ -CTTAACTGTCATACTG-1,◯ -CTTAACTTCACTCTTA-1,◯ -CTTAACTTCAGCTCTC-1,◯ -CTTAACTTCGGTCTAA-1,◯ -CTTAACTTCTCATTCA-1,◯ -CTTAACTTCTCGAGTA-1,◯ -CTTACCGAGACAGACC-1,◯ -CTTACCGAGACTAAGT-1,◯ -CTTACCGAGTGAAGTT-1,◯ -CTTACCGCAAGAGTCG-1,◯ -CTTACCGCAGCGATCC-1,◯ -CTTACCGCAGCTTAAC-1,◯ -CTTACCGCAGGATTGG-1,◯ -CTTACCGCATACGCCG-1,◯ -CTTACCGCATAGACTC-1,◯ -CTTACCGCATCCGCGA-1,◯ -CTTACCGGTACGAAAT-1,◯ -CTTACCGGTAGCCTAT-1,◯ -CTTACCGGTCTGGAGA-1,◯ -CTTACCGGTGCAGTAG-1,◯ -CTTACCGGTGGACGAT-1,◯ -CTTACCGTCCAATGGT-1,◯ -CTTACCGTCCTCATTA-1,◯ -CTTACCGTCTAACTCT-1,◯ -CTTACCGTCTCGGACG-1,◯ -CTTAGGAAGGCAGGTT-1,◯ -CTTAGGACAGTATAAG-1,◯ -CTTAGGACATTACGAC-1,◯ -CTTAGGAGTGGACGAT-1,◯ -CTTAGGAGTGGTCTCG-1,◯ -CTTAGGATCCCAAGAT-1,◯ -CTTAGGATCTACCTGC-1,◯ -CTTCTCTAGAAGCCCA-1,◯ -CTTCTCTAGATGCCAG-1,◯ -CTTCTCTAGCCCTAAT-1,◯ -CTTCTCTAGTGCGTGA-1,◯ -CTTCTCTCACGTTGGC-1,◯ -CTTCTCTCAGCTCGAC-1,◯ -CTTCTCTCAGGTCCAC-1,◯ -CTTCTCTGTGGACGAT-1,◯ -CTTCTCTGTTCGAATC-1,◯ -CTTCTCTTCAATACCG-1,◯ -CTTCTCTTCATATCGG-1,◯ -CTTCTCTTCATGGTCA-1,◯ -CTTCTCTTCATTGCGA-1,◯ -CTTGGCTAGCGGCTTC-1,◯ -CTTGGCTCAGGTTTCA-1,◯ -CTTGGCTGTAAACGCG-1,◯ -CTTGGCTGTCGGGTCT-1,◯ -CTTGGCTGTGTGGTTT-1,◯ -CTTGGCTTCACAACGT-1,◯ -CTTGGCTTCCGCGTTT-1,◯ -CTTTGCGAGAGACTTA-1,◯ -CTTTGCGAGAGGTTAT-1,◯ -CTTTGCGAGATATGCA-1,◯ -CTTTGCGCAACCGCCA-1,◯ -CTTTGCGCAAGCTGGA-1,◯ -CTTTGCGCAGGATTGG-1,◯ -CTTTGCGCATCGATGT-1,◯ -CTTTGCGCATGGATGG-1,◯ -CTTTGCGCATTGAGCT-1,◯ -CTTTGCGTCACCCTCA-1,◯ -CTTTGCGTCCTTTACA-1,◯ -CTTTGCGTCTCAACTT-1,◯ -CTTTGCGTCTGGCGAC-1,◯ -GAAACTCCAAGCCATT-1,◯ -GAAACTCCAATGTAAG-1,◯ -GAAACTCCACTGTCGG-1,◯ -GAAACTCCATTGAGCT-1,◯ -GAAACTCGTACCGTAT-1,◯ -GAAACTCGTCTTTCAT-1,◯ -GAAACTCTCCAGAAGG-1,◯ -GAAACTCTCCCGACTT-1,◯ -GAAATGAAGAAACGCC-1,◯ -GAAATGAAGGACATTA-1,◯ -GAAATGACAATGCCAT-1,◯ -GAAATGACAGCTGCAC-1,◯ -GAAATGACATCCCATC-1,◯ -GAAATGACATTAACCG-1,◯ -GAAATGAGTACAGTGG-1,◯ -GAAATGAGTGTAATGA-1,◯ -GAAATGATCATCGGAT-1,◯ -GAAATGATCGCTAGCG-1,◯ -GAAATGATCTGGGCCA-1,◯ -GAAATGATCTGTCTAT-1,◯ -GAACATCAGGATGGTC-1,◯ -GAACATCAGGCGTACA-1,◯ -GAACATCAGTCGTTTG-1,◯ -GAACATCAGTGGTAAT-1,◯ -GAACATCCAAGCGCTC-1,◯ -GAACATCCAGGACCCT-1,◯ -GAACATCCATCGATGT-1,◯ -GAACATCGTAAGGGCT-1,◯ -GAACATCGTGTTCTTT-1,◯ -GAACCTAAGCGATAGC-1,◯ -GAACCTACAAGCGCTC-1,◯ -GAACCTAGTCCGAATT-1,◯ -GAACCTAGTCTCGTTC-1,◯ -GAACCTATCCGGCACA-1,◯ -GAACCTATCGGATGTT-1,◯ -GAACGGACACACAGAG-1,◯ -GAACGGACATTACGAC-1,◯ -GAACGGAGTATATGAG-1,◯ -GAACGGAGTCATACTG-1,◯ -GAACGGAGTTAAGTAG-1,◯ -GAACGGATCAGAGGTG-1,◯ -GAAGCAGAGACGACGT-1,◯ -GAAGCAGAGAGTGAGA-1,◯ -GAAGCAGAGCTACCTA-1,◯ -GAAGCAGAGGGTTTCT-1,◯ -GAAGCAGAGTACGTTC-1,◯ -GAAGCAGAGTGCGTGA-1,◯ -GAAGCAGAGTGTTAGA-1,◯ -GAAGCAGCAATGTAAG-1,◯ -GAAGCAGCACAGATTC-1,◯ -GAAGCAGCAGTCGTGC-1,◯ -GAAGCAGCATTGAGCT-1,◯ -GAAGCAGGTACAGACG-1,◯ -GAAGCAGGTAGCGCAA-1,◯ -GAAGCAGGTGACGCCT-1,◯ -GAAGCAGGTGTGACCC-1,◯ -GAAGCAGTCCATGAAC-1,◯ -GAAGCAGTCCCATTTA-1,◯ -GAAGCAGTCGTTACAG-1,◯ -GAATAAGAGCCCTAAT-1,◯ -GAATAAGAGGACATTA-1,◯ -GAATAAGCAAGCTGTT-1,◯ -GAATAAGCACATAACC-1,◯ -GAATAAGCATCCTAGA-1,◯ -GAATAAGGTACCGTTA-1,◯ -GAATAAGGTCGTGGCT-1,◯ -GAATAAGGTCTAACGT-1,◯ -GAATAAGGTGAGGCTA-1,◯ -GAATAAGGTGCTAGCC-1,◯ -GAATAAGGTGGCAAAC-1,◯ -GAATAAGTCAGTCAGT-1,◯ -GAATAAGTCCGCGTTT-1,◯ -GAATAAGTCTGTACGA-1,◯ -GAATGAAAGGATGGAA-1,◯ -GAATGAAAGTGGAGAA-1,◯ -GAATGAACAAGAAAGG-1,◯ -GAATGAACACAGATTC-1,◯ -GAATGAACATGCGCAC-1,◯ -GAATGAAGTAGCTAAA-1,◯ -GAATGAAGTCCCGACA-1,◯ -GAATGAATCACAATGC-1,◯ -GAATGAATCCTACAGA-1,◯ -GACACGCAGAACAACT-1,◯ -GACACGCAGACACTAA-1,◯ -GACACGCAGAGTCTGG-1,◯ -GACACGCAGCAGACTG-1,◯ -GACACGCAGCCACCTG-1,◯ -GACACGCCACTTGGAT-1,◯ -GACACGCCATTAGGCT-1,◯ -GACACGCCATTGTGCA-1,◯ -GACACGCTCATTCACT-1,◯ -GACACGCTCGATCCCT-1,◯ -GACACGCTCGGTGTTA-1,◯ -GACAGAGCAAACGCGA-1,◯ -GACAGAGCAAGCTGGA-1,◯ -GACAGAGCACAGACAG-1,◯ -GACAGAGCAGCCACCA-1,◯ -GACAGAGCATAAGACA-1,◯ -GACAGAGCATCGACGC-1,◯ -GACAGAGCATGGGAAC-1,◯ -GACAGAGCATGTTGAC-1,◯ -GACAGAGGTATGAAAC-1,◯ -GACAGAGGTCCAGTGC-1,◯ -GACAGAGGTTCCCGAG-1,◯ -GACAGAGGTTTGACTG-1,◯ -GACAGAGTCACAACGT-1,◯ -GACAGAGTCTGCGACG-1,◯ -GACAGAGTCTTGTCAT-1,◯ -GACCAATAGGATTCGG-1,◯ -GACCAATAGGGATACC-1,◯ -GACCAATCAACAACCT-1,◯ -GACCAATCAGCGTCCA-1,◯ -GACCAATCAGGGCATA-1,◯ -GACCAATCATCCCATC-1,◯ -GACCAATCATTCTTAC-1,◯ -GACCAATTCTTCGGTC-1,◯ -GACCTGGAGACTGTAA-1,◯ -GACCTGGAGCCACGCT-1,◯ -GACCTGGCAGGAATGC-1,◯ -GACCTGGCAGTAAGAT-1,◯ -GACCTGGTCCGTCATC-1,◯ -GACGCGTAGTAATCCC-1,◯ -GACGCGTCAACTGCGC-1,◯ -GACGCGTCAAGGTTCT-1,◯ -GACGCGTCATAGGATA-1,◯ -GACGCGTGTTGATTGC-1,◯ -GACGCGTTCATGTCTT-1,◯ -GACGCGTTCGGATGGA-1,◯ -GACGCGTTCTCAAACG-1,◯ -GACGGCTAGAAGAAGC-1,◯ -GACGGCTAGATGTTAG-1,◯ -GACGGCTAGCAGCGTA-1,◯ -GACGGCTAGGCCGAAT-1,◯ -GACGGCTCAATTGCTG-1,◯ -GACGGCTCACGAGGTA-1,◯ -GACGGCTCACTCTGTC-1,◯ -GACGGCTCAGGTGGAT-1,◯ -GACGGCTCAGTAAGCG-1,◯ -GACGGCTGTAGCGATG-1,◯ -GACGGCTGTAGTGAAT-1,◯ -GACGGCTGTCGCGGTT-1,◯ -GACGGCTGTGGTACAG-1,◯ -GACGGCTGTGTTGAGG-1,◯ -GACGTGCAGCTCCTCT-1,◯ -GACGTGCAGCTGGAAC-1,◯ -GACGTGCAGCTTATCG-1,◯ -GACGTGCAGGACGAAA-1,◯ -GACGTGCAGTGGTAGC-1,◯ -GACGTGCCAAGCCTAT-1,◯ -GACGTGCCACAAGTAA-1,◯ -GACGTGCCACTGCCAG-1,◯ -GACGTGCCAGTTAACC-1,◯ -GACGTGCCATGACATC-1,◯ -GACGTGCGTAAACACA-1,◯ -GACGTGCGTAATTGGA-1,◯ -GACGTGCGTACCAGTT-1,◯ -GACGTGCGTATAGTAG-1,◯ -GACGTGCGTTAAGATG-1,◯ -GACGTGCGTTTGTTTC-1,◯ -GACGTGCTCATTCACT-1,◯ -GACGTGCTCGGAAATA-1,◯ -GACGTTAAGGGCACTA-1,◯ -GACGTTAAGTGTACTC-1,◯ -GACGTTACACGAAGCA-1,◯ -GACGTTAGTAACGTTC-1,◯ -GACGTTAGTAAGAGAG-1,◯ -GACGTTAGTATATGAG-1,◯ -GACGTTATCGAGAGCA-1,◯ -GACGTTATCGTCCGTT-1,◯ -GACTAACAGCGTTGCC-1,◯ -GACTAACAGCTCCTTC-1,◯ -GACTAACAGGTAAACT-1,◯ -GACTAACAGTGATCGG-1,◯ -GACTAACCAGGTTTCA-1,◯ -GACTAACGTGACGCCT-1,◯ -GACTAACGTTCAGGCC-1,◯ -GACTAACGTTTAAGCC-1,◯ -GACTAACTCATCTGTT-1,◯ -GACTAACTCGTCGTTC-1,◯ -GACTACAAGAAACCTA-1,◯ -GACTACAAGCCTATGT-1,◯ -GACTACACACCTCGGA-1,◯ -GACTACACATCGGACC-1,◯ -GACTACAGTCGCCATG-1,◯ -GACTACAGTGATAAAC-1,◯ -GACTACAGTTCGAATC-1,◯ -GACTACATCAGCACAT-1,◯ -GACTACATCAGTGCAT-1,◯ -GACTACATCGAATGCT-1,◯ -GACTACATCTTACCGC-1,◯ -GACTGCGAGAGGGCTT-1,◯ -GACTGCGAGATGTCGG-1,◯ -GACTGCGAGATTACCC-1,◯ -GACTGCGAGCCAACAG-1,◯ -GACTGCGAGCCCAGCT-1,◯ -GACTGCGAGGACATTA-1,◯ -GACTGCGAGTGTACCT-1,◯ -GACTGCGCAACTGCGC-1,◯ -GACTGCGCAATCCGAT-1,◯ -GACTGCGCAATGGATA-1,◯ -GACTGCGCACAAGACG-1,◯ -GACTGCGCACAGAGGT-1,◯ -GACTGCGCACCTTGTC-1,◯ -GACTGCGCAGGGATTG-1,◯ -GACTGCGGTACCGGCT-1,◯ -GACTGCGGTCATATCG-1,◯ -GACTGCGGTCTGATCA-1,◯ -GACTGCGGTGTGACGA-1,◯ -GACTGCGTCCGCTGTT-1,◯ -GACTGCGTCCTGCAGG-1,◯ -GAGCAGAAGAAGGACA-1,◯ -GAGCAGAAGGAATTAC-1,◯ -GAGCAGAAGTCGATAA-1,◯ -GAGCAGACAGCTCCGA-1,◯ -GAGCAGAGTAAACACA-1,◯ -GAGCAGAGTAAAGGAG-1,◯ -GAGCAGAGTGTTGGGA-1,◯ -GAGCAGAGTTAGAACA-1,◯ -GAGCAGAGTTGATTGC-1,◯ -GAGCAGAGTTTGGGCC-1,◯ -GAGCAGATCCCATTAT-1,◯ -GAGCAGATCCTTTACA-1,◯ -GAGCAGATCGTGGTCG-1,◯ -GAGCAGATCTGTCCGT-1,◯ -GAGCAGATCTTTACGT-1,◯ -GAGGTGAAGGGTGTGT-1,◯ -GAGGTGAAGGTGGGTT-1,◯ -GAGGTGAAGTCATCCA-1,◯ -GAGGTGAAGTTTAGGA-1,◯ -GAGGTGACAAAGGTGC-1,◯ -GAGGTGACAATCACAC-1,◯ -GAGGTGAGTAAGTTCC-1,◯ -GAGGTGAGTAGGCTGA-1,◯ -GAGGTGAGTTAAAGAC-1,◯ -GAGGTGATCCAGAGGA-1,◯ -GAGGTGATCCCTTGCA-1,◯ -GAGGTGATCCGTAGGC-1,◯ -GAGGTGATCGTGTAGT-1,◯ -GAGTCCGAGGTTCCTA-1,◯ -GAGTCCGCAAGGTTCT-1,◯ -GAGTCCGCACCGGAAA-1,◯ -GAGTCCGCACTCTGTC-1,◯ -GAGTCCGCAGACAAAT-1,◯ -GAGTCCGGTACCGTTA-1,◯ -GAGTCCGGTAGAGCTG-1,◯ -GAGTCCGGTCCGTCAG-1,◯ -GAGTCCGGTGAGCGAT-1,◯ -GAGTCCGTCAAGAAGT-1,◯ -GAGTCCGTCAATCTCT-1,◯ -GAGTCCGTCCTGCAGG-1,◯ -GAGTCCGTCGAACTGT-1,◯ -GATCAGTAGCGCTTAT-1,◯ -GATCAGTAGTGCGTGA-1,◯ -GATCAGTCAAACAACA-1,◯ -GATCAGTCAGCCACCA-1,◯ -GATCAGTGTACAGTTC-1,◯ -GATCAGTGTACTTCTT-1,◯ -GATCAGTGTGCGGTAA-1,◯ -GATCAGTTCATGTCCC-1,◯ -GATCAGTTCTAACTGG-1,◯ -GATCGATAGACACTAA-1,◯ -GATCGATAGACAGGCT-1,◯ -GATCGATAGACTTGAA-1,◯ -GATCGATAGGATGGAA-1,◯ -GATCGATAGTGTCCCG-1,◯ -GATCGATCAAGTAATG-1,◯ -GATCGATCAATTCCTT-1,◯ -GATCGATCACTCAGGC-1,◯ -GATCGATCAGATGGCA-1,◯ -GATCGATCAGGTCCAC-1,◯ -GATCGATGTGTGAATA-1,◯ -GATCGATGTTAAGAAC-1,◯ -GATCGATTCTCGAGTA-1,◯ -GATCGATTCTGGGCCA-1,◯ -GATCGCGAGATCTGCT-1,◯ -GATCGCGAGGACATTA-1,◯ -GATCGCGAGGATATAC-1,◯ -GATCGCGCAAAGCAAT-1,◯ -GATCGCGGTAAGTGTA-1,◯ -GATCGCGGTGCCTGGT-1,◯ -GATCGCGGTTTGACAC-1,◯ -GATCGCGTCAGGTAAA-1,◯ -GATCGCGTCGATGAGG-1,◯ -GATCGCGTCGGTCCGA-1,◯ -GATCGCGTCTACGAGT-1,◯ -GATCGCGTCTGGTATG-1,◯ -GATCGTAAGAGGTAGA-1,◯ -GATCGTAAGCGATTCT-1,◯ -GATCGTAAGTGCCATT-1,◯ -GATCGTACAGCGAACA-1,◯ -GATCGTAGTACAGTGG-1,◯ -GATCGTAGTCCGCTGA-1,◯ -GATCGTAGTTTCGCTC-1,◯ -GATCGTATCGTACGGC-1,◯ -GATCGTATCGTAGGAG-1,◯ -GATCTAGAGACACGAC-1,◯ -GATCTAGAGAGACTTA-1,◯ -GATCTAGAGCGACGTA-1,◯ -GATCTAGAGTACGTAA-1,◯ -GATCTAGAGTGACTCT-1,◯ -GATCTAGCAATACGCT-1,◯ -GATCTAGCACACATGT-1,◯ -GATCTAGCATTGCGGC-1,◯ -GATCTAGGTACGCACC-1,◯ -GATCTAGGTATTACCG-1,◯ -GATCTAGGTTGGGACA-1,◯ -GATCTAGTCAAAGACA-1,◯ -GATCTAGTCGTACGGC-1,◯ -GATCTAGTCGTAGATC-1,◯ -GATCTAGTCTCTTGAT-1,◯ -GATCTAGTCTTCAACT-1,◯ -GATCTAGTCTTGTCAT-1,◯ -GATGAAAAGAAGGACA-1,◯ -GATGAAACACCATCCT-1,◯ -GATGAAACACCGAAAG-1,◯ -GATGAAACACGACGAA-1,◯ -GATGAAACAGTCTTCC-1,◯ -GATGAAACATCACAAC-1,◯ -GATGAAAGTAAGTGGC-1,◯ -GATGAAAGTTGATTGC-1,◯ -GATGAAAGTTGCGCAC-1,◯ -GATGAAATCAGCTGGC-1,◯ -GATGAAATCCGGCACA-1,◯ -GATGAAATCGTTACAG-1,◯ -GATGAAATCTCTGTCG-1,◯ -GATGAAATCTGGCGTG-1,◯ -GATGAGGAGAGGTACC-1,◯ -GATGAGGAGAGGTAGA-1,◯ -GATGAGGAGATCTGCT-1,◯ -GATGAGGAGTCATCCA-1,◯ -GATGAGGAGTCCCACG-1,◯ -GATGAGGAGTTTCCTT-1,◯ -GATGAGGCAAAGGCGT-1,◯ -GATGAGGCAACTTGAC-1,◯ -GATGAGGCAGCGTCCA-1,◯ -GATGAGGGTACAAGTA-1,◯ -GATGAGGGTCTTTCAT-1,◯ -GATGAGGGTGTTCTTT-1,◯ -GATGAGGGTTGGGACA-1,◯ -GATGAGGTCAACACAC-1,◯ -GATGAGGTCAAGGCTT-1,◯ -GATGAGGTCATCGCTC-1,◯ -GATGAGGTCTAACTTC-1,◯ -GATGCTAAGTGGAGAA-1,◯ -GATGCTACACGGCGTT-1,◯ -GATGCTACAGCCTATA-1,◯ -GATGCTACATATACGC-1,◯ -GATGCTAGTAGGCATG-1,◯ -GATGCTAGTGTCCTCT-1,◯ -GATGCTATCGGTTCGG-1,◯ -GATGCTATCTTTAGTC-1,◯ -GATTCAGAGACATAAC-1,◯ -GATTCAGAGCCCAATT-1,◯ -GATTCAGAGCGACGTA-1,◯ -GATTCAGCAACAACCT-1,◯ -GATTCAGCAGGGATTG-1,◯ -GATTCAGCAGTCGATT-1,◯ -GATTCAGCATGCTGGC-1,◯ -GATTCAGGTATCGCAT-1,◯ -GATTCAGTCAATCTCT-1,◯ -GATTCAGTCAGGCCCA-1,◯ -GATTCAGTCAGGTAAA-1,◯ -GATTCAGTCATGCAAC-1,◯ -GATTCAGTCCCTGACT-1,◯ -GATTCAGTCCTCATTA-1,◯ -GCAAACTAGAAGATTC-1,◯ -GCAAACTAGTCACGCC-1,◯ -GCAAACTCAGCTGTGC-1,◯ -GCAAACTGTTAAAGTG-1,◯ -GCAAACTGTTTACTCT-1,◯ -GCAAACTTCGCTGATA-1,◯ -GCAATCAAGTGCTGCC-1,◯ -GCAATCAAGTTAACGA-1,◯ -GCAATCACAAAGCAAT-1,◯ -GCAATCACAATGTTGC-1,◯ -GCAATCACATGCATGT-1,◯ -GCAATCATCTTGAGGT-1,◯ -GCACATAAGCGCTCCA-1,◯ -GCACATAAGTTACCCA-1,◯ -GCACATAAGTTAGGTA-1,◯ -GCACATAGTAAGTTCC-1,◯ -GCACATAGTATGCTTG-1,◯ -GCACATATCCCAAGAT-1,◯ -GCACATATCGCGTAGC-1,◯ -GCACATATCGTGGTCG-1,◯ -GCACATATCTACCTGC-1,◯ -GCACATATCTAGAGTC-1,◯ -GCACATATCTAGCACA-1,◯ -GCACATATCTGCCAGG-1,◯ -GCACTCTAGAACAACT-1,◯ -GCACTCTAGGTAGCCA-1,◯ -GCACTCTAGTAGATGT-1,◯ -GCACTCTCACGCTTTC-1,◯ -GCACTCTCATCCCATC-1,◯ -GCACTCTGTACTTAGC-1,◯ -GCACTCTGTCGAATCT-1,◯ -GCACTCTGTCGTTGTA-1,◯ -GCACTCTGTGACAAAT-1,◯ -GCACTCTTCACAACGT-1,◯ -GCAGCCAAGACACGAC-1,◯ -GCAGCCAAGCGCTTAT-1,◯ -GCAGCCAAGGACACCA-1,◯ -GCAGCCAAGGGTCGAT-1,◯ -GCAGCCAAGTTCGATC-1,◯ -GCAGCCACACACCGCA-1,◯ -GCAGCCACACAGAGGT-1,◯ -GCAGCCACAGGATCGA-1,◯ -GCAGCCACATCGTCGG-1,◯ -GCAGCCAGTACCATCA-1,◯ -GCAGCCAGTGCAGTAG-1,◯ -GCAGCCAGTGTGCCTG-1,◯ -GCAGCCAGTTCCGTCT-1,◯ -GCAGCCATCGGACAAG-1,◯ -GCAGCCATCTCTGTCG-1,◯ -GCAGTTAAGAAGGACA-1,◯ -GCAGTTAAGACGCACA-1,◯ -GCAGTTAAGGTGCTTT-1,◯ -GCAGTTAAGTCTTGCA-1,◯ -GCAGTTAAGTGATCGG-1,◯ -GCAGTTACACCAGGCT-1,◯ -GCAGTTACAGGGAGAG-1,◯ -GCAGTTAGTCGAATCT-1,◯ -GCAGTTAGTTAGTGGG-1,◯ -GCAGTTAGTTCCACGG-1,◯ -GCAGTTAGTTTAAGCC-1,◯ -GCAGTTATCAGCATGT-1,◯ -GCAGTTATCGGATGTT-1,◯ -GCAGTTATCTTCAACT-1,◯ -GCATACAAGCCCAATT-1,◯ -GCATACAAGCGATAGC-1,◯ -GCATACACAAATACAG-1,◯ -GCATACACAATCGGTT-1,◯ -GCATACACATCCAACA-1,◯ -GCATACAGTAGCTGCC-1,◯ -GCATACAGTCTCTCGT-1,◯ -GCATACAGTGAAGGCT-1,◯ -GCATGATAGAATGTGT-1,◯ -GCATGATAGTATGACA-1,◯ -GCATGATAGTTAGCGG-1,◯ -GCATGATCACACATGT-1,◯ -GCATGATCACGGCCAT-1,◯ -GCATGATCAGTTTACG-1,◯ -GCATGATCATAGAAAC-1,◯ -GCATGATCATTGGGCC-1,◯ -GCATGATGTCGCTTCT-1,◯ -GCATGATGTCTAGCGC-1,◯ -GCATGATGTGGTGTAG-1,◯ -GCATGATTCAGAGGTG-1,◯ -GCATGATTCATGTCTT-1,◯ -GCATGATTCCACTGGG-1,◯ -GCATGATTCCCATTTA-1,◯ -GCATGATTCGATAGAA-1,◯ -GCATGATTCGCAAGCC-1,◯ -GCATGCGCAAGCTGTT-1,◯ -GCATGCGCAGCTGTAT-1,◯ -GCATGCGCATCTACGA-1,◯ -GCATGCGGTCGGCACT-1,◯ -GCATGCGGTCGGCTCA-1,◯ -GCATGCGGTCTAAACC-1,◯ -GCATGCGTCCAACCAA-1,◯ -GCATGCGTCCCAACGG-1,◯ -GCATGTAAGAAGATTC-1,◯ -GCATGTAAGCTGAAAT-1,◯ -GCATGTAAGTGGCACA-1,◯ -GCATGTACAGTAGAGC-1,◯ -GCATGTACATAAGACA-1,◯ -GCATGTACATGAACCT-1,◯ -GCATGTAGTACTTGAC-1,◯ -GCATGTAGTGCGAAAC-1,◯ -GCATGTAGTTACAGAA-1,◯ -GCATGTATCCCATTAT-1,◯ -GCCAAATAGCACCGTC-1,◯ -GCCAAATAGTGCAAGC-1,◯ -GCCAAATAGTGTGAAT-1,◯ -GCCAAATCATATGCTG-1,◯ -GCCAAATGTCCCTTGT-1,◯ -GCCAAATGTCTTGCGG-1,◯ -GCCAAATTCACAGTAC-1,◯ -GCCAAATTCACGCATA-1,◯ -GCCAAATTCGAGAACG-1,◯ -GCCAAATTCGTCCAGG-1,◯ -GCCTCTAAGCCCTAAT-1,◯ -GCCTCTAAGGCGTACA-1,◯ -GCCTCTAAGGCTACGA-1,◯ -GCCTCTACACCTCGGA-1,◯ -GCCTCTACACGAAATA-1,◯ -GCCTCTACACGTCAGC-1,◯ -GCCTCTACATCCGCGA-1,◯ -GCCTCTAGTCGCCATG-1,◯ -GCCTCTAGTGCAGACA-1,◯ -GCCTCTATCCAAACAC-1,◯ -GCCTCTATCGGACAAG-1,◯ -GCCTCTATCTCGATGA-1,◯ -GCCTCTATCTTGCAAG-1,◯ -GCGACCAAGCGACGTA-1,◯ -GCGACCAAGTGGCACA-1,◯ -GCGACCACATGTCGAT-1,◯ -GCGACCAGTCTAGCGC-1,◯ -GCGACCAGTTTGTGTG-1,◯ -GCGACCATCAAAGTAG-1,◯ -GCGACCATCCACGAAT-1,◯ -GCGACCATCCGCAAGC-1,◯ -GCGAGAAAGACAAGCC-1,◯ -GCGAGAAAGAGATGAG-1,◯ -GCGAGAAAGCGTGAGT-1,◯ -GCGAGAAAGGCCCGTT-1,◯ -GCGAGAACACAGCCCA-1,◯ -GCGAGAACACGAGGTA-1,◯ -GCGAGAAGTAGGCTGA-1,◯ -GCGAGAAGTCAAAGCG-1,◯ -GCGAGAAGTCAGAAGC-1,◯ -GCGAGAAGTGGTGTAG-1,◯ -GCGAGAATCACAACGT-1,◯ -GCGAGAATCCGAACGC-1,◯ -GCGAGAATCCGGCACA-1,◯ -GCGAGAATCCTTGCCA-1,◯ -GCGAGAATCGAATGGG-1,◯ -GCGAGAATCGGCCGAT-1,◯ -GCGAGAATCTGATACG-1,◯ -GCGAGAATCTTGACGA-1,◯ -GCGCAACAGTAGCGGT-1,◯ -GCGCAACCAACAACCT-1,◯ -GCGCAACCACGTTGGC-1,◯ -GCGCAACCAGCTATTG-1,◯ -GCGCAACCAGGGTTAG-1,◯ -GCGCAACCAGTCACTA-1,◯ -GCGCAACGTAGCAAAT-1,◯ -GCGCAACGTTGAGTTC-1,◯ -GCGCAACTCACATGCA-1,◯ -GCGCAACTCTTCGGTC-1,◯ -GCGCAGTAGCCACTAT-1,◯ -GCGCAGTAGTGTCCCG-1,◯ -GCGCAGTCAATCCAAC-1,◯ -GCGCAGTCACCCATGG-1,◯ -GCGCAGTCATATGGTC-1,◯ -GCGCAGTGTAATAGCA-1,◯ -GCGCAGTGTAGCGTCC-1,◯ -GCGCAGTGTGTAATGA-1,◯ -GCGCAGTTCACTTCAT-1,◯ -GCGCAGTTCATAGCAC-1,◯ -GCGCCAAAGATCACGG-1,◯ -GCGCCAAAGGAGTTTA-1,◯ -GCGCCAAAGGATGGTC-1,◯ -GCGCCAAAGGCTAGAC-1,◯ -GCGCCAACACAACGTT-1,◯ -GCGCCAAGTACCGTAT-1,◯ -GCGCCAAGTAGTACCT-1,◯ -GCGCCAAGTCTAACGT-1,◯ -GCGCCAAGTCTCTCGT-1,◯ -GCGCCAAGTGTAATGA-1,◯ -GCGCCAATCCACTGGG-1,◯ -GCGCCAATCCGGGTGT-1,◯ -GCGCGATAGAAGGCCT-1,◯ -GCGCGATAGATCTGAA-1,◯ -GCGCGATAGGCACATG-1,◯ -GCGCGATAGTCGATAA-1,◯ -GCGCGATAGTTAACGA-1,◯ -GCGCGATAGTTACGGG-1,◯ -GCGCGATCAAGTCTGT-1,◯ -GCGCGATCACTATCTT-1,◯ -GCGCGATCATCCTAGA-1,◯ -GCGCGATGTACCTACA-1,◯ -GCGCGATGTCCAACTA-1,◯ -GCGCGATGTTCTGAAC-1,◯ -GCGCGATTCCACGTTC-1,◯ -GCGCGATTCCACTCCA-1,◯ -GCGCGATTCGACCAGC-1,◯ -GCGGGTTAGACATAAC-1,◯ -GCGGGTTAGATCGATA-1,◯ -GCGGGTTAGATTACCC-1,◯ -GCGGGTTAGCTAGGCA-1,◯ -GCGGGTTCAGGCGATA-1,◯ -GCGGGTTGTGAACCTT-1,◯ -GCGGGTTGTTCCACTC-1,◯ -GCGGGTTTCAAACCGT-1,◯ -GCGGGTTTCGGCGCTA-1,◯ -GCGGGTTTCTATCCCG-1,◯ -GCGGGTTTCTGGTGTA-1,◯ -GCTCCTAAGCAGCCTC-1,◯ -GCTCCTAAGCTCCTTC-1,◯ -GCTCCTACAGACAAGC-1,◯ -GCTCCTACAGGGAGAG-1,◯ -GCTCCTAGTACGCACC-1,◯ -GCTCCTAGTCTCTTAT-1,◯ -GCTCCTATCAGCTGGC-1,◯ -GCTCCTATCGCTAGCG-1,◯ -GCTCTGTAGAATAGGG-1,◯ -GCTCTGTAGCGACGTA-1,◯ -GCTCTGTCAGCTCGAC-1,◯ -GCTCTGTCATACAGCT-1,◯ -GCTCTGTCATCTGGTA-1,◯ -GCTCTGTCATGGTTGT-1,◯ -GCTCTGTGTAGCGTCC-1,◯ -GCTCTGTGTCTAGGTT-1,◯ -GCTCTGTTCGTCCGTT-1,◯ -GCTGCAGAGAGATGAG-1,◯ -GCTGCAGAGATATACG-1,◯ -GCTGCAGAGATCCGAG-1,◯ -GCTGCAGAGGCACATG-1,◯ -GCTGCAGAGTCGTTTG-1,◯ -GCTGCAGCAGGTTTCA-1,◯ -GCTGCAGGTACAGCAG-1,◯ -GCTGCAGGTATCACCA-1,◯ -GCTGCAGGTCACACGC-1,◯ -GCTGCAGGTCGGGTCT-1,◯ -GCTGCAGGTTCCGTCT-1,◯ -GCTGCAGTCAGAGACG-1,◯ -GCTGCGAAGAACAACT-1,◯ -GCTGCGAAGACATAAC-1,◯ -GCTGCGAAGAGACTTA-1,◯ -GCTGCGAAGTAGGTGC-1,◯ -GCTGCGAAGTTCGATC-1,◯ -GCTGCGACACGCTTTC-1,◯ -GCTGCGACAGTACACT-1,◯ -GCTGCGACATCACAAC-1,◯ -GCTGCGAGTACTCTCC-1,◯ -GCTGCGAGTGGCAAAC-1,◯ -GCTGCGATCATTCACT-1,◯ -GCTGCGATCCCTGACT-1,◯ -GCTGCGATCGAATGCT-1,◯ -GCTGCGATCGTGGGAA-1,◯ -GCTGCGATCGTTACAG-1,◯ -GCTGCGATCTCTAAGG-1,◯ -GCTGCTTAGAAACCAT-1,◯ -GCTGCTTAGGTGCAAC-1,◯ -GCTGCTTCAGCCAGAA-1,◯ -GCTGCTTCAGGAATCG-1,◯ -GCTGCTTCATACTACG-1,◯ -GCTGCTTTCATTGCCC-1,◯ -GCTGCTTTCCCAACGG-1,◯ -GCTGCTTTCTAACTGG-1,◯ -GCTGCTTTCTCGCATC-1,◯ -GCTGGGTAGGAGTTGC-1,◯ -GCTGGGTAGTTTCCTT-1,◯ -GCTGGGTCAACACGCC-1,◯ -GCTGGGTCAAGGACTG-1,◯ -GCTGGGTCAGCCTGTG-1,◯ -GCTGGGTCAGGAATCG-1,◯ -GCTGGGTGTCTCCCTA-1,◯ -GCTGGGTGTGCCTGGT-1,◯ -GCTGGGTGTGGGTATG-1,◯ -GCTGGGTTCATACGGT-1,◯ -GCTTCCAAGATATACG-1,◯ -GCTTCCAAGGGAACGG-1,◯ -GCTTCCACAGGATTGG-1,◯ -GCTTCCATCCTTTACA-1,◯ -GCTTCCATCTCCTATA-1,◯ -GCTTGAAAGACTGGGT-1,◯ -GCTTGAAAGGAGCGTT-1,◯ -GCTTGAACACGGTAAG-1,◯ -GCTTGAACAGTAACGG-1,◯ -GCTTGAACAGTTCATG-1,◯ -GCTTGAACAGTTTACG-1,◯ -GCTTGAACATGCAATC-1,◯ -GCTTGAACATTCCTCG-1,◯ -GCTTGAAGTAGAGCTG-1,◯ -GCTTGAAGTCGCGGTT-1,◯ -GCTTGAAGTGGCTCCA-1,◯ -GCTTGAATCTTGTACT-1,◯ -GGAAAGCAGCAATATG-1,◯ -GGAAAGCAGGAATTAC-1,◯ -GGAAAGCAGGAGTAGA-1,◯ -GGAAAGCGTAGAAAGG-1,◯ -GGAAAGCGTCGCGGTT-1,◯ -GGAACTTAGCGTGAGT-1,◯ -GGAACTTAGCTTCGCG-1,◯ -GGAACTTCAATGTTGC-1,◯ -GGAACTTCAGGGATTG-1,◯ -GGAACTTGTAGTACCT-1,◯ -GGAACTTGTCACACGC-1,◯ -GGAACTTGTCTTGATG-1,◯ -GGAACTTTCATATCGG-1,◯ -GGAACTTTCGAACGGA-1,◯ -GGAACTTTCTTATCTG-1,◯ -GGAATAAAGAACTGTA-1,◯ -GGAATAAAGGGATACC-1,◯ -GGAATAAAGTGTTGAA-1,◯ -GGAATAACAGCGTAAG-1,◯ -GGAATAAGTAAGGGCT-1,◯ -GGAATAAGTACAGTGG-1,◯ -GGAATAAGTGAGTATA-1,◯ -GGAATAATCAGCGATT-1,◯ -GGAATAATCATGCAAC-1,◯ -GGAATAATCCCTTGTG-1,◯ -GGACAAGAGCAAATCA-1,◯ -GGACAAGAGCCGGTAA-1,◯ -GGACAAGCAGTGAGTG-1,◯ -GGACAAGCATCCAACA-1,◯ -GGACAAGCATCCGGGT-1,◯ -GGACAAGCATCCTAGA-1,◯ -GGACAAGGTAAGGGAA-1,◯ -GGACAAGGTATATGAG-1,◯ -GGACAAGGTGTAACGG-1,◯ -GGACAAGGTGTGACCC-1,◯ -GGACAAGTCAGCACAT-1,◯ -GGACAAGTCCGCAAGC-1,◯ -GGACAAGTCCTAGAAC-1,◯ -GGACAAGTCGCCTGAG-1,◯ -GGACAAGTCTAACCGA-1,◯ -GGACAAGTCTAACGGT-1,◯ -GGACAGAAGTCAAGCG-1,◯ -GGACAGACAAAGTCAA-1,◯ -GGACAGACATCGGGTC-1,◯ -GGACAGACATCGGTTA-1,◯ -GGACAGAGTACCGTTA-1,◯ -GGACAGAGTCGTTGTA-1,◯ -GGACAGAGTGCCTTGG-1,◯ -GGACAGATCACGAAGG-1,◯ -GGACAGATCACGACTA-1,◯ -GGACAGATCACGGTTA-1,◯ -GGACAGATCTGCGGCA-1,◯ -GGACAGATCTTCGAGA-1,◯ -GGACATTAGGGTTCCC-1,◯ -GGACATTCAACACCCG-1,◯ -GGACATTCACACCGCA-1,◯ -GGACATTCACCCATGG-1,◯ -GGACATTCAGTACACT-1,◯ -GGACATTCATGTTGAC-1,◯ -GGACATTGTATAAACG-1,◯ -GGACATTGTGCTAGCC-1,◯ -GGACATTGTTCCACAA-1,◯ -GGACATTGTTTCCACC-1,◯ -GGACATTTCGCTGATA-1,◯ -GGACGTCAGAAGGTGA-1,◯ -GGACGTCAGAATCTCC-1,◯ -GGACGTCAGACGCTTT-1,◯ -GGACGTCAGTACATGA-1,◯ -GGACGTCAGTGAAGAG-1,◯ -GGACGTCCAAGCTGTT-1,◯ -GGACGTCCACAAGCCC-1,◯ -GGACGTCCAGCTGTGC-1,◯ -GGACGTCCATAGGATA-1,◯ -GGACGTCCATGTTCCC-1,◯ -GGACGTCCATTGCGGC-1,◯ -GGACGTCGTAAGGGAA-1,◯ -GGACGTCTCAACCATG-1,◯ -GGACGTCTCACCACCT-1,◯ -GGACGTCTCATTGCGA-1,◯ -GGAGCAAAGCGGATCA-1,◯ -GGAGCAAAGGCTAGGT-1,◯ -GGAGCAAAGGTGTTAA-1,◯ -GGAGCAACAGTAACGG-1,◯ -GGAGCAAGTACCTACA-1,◯ -GGAGCAAGTAGCAAAT-1,◯ -GGAGCAAGTCCCGACA-1,◯ -GGAGCAAGTGGACGAT-1,◯ -GGAGCAAGTTAAGAAC-1,◯ -GGAGCAAGTTAGATGA-1,◯ -GGAGCAAGTTGTACAC-1,◯ -GGAGCAATCATTATCC-1,◯ -GGAGCAATCGGAAACG-1,◯ -GGATGTTAGAAACGCC-1,◯ -GGATGTTAGACTTGAA-1,◯ -GGATGTTAGCTAGTCT-1,◯ -GGATGTTAGCTGCCCA-1,◯ -GGATGTTAGGGCATGT-1,◯ -GGATGTTCAATGTTGC-1,◯ -GGATGTTCACTGAAGG-1,◯ -GGATGTTCACTTAACG-1,◯ -GGATGTTCAGCTGCAC-1,◯ -GGATGTTCAGTGGAGT-1,◯ -GGATGTTGTCTCTTAT-1,◯ -GGATGTTGTTCAGTAC-1,◯ -GGATGTTTCTCTTATG-1,◯ -GGATGTTTCTGGTGTA-1,◯ -GGATTACAGCCTTGAT-1,◯ -GGATTACCAGAGCCAA-1,◯ -GGATTACCAGCTATTG-1,◯ -GGATTACCATGGTCAT-1,◯ -GGATTACGTACTCAAC-1,◯ -GGATTACGTATATGGA-1,◯ -GGATTACGTGAGTATA-1,◯ -GGATTACGTTCCACTC-1,◯ -GGATTACTCACAGTAC-1,◯ -GGATTACTCGCATGGC-1,◯ -GGATTACTCTTCTGGC-1,◯ -GGCAATTAGCTAGTTC-1,◯ -GGCAATTAGTGTGAAT-1,◯ -GGCAATTCACCACCAG-1,◯ -GGCAATTGTAGTGAAT-1,◯ -GGCAATTTCAGTGCAT-1,◯ -GGCAATTTCGAATGGG-1,◯ -GGCAATTTCTGATTCT-1,◯ -GGCAATTTCTTGTATC-1,◯ -GGCCGATAGAGAGCTC-1,◯ -GGCCGATAGAGTGACC-1,◯ -GGCCGATAGGTTACCT-1,◯ -GGCCGATCACAACGTT-1,◯ -GGCCGATCAGGGTACA-1,◯ -GGCCGATCATGGTCTA-1,◯ -GGCCGATTCCTCTAGC-1,◯ -GGCGACTAGATATACG-1,◯ -GGCGACTAGGACAGCT-1,◯ -GGCGACTAGGAGCGTT-1,◯ -GGCGACTAGGCAAAGA-1,◯ -GGCGACTAGTCGTTTG-1,◯ -GGCGACTCACATTAGC-1,◯ -GGCGACTCACGACTCG-1,◯ -GGCGACTCACGTTGGC-1,◯ -GGCGACTCACTGCCAG-1,◯ -GGCGACTCATGAAGTA-1,◯ -GGCGACTGTGGTTTCA-1,◯ -GGCGACTGTTGTCTTT-1,◯ -GGCGACTTCCAAAGTC-1,◯ -GGCGACTTCCCTAACC-1,◯ -GGCGTGTAGTCCTCCT-1,◯ -GGCGTGTAGTTTGCGT-1,◯ -GGCGTGTCAGATTGCT-1,◯ -GGCGTGTCATTAGCCA-1,◯ -GGCGTGTTCACCACCT-1,◯ -GGCGTGTTCCATGAGT-1,◯ -GGCGTGTTCTACTTAC-1,◯ -GGCGTGTTCTTCTGGC-1,◯ -GGCTCGAAGGGTATCG-1,◯ -GGCTCGAAGGTGACCA-1,◯ -GGCTCGACAGCTGTAT-1,◯ -GGCTCGACATACTCTT-1,◯ -GGCTCGACATGTAGTC-1,◯ -GGCTCGAGTACCGTAT-1,◯ -GGCTCGAGTATATGAG-1,◯ -GGCTCGAGTCAGATAA-1,◯ -GGCTCGAGTGATAAAC-1,◯ -GGCTCGATCAGGTTCA-1,◯ -GGCTCGATCGATAGAA-1,◯ -GGCTCGATCGCCTGTT-1,◯ -GGCTGGTAGGGAGTAA-1,◯ -GGCTGGTCAAGACGTG-1,◯ -GGCTGGTCAGACACTT-1,◯ -GGCTGGTCAGTGGGAT-1,◯ -GGCTGGTGTACTCTCC-1,◯ -GGCTGGTGTACTTCTT-1,◯ -GGCTGGTGTAGAGCTG-1,◯ -GGCTGGTGTCGCGGTT-1,◯ -GGCTGGTGTCTTGCGG-1,◯ -GGCTGGTGTGCGAAAC-1,◯ -GGCTGGTTCAACGGCC-1,◯ -GGCTGGTTCTACTTAC-1,◯ -GGCTGGTTCTGTGCAA-1,◯ -GGGAATGAGGCTAGAC-1,◯ -GGGAATGAGTCTTGCA-1,◯ -GGGAATGAGTGCGATG-1,◯ -GGGAATGCAAGTACCT-1,◯ -GGGAATGCAATAAGCA-1,◯ -GGGAATGCAATCCAAC-1,◯ -GGGAATGCAGATTGCT-1,◯ -GGGAATGCATATGAGA-1,◯ -GGGAATGGTAGGCATG-1,◯ -GGGAATGGTGAGTGAC-1,◯ -GGGAATGGTGTATGGG-1,◯ -GGGAATGGTTGTACAC-1,◯ -GGGAATGGTTGTCTTT-1,◯ -GGGAATGTCCAAAGTC-1,◯ -GGGAATGTCTGTCTCG-1,◯ -GGGAATGTCTTACCTA-1,◯ -GGGAATGTCTTGAGAC-1,◯ -GGGAATGTCTTGTATC-1,◯ -GGGACCTAGAGTACCG-1,◯ -GGGACCTAGATATACG-1,◯ -GGGACCTAGCTAAGAT-1,◯ -GGGACCTAGGATGGTC-1,◯ -GGGACCTAGTGGGTTG-1,◯ -GGGACCTCAGCGTCCA-1,◯ -GGGACCTCATTCACTT-1,◯ -GGGACCTGTGTTTGTG-1,◯ -GGGACCTTCACTTACT-1,◯ -GGGACCTTCGTTGACA-1,◯ -GGGAGATAGGATGGTC-1,◯ -GGGAGATAGTTCGCGC-1,◯ -GGGAGATCAAGAGGCT-1,◯ -GGGAGATCACGCGAAA-1,◯ -GGGAGATCAGGATTGG-1,◯ -GGGAGATCATTCCTGC-1,◯ -GGGAGATGTTACGCGC-1,◯ -GGGAGATTCAGCTGGC-1,◯ -GGGAGATTCAGTTGAC-1,◯ -GGGAGATTCATGGTCA-1,◯ -GGGAGATTCCCATTAT-1,◯ -GGGAGATTCCGCATAA-1,◯ -GGGAGATTCGTTTAGG-1,◯ -GGGAGATTCTGGCGTG-1,◯ -GGGATGAAGGTCATCT-1,◯ -GGGATGAAGTATGACA-1,◯ -GGGATGACAAAGGTGC-1,◯ -GGGATGACACTCGACG-1,◯ -GGGATGACATCGTCGG-1,◯ -GGGATGACATCTACGA-1,◯ -GGGATGAGTGAGTATA-1,◯ -GGGATGAGTTAGAACA-1,◯ -GGGCACTAGACTAGAT-1,◯ -GGGCACTAGGGCTTCC-1,◯ -GGGCACTAGTGTCTCA-1,◯ -GGGCACTCACAGGCCT-1,◯ -GGGCACTCACCGTTGG-1,◯ -GGGCACTGTATAGGGC-1,◯ -GGGCACTGTATGAATG-1,◯ -GGGCACTGTTAGGGTG-1,◯ -GGGCACTTCCCAGGTG-1,◯ -GGGCACTTCGACCAGC-1,◯ -GGGCACTTCTCAAGTG-1,◯ -GGGCACTTCTGCTTGC-1,◯ -GGGCACTTCTTTACGT-1,◯ -GGGCATCAGCACCGTC-1,◯ -GGGCATCAGCTCCTCT-1,◯ -GGGCATCAGGGTTCCC-1,◯ -GGGCATCAGTGATCGG-1,◯ -GGGCATCAGTTCGCAT-1,◯ -GGGCATCCAATAGAGT-1,◯ -GGGCATCCAATGTTGC-1,◯ -GGGCATCCACAAGACG-1,◯ -GGGCATCCAGCTTCGG-1,◯ -GGGCATCCATGAAGTA-1,◯ -GGGCATCGTAGGCTGA-1,◯ -GGGCATCGTGTTGGGA-1,◯ -GGGCATCGTTTAAGCC-1,◯ -GGGCATCTCAGCGACC-1,◯ -GGGCATCTCATCGCTC-1,◯ -GGGCATCTCGCTTGTC-1,◯ -GGGTCTGCAAACAACA-1,◯ -GGGTCTGCACAGACTT-1,◯ -GGGTCTGCACCCATTC-1,◯ -GGGTCTGCACGACGAA-1,◯ -GGGTCTGCATACCATG-1,◯ -GGGTCTGCATGGTCTA-1,◯ -GGGTCTGGTAGCGATG-1,◯ -GGGTCTGGTGTAACGG-1,◯ -GGGTCTGTCAGTACGT-1,◯ -GGGTTGCAGAGGTACC-1,◯ -GGGTTGCAGGACTGGT-1,◯ -GGGTTGCAGTGTTTGC-1,◯ -GGGTTGCCACTAAGTC-1,◯ -GGGTTGCCATCACCCT-1,◯ -GGGTTGCCATTTCAGG-1,◯ -GGGTTGCGTACAGTTC-1,◯ -GGGTTGCTCCCAAGTA-1,◯ -GGGTTGCTCCTTTACA-1,◯ -GGTATTGAGAAGAAGC-1,◯ -GGTATTGAGCTCCCAG-1,◯ -GGTATTGAGTTTCCTT-1,◯ -GGTATTGGTGATAAAC-1,◯ -GGTATTGGTGGTGTAG-1,◯ -GGTATTGTCAGGCGAA-1,◯ -GGTATTGTCATGCATG-1,◯ -GGTATTGTCCAGTATG-1,◯ -GGTATTGTCGATCCCT-1,◯ -GGTATTGTCGGATGTT-1,◯ -GGTATTGTCTAACTCT-1,◯ -GGTATTGTCTTGAGAC-1,◯ -GGTATTGTCTTGTATC-1,◯ -GGTGAAGAGATATGCA-1,◯ -GGTGAAGAGCTACCGC-1,◯ -GGTGAAGAGCTCCTCT-1,◯ -GGTGAAGAGTTAGCGG-1,◯ -GGTGAAGCACACATGT-1,◯ -GGTGAAGCACCCATTC-1,◯ -GGTGAAGCACCGATAT-1,◯ -GGTGAAGCACGGTAAG-1,◯ -GGTGAAGCACTTAACG-1,◯ -GGTGAAGCAGACGCTC-1,◯ -GGTGAAGCATATGCTG-1,◯ -GGTGAAGGTAGGCATG-1,◯ -GGTGAAGGTCCTAGCG-1,◯ -GGTGAAGGTCTCCCTA-1,◯ -GGTGAAGGTGCCTGTG-1,◯ -GGTGAAGGTTAAAGAC-1,◯ -GGTGAAGTCATTTGGG-1,◯ -GGTGAAGTCGCGATCG-1,◯ -GGTGAAGTCTAACCGA-1,◯ -GGTGCGTAGAATAGGG-1,◯ -GGTGCGTAGTTCGCGC-1,◯ -GGTGCGTGTGACGCCT-1,◯ -GGTGCGTGTTTAGGAA-1,◯ -GGTGCGTTCATTGCGA-1,◯ -GGTGCGTTCTCCTATA-1,◯ -GGTGTTAAGACTAGAT-1,◯ -GGTGTTAAGTGGGTTG-1,◯ -GGTGTTACAAGGACTG-1,◯ -GGTGTTACAATCCGAT-1,◯ -GGTGTTACATTGAGCT-1,◯ -GGTGTTAGTCCGAGTC-1,◯ -GGTGTTATCACGCATA-1,◯ -GGTGTTATCCGCAAGC-1,◯ -GGTGTTATCCGCTGTT-1,◯ -GGTGTTATCGCGATCG-1,◯ -GGTGTTATCGTTTAGG-1,◯ -GGTGTTATCTCGGACG-1,◯ -GTAACGTAGACTAAGT-1,◯ -GTAACGTAGGCTAGGT-1,◯ -GTAACGTAGTACGTTC-1,◯ -GTAACGTAGTGCAAGC-1,◯ -GTAACGTCAAGGTTCT-1,◯ -GTAACGTCAAGTAGTA-1,◯ -GTAACGTGTTTGACAC-1,◯ -GTAACGTTCATCTGCC-1,◯ -GTAACGTTCCCGGATG-1,◯ -GTAACGTTCCCTAATT-1,◯ -GTAACGTTCGTCCGTT-1,◯ -GTAACTGAGAATTCCC-1,◯ -GTAACTGAGCCCAGCT-1,◯ -GTAACTGCAGCTGCTG-1,◯ -GTAACTGCATGCCTAA-1,◯ -GTAACTGGTAACGTTC-1,◯ -GTAACTGGTAAGTTCC-1,◯ -GTAACTGGTAGGGACT-1,◯ -GTAACTGGTCGAAAGC-1,◯ -GTAACTGGTGCATCTA-1,◯ -GTAACTGGTTAAGACA-1,◯ -GTAACTGTCATCATTC-1,◯ -GTAACTGTCGGAATCT-1,◯ -GTAACTGTCTAACGGT-1,◯ -GTACGTAAGAAACCTA-1,◯ -GTACGTAAGACCCACC-1,◯ -GTACGTACAACGCACC-1,◯ -GTACGTACAATGTAAG-1,◯ -GTACGTAGTAGGACAC-1,◯ -GTACGTAGTGGCCCTA-1,◯ -GTACGTAGTTATGTGC-1,◯ -GTACGTATCAAACCGT-1,◯ -GTACGTATCAACGGCC-1,◯ -GTACGTATCATAAAGG-1,◯ -GTACGTATCCCGGATG-1,◯ -GTACGTATCTAACTTC-1,◯ -GTACGTATCTATCCTA-1,◯ -GTACTCCAGGTGTGGT-1,◯ -GTACTCCAGTTGCAGG-1,◯ -GTACTCCCAAGCGATG-1,◯ -GTACTCCGTGTTTGGT-1,◯ -GTACTCCGTTCTCATT-1,◯ -GTACTCCTCCGAGCCA-1,◯ -GTACTCCTCGGTTAAC-1,◯ -GTACTCCTCTACTTAC-1,◯ -GTACTTTAGCGTCTAT-1,◯ -GTACTTTAGGCGCTCT-1,◯ -GTACTTTAGTGATCGG-1,◯ -GTACTTTCAAACGTGG-1,◯ -GTACTTTCAGCAGTTT-1,◯ -GTACTTTGTGTGTGCC-1,◯ -GTACTTTGTTATCCGA-1,◯ -GTACTTTTCACCAGGC-1,◯ -GTACTTTTCAGCATGT-1,◯ -GTACTTTTCTGCGGCA-1,◯ -GTAGGCCAGAAGGGTA-1,◯ -GTAGGCCAGCAGCCTC-1,◯ -GTAGGCCAGGCTAGAC-1,◯ -GTAGGCCAGTATCTCG-1,◯ -GTAGGCCCAGTCGATT-1,◯ -GTAGGCCCATGAGCGA-1,◯ -GTAGGCCCATGCCCGA-1,◯ -GTAGGCCGTCTACCTC-1,◯ -GTAGGCCGTCTCTCGT-1,◯ -GTAGGCCTCTACGAGT-1,◯ -GTAGGCCTCTTCAACT-1,◯ -GTAGTCAAGACGCAAC-1,◯ -GTAGTCAAGTAGCGGT-1,◯ -GTAGTCACAGGTCTCG-1,◯ -GTAGTCACAGTCGTGC-1,◯ -GTAGTCAGTATGAAAC-1,◯ -GTAGTCAGTCTTTCAT-1,◯ -GTAGTCATCAACCATG-1,◯ -GTAGTCATCATCTGTT-1,◯ -GTAGTCATCCACGAAT-1,◯ -GTAGTCATCTAACGGT-1,◯ -GTATCTTAGAGGTTGC-1,◯ -GTATCTTAGCTAAACA-1,◯ -GTATCTTAGGTGACCA-1,◯ -GTATCTTAGTTGCAGG-1,◯ -GTATCTTCAAGTCTAC-1,◯ -GTATCTTCAGCCACCA-1,◯ -GTATCTTCATAACCTG-1,◯ -GTATCTTCATTGGTAC-1,◯ -GTATCTTGTGGTGTAG-1,◯ -GTATCTTGTTACGACT-1,◯ -GTATCTTGTTGTACAC-1,◯ -GTATCTTTCAGAAATG-1,◯ -GTATCTTTCCTTTACA-1,◯ -GTATCTTTCTTATCTG-1,◯ -GTATCTTTCTTGAGGT-1,◯ -GTATTCTAGGAATCGC-1,◯ -GTATTCTAGGCCCTTG-1,◯ -GTATTCTAGGCTCAGA-1,◯ -GTATTCTCAATGGTCT-1,◯ -GTATTCTCACATGGGA-1,◯ -GTATTCTCACATGTGT-1,◯ -GTATTCTCACGGCGTT-1,◯ -GTATTCTCAGAGTGTG-1,◯ -GTATTCTCAGGATTGG-1,◯ -GTATTCTGTGTTTGGT-1,◯ -GTATTCTGTTTGTGTG-1,◯ -GTATTCTTCAATACCG-1,◯ -GTATTCTTCAGAGACG-1,◯ -GTATTCTTCCTACAGA-1,◯ -GTATTCTTCCTGTAGA-1,◯ -GTATTCTTCGAGGTAG-1,◯ -GTCAAGTCAAAGCAAT-1,◯ -GTCAAGTCAAGCTGAG-1,◯ -GTCAAGTCAGTTCCCT-1,◯ -GTCAAGTGTCATATCG-1,◯ -GTCAAGTGTGACAAAT-1,◯ -GTCAAGTTCACTCCTG-1,◯ -GTCAAGTTCCTCCTAG-1,◯ -GTCAAGTTCCTTGGTC-1,◯ -GTCAAGTTCGCCTGTT-1,◯ -GTCAAGTTCGGAGCAA-1,◯ -GTCACAAAGCCCTAAT-1,◯ -GTCACAAAGTGTTAGA-1,◯ -GTCACAACATCACCCT-1,◯ -GTCACAACATTGTGCA-1,◯ -GTCACAAGTCAGCTAT-1,◯ -GTCACAAGTGTGACGA-1,◯ -GTCACAATCGCCTGAG-1,◯ -GTCACAATCGTTACAG-1,◯ -GTCACAATCTAGAGTC-1,◯ -GTCACGGAGATCCGAG-1,◯ -GTCACGGAGCTCAACT-1,◯ -GTCACGGCAGCCAGAA-1,◯ -GTCACGGCAGGAACGT-1,◯ -GTCACGGCATTGAGCT-1,◯ -GTCACGGGTACGCACC-1,◯ -GTCACGGGTAGCAAAT-1,◯ -GTCACGGGTATATCCG-1,◯ -GTCACGGGTCAGCTAT-1,◯ -GTCACGGGTCTTCGTC-1,◯ -GTCACGGGTCTTTCAT-1,◯ -GTCACGGGTTCTGGTA-1,◯ -GTCACGGTCTACCAGA-1,◯ -GTCACGGTCTCAAACG-1,◯ -GTCATTTAGAGCAATT-1,◯ -GTCATTTAGGGAGTAA-1,◯ -GTCATTTCAGGAACGT-1,◯ -GTCATTTGTAAACCTC-1,◯ -GTCATTTGTACAAGTA-1,◯ -GTCATTTGTCATGCCG-1,◯ -GTCATTTGTCTCTTTA-1,◯ -GTCATTTGTTCAACCA-1,◯ -GTCATTTTCATGGTCA-1,◯ -GTCATTTTCTGATTCT-1,◯ -GTCCTCAAGTAAGTAC-1,◯ -GTCCTCACAGGGATTG-1,◯ -GTCCTCAGTCAACTGT-1,◯ -GTCCTCAGTCCATGAT-1,◯ -GTCCTCAGTTAGTGGG-1,◯ -GTCCTCATCCTTTACA-1,◯ -GTCCTCATCGGCGCTA-1,◯ -GTCCTCATCTCATTCA-1,◯ -GTCGGGTAGATGCCAG-1,◯ -GTCGGGTAGCAATATG-1,◯ -GTCGGGTAGTCTCCTC-1,◯ -GTCGGGTAGTGTGAAT-1,◯ -GTCGGGTCAACAACCT-1,◯ -GTCGGGTCACCACCAG-1,◯ -GTCGGGTCACTAAGTC-1,◯ -GTCGGGTCAGACGTAG-1,◯ -GTCGGGTGTAAGTGGC-1,◯ -GTCGGGTGTAGGGTAC-1,◯ -GTCGGGTGTGAAATCA-1,◯ -GTCGGGTTCCCTCAGT-1,◯ -GTCGGGTTCCTTGACC-1,◯ -GTCGGGTTCTTGTATC-1,◯ -GTCGTAAAGCCGATTT-1,◯ -GTCGTAAAGGCTATCT-1,◯ -GTCGTAAAGTACGCCC-1,◯ -GTCGTAAAGTGCCAGA-1,◯ -GTCGTAACAATAGAGT-1,◯ -GTCGTAACACACCGAC-1,◯ -GTCGTAACACGTTGGC-1,◯ -GTCGTAAGTCCTCTTG-1,◯ -GTCGTAAGTTCCACTC-1,◯ -GTCTCGTAGATATGCA-1,◯ -GTCTCGTAGCGATGAC-1,◯ -GTCTCGTAGCGTCTAT-1,◯ -GTCTCGTAGGCTAGAC-1,◯ -GTCTCGTAGGGCTTGA-1,◯ -GTCTCGTCAAATCCGT-1,◯ -GTCTCGTCAAGCCGTC-1,◯ -GTCTCGTCAGCTGCAC-1,◯ -GTCTCGTGTAACGACG-1,◯ -GTCTCGTTCCCGACTT-1,◯ -GTCTCGTTCGGGAGTA-1,◯ -GTCTCGTTCGGTCTAA-1,◯ -GTCTCGTTCTTGGGTA-1,◯ -GTCTTCGAGAGCCTAG-1,◯ -GTCTTCGAGCGTCTAT-1,◯ -GTCTTCGAGCTAGTTC-1,◯ -GTCTTCGAGTTAGGTA-1,◯ -GTCTTCGCAGTGAGTG-1,◯ -GTCTTCGCATTGTGCA-1,◯ -GTCTTCGGTAGCGTGA-1,◯ -GTCTTCGGTAGGCTGA-1,◯ -GTCTTCGGTCTTTCAT-1,◯ -GTCTTCGGTGATGCCC-1,◯ -GTCTTCGGTTTAGGAA-1,◯ -GTCTTCGGTTTCCACC-1,◯ -GTCTTCGTCCAACCAA-1,◯ -GTCTTCGTCGCAAGCC-1,◯ -GTCTTCGTCTAAGCCA-1,◯ -GTCTTCGTCTCCAGGG-1,◯ -GTGAAGGAGACATAAC-1,◯ -GTGAAGGAGAGATGAG-1,◯ -GTGAAGGAGCTGTCTA-1,◯ -GTGAAGGCAACAACCT-1,◯ -GTGAAGGCAATCGGTT-1,◯ -GTGAAGGCAGGGCATA-1,◯ -GTGAAGGGTCAATGTC-1,◯ -GTGAAGGGTCTCATCC-1,◯ -GTGAAGGTCAGTCAGT-1,◯ -GTGAAGGTCCTAGAAC-1,◯ -GTGCAGCAGAAGGCCT-1,◯ -GTGCAGCCAAAGAATC-1,◯ -GTGCAGCGTACCGCTG-1,◯ -GTGCAGCTCAGGTTCA-1,◯ -GTGCATAAGAACAATC-1,◯ -GTGCATAAGAATAGGG-1,◯ -GTGCATAAGAGACTAT-1,◯ -GTGCATAAGGTGACCA-1,◯ -GTGCATAAGTAGATGT-1,◯ -GTGCATAAGTTGAGAT-1,◯ -GTGCATACAAGAGTCG-1,◯ -GTGCATACATGCCTAA-1,◯ -GTGCATACATGCTGGC-1,◯ -GTGCATACATTTGCCC-1,◯ -GTGCATAGTAAACGCG-1,◯ -GTGCATAGTAATAGCA-1,◯ -GTGCATAGTCATGCAT-1,◯ -GTGCATAGTTCAGGCC-1,◯ -GTGCATATCAAAGTAG-1,◯ -GTGCATATCAAGATCC-1,◯ -GTGCATATCATCTGTT-1,◯ -GTGCATATCGCAGGCT-1,◯ -GTGCATATCTGAAAGA-1,◯ -GTGCATATCTTTACAC-1,◯ -GTGCGGTAGCAGGTCA-1,◯ -GTGCGGTAGGCTCATT-1,◯ -GTGCGGTAGGGTGTGT-1,◯ -GTGCGGTCAGGATTGG-1,◯ -GTGCGGTCATACGCCG-1,◯ -GTGCGGTCATGGAATA-1,◯ -GTGCGGTCATTCCTGC-1,◯ -GTGCGGTGTAGTACCT-1,◯ -GTGCGGTGTCACACGC-1,◯ -GTGCGGTGTGTGCCTG-1,◯ -GTGCGGTTCATGTCCC-1,◯ -GTGCGGTTCATTCACT-1,◯ -GTGCGGTTCTCTGCTG-1,◯ -GTGCGGTTCTTCTGGC-1,◯ -GTGCTTCCATAGTAAG-1,◯ -GTGCTTCGTGAGGGTT-1,◯ -GTGCTTCGTGCACTTA-1,◯ -GTGCTTCTCGCATGGC-1,◯ -GTGGGTCAGAGCTTCT-1,◯ -GTGGGTCCAGCATACT-1,◯ -GTGGGTCCAGGAACGT-1,◯ -GTGGGTCGTAGATTAG-1,◯ -GTGGGTCGTGGGTATG-1,◯ -GTGTGCGAGAACTGTA-1,◯ -GTGTGCGAGAGTACAT-1,◯ -GTGTGCGAGCTAACAA-1,◯ -GTGTGCGCAACGCACC-1,◯ -GTGTGCGGTCGACTGC-1,◯ -GTGTGCGTCACCAGGC-1,◯ -GTGTGCGTCCGCATCT-1,◯ -GTGTGCGTCGGCCGAT-1,◯ -GTGTGCGTCTAAGCCA-1,◯ -GTGTTAGAGCTCCCAG-1,◯ -GTGTTAGAGGCGTACA-1,◯ -GTGTTAGCACGACGAA-1,◯ -GTGTTAGCACTGTGTA-1,◯ -GTGTTAGCAGACAAAT-1,◯ -GTGTTAGCAGCTCCGA-1,◯ -GTGTTAGGTCATGCCG-1,◯ -GTGTTAGGTGGCCCTA-1,◯ -GTGTTAGTCAACGGCC-1,◯ -GTGTTAGTCATACGGT-1,◯ -GTGTTAGTCATGTCTT-1,◯ -GTGTTAGTCCGCAGTG-1,◯ -GTGTTAGTCCGGCACA-1,◯ -GTGTTAGTCCTGCTTG-1,◯ -GTTAAGCAGGCAGTCA-1,◯ -GTTAAGCAGGGTGTTG-1,◯ -GTTAAGCAGTGACATA-1,◯ -GTTAAGCAGTTTCCTT-1,◯ -GTTAAGCAGTTTGCGT-1,◯ -GTTAAGCCAAAGTCAA-1,◯ -GTTAAGCCAATTGCTG-1,◯ -GTTAAGCCACATGGGA-1,◯ -GTTAAGCCACATTAGC-1,◯ -GTTAAGCCATGGTTGT-1,◯ -GTTAAGCGTATGAATG-1,◯ -GTTAAGCGTTGCCTCT-1,◯ -GTTACAGAGAGTGACC-1,◯ -GTTACAGAGATGCCTT-1,◯ -GTTACAGAGCACGCCT-1,◯ -GTTACAGAGGCATTGG-1,◯ -GTTACAGAGTGGCACA-1,◯ -GTTACAGCACCCAGTG-1,◯ -GTTACAGGTACGCACC-1,◯ -GTTACAGGTCACCTAA-1,◯ -GTTACAGGTCCAGTTA-1,◯ -GTTACAGGTCTTCAAG-1,◯ -GTTACAGGTTGACGTT-1,◯ -GTTACAGTCAGCATGT-1,◯ -GTTACAGTCCTAGGGC-1,◯ -GTTACAGTCTTATCTG-1,◯ -GTTCATTAGACTAGGC-1,◯ -GTTCATTAGGAGTCTG-1,◯ -GTTCATTAGGCTCATT-1,◯ -GTTCATTAGGTGCAAC-1,◯ -GTTCATTCATATACGC-1,◯ -GTTCATTCATCGGAAG-1,◯ -GTTCATTGTGCGAAAC-1,◯ -GTTCATTGTTCAGTAC-1,◯ -GTTCATTGTTCGAATC-1,◯ -GTTCATTTCAGAGCTT-1,◯ -GTTCATTTCATGTAGC-1,◯ -GTTCGGGAGCTATGCT-1,◯ -GTTCGGGCAAGTAGTA-1,◯ -GTTCGGGCAATGACCT-1,◯ -GTTCGGGCATGGAATA-1,◯ -GTTCGGGTCAAACAAG-1,◯ -GTTCGGGTCGGCGCTA-1,◯ -GTTCTCGAGCTCCCAG-1,◯ -GTTCTCGCAAGAAAGG-1,◯ -GTTCTCGCACACGCTG-1,◯ -GTTCTCGCACCGTTGG-1,◯ -GTTCTCGCACGTTGGC-1,◯ -GTTCTCGCAGACGCAA-1,◯ -GTTCTCGCAGCATGAG-1,◯ -GTTCTCGCAGCTGCAC-1,◯ -GTTCTCGTCCCTTGCA-1,◯ -GTTCTCGTCCTCCTAG-1,◯ -GTTTCTAAGCCTCGTG-1,◯ -GTTTCTAAGCTAAGAT-1,◯ -GTTTCTAAGGGATCTG-1,◯ -GTTTCTACAAGTAATG-1,◯ -GTTTCTACATGCCACG-1,◯ -GTTTCTACATGCCCGA-1,◯ -GTTTCTACATGTCTCC-1,◯ -GTTTCTAGTAAATGTG-1,◯ -GTTTCTAGTAAGAGGA-1,◯ -GTTTCTAGTAGCGATG-1,◯ -GTTTCTAGTAGGAGTC-1,◯ -GTTTCTAGTGCTGTAT-1,◯ -GTTTCTAGTTATGTGC-1,◯ -GTTTCTATCATTGCCC-1,◯ -GTTTCTATCCCGGATG-1,◯ -GTTTCTATCCGCATCT-1,◯ -TAAACCGAGAATAGGG-1,◯ -TAAACCGAGTGGGATC-1,◯ -TAAACCGCAGCGTTCG-1,◯ -TAAACCGGTAGAGCTG-1,◯ -TAAACCGGTCATTAGC-1,◯ -TAAACCGGTTACAGAA-1,◯ -TAAACCGTCAGTTAGC-1,◯ -TAAACCGTCCACGTGG-1,◯ -TAAGAGAAGAACTCGG-1,◯ -TAAGAGAAGAGTAAGG-1,◯ -TAAGAGAAGATGAGAG-1,◯ -TAAGAGAAGGCGACAT-1,◯ -TAAGAGAAGGGTGTGT-1,◯ -TAAGAGAAGGTGTGGT-1,◯ -TAAGAGACAATGGATA-1,◯ -TAAGAGACAGCCAGAA-1,◯ -TAAGAGATCACTATTC-1,◯ -TAAGAGATCAGCCTAA-1,◯ -TAAGAGATCTCTGCTG-1,◯ -TAAGCGTAGAAACCTA-1,◯ -TAAGCGTAGAAGGCCT-1,◯ -TAAGCGTAGGACGAAA-1,◯ -TAAGCGTAGGATCGCA-1,◯ -TAAGCGTCACAGGCCT-1,◯ -TAAGCGTGTACCGAGA-1,◯ -TAAGCGTGTTAAGATG-1,◯ -TAAGCGTTCACTATTC-1,◯ -TAAGCGTTCAGGCGAA-1,◯ -TAAGCGTTCGTGGTCG-1,◯ -TAAGCGTTCTTGTACT-1,◯ -TAAGTGCAGAAACCTA-1,◯ -TAAGTGCAGACCTTTG-1,◯ -TAAGTGCAGCATGGCA-1,◯ -TAAGTGCAGCGCTCCA-1,◯ -TAAGTGCAGTCCATAC-1,◯ -TAAGTGCCAGGTTTCA-1,◯ -TAAGTGCGTCGACTAT-1,◯ -TAAGTGCGTCGCGAAA-1,◯ -TAAGTGCGTCTCCATC-1,◯ -TAAGTGCGTTCGCGAC-1,◯ -TAAGTGCTCAACACGT-1,◯ -TAAGTGCTCAATCACG-1,◯ -TAAGTGCTCCAGTAGT-1,◯ -TAAGTGCTCGCGATCG-1,◯ -TAAGTGCTCTCTAAGG-1,◯ -TAAGTGCTCTGCTGCT-1,◯ -TACACGAAGATCGATA-1,◯ -TACACGAAGATCTGAA-1,◯ -TACACGAAGGTAGCCA-1,◯ -TACACGACAAGGCTCC-1,◯ -TACACGAGTTGGTAAA-1,◯ -TACACGAGTTGTACAC-1,◯ -TACACGATCAGCTCGG-1,◯ -TACACGATCGCGCCAA-1,◯ -TACACGATCTCTGCTG-1,◯ -TACAGTGAGATACACA-1,◯ -TACAGTGAGCAGATCG-1,◯ -TACAGTGAGCTGTCTA-1,◯ -TACAGTGAGTGCCAGA-1,◯ -TACAGTGCAAGCGTAG-1,◯ -TACAGTGCAATCGGTT-1,◯ -TACAGTGCAGTAACGG-1,◯ -TACAGTGCATGGTCAT-1,◯ -TACAGTGGTAGCGTAG-1,◯ -TACAGTGGTTGAACTC-1,◯ -TACCTATAGAAACCAT-1,◯ -TACCTATAGATCTGAA-1,◯ -TACCTATAGTGAATTG-1,◯ -TACCTATCAATGAATG-1,◯ -TACCTATCAGACTCGC-1,◯ -TACCTATCATAGAAAC-1,◯ -TACCTATCATGAAGTA-1,◯ -TACCTATCATGAGCGA-1,◯ -TACCTATGTCCAAGTT-1,◯ -TACCTATGTGCCTTGG-1,◯ -TACCTATTCCAAACAC-1,◯ -TACCTATTCCAGGGCT-1,◯ -TACCTTAAGATCGGGT-1,◯ -TACCTTAAGCCACTAT-1,◯ -TACCTTACACTAAGTC-1,◯ -TACCTTACATGCCCGA-1,◯ -TACCTTAGTCAAAGAT-1,◯ -TACCTTAGTCTTCAAG-1,◯ -TACCTTAGTTACGACT-1,◯ -TACCTTAGTTTCCACC-1,◯ -TACCTTATCCAAAGTC-1,◯ -TACCTTATCGGTCTAA-1,◯ -TACCTTATCGTGACAT-1,◯ -TACGGATAGATGGGTC-1,◯ -TACGGATAGCTAGTGG-1,◯ -TACGGATAGGTTCCTA-1,◯ -TACGGATCAAGCTGTT-1,◯ -TACGGATCAATCGAAA-1,◯ -TACGGATGTACCGTAT-1,◯ -TACGGATGTAGGACAC-1,◯ -TACGGATGTCGTCTTC-1,◯ -TACGGATGTGATGATA-1,◯ -TACGGATGTGCAACTT-1,◯ -TACGGATGTGCCTGTG-1,◯ -TACGGATTCCGTACAA-1,◯ -TACGGATTCGTAGATC-1,◯ -TACGGGCAGAGCTGGT-1,◯ -TACGGGCAGAGGGATA-1,◯ -TACGGGCAGCAGGCTA-1,◯ -TACGGGCAGTAGCCGA-1,◯ -TACGGGCCACACTGCG-1,◯ -TACGGGCCACATGGGA-1,◯ -TACGGGCCACGGTGTC-1,◯ -TACGGGCCACTCGACG-1,◯ -TACGGGCCATCGATGT-1,◯ -TACGGGCGTATGAATG-1,◯ -TACGGGCGTTCTGAAC-1,◯ -TACGGGCTCAGTTCGA-1,◯ -TACGGGCTCAGTTTGG-1,◯ -TACGGGCTCATCACCC-1,◯ -TACGGGCTCCCTCAGT-1,◯ -TACGGTAAGCTAAACA-1,◯ -TACGGTACAAACCTAC-1,◯ -TACGGTACAAACTGTC-1,◯ -TACGGTACAAGCCATT-1,◯ -TACGGTAGTACCGAGA-1,◯ -TACGGTAGTCGCATCG-1,◯ -TACGGTATCACCCTCA-1,◯ -TACGGTATCAGCATGT-1,◯ -TACGGTATCATTATCC-1,◯ -TACGGTATCCAATGGT-1,◯ -TACGGTATCCTAGTGA-1,◯ -TACGGTATCTGTCCGT-1,◯ -TACTCATAGCAGCCTC-1,◯ -TACTCATAGCGACGTA-1,◯ -TACTCATAGCGATATA-1,◯ -TACTCATAGCTTCGCG-1,◯ -TACTCATAGTCATGCT-1,◯ -TACTCATCAAAGCAAT-1,◯ -TACTCATCAGGACCCT-1,◯ -TACTCATGTACTTCTT-1,◯ -TACTCATGTCATGCAT-1,◯ -TACTCATGTCTGGAGA-1,◯ -TACTCATGTTGCCTCT-1,◯ -TACTCATGTTGTGGCC-1,◯ -TACTCGCCACAGAGGT-1,◯ -TACTCGCCACGGCCAT-1,◯ -TACTCGCCACTTAAGC-1,◯ -TACTCGCCAGGGTATG-1,◯ -TACTCGCCAGTCCTTC-1,◯ -TACTCGCCATTCTTAC-1,◯ -TACTCGCGTCTAAAGA-1,◯ -TACTCGCTCAGAAATG-1,◯ -TACTCGCTCGAGCCCA-1,◯ -TACTCGCTCGCAAGCC-1,◯ -TACTTACAGCCACCTG-1,◯ -TACTTACCAAGCTGTT-1,◯ -TACTTACCAATCAGAA-1,◯ -TACTTACCACCTTGTC-1,◯ -TACTTACCACGGTAGA-1,◯ -TACTTACGTCATCGGC-1,◯ -TACTTACGTCTGGTCG-1,◯ -TACTTACGTTTAGCTG-1,◯ -TACTTACTCGCAAACT-1,◯ -TACTTACTCGTCTGCT-1,◯ -TACTTACTCTGGCGTG-1,◯ -TACTTGTAGCGATTCT-1,◯ -TACTTGTAGTGTTGAA-1,◯ -TACTTGTCAAGCTGGA-1,◯ -TACTTGTCATCACCCT-1,◯ -TACTTGTGTAGGCATG-1,◯ -TACTTGTGTTGCGCAC-1,◯ -TACTTGTGTTTGGGCC-1,◯ -TACTTGTTCAGCATGT-1,◯ -TACTTGTTCGCAAGCC-1,◯ -TACTTGTTCTACCAGA-1,◯ -TACTTGTTCTTTACGT-1,◯ -TACTTGTTCTTTAGTC-1,◯ -TAGACCAAGCCCGAAA-1,◯ -TAGACCAAGGTTACCT-1,◯ -TAGACCACAACACGCC-1,◯ -TAGACCACACGACGAA-1,◯ -TAGACCACAGTTCATG-1,◯ -TAGACCACATGCAATC-1,◯ -TAGACCAGTCTAGTCA-1,◯ -TAGACCAGTGGAAAGA-1,◯ -TAGACCAGTTAAGGGC-1,◯ -TAGACCAGTTCGCGAC-1,◯ -TAGACCATCACTATTC-1,◯ -TAGACCATCGGAAACG-1,◯ -TAGACCATCTAACCGA-1,◯ -TAGAGCTAGCCTCGTG-1,◯ -TAGAGCTAGGAATCGC-1,◯ -TAGAGCTCAAGACACG-1,◯ -TAGAGCTCAATCGAAA-1,◯ -TAGAGCTCACATCTTT-1,◯ -TAGAGCTCATGAAGTA-1,◯ -TAGAGCTGTCTTCTCG-1,◯ -TAGAGCTGTGTCGCTG-1,◯ -TAGAGCTGTGTGCCTG-1,◯ -TAGAGCTTCTTTAGGG-1,◯ -TAGCCGGAGCCATCGC-1,◯ -TAGCCGGAGCGATATA-1,◯ -TAGCCGGAGCTAGCCC-1,◯ -TAGCCGGAGCTGCGAA-1,◯ -TAGCCGGAGGGATACC-1,◯ -TAGCCGGAGGTAGCTG-1,◯ -TAGCCGGAGGTGACCA-1,◯ -TAGCCGGCAATAGCGG-1,◯ -TAGCCGGCATTGGTAC-1,◯ -TAGCCGGCATTTGCTT-1,◯ -TAGCCGGGTTACCAGT-1,◯ -TAGGCATAGCCTATGT-1,◯ -TAGGCATAGCGTGAAC-1,◯ -TAGGCATCAATGGATA-1,◯ -TAGGCATCAGCGTAAG-1,◯ -TAGGCATCAGCTCGCA-1,◯ -TAGGCATCATGGGACA-1,◯ -TAGGCATCATTGGGCC-1,◯ -TAGGCATGTAACGACG-1,◯ -TAGGCATTCAGGTAAA-1,◯ -TAGGCATTCCTTAATC-1,◯ -TAGGCATTCGGCTACG-1,◯ -TAGGCATTCTACCAGA-1,◯ -TAGTGGTAGCTACCTA-1,◯ -TAGTGGTAGTCAAGGC-1,◯ -TAGTGGTAGTTCGCAT-1,◯ -TAGTGGTCAAGCCTAT-1,◯ -TAGTGGTCACACAGAG-1,◯ -TAGTGGTCATACGCTA-1,◯ -TAGTGGTGTAGCTCCG-1,◯ -TAGTGGTGTGAAATCA-1,◯ -TAGTGGTTCCTAAGTG-1,◯ -TAGTGGTTCGGCGCTA-1,◯ -TAGTTGGAGCGTAGTG-1,◯ -TAGTTGGAGTGTCCAT-1,◯ -TAGTTGGCAAAGGAAG-1,◯ -TAGTTGGCACATCCGG-1,◯ -TAGTTGGCAGGACGTA-1,◯ -TAGTTGGCAGTCGATT-1,◯ -TAGTTGGGTAGCCTAT-1,◯ -TAGTTGGGTAGTAGTA-1,◯ -TAGTTGGGTCAGCTAT-1,◯ -TAGTTGGGTCCGAATT-1,◯ -TAGTTGGTCTGCTGTC-1,◯ -TAGTTGGTCTTAACCT-1,◯ -TAGTTGGTCTTCCTTC-1,◯ -TATCAGGAGGAATTAC-1,◯ -TATCAGGAGTTTCCTT-1,◯ -TATCAGGCAAGGTGTG-1,◯ -TATCAGGCAAGTACCT-1,◯ -TATCAGGCACGAAGCA-1,◯ -TATCAGGCAGCTGTAT-1,◯ -TATCAGGGTTTAGGAA-1,◯ -TATCAGGTCACAGTAC-1,◯ -TATCAGGTCACCCTCA-1,◯ -TATCTCAAGGGATCTG-1,◯ -TATCTCACACAACGCC-1,◯ -TATCTCACAGAAGCAC-1,◯ -TATCTCACAGGACCCT-1,◯ -TATCTCACATATGCTG-1,◯ -TATCTCACATGAACCT-1,◯ -TATCTCACATTTCAGG-1,◯ -TATCTCAGTAAACCTC-1,◯ -TATCTCAGTCCTAGCG-1,◯ -TATCTCAGTCGAAAGC-1,◯ -TATCTCAGTGAGGCTA-1,◯ -TATCTCAGTGCACCAC-1,◯ -TATCTCAGTTAGGGTG-1,◯ -TATCTCATCCAAGTAC-1,◯ -TATCTCATCTCCGGTT-1,◯ -TATGCCCAGAACAATC-1,◯ -TATGCCCAGACCACGA-1,◯ -TATGCCCAGCCCAACC-1,◯ -TATGCCCAGGCGACAT-1,◯ -TATGCCCCATGCCTAA-1,◯ -TATGCCCGTCAAGCGA-1,◯ -TATGCCCGTCACACGC-1,◯ -TATGCCCGTGATGATA-1,◯ -TATGCCCGTGTGCGTC-1,◯ -TATGCCCGTTGCTCCT-1,◯ -TATGCCCTCGTACGGC-1,◯ -TATTACCAGACCTTTG-1,◯ -TATTACCAGAGTTGGC-1,◯ -TATTACCAGCAGATCG-1,◯ -TATTACCCAACACCCG-1,◯ -TATTACCCACTTAAGC-1,◯ -TATTACCCAGGCTCAC-1,◯ -TATTACCCATGCAATC-1,◯ -TATTACCGTTGCCTCT-1,◯ -TATTACCTCTGTGCAA-1,◯ -TATTACCTCTTCGGTC-1,◯ -TCAACGAAGCCACGCT-1,◯ -TCAACGAAGGCTCTTA-1,◯ -TCAACGAAGGGCTTGA-1,◯ -TCAACGAAGTAGCCGA-1,◯ -TCAACGAAGTTGTCGT-1,◯ -TCAACGACAGCTGCTG-1,◯ -TCAACGAGTAATCACC-1,◯ -TCAACGATCCTTTCTC-1,◯ -TCAACGATCGATCCCT-1,◯ -TCAACGATCGTCTGAA-1,◯ -TCAATCTAGAAACGAG-1,◯ -TCAATCTAGGTGTTAA-1,◯ -TCAATCTAGTGTCCAT-1,◯ -TCAATCTCAGCTCCGA-1,◯ -TCAATCTCATGAAGTA-1,◯ -TCAATCTGTTTAGCTG-1,◯ -TCAATCTTCCACTGGG-1,◯ -TCAATCTTCCCAACGG-1,◯ -TCACAAGAGAACTGTA-1,◯ -TCACAAGAGATGGCGT-1,◯ -TCACAAGAGGGAAACA-1,◯ -TCACAAGCACGGCGTT-1,◯ -TCACAAGCACGGCTAC-1,◯ -TCACAAGCACTACAGT-1,◯ -TCACAAGCATGTCCTC-1,◯ -TCACAAGGTACTTAGC-1,◯ -TCACAAGGTATCAGTC-1,◯ -TCACAAGGTCGCATCG-1,◯ -TCACAAGTCAGGTTCA-1,◯ -TCACAAGTCAGTGCAT-1,◯ -TCACAAGTCTGAGTGT-1,◯ -TCACAAGTCTGGCGTG-1,◯ -TCACGAAAGGGCTTCC-1,◯ -TCACGAACAATAGCGG-1,◯ -TCACGAACACCTCGGA-1,◯ -TCACGAACAGTCTTCC-1,◯ -TCACGAAGTATCACCA-1,◯ -TCACGAAGTCGCATAT-1,◯ -TCACGAAGTCTACCTC-1,◯ -TCACGAAGTGATGATA-1,◯ -TCACGAATCCACTGGG-1,◯ -TCACGAATCGCGTTTC-1,◯ -TCACGAATCGGAAACG-1,◯ -TCACGAATCGGAGGTA-1,◯ -TCACGAATCTGTTGAG-1,◯ -TCACGAATCTTCGAGA-1,◯ -TCACGAATCTTTACGT-1,◯ -TCAGATGAGACCACGA-1,◯ -TCAGATGAGCCACGCT-1,◯ -TCAGATGAGGGTGTTG-1,◯ -TCAGATGAGTCTTGCA-1,◯ -TCAGATGCACAGACTT-1,◯ -TCAGATGGTCAATACC-1,◯ -TCAGATGGTCTTTCAT-1,◯ -TCAGATGGTGTTTGGT-1,◯ -TCAGATGTCACTTCAT-1,◯ -TCAGATGTCCGCATAA-1,◯ -TCAGATGTCCGCGGTA-1,◯ -TCAGATGTCCGTCAAA-1,◯ -TCAGATGTCCTGCTTG-1,◯ -TCAGATGTCGATAGAA-1,◯ -TCAGATGTCTAACTTC-1,◯ -TCAGCAAAGCCCGAAA-1,◯ -TCAGCAAAGCTGAACG-1,◯ -TCAGCAAAGTATTGGA-1,◯ -TCAGCAACAAATCCGT-1,◯ -TCAGCAACAAATTGCC-1,◯ -TCAGCAACACATGTGT-1,◯ -TCAGCAAGTCAAAGCG-1,◯ -TCAGCAAGTCGCGGTT-1,◯ -TCAGCAAGTGAGGGAG-1,◯ -TCAGCAATCACAGTAC-1,◯ -TCAGCAATCATCTGCC-1,◯ -TCAGCAATCCCTAACC-1,◯ -TCAGCAATCTGCAGTA-1,◯ -TCAGCTCCACAAGCCC-1,◯ -TCAGCTCGTACGCACC-1,◯ -TCAGCTCGTCTTCGTC-1,◯ -TCAGCTCGTTGATTGC-1,◯ -TCAGCTCTCGTGGACC-1,◯ -TCAGCTCTCTAACGGT-1,◯ -TCAGGATAGTACGATA-1,◯ -TCAGGATAGTGCAAGC-1,◯ -TCAGGATAGTTGTAGA-1,◯ -TCAGGATCACCAGATT-1,◯ -TCAGGATCACGGCGTT-1,◯ -TCAGGATCAGTTTACG-1,◯ -TCAGGATGTCCGAAGA-1,◯ -TCAGGATGTCGCGTGT-1,◯ -TCAGGATGTTGCCTCT-1,◯ -TCAGGATTCCACGTGG-1,◯ -TCAGGATTCCCAAGTA-1,◯ -TCAGGATTCCGAGCCA-1,◯ -TCAGGTAAGAGTCGGT-1,◯ -TCAGGTAAGCCCGAAA-1,◯ -TCAGGTAAGCTGTTCA-1,◯ -TCAGGTACAGATAATG-1,◯ -TCAGGTACAGATCCAT-1,◯ -TCAGGTACAGGACCCT-1,◯ -TCAGGTACATTGGCGC-1,◯ -TCAGGTACATTTGCTT-1,◯ -TCAGGTAGTCTAGGTT-1,◯ -TCAGGTAGTGCTAGCC-1,◯ -TCAGGTATCAGGTTCA-1,◯ -TCAGGTATCAGTTCGA-1,◯ -TCAGGTATCTCTGCTG-1,◯ -TCATTACAGACTAGAT-1,◯ -TCATTACAGCTAGCCC-1,◯ -TCATTACAGGAATCGC-1,◯ -TCATTACAGGAGTACC-1,◯ -TCATTACAGGGCTCTC-1,◯ -TCATTACAGTGGACGT-1,◯ -TCATTACAGTTAGGTA-1,◯ -TCATTACCAGGCGATA-1,◯ -TCATTACCATATGGTC-1,◯ -TCATTACCATGCCTTC-1,◯ -TCATTACCATTTCAGG-1,◯ -TCATTACGTAGAAAGG-1,◯ -TCATTACGTAGCTCCG-1,◯ -TCATTACGTGTAAGTA-1,◯ -TCATTACTCGAATCCA-1,◯ -TCATTACTCGCAAGCC-1,◯ -TCATTACTCGGCTACG-1,◯ -TCATTTGAGAGTACAT-1,◯ -TCATTTGAGCAGGTCA-1,◯ -TCATTTGAGGCTAGCA-1,◯ -TCATTTGAGTGGGATC-1,◯ -TCATTTGCAAGCTGAG-1,◯ -TCATTTGCAAGGGTCA-1,◯ -TCATTTGCACCGAAAG-1,◯ -TCATTTGCATAACCTG-1,◯ -TCATTTGCATCACGAT-1,◯ -TCATTTGGTCCTGCTT-1,◯ -TCATTTGGTTACCGAT-1,◯ -TCATTTGGTTGTTTGG-1,◯ -TCATTTGTCATCGCTC-1,◯ -TCCACACAGGGATACC-1,◯ -TCCACACAGGTAAACT-1,◯ -TCCACACAGTTACCCA-1,◯ -TCCACACAGTTATCGC-1,◯ -TCCACACCAAACTGCT-1,◯ -TCCACACCAAGCCCAC-1,◯ -TCCACACCACTTCTGC-1,◯ -TCCACACCAGTGGGAT-1,◯ -TCCACACGTATTACCG-1,◯ -TCCACACGTCTTGCGG-1,◯ -TCCACACTCATTGCGA-1,◯ -TCCACACTCCACTGGG-1,◯ -TCCACACTCGCCAGCA-1,◯ -TCCACACTCGCTTAGA-1,◯ -TCCACACTCTACTATC-1,◯ -TCCCGATAGACCTTTG-1,◯ -TCCCGATAGAGGTAGA-1,◯ -TCCCGATAGATCGGGT-1,◯ -TCCCGATAGGCGTACA-1,◯ -TCCCGATCAGAAGCAC-1,◯ -TCCCGATCATGCAATC-1,◯ -TCCCGATGTCTCTCTG-1,◯ -TCCCGATGTGTATGGG-1,◯ -TCCCGATGTTGAACTC-1,◯ -TCCCGATTCTATCCCG-1,◯ -TCCCGATTCTTGTTTG-1,◯ -TCGAGGCAGAGTCTGG-1,◯ -TCGAGGCAGCTTCGCG-1,◯ -TCGAGGCAGGATGTAT-1,◯ -TCGAGGCAGGCCATAG-1,◯ -TCGAGGCAGGGTTCCC-1,◯ -TCGAGGCGTACAGTTC-1,◯ -TCGAGGCGTGAACCTT-1,◯ -TCGAGGCGTTATCACG-1,◯ -TCGAGGCGTTCGCTAA-1,◯ -TCGAGGCTCCTATGTT-1,◯ -TCGAGGCTCTTCGGTC-1,◯ -TCGCGAGAGAAACGAG-1,◯ -TCGCGAGAGACAAGCC-1,◯ -TCGCGAGAGAGCTGGT-1,◯ -TCGCGAGAGGTGATAT-1,◯ -TCGCGAGCAGATCCAT-1,◯ -TCGCGAGCATCACGAT-1,◯ -TCGCGAGGTCGGCTCA-1,◯ -TCGCGAGTCAATCTCT-1,◯ -TCGCGAGTCACTTATC-1,◯ -TCGCGAGTCGTCTGCT-1,◯ -TCGCGTTAGAATTGTG-1,◯ -TCGCGTTAGGATGTAT-1,◯ -TCGCGTTAGTACGCGA-1,◯ -TCGCGTTAGTCCAGGA-1,◯ -TCGCGTTCACGGCTAC-1,◯ -TCGCGTTCACGTCTCT-1,◯ -TCGCGTTCATCGATTG-1,◯ -TCGCGTTGTAAACACA-1,◯ -TCGCGTTGTAGCTAAA-1,◯ -TCGCGTTGTCTAAACC-1,◯ -TCGCGTTGTGACAAAT-1,◯ -TCGCGTTGTTGATTGC-1,◯ -TCGCGTTTCCAAACAC-1,◯ -TCGCGTTTCGCCCTTA-1,◯ -TCGGGACAGAATAGGG-1,◯ -TCGGGACAGCCAGAAC-1,◯ -TCGGGACAGTCATCCA-1,◯ -TCGGGACCAATAGAGT-1,◯ -TCGGGACCATATACGC-1,◯ -TCGGGACCATTATCTC-1,◯ -TCGGGACGTAAACGCG-1,◯ -TCGGGACGTAGCGTAG-1,◯ -TCGGGACGTCGAGTTT-1,◯ -TCGGGACGTCTAGCCG-1,◯ -TCGGGACGTGACAAAT-1,◯ -TCGGGACGTTCATGGT-1,◯ -TCGGGACGTTGAGTTC-1,◯ -TCGGGACTCCACTGGG-1,◯ -TCGGGACTCTCTTGAT-1,◯ -TCGGGACTCTTGTATC-1,◯ -TCGGTAAAGCCCAATT-1,◯ -TCGGTAAAGTTTCCTT-1,◯ -TCGGTAACAATGTAAG-1,◯ -TCGGTAACACATGGGA-1,◯ -TCGGTAACATCCGGGT-1,◯ -TCGGTAAGTATGAAAC-1,◯ -TCGGTAAGTCGCATCG-1,◯ -TCGGTAATCGACAGCC-1,◯ -TCGGTAATCGCATGAT-1,◯ -TCGTACCGTAGCCTAT-1,◯ -TCGTAGAAGACTGGGT-1,◯ -TCGTAGAAGAGACTTA-1,◯ -TCGTAGAAGCCCTAAT-1,◯ -TCGTAGACACTAGTAC-1,◯ -TCGTAGACACTTGGAT-1,◯ -TCGTAGACATCAGTAC-1,◯ -TCGTAGAGTAACGACG-1,◯ -TCGTAGAGTCGAACAG-1,◯ -TCGTAGAGTTATGTGC-1,◯ -TCGTAGAGTTGTGGAG-1,◯ -TCGTAGATCACCGGGT-1,◯ -TCGTAGATCAGCAACT-1,◯ -TCGTAGATCGAATCCA-1,◯ -TCGTAGATCGCAAGCC-1,◯ -TCGTAGATCGGAGCAA-1,◯ -TCTATTGAGCTCTCGG-1,◯ -TCTATTGAGGCAAAGA-1,◯ -TCTATTGAGTGTCCCG-1,◯ -TCTATTGAGTTACGGG-1,◯ -TCTATTGAGTTGAGAT-1,◯ -TCTATTGCAATCAGAA-1,◯ -TCTATTGCACAGGTTT-1,◯ -TCTATTGGTAGCGTGA-1,◯ -TCTATTGGTCGAGTTT-1,◯ -TCTATTGGTCTAAACC-1,◯ -TCTATTGGTGAGGGTT-1,◯ -TCTATTGGTGTGAATA-1,◯ -TCTATTGTCAAAGTAG-1,◯ -TCTATTGTCGCAGGCT-1,◯ -TCTCATACAGCGTAAG-1,◯ -TCTCATAGTATAAACG-1,◯ -TCTCATAGTCTTGATG-1,◯ -TCTCATAGTGCCTGGT-1,◯ -TCTCATATCACTATTC-1,◯ -TCTCATATCAGGCCCA-1,◯ -TCTCATATCTATCGCC-1,◯ -TCTCTAAAGAATTCCC-1,◯ -TCTCTAAAGGAGTAGA-1,◯ -TCTCTAACAATGTAAG-1,◯ -TCTCTAACAGGAATGC-1,◯ -TCTCTAACATACCATG-1,◯ -TCTCTAACATGGTTGT-1,◯ -TCTCTAAGTGACTCAT-1,◯ -TCTCTAATCACGCGGT-1,◯ -TCTGAGAAGCGGATCA-1,◯ -TCTGAGAAGGACACCA-1,◯ -TCTGAGAAGGCAGGTT-1,◯ -TCTGAGAAGTTGAGTA-1,◯ -TCTGAGACATGCAATC-1,◯ -TCTGAGACATTACCTT-1,◯ -TCTGAGAGTAGATTAG-1,◯ -TCTGAGAGTGGCGAAT-1,◯ -TCTGAGAGTTAGTGGG-1,◯ -TCTGAGAGTTCAGGCC-1,◯ -TCTGAGATCCCAAGAT-1,◯ -TCTGGAAAGCCCAATT-1,◯ -TCTGGAAGTCATCGGC-1,◯ -TCTGGAAGTCGCATCG-1,◯ -TCTGGAAGTGTAACGG-1,◯ -TCTGGAAGTGTTCGAT-1,◯ -TCTGGAAGTTCCCTTG-1,◯ -TCTGGAATCCCTCTTT-1,◯ -TCTGGAATCGACAGCC-1,◯ -TCTGGAATCGTGTAGT-1,◯ -TCTGGAATCTATCCCG-1,◯ -TCTTCGGAGATGTGGC-1,◯ -TCTTCGGAGTCGTTTG-1,◯ -TCTTCGGCACAGGTTT-1,◯ -TCTTCGGCACGGTAGA-1,◯ -TCTTCGGTCTTAGAGC-1,◯ -TCTTTCCAGGTGCAAC-1,◯ -TCTTTCCAGGTGGGTT-1,◯ -TCTTTCCCAGACGCCT-1,◯ -TCTTTCCGTAAGAGGA-1,◯ -TCTTTCCGTAGCGATG-1,◯ -TCTTTCCTCAGGCGAA-1,◯ -TCTTTCCTCCTAAGTG-1,◯ -TCTTTCCTCGGACAAG-1,◯ -TGAAAGAAGATATGCA-1,◯ -TGAAAGAAGTCAATAG-1,◯ -TGAAAGAAGTCCGTAT-1,◯ -TGAAAGACACACGCTG-1,◯ -TGAAAGAGTTTACTCT-1,◯ -TGAAAGATCAAGGTAA-1,◯ -TGAAAGATCACTGGGC-1,◯ -TGAAAGATCCCGACTT-1,◯ -TGAAAGATCGACAGCC-1,◯ -TGAAAGATCGCCAAAT-1,◯ -TGAAAGATCTAACGGT-1,◯ -TGAAAGATCTCTGCTG-1,◯ -TGAAAGATCTTCGAGA-1,◯ -TGACAACAGAGACGAA-1,◯ -TGACAACAGCCCGAAA-1,◯ -TGACAACAGCCGGTAA-1,◯ -TGACAACAGGCCCTCA-1,◯ -TGACAACAGGCGACAT-1,◯ -TGACAACCAAGAGTCG-1,◯ -TGACAACCACACCGAC-1,◯ -TGACAACCACGTAAGG-1,◯ -TGACAACCATTCTCAT-1,◯ -TGACAACGTAATCGTC-1,◯ -TGACAACGTATGAAAC-1,◯ -TGACAACGTCTGATCA-1,◯ -TGACAACTCGTGACAT-1,◯ -TGACAACTCGTGTAGT-1,◯ -TGACAACTCTATCCCG-1,◯ -TGACGGCCACCATCCT-1,◯ -TGACGGCGTAAGAGGA-1,◯ -TGACGGCGTCGCGGTT-1,◯ -TGACGGCGTGTCCTCT-1,◯ -TGACGGCTCAACACAC-1,◯ -TGACGGCTCAGCTCGG-1,◯ -TGACGGCTCATGTGGT-1,◯ -TGACGGCTCGCACTCT-1,◯ -TGACGGCTCGGAAATA-1,◯ -TGACGGCTCTGGCGTG-1,◯ -TGACTAGAGCAATATG-1,◯ -TGACTAGAGGATGCGT-1,◯ -TGACTAGAGTAGGCCA-1,◯ -TGACTAGCACCAGTTA-1,◯ -TGACTAGGTTAAGGGC-1,◯ -TGACTAGTCAACCATG-1,◯ -TGACTAGTCCTCATTA-1,◯ -TGACTAGTCGGTCTAA-1,◯ -TGACTAGTCTTACCGC-1,◯ -TGACTTTAGGAATCGC-1,◯ -TGACTTTCAATTGCTG-1,◯ -TGACTTTCACCACCAG-1,◯ -TGACTTTCAGACAAGC-1,◯ -TGACTTTCATGCATGT-1,◯ -TGACTTTGTCCGAAGA-1,◯ -TGACTTTGTGTGACGA-1,◯ -TGACTTTTCAAACAAG-1,◯ -TGACTTTTCATGCAAC-1,◯ -TGACTTTTCTCAAACG-1,◯ -TGACTTTTCTGTCTCG-1,◯ -TGAGAGGAGCGTGAAC-1,◯ -TGAGAGGAGTGGAGAA-1,◯ -TGAGAGGCAAGAAAGG-1,◯ -TGAGAGGCAAGTTCTG-1,◯ -TGAGAGGCACCGAAAG-1,◯ -TGAGAGGCAGACGCAA-1,◯ -TGAGAGGCAGTCGTGC-1,◯ -TGAGAGGCATGGGAAC-1,◯ -TGAGAGGGTAGCGTAG-1,◯ -TGAGAGGGTCAGTGGA-1,◯ -TGAGAGGGTCCATCCT-1,◯ -TGAGAGGGTCTCCACT-1,◯ -TGAGAGGTCAGGATCT-1,◯ -TGAGAGGTCCCTGACT-1,◯ -TGAGAGGTCGCATGAT-1,◯ -TGAGCATAGAGACGAA-1,◯ -TGAGCATAGCGAAGGG-1,◯ -TGAGCATAGCGATGAC-1,◯ -TGAGCATAGCTAAGAT-1,◯ -TGAGCATAGCTCTCGG-1,◯ -TGAGCATAGGACCACA-1,◯ -TGAGCATAGTCCATAC-1,◯ -TGAGCATAGTTAACGA-1,◯ -TGAGCATAGTTGAGAT-1,◯ -TGAGCATCAAGTACCT-1,◯ -TGAGCATCACCAACCG-1,◯ -TGAGCATGTACCGGCT-1,◯ -TGAGCATGTACTCGCG-1,◯ -TGAGCATGTATGAAAC-1,◯ -TGAGCATGTGTAACGG-1,◯ -TGAGCATGTTCCAACA-1,◯ -TGAGCATGTTGCTCCT-1,◯ -TGAGCATTCCGAGCCA-1,◯ -TGAGCCGAGAAACCGC-1,◯ -TGAGCCGAGCGTGAGT-1,◯ -TGAGCCGAGGATGGTC-1,◯ -TGAGCCGAGTTCGCAT-1,◯ -TGAGCCGCAAACTGTC-1,◯ -TGAGCCGCAGACAAGC-1,◯ -TGAGCCGGTGTGGCTC-1,◯ -TGAGCCGTCACCCGAG-1,◯ -TGAGCCGTCATGTAGC-1,◯ -TGAGCCGTCCTGTACC-1,◯ -TGAGCCGTCGCCTGTT-1,◯ -TGAGGGAAGTAGCGGT-1,◯ -TGAGGGACAATAGCAA-1,◯ -TGAGGGACACGTAAGG-1,◯ -TGAGGGACAGACACTT-1,◯ -TGAGGGACATGGATGG-1,◯ -TGAGGGATCCCTAATT-1,◯ -TGAGGGATCGCAAGCC-1,◯ -TGATTTCAGCGGATCA-1,◯ -TGATTTCGTAGGAGTC-1,◯ -TGATTTCGTCTTCGTC-1,◯ -TGATTTCGTTCACCTC-1,◯ -TGATTTCGTTCCACTC-1,◯ -TGATTTCTCAGCAACT-1,◯ -TGATTTCTCGCCGTGA-1,◯ -TGATTTCTCGGATGGA-1,◯ -TGATTTCTCTGTGCAA-1,◯ -TGCACCTAGATCCCGC-1,◯ -TGCACCTAGCCGCCTA-1,◯ -TGCACCTAGGAATCGC-1,◯ -TGCACCTCAAGCGTAG-1,◯ -TGCACCTCAAGTAATG-1,◯ -TGCACCTCACATGTGT-1,◯ -TGCACCTGTGTCAATC-1,◯ -TGCACCTGTTCCACAA-1,◯ -TGCACCTGTTCGTCTC-1,◯ -TGCACCTGTTTGCATG-1,◯ -TGCACCTTCATTGCCC-1,◯ -TGCACCTTCCAATGGT-1,◯ -TGCACCTTCGCCAGCA-1,◯ -TGCCAAAAGCTGCCCA-1,◯ -TGCCAAAAGGTGCACA-1,◯ -TGCCAAAAGTACGTAA-1,◯ -TGCCAAACACATCCAA-1,◯ -TGCCAAACACTTCGAA-1,◯ -TGCCAAAGTAGAGTGC-1,◯ -TGCCAAAGTCTAGCGC-1,◯ -TGCCAAAGTGCTTCTC-1,◯ -TGCCAAAGTTCGCGAC-1,◯ -TGCCAAAGTTGATTCG-1,◯ -TGCCAAATCGGCGCAT-1,◯ -TGCCCATAGCGCTCCA-1,◯ -TGCCCATAGGCCGAAT-1,◯ -TGCCCATAGTCGTTTG-1,◯ -TGCCCATAGTGACATA-1,◯ -TGCCCATCACGGTAAG-1,◯ -TGCCCATCATTTCACT-1,◯ -TGCCCATGTAAGGGAA-1,◯ -TGCCCATGTAAGTAGT-1,◯ -TGCCCATGTCGGATCC-1,◯ -TGCCCATGTCTAGTCA-1,◯ -TGCCCATGTTATCGGT-1,◯ -TGCCCATGTTGGACCC-1,◯ -TGCCCATTCCGAACGC-1,◯ -TGCCCATTCCTGTACC-1,◯ -TGCCCATTCTACTCAT-1,◯ -TGCCCTAAGGCGACAT-1,◯ -TGCCCTAAGTAGTGCG-1,◯ -TGCCCTACAAGCCGCT-1,◯ -TGCCCTACATTACCTT-1,◯ -TGCCCTAGTATGCTTG-1,◯ -TGCCCTAGTGTCTGAT-1,◯ -TGCCCTATCAGAAATG-1,◯ -TGCCCTATCGTCACGG-1,◯ -TGCCCTATCGTTACGA-1,◯ -TGCCCTATCTCCGGTT-1,◯ -TGCCCTATCTCGCTTG-1,◯ -TGCGCAGAGACAATAC-1,◯ -TGCGCAGAGATCTGCT-1,◯ -TGCGCAGCAAATCCGT-1,◯ -TGCGCAGCAAGTCTAC-1,◯ -TGCGCAGCACATCCAA-1,◯ -TGCGCAGCACTTCGAA-1,◯ -TGCGCAGCATCTCCCA-1,◯ -TGCGCAGCATGCCTTC-1,◯ -TGCGCAGCATGGTTGT-1,◯ -TGCGCAGTCGCCAAAT-1,◯ -TGCGCAGTCGTACCGG-1,◯ -TGCGCAGTCTGCTGCT-1,◯ -TGCGGGTAGATAGGAG-1,◯ -TGCGGGTGTAATCGTC-1,◯ -TGCGGGTGTGAGCGAT-1,◯ -TGCGGGTGTTACCGAT-1,◯ -TGCGTGGAGAGTAAGG-1,◯ -TGCGTGGAGGCTACGA-1,◯ -TGCGTGGGTAAGTAGT-1,◯ -TGCGTGGGTAATAGCA-1,◯ -TGCGTGGGTCATGCAT-1,◯ -TGCGTGGGTTATCGGT-1,◯ -TGCGTGGTCATAACCG-1,◯ -TGCGTGGTCCACTGGG-1,◯ -TGCGTGGTCTCCTATA-1,◯ -TGCTACCAGACTTTCG-1,◯ -TGCTACCAGAGTCTGG-1,◯ -TGCTACCAGCGATTCT-1,◯ -TGCTACCAGGACTGGT-1,◯ -TGCTACCAGTACGATA-1,◯ -TGCTACCAGTCCATAC-1,◯ -TGCTACCAGTGCCATT-1,◯ -TGCTACCCACATCTTT-1,◯ -TGCTACCGTATTCTCT-1,◯ -TGCTACCGTGCGCTTG-1,◯ -TGCTACCGTTGTCGCG-1,◯ -TGCTACCTCGACGGAA-1,◯ -TGCTGCTAGAATGTTG-1,◯ -TGCTGCTAGACCGGAT-1,◯ -TGCTGCTCAACGCACC-1,◯ -TGCTGCTCAAGTAGTA-1,◯ -TGCTGCTCACCAGGTC-1,◯ -TGCTGCTCACCGTTGG-1,◯ -TGCTGCTCACGGCCAT-1,◯ -TGCTGCTCATAGGATA-1,◯ -TGCTGCTGTTGGAGGT-1,◯ -TGCTGCTGTTGTTTGG-1,◯ -TGCTGCTTCCCGGATG-1,◯ -TGCTGCTTCTTGTATC-1,◯ -TGGACGCAGACTTTCG-1,◯ -TGGACGCAGTCAAGCG-1,◯ -TGGACGCAGTGTACCT-1,◯ -TGGACGCAGTGTGAAT-1,◯ -TGGACGCCAAGGACAC-1,◯ -TGGACGCTCTAAGCCA-1,◯ -TGGACGCTCTCAACTT-1,◯ -TGGCCAGAGGGATGGG-1,◯ -TGGCCAGAGTTGAGTA-1,◯ -TGGCCAGCAAGGGTCA-1,◯ -TGGCCAGGTACTTAGC-1,◯ -TGGCCAGGTATAGTAG-1,◯ -TGGCCAGGTCATGCAT-1,◯ -TGGCCAGGTTGTTTGG-1,◯ -TGGCCAGTCAGTCCCT-1,◯ -TGGCCAGTCTATCCTA-1,◯ -TGGCCAGTCTTGTACT-1,◯ -TGGCGCAAGCACCGTC-1,◯ -TGGCGCAAGGAGTAGA-1,◯ -TGGCGCAAGTGGGTTG-1,◯ -TGGCGCACAAGGCTCC-1,◯ -TGGCGCAGTGTGGCTC-1,◯ -TGGCGCAGTTACAGAA-1,◯ -TGGCGCATCCAACCAA-1,◯ -TGGCGCATCTTATCTG-1,◯ -TGGCTGGAGACGCACA-1,◯ -TGGCTGGAGCAATCTC-1,◯ -TGGCTGGAGGATCGCA-1,◯ -TGGCTGGCAAAGTCAA-1,◯ -TGGCTGGCAATAGAGT-1,◯ -TGGCTGGCAGAGTGTG-1,◯ -TGGCTGGCATAGTAAG-1,◯ -TGGCTGGGTCCTCCAT-1,◯ -TGGCTGGGTTATCCGA-1,◯ -TGGCTGGGTTTACTCT-1,◯ -TGGCTGGTCGACAGCC-1,◯ -TGGCTGGTCGGACAAG-1,◯ -TGGCTGGTCTTGAGGT-1,◯ -TGGGAAGAGCCAGGAT-1,◯ -TGGGAAGAGCTATGCT-1,◯ -TGGGAAGAGGACATTA-1,◯ -TGGGAAGAGGCAGTCA-1,◯ -TGGGAAGCAATGGTCT-1,◯ -TGGGAAGCACAGCGTC-1,◯ -TGGGAAGCACTCAGGC-1,◯ -TGGGAAGCATATGAGA-1,◯ -TGGGAAGCATGAGCGA-1,◯ -TGGGAAGCATTTCACT-1,◯ -TGGGAAGGTAGAGGAA-1,◯ -TGGGAAGGTGAAGGCT-1,◯ -TGGGAAGGTGCGGTAA-1,◯ -TGGGAAGGTGGTACAG-1,◯ -TGGGAAGTCCAGAGGA-1,◯ -TGGGAAGTCCTCATTA-1,◯ -TGGGAAGTCGAACTGT-1,◯ -TGGGAAGTCGCGTAGC-1,◯ -TGGGCGTAGAGAGCTC-1,◯ -TGGGCGTAGGACGAAA-1,◯ -TGGGCGTCAATCCAAC-1,◯ -TGGGCGTCATGACGGA-1,◯ -TGGGCGTGTCCAGTTA-1,◯ -TGGGCGTGTCCTAGCG-1,◯ -TGGGCGTGTCTCCACT-1,◯ -TGGGCGTGTGCTGTAT-1,◯ -TGGGCGTTCATTCACT-1,◯ -TGGGCGTTCCATGCTC-1,◯ -TGGGCGTTCGATGAGG-1,◯ -TGGTTAGAGACAATAC-1,◯ -TGGTTAGAGTTGAGAT-1,◯ -TGGTTAGAGTTGTAGA-1,◯ -TGGTTAGCAAGTAGTA-1,◯ -TGGTTAGCAGCGTAAG-1,◯ -TGGTTAGCAGCGTCCA-1,◯ -TGGTTAGCATGTCCTC-1,◯ -TGGTTAGGTACGACCC-1,◯ -TGGTTAGGTGTTGAGG-1,◯ -TGGTTAGTCGGAGGTA-1,◯ -TGGTTCCAGACAATAC-1,◯ -TGGTTCCAGACACGAC-1,◯ -TGGTTCCAGTACCGGA-1,◯ -TGGTTCCAGTATCTCG-1,◯ -TGGTTCCCAAAGCGGT-1,◯ -TGGTTCCCACCCATGG-1,◯ -TGGTTCCCACGACTCG-1,◯ -TGGTTCCGTGCCTGGT-1,◯ -TGTATTCAGATCTGAA-1,◯ -TGTATTCAGCCTTGAT-1,◯ -TGTATTCCAACACGCC-1,◯ -TGTATTCCATCGGGTC-1,◯ -TGTATTCGTCTAGCGC-1,◯ -TGTATTCGTGTGACCC-1,◯ -TGTATTCTCGGAGGTA-1,◯ -TGTCCCAAGGCTATCT-1,◯ -TGTCCCAAGTAATCCC-1,◯ -TGTCCCACAAATCCGT-1,◯ -TGTCCCACACCAGGCT-1,◯ -TGTCCCACAGATGAGC-1,◯ -TGTCCCACATCACGAT-1,◯ -TGTCCCACATGCAATC-1,◯ -TGTCCCAGTCCAAGTT-1,◯ -TGTCCCAGTGCAGGTA-1,◯ -TGTGGTAAGACTCGGA-1,◯ -TGTGGTAAGATAGCAT-1,◯ -TGTGGTAAGCTAGTTC-1,◯ -TGTGGTACACCGTTGG-1,◯ -TGTGGTACAGACAAAT-1,◯ -TGTGGTACAGCATGAG-1,◯ -TGTGGTAGTACCTACA-1,◯ -TGTGGTAGTCTCTCTG-1,◯ -TGTGGTAGTTCTGTTT-1,◯ -TGTGGTATCAAACCAC-1,◯ -TGTGGTATCGTTTGCC-1,◯ -TGTGTTTAGGGAGTAA-1,◯ -TGTGTTTAGTGGTAGC-1,◯ -TGTGTTTAGTTACCCA-1,◯ -TGTGTTTCAATTCCTT-1,◯ -TGTGTTTCACCGATAT-1,◯ -TGTGTTTCAGGTCTCG-1,◯ -TGTGTTTCATCCAACA-1,◯ -TGTGTTTGTAGATTAG-1,◯ -TGTGTTTGTCATGCCG-1,◯ -TGTGTTTGTGCAACTT-1,◯ -TGTGTTTGTTAAGAAC-1,◯ -TGTGTTTTCACCTTAT-1,◯ -TGTGTTTTCAGTACGT-1,◯ -TGTGTTTTCCATGAAC-1,◯ -TGTTCCGAGATGCCAG-1,◯ -TGTTCCGAGGGCTTGA-1,◯ -TGTTCCGCAACACCTA-1,◯ -TGTTCCGCACGAGGTA-1,◯ -TGTTCCGCAGGATTGG-1,◯ -TGTTCCGCATGTTCCC-1,◯ -TGTTCCGGTACAGTTC-1,◯ -TGTTCCGGTCCATGAT-1,◯ -TGTTCCGGTTCCACTC-1,◯ -TGTTCCGTCACCGTAA-1,◯ -TGTTCCGTCCGCAGTG-1,◯ -TGTTCCGTCGTGGACC-1,◯ -TTAACTCAGCCCTAAT-1,◯ -TTAACTCCACGGATAG-1,◯ -TTAACTCCAGGGAGAG-1,◯ -TTAACTCCATGCAACT-1,◯ -TTAACTCGTATCGCAT-1,◯ -TTAACTCGTGATGTCT-1,◯ -TTAACTCTCGGTGTTA-1,◯ -TTAGGACAGAAACCGC-1,◯ -TTAGGACAGATATACG-1,◯ -TTAGGACAGCGCCTCA-1,◯ -TTAGGACAGCGGCTTC-1,◯ -TTAGGACAGTACGACG-1,◯ -TTAGGACCAAAGGAAG-1,◯ -TTAGGACCAGCCAGAA-1,◯ -TTAGGACGTAAGTTCC-1,◯ -TTAGGACGTACTTAGC-1,◯ -TTAGGACGTGCCTGCA-1,◯ -TTAGGACGTTATCACG-1,◯ -TTAGGACTCTGTTTGT-1,◯ -TTAGGCAAGGGTGTGT-1,◯ -TTAGGCAAGTCAAGGC-1,◯ -TTAGGCACACTCGACG-1,◯ -TTAGGCACAGTGAGTG-1,◯ -TTAGGCACATGAAGTA-1,◯ -TTAGGCAGTAGAGTGC-1,◯ -TTAGGCAGTCCTAGCG-1,◯ -TTAGGCAGTTCGGGCT-1,◯ -TTAGGCATCAACACTG-1,◯ -TTAGGCATCCTTTCGG-1,◯ -TTAGGCATCGCTTGTC-1,◯ -TTAGGCATCGTCTGCT-1,◯ -TTAGGCATCTCAAACG-1,◯ -TTAGTTCAGACGCACA-1,◯ -TTAGTTCAGCTACCTA-1,◯ -TTAGTTCCACAAGTAA-1,◯ -TTAGTTCGTAAGTGTA-1,◯ -TTAGTTCGTATGGTTC-1,◯ -TTAGTTCGTGAGCGAT-1,◯ -TTAGTTCGTTTGGCGC-1,◯ -TTAGTTCTCGACGGAA-1,◯ -TTATGCTAGCTAGTTC-1,◯ -TTATGCTAGCTGAAAT-1,◯ -TTATGCTAGTCAAGGC-1,◯ -TTATGCTCAAAGCAAT-1,◯ -TTATGCTCAACACGCC-1,◯ -TTATGCTCAATCCGAT-1,◯ -TTATGCTCAGCTTAAC-1,◯ -TTATGCTCAGGCAGTA-1,◯ -TTATGCTGTAAGGGCT-1,◯ -TTATGCTGTATTCGTG-1,◯ -TTATGCTGTCATATGC-1,◯ -TTATGCTGTCCGCTGA-1,◯ -TTATGCTGTGCAACTT-1,◯ -TTATGCTTCACGCATA-1,◯ -TTATGCTTCAGAGACG-1,◯ -TTCCCAGAGAGGTTGC-1,◯ -TTCCCAGAGGACAGAA-1,◯ -TTCCCAGAGGGCTTCC-1,◯ -TTCCCAGAGGTGCTTT-1,◯ -TTCCCAGAGTGGAGTC-1,◯ -TTCCCAGCAAGTCTGT-1,◯ -TTCCCAGCAAGTTAAG-1,◯ -TTCCCAGCAGTCGTGC-1,◯ -TTCCCAGCATGGATGG-1,◯ -TTCCCAGCATTTGCCC-1,◯ -TTCCCAGGTAGCCTCG-1,◯ -TTCCCAGGTCCTCCAT-1,◯ -TTCCCAGTCAAAGTAG-1,◯ -TTCCCAGTCAGGCGAA-1,◯ -TTCCCAGTCCACGCAG-1,◯ -TTCCCAGTCCGTAGTA-1,◯ -TTCGAAGAGAGACTAT-1,◯ -TTCGAAGCAACGATCT-1,◯ -TTCGAAGCACGGTTTA-1,◯ -TTCGAAGCATGGATGG-1,◯ -TTCGAAGGTGTGGCTC-1,◯ -TTCGAAGGTGTGGTTT-1,◯ -TTCGAAGGTTAGATGA-1,◯ -TTCGAAGTCACTATTC-1,◯ -TTCGAAGTCCTGTAGA-1,◯ -TTCGAAGTCGGCGCTA-1,◯ -TTCGAAGTCTGTCCGT-1,◯ -TTCGGTCAGACTTGAA-1,◯ -TTCGGTCAGGAGCGTT-1,◯ -TTCGGTCAGTACGCGA-1,◯ -TTCGGTCAGTGTACGG-1,◯ -TTCGGTCCAAGCCTAT-1,◯ -TTCGGTCCAAGCGTAG-1,◯ -TTCGGTCCAGCCTATA-1,◯ -TTCGGTCCAGCGTAAG-1,◯ -TTCGGTCGTATAAACG-1,◯ -TTCGGTCGTCACTGGC-1,◯ -TTCGGTCGTCAGAATA-1,◯ -TTCGGTCGTGGGTATG-1,◯ -TTCGGTCGTTCAGTAC-1,◯ -TTCGGTCTCATATCGG-1,◯ -TTCGGTCTCCAGAAGG-1,◯ -TTCGGTCTCCGCTGTT-1,◯ -TTCGGTCTCCTTAATC-1,◯ -TTCGGTCTCTATCGCC-1,◯ -TTCTACAAGAGCTGCA-1,◯ -TTCTACAAGCAATCTC-1,◯ -TTCTACAAGTGCGTGA-1,◯ -TTCTACACAAACAACA-1,◯ -TTCTACACAGCTGGCT-1,◯ -TTCTACACATCACAAC-1,◯ -TTCTACACATCCCACT-1,◯ -TTCTACACATCCCATC-1,◯ -TTCTACAGTATAGGGC-1,◯ -TTCTACAGTCAATACC-1,◯ -TTCTACATCAGCTCTC-1,◯ -TTCTACATCAGGCCCA-1,◯ -TTCTACATCCGTAGGC-1,◯ -TTCTACATCTATCCCG-1,◯ -TTCTCAACAAGACACG-1,◯ -TTCTCAACAGCCTTTC-1,◯ -TTCTCAACATAGACTC-1,◯ -TTCTCAAGTAGCAAAT-1,◯ -TTCTCAAGTATAGGGC-1,◯ -TTCTCAAGTGCAACGA-1,◯ -TTCTCAAGTTGAGTTC-1,◯ -TTCTCAATCCGAAGAG-1,◯ -TTCTCAATCCTCAACC-1,◯ -TTCTCCTAGAGATGAG-1,◯ -TTCTCCTAGCCACGTC-1,◯ -TTCTCCTAGGAGTACC-1,◯ -TTCTCCTCAGACGCCT-1,◯ -TTCTCCTTCTGTCTCG-1,◯ -TTCTTAGAGAAGAAGC-1,◯ -TTCTTAGAGATCCGAG-1,◯ -TTCTTAGAGCGAGAAA-1,◯ -TTCTTAGAGCGCCTCA-1,◯ -TTCTTAGCAACCGCCA-1,◯ -TTCTTAGCATCCCACT-1,◯ -TTCTTAGCATCGACGC-1,◯ -TTCTTAGGTACTCTCC-1,◯ -TTCTTAGTCCCTTGCA-1,◯ -TTCTTAGTCCTTGACC-1,◯ -TTGAACGAGAGCTGCA-1,◯ -TTGAACGAGGCTCAGA-1,◯ -TTGAACGAGTCATGCT-1,◯ -TTGAACGCAATACGCT-1,◯ -TTGAACGCATCATCCC-1,◯ -TTGAACGGTCCGAACC-1,◯ -TTGAACGTCACTATTC-1,◯ -TTGAACGTCCATTCTA-1,◯ -TTGAACGTCCGCGTTT-1,◯ -TTGACTTAGGTGGGTT-1,◯ -TTGACTTAGTAGGTGC-1,◯ -TTGACTTAGTGTCCCG-1,◯ -TTGACTTAGTTGAGTA-1,◯ -TTGACTTCAACACGCC-1,◯ -TTGACTTCACGGTGTC-1,◯ -TTGACTTCATCGGGTC-1,◯ -TTGACTTGTATTCGTG-1,◯ -TTGACTTGTGCCTTGG-1,◯ -TTGACTTGTGCTAGCC-1,◯ -TTGACTTGTGGCGAAT-1,◯ -TTGACTTGTTCGGGCT-1,◯ -TTGACTTTCCACGACG-1,◯ -TTGACTTTCCACGTTC-1,◯ -TTGACTTTCCAGAAGG-1,◯ -TTGACTTTCGAATGGG-1,◯ -TTGACTTTCGGAGGTA-1,◯ -TTGCCGTAGAGAGCTC-1,◯ -TTGCCGTAGGAGCGAG-1,◯ -TTGCCGTCACAAGACG-1,◯ -TTGCCGTCATTGCGGC-1,◯ -TTGCCGTGTCCAGTTA-1,◯ -TTGCCGTTCAAACGGG-1,◯ -TTGCCGTTCAATAAGG-1,◯ -TTGCCGTTCGACCAGC-1,◯ -TTGCCGTTCGCCGTGA-1,◯ -TTGCCGTTCTTCGGTC-1,◯ -TTGCGTCAGAACTCGG-1,◯ -TTGCGTCAGCGTCTAT-1,◯ -TTGCGTCAGGACCACA-1,◯ -TTGCGTCAGGGTCGAT-1,◯ -TTGCGTCAGTGTGGCA-1,◯ -TTGCGTCAGTTAGCGG-1,◯ -TTGCGTCCAAACTGTC-1,◯ -TTGCGTCCAACAACCT-1,◯ -TTGCGTCCAAGGACTG-1,◯ -TTGCGTCCACCCAGTG-1,◯ -TTGCGTCCACGTCAGC-1,◯ -TTGCGTCCAGACAAGC-1,◯ -TTGCGTCGTCGGGTCT-1,◯ -TTGCGTCGTGCCTGCA-1,◯ -TTGCGTCTCACCCGAG-1,◯ -TTGCGTCTCCATGCTC-1,◯ -TTGCGTCTCCGCTGTT-1,◯ -TTGCGTCTCGGCTTGG-1,◯ -TTGCGTCTCGTCCGTT-1,◯ -TTGGAACAGCAAATCA-1,◯ -TTGGAACAGGCAGTCA-1,◯ -TTGGAACCATTGGTAC-1,◯ -TTGGAACGTCCGAGTC-1,◯ -TTGGAACGTGTGGCTC-1,◯ -TTGGAACGTTATCGGT-1,◯ -TTGGAACTCATTTGGG-1,◯ -TTGGAACTCCTATGTT-1,◯ -TTGGAACTCGTTTAGG-1,◯ -TTGGAACTCTTAGCCC-1,◯ -TTGGCAAAGACAAAGG-1,◯ -TTGGCAAAGCAGGCTA-1,◯ -TTGGCAAAGTACCGGA-1,◯ -TTGGCAACACCAGCAC-1,◯ -TTGGCAAGTAAATACG-1,◯ -TTGGCAAGTAAGTGGC-1,◯ -TTGGCAAGTATCACCA-1,◯ -TTGGCAAGTGAGGCTA-1,◯ -TTGGCAAGTGGACGAT-1,◯ -TTGGCAAGTTATCGGT-1,◯ -TTGGCAATCAAACCAC-1,◯ -TTGGCAATCGTGACAT-1,◯ -TTGTAGGAGTCGCCGT-1,◯ -TTGTAGGCACACATGT-1,◯ -TTGTAGGCATCCTAGA-1,◯ -TTGTAGGCATGGTTGT-1,◯ -TTGTAGGGTTCTCATT-1,◯ -TTGTAGGTCGCCATAA-1,◯ -TTTACTGAGAGCCTAG-1,◯ -TTTACTGAGATGCCTT-1,◯ -TTTACTGAGCTAAGAT-1,◯ -TTTACTGAGGGCTTGA-1,◯ -TTTACTGCAAAGGAAG-1,◯ -TTTACTGCAATTCCTT-1,◯ -TTTACTGCAGGCGATA-1,◯ -TTTACTGCAGGTCTCG-1,◯ -TTTACTGTCAGCTCGG-1,◯ -TTTACTGTCCAAGTAC-1,◯ -TTTACTGTCGGCATCG-1,◯ -TTTATGCAGAGAGCTC-1,◯ -TTTATGCAGCCCAACC-1,◯ -TTTATGCCAAAGGTGC-1,◯ -TTTATGCCAAGCGTAG-1,◯ -TTTATGCCACTTGGAT-1,◯ -TTTATGCCAGTTAACC-1,◯ -TTTATGCCATACGCCG-1,◯ -TTTATGCTCCCTCAGT-1,◯ -TTTATGCTCCTGTACC-1,◯ -TTTATGCTCGCAGGCT-1,◯ -TTTCCTCAGAAGCCCA-1,◯ -TTTCCTCAGAGAACAG-1,◯ -TTTCCTCAGTTTCCTT-1,◯ -TTTCCTCCAATGCCAT-1,◯ -TTTCCTCCACTCTGTC-1,◯ -TTTCCTCCAGAGTGTG-1,◯ -TTTCCTCCAGCGATCC-1,◯ -TTTCCTCGTAACGTTC-1,◯ -TTTCCTCGTTCCGTCT-1,◯ -TTTGCGCAGCTCAACT-1,◯ -TTTGCGCAGTGTACGG-1,◯ -TTTGCGCCAAGAAGAG-1,◯ -TTTGCGCCACACAGAG-1,◯ -TTTGCGCCAGGCAGTA-1,◯ -TTTGCGCCATTCTTAC-1,◯ -TTTGCGCGTAAAGTCA-1,◯ -TTTGCGCTCCACGAAT-1,◯ -TTTGCGCTCCCTCAGT-1,◯ -TTTGCGCTCGCGGATC-1,◯ -TTTGGTTAGCTAGTGG-1,◯ -TTTGGTTAGGAGCGTT-1,◯ -TTTGGTTCAATGGTCT-1,◯ -TTTGGTTCAGGATTGG-1,◯ -TTTGGTTCATGCCCGA-1,◯ -TTTGGTTGTAAGTGGC-1,◯ -TTTGGTTGTAGCTTGT-1,◯ -TTTGGTTGTGTGGTTT-1,◯ -TTTGGTTTCATATCGG-1,◯ -TTTGGTTTCTGCTGCT-1,◯ -TTTGTCAAGACTGTAA-1,◯ -TTTGTCACACAGGCCT-1,◯ -TTTGTCACAGCAGTTT-1,◯ -TTTGTCACAGCTATTG-1,◯ -TTTGTCACATACCATG-1,◯ -TTTGTCAGTCCATCCT-1,◯ -TTTGTCAGTCTCACCT-1,◯ -TTTGTCAGTCTCTTTA-1,◯ -TTTGTCATCAAACCGT-1,◯ -TTTGTCATCAGTTTGG-1,◯ diff --git a/enclone_main/testx/inputs/outputs/enclone_ext_test1_output b/enclone_main/testx/inputs/outputs/enclone_ext_test1_output deleted file mode 100644 index c6738dfd4..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_ext_test1_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌───────────┬────────────────────────────────────────┬──────────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 200|IGHV5-10-1 ◆ 58|IGHJ6 │ 362|IGLV3-21 ◆ 319|IGLJ6 │ -│ ├────────────────────────────────────────┼──────────────────────────────────────────────┤ -│ │ 1111111111111111111111111 │ 1111111111111 │ -│ │ 1111112222222222333333333 │ 0000011111111 │ -│ │ 4567890123456789012345678 │ 5678901234567 │ -│ │ ═══════════CDR3══════════ │ ═════CDR3════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦YMDVW │ CQVWDSSS◦CAGL │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦YMDVW │ CQVWDSSS◦CAGL │ -├───────────┼────────────────────────────────────────┼──────────────────────────────────────────────┤ -│# n │ ......................... u const │ ............. u const notes │ -│1 1 │ CARNWRYCTSVSCQHREYFYYMDVW 69 IGHA1 │ CQVLDTTTHQIIF 58 IGLC2 ins = MAWTVL at 14│ -└───────────┴────────────────────────────────────────┴──────────────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_ext_test2_output b/enclone_main/testx/inputs/outputs/enclone_ext_test2_output deleted file mode 100644 index c53aad5b8..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_ext_test2_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 7 CELLS - -[1.1] CLONOTYPE = 7 CELLS -┌──────────────────────────────┬───────────────────────────────┬─────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 67.1.1|IGHV1-2 ◆ 55|IGHJ4 │ 362|IGLV3-21 ◆ 314|IGLJ2 │ -│ ├───────────────────────────────┼─────────────────────────────┤ -│ │ 1111111111111111 │ 111111111111111 │ -│ │ 1111112222222222 │ 000001111111111 │ -│ │ 4567890123456789 │ 567890123456789 │ -│ │ ══════CDR3══════ │ ══════CDR3═════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ CQVWDSS◦◦◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ CQVWDSS◦◦◦◦◦◦◦◦ │ -├──────────────────────────────┼───────────────────────────────┼─────────────────────────────┤ -│# n_47212 n_47200 n_other │ ................ u const │ ............... u const│ -│1 3 2 2 │ CVKGKSGSFWYYFENW 55 IGHA1 │ CQVWDPPTHDHPGIF 270 IGLC2│ -└──────────────────────────────┴───────────────────────────────┴─────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_ext_test3_output b/enclone_main/testx/inputs/outputs/enclone_ext_test3_output deleted file mode 100644 index 96b685666..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_ext_test3_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 59 CELLS - -[1.1] CLONOTYPE = 59 CELLS -┌─────────────────┬─────────────────────────────────────────┬─────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 151.1.1|IGHV3-64D ◆ 55|IGHJ4 │ 352.1.1|IGLV3-1 ◆ 312|IGLJ1│ -│ ├─────────────────────────────────────────┼─────────────────────────────┤ -│ │ 11 1111111111111111 │ 11111111111 │ -│ │ 46778900 1111112222222222 │ 6 00000111111 │ -│ │ 98151856 4567890123456789 │ 9 56789012345 │ -│ │ ══════CDR3══════ │ ════CDR3═══ │ -│reference │ SASSSYRA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ S CQAWD◦◦◦◦◦◦ │ -│donor ref │ SASSSYRA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ T CQAWD◦◦◦◦◦◦ │ -├─────────────────┼─────────────────────────────────────────┼─────────────────────────────┤ -│# n sec mem │ ........ ................ u const │ . ........... u const │ -│1 54 15 0 │ DSTNSSGT CVKDRVTGTITELDYW 321 IGHG1 │ T CQAWDSSAGVF 3925 IGLC1 │ -│2 5 1 0 │ │ T CQAWDSSAGVF 262 IGLC1 │ -└─────────────────┴─────────────────────────────────────────┴─────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_internal_test1_output b/enclone_main/testx/inputs/outputs/enclone_internal_test1_output deleted file mode 100644 index 94e25e003..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_internal_test1_output +++ /dev/null @@ -1,21 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 12 CELLS - -[1.1] CLONOTYPE = 12 CELLS -┌───────────┬───────────────────────────────────────┬─────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 106|IGHV3-11 ◆ 743|IGHJ6 │ 334|IGLV1-51 ◆ 312|IGLJ1 │ -│ ├───────────────────────────────────────┼─────────────────────────────┤ -│ │ 11111111111111111 │ 1111111111111 │ -│ │ 24677 11111122222222223 │ 5 0001111111111 │ -│ │ 26557 45678901234567890 │ 8 7890123456789 │ -│ │ ═══════CDR3══════ │ ═════CDR3════ │ -│reference │ LTWYN ◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ L CGTWD◦◦◦◦◦◦◦◦ │ -│donor ref │ LTWYN ◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ L CGTWD◦◦◦◦◦◦◦◦ │ -├───────────┼───────────────────────────────────────┼─────────────────────────────┤ -│# n │ ..x.. ................. u const │ . ............. u const│ -│1 9 │ LIWYN CARDRIAGRFGYGMDVW 175 IGHG1 │ V CGTWDGSLSAYVL 990 IGLC1│ -│2 1 │ LICYN CARDRIAGRFGYGMDVW 8 IGHG1 │ V CGTWDGSLSAYVL 7 IGLC1│ -│3 2 │ │ V CGTWDGSLSAYVL 16 IGLC1│ -└───────────┴───────────────────────────────────────┴─────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test100_output b/enclone_main/testx/inputs/outputs/enclone_test100_output deleted file mode 100644 index 30921b3f3..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test100_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 2 CELLS - -[1.1] CLONOTYPE = 2 CELLS -┌────────────┬──────────────────────────────┬───────────────────────────────┬─────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ CHAIN 3 │ -│ │ 561|TRBV10-2 ◆ 547|TRBJ2-1 │ 615|TRBV25-1 ◆ 541|TRBJ1-2 │ 485.1.1|TRAV21 ◆ 410|TRAJ21│ -│ ├──────────────────────────────┼───────────────────────────────┼─────────────────────────────┤ -│ │ 1111111111111 │ 11111111111111111 │ 11111111111111 │ -│ │ 0111111111122 │ 01111111111222222 │ 2 00111111111122 │ -│ │ 9012345678901 │ 90123456789012345 │ 3 89012345678901 │ -│ │ ═════CDR3════ │ ═══════CDR3══════ │ ═════CDR3═════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦FF │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦F │ T ◦◦◦◦◦◦◦◦◦◦◦FYF │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦FF │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦F │ T ◦◦◦◦◦◦◦◦◦◦◦FYF │ -├────────────┼──────────────────────────────┼───────────────────────────────┼─────────────────────────────┤ -│# n inkt │ ............. u const │ ................. u const │ . .............. u const│ -│1 1 𝝱g │ CASGDRGYNEQFF 3 TRBC2 │ CASSGRDRVWNNYGYTF 6 TRBC1 │ T CAVRPGYNFNKFYF 4 TRAC │ -│2 1 │ CASGDRGYNEQFF 14 TRBC2 │ │ T CAVRPGYNFNKFYF 13 TRAC │ -└────────────┴──────────────────────────────┴───────────────────────────────┴─────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test101_output b/enclone_main/testx/inputs/outputs/enclone_test101_output deleted file mode 100644 index b77d89211..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test101_output +++ /dev/null @@ -1,21 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 70 CELLS - -[1.1] CLONOTYPE = 70 CELLS -┌─────────────┬──────────────────────────────┬──────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 600|TRBV20-1 ◆ 554|TRBJ2-7 │ 525|TRAV8-2 ◆ 411|TRAJ22│ -│ ├──────────────────────────────┼──────────────────────────┤ -│ │ 111111111111111 │ 1111111111111 │ -│ │ 000111111111122 │ 0111111111122 │ -│ │ 789012345678901 │ 9012345678901 │ -│ │ ══════CDR3═════ │ ═════CDR3════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦F │ ◦◦◦◦◦◦◦◦RQLTF │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦F │ ◦◦◦◦◦◦◦◦RQLTF │ -├─────────────┼──────────────────────────────┼──────────────────────────┤ -│# n mait │ ............... u const │ ............. u const │ -│1 64 𝝱g │ CSARDLEVLSYEQYF 7 TRBC2 │ CVGRKGSARQLTF 4 TRAC │ -│2 5 𝝱g │ CSARDLEVLSYEQYF 3 TRBC2 │ │ -│3 1 │ │ CVGRKGSARQLTF 3 TRAC │ -└─────────────┴──────────────────────────────┴──────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test103_output b/enclone_main/testx/inputs/outputs/enclone_test103_output deleted file mode 100644 index 9336895c2..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test103_output +++ /dev/null @@ -1,21 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌───────────┬──────────────────────────┐ -│ │ CHAIN 1 │ -│ │ 366|IGLV3-25 ◆ 316|IGLJ3│ -│ ├──────────────────────────┤ -│ │ 1111111111111 │ -│ │ 0000011111111 │ -│ │ 5678901234567 │ -│ │ ═════CDR3════ │ -│reference │ CQ◦◦◦◦◦◦◦◦◦◦◦ │ -│donor ref │ CQ◦◦◦◦◦◦◦◦◦◦◦ │ -├───────────┼──────────────────────────┤ -│# n │ ............. u const│ -│1 1 │ CQSADSSGTYKVF 21 IGLC2│ -└───────────┴──────────────────────────┘ -n,datasets,origins,donors,entropy_cell,near,far,dref,ext,mark,inkt,mait,var1,u1,u_min1,u_max1,u_Σ1,u_μ1,comp1,edit1,r1,r_min1,r_max1,r_Σ1,r_μ1,const1,white1,cdr3_dna1,cdr3_len1,ulen1,vjlen1,clen1,cdiff1,udiff1,notes1,d_univ1,d_donor1,v_name1,d_name1,j_name1,v_id1,d_id1,j_id1,var_indices_dna1,var_indices_aa1,share_indices_dna1,share_indices_aa1,v_start1,const_id1,utr_id1,utr_name1,cdr3_start1,cdr3_aa1,seq1,vj_seq1,var_aa1,var2,u2,u_min2,u_max2,u_Σ2,u_μ2,comp2,edit2,r2,r_min2,r_max2,r_Σ2,r_μ2,const2,white2,cdr3_dna2,cdr3_len2,ulen2,vjlen2,clen2,cdiff2,udiff2,notes2,d_univ2,d_donor2,v_name2,d_name2,j_name2,v_id2,d_id2,j_id2,var_indices_dna2,var_indices_aa2,share_indices_dna2,share_indices_aa2,v_start2,const_id2,utr_id2,utr_name2,cdr3_start2,cdr3_aa2,seq2,vj_seq2,var_aa2,var3,u3,u_min3,u_max3,u_Σ3,u_μ3,comp3,edit3,r3,r_min3,r_max3,r_Σ3,r_μ3,const3,white3,cdr3_dna3,cdr3_len3,ulen3,vjlen3,clen3,cdiff3,udiff3,notes3,d_univ3,d_donor3,v_name3,d_name3,j_name3,v_id3,d_id3,j_id3,var_indices_dna3,var_indices_aa3,share_indices_dna3,share_indices_aa3,v_start3,const_id3,utr_id3,utr_name3,cdr3_start3,cdr3_aa3,seq3,vj_seq3,var_aa3,var4,u4,u_min4,u_max4,u_Σ4,u_μ4,comp4,edit4,r4,r_min4,r_max4,r_Σ4,r_μ4,const4,white4,cdr3_dna4,cdr3_len4,ulen4,vjlen4,clen4,cdiff4,udiff4,notes4,d_univ4,d_donor4,v_name4,d_name4,j_name4,v_id4,d_id4,j_id4,var_indices_dna4,var_indices_aa4,share_indices_dna4,share_indices_aa4,v_start4,const_id4,utr_id4,utr_name4,cdr3_start4,cdr3_aa4,seq4,vj_seq4,var_aa4,group_id,group_ncells,clonotype_id,clonotype_ncells,nchains,exact_subclonotype_id,barcodes,85333_barcodes -1,85333,s1,d1,0.00,,,0,,0,,,,21,21,21,,,3,S0•I2:7•S9,2230,2230,2230,,,IGLC2,,TGTCAATCAGCAGACAGCAGTGGTACTTACAAGGTGTTC,13,43,382,211,,,,0,0,IGLV3-25,,IGLJ3,366,,316,,,,,43,309,365,IGLV3-25,315,CQSADSSGTYKVF,GCTGTGCTGTGGGTCCAGGAGGCAGAACTCTGGGTGTCTCACCATGGCCTGGATCCCTCTACTTCTCCCCCTCCTCACTCTCTGCACAGGCTCTGAGGCCTCCTATGAGCTGACACAGCCACCCTCGGTGTCAGTGTCCCCAGGACAGACGGCCAGGATCACCTGCTCTGGAGATGCATTGCCAAAGCAATATGCTTATTGGTACCAGCAGAAGCCAGGCCAGGCCCCTGTGCTGGTGATATATAAAGACAGTGAGAGGCCCTCAGGGATCCCTGAGCGATTCTCTGGCTCCAGCTCAGGGACAACAGTCACGTTGACCATCAGTGGAGTCCAGGCAGAAGACGAGGCTGACTATTACTGTCAATCAGCAGACAGCAGTGGTACTTACAAGGTGTTCGGCGGAGGGACCAAGCTGACCGTCCTAGGTCAGCCCAAGGCTGCCCCCTCGGTCACTCTGTTCCCGCCCTCCTCTGAGGAGCTTCAAGCCAACAAGGCCACACTGGTGTGTCTCATAAGTGACTTCTACCCGGGAGCCGTGACAGTGGCCTGGAAGGCAGATAGCAGCCCCGTCAAGGCGGGAGTGGAGACCACCACACCCTCCAAACAAAGCAACAACAAGTACGCGGCCAGCAGCTA,ATGGCCTGGATCCCTCTACTTCTCCCCCTCCTCACTCTCTGCACAGGCTCTGAGGCCTCCTATGAGCTGACACAGCCACCCTCGGTGTCAGTGTCCCCAGGACAGACGGCCAGGATCACCTGCTCTGGAGATGCATTGCCAAAGCAATATGCTTATTGGTACCAGCAGAAGCCAGGCCAGGCCCCTGTGCTGGTGATATATAAAGACAGTGAGAGGCCCTCAGGGATCCCTGAGCGATTCTCTGGCTCCAGCTCAGGGACAACAGTCACGTTGACCATCAGTGGAGTCCAGGCAGAAGACGAGGCTGACTATTACTGTCAATCAGCAGACAGCAGTGGTACTTACAAGGTGTTCGGCGGAGGGACCAAGCTGACCGTCCTAG,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,1,1,1,1,1,GGGATGACATCGATGT-1,GGGATGACATCGATGT-1 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test104_output b/enclone_main/testx/inputs/outputs/enclone_test104_output deleted file mode 100644 index c43ec0760..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test104_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 2 CELLS - -[1.1] CLONOTYPE = 2 CELLS -┌───────────┬───────────────────────────────────────────────────┬────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 120|IGHV3-21 ◆ 55|IGHJ4 │ 352.1.1|IGLV3-1 ◆ 312|IGLJ1 │ -│ ├───────────────────────────────────────────────────┼────────────────────────────────────────┤ -│ │ 11 1111111111111111111111111 │ 1111111111111 1 │ -│ │ 47789900 1111112222222222333333333 │ 24444668 0000011111111 2 │ -│ │ 82487837 4567890123456789012345678 │ 33678396 5678901234567 6 │ -│ │ ═══════════CDR3══════════ │ ═════CDR3════ │ -│reference │ SSSILYSE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ TDGDKLSN CQAWD◦◦◦◦◦◦◦◦ L │ -│donor ref │ SSSILYSE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ TDGDKLTN CQAWD◦◦◦◦◦◦◦◦ L │ -├───────────┼───────────────────────────────────────────────────┼────────────────────────────────────────┤ -│# n │ xxxxxxxx ........x.x...........xx. u const │ xxxxxxxx ......x...... x u const│ -│1 1 │ SSSFLYSD CARVIVGPKKLEGRLYSSSLHFDCW 2151 IGHG1 │ TDGDKVTN CQAWDSSTASYVF L 16974 IGLC1│ -│2 1 │ SHNILSNE CARVIVGPEKQEGRLYSSSLHFDYW 364 IGHG2 │ INGNRLND CQAWDSRTASYVF L 2673 IGLC1│ -└───────────┴───────────────────────────────────────────────────┴────────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test105_output b/enclone_main/testx/inputs/outputs/enclone_test105_output deleted file mode 100644 index 11a374242..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test105_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 3 CELLS -┌────────────────┬────────────────────────────────────────────────┬──────────────────────────────────────────────────┬──────────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ CHAIN 3 │ -│ │ 96|IGHV2-26 ◆ 743|IGHJ6 │ 96|IGHV2-26 ◆ 743|IGHJ6 │ 234|IGKV1-5 ◆ 217|IGKJ4 │ -│ ├────────────────────────────────────────────────┼──────────────────────────────────────────────────┼──────────────────────────────────────────────┤ -│ │ 1111111111111111111111 1 │ 11 1111111111111111111111 1 │ 11111111111 │ -│ │ 23445558 1111122222222223333333 3 │ 44578800 1111122222222223333333 4 │ 245556667788888999 01111111111 │ -│ │ 95580150 5678901234567890123456 8 │ 59080401 5678901234567890123456 6 │ 010280161401469138 90123456789 │ -│ │ ═════════CDR3═════════ │ ═════════CDR3═════════ │ ════CDR3═══ │ -│reference │ LTFSARSS ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW Q │ FNASSKLT ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW S │ KTISQKPKKSPSSSGETS CQQ◦◦◦◦◦◦◦◦ │ -│donor ref │ LTFSARSS ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW Q │ FNASSKLT ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW S │ KTISQKPKKSPSSSGETS CQQ◦◦◦◦◦◦◦◦ │ -├────────────────┼────────────────────────────────────────────────┼──────────────────────────────────────────────────┼──────────────────────────────────────────────┤ -│# datasets n │ ........ ...................... . u const │ ........ ...................... . u const │ xxxxxxxxxxxxxxxxxx ..x.x.x.... u const│ -│1 123085 1 │ VPITGTSS CARILGRAGTVIVYYFYGMDVW Q 25 IGHG1 │ │ NSVSQKPNQRPSGSGETS CQQYKSSPLTF 157 IGKC │ -│2 123089 2 │ │ VDPFNRLS CVRILGRALTVRVYFYYGIDVW S 7080 IGHG1 │ KTITQRAKKTPSSSGESD CQQYNSLPLTF 13915 IGKC │ -└────────────────┴────────────────────────────────────────────────┴──────────────────────────────────────────────────┴──────────────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test10_output b/enclone_main/testx/inputs/outputs/enclone_test10_output deleted file mode 100644 index 4994002d9..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test10_output +++ /dev/null @@ -1,25 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 132 CELLS - -[1.1] CLONOTYPE = 132 CELLS -██ WARNING: This clonotype contains cells from multiple donors. -donors = d1,d2 -datasets in which these donors appear: -donor 1: 123085 -donor 2: 123089 -┌────────────────────────────────────────┬────────────────────────────────┬─────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 151.2.1|IGHV3-64D ◆ 55|IGHJ4 │ 352.2.1|IGLV3-1 ◆ 312|IGLJ1│ -│ ├────────────────────────────────┼─────────────────────────────┤ -│ │ 11 │ │ -│ │ 4677900 │ │ -│ │ 9815856 │ │ -│ │ │ │ -│reference │ SASSYRA │ │ -│donor ref │ SASSYRA │ │ -├────────────────────────────────────────┼────────────────────────────────┼─────────────────────────────┤ -│# n origins donors datasets │ ....... u const │ u const │ -│1 122 s1 d1,d2 123085,123089 │ DSTNSGT 992 IGHG1 │ 5329 IGLC1 │ -│2 10 s1 d1,d2 123085,123089 │ │ 572 IGLC1 │ -└────────────────────────────────────────┴────────────────────────────────┴─────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test11_output b/enclone_main/testx/inputs/outputs/enclone_test11_output deleted file mode 100644 index 003e04965..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test11_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 3 CELLS -┌──────────────────────────────┬──────────────────────────────────┬─────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 195|IGHV4-59 ◆ 57|IGHJ5 │ 352|IGLV3-1 ◆ 314|IGLJ2│ -│ ├──────────────────────────────────┼─────────────────────────┤ -│ │ 11111111111111111111 │ 11111111111 │ -│ │ 11111112222222222333 │ 00000111111 │ -│ │ 34567890123456789012 │ 56789012345 │ -│ │ ════════CDR3════════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -├──────────────────────────────┼──────────────────────────────────┼─────────────────────────┤ -│# datasets n donors gex │ .................... u const │ ........... u const │ -│1 toast 1 d 2743 │ CARSFFGDTAMVMFQAFDPW 2 IGHD │ CQAWDSSTVVF 8 IGLC2 │ -│2 toast 2 d 2769 │ │ CQAWDSSTVVF 6 IGLC2 │ -└──────────────────────────────┴──────────────────────────────────┴─────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test12_output b/enclone_main/testx/inputs/outputs/enclone_test12_output deleted file mode 100644 index aa3df4a4b..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test12_output +++ /dev/null @@ -1,24 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 606 CELLS - -[1.1] CLONOTYPE = 606 CELLS -┌───────────┬──────────────────────────────────────────────────────────────────────┬─────────────────────────────────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 645|TRBV5-5 ◆ 550|TRBJ2-3 │ 458.1.1|TRAV1-2 ◆ 400|TRAJ11 │ -│ ├──────────────────────────────────────────────────────────────────────┼─────────────────────────────────────────────────────────────────────┤ -│ │ 111111111111111 │ 11111111111111 │ -│ │ 011111111112222 │ 5 00000011111111 │ -│ │ 901234567890123 │ 9 45678901234567 │ -│ │ ══════CDR3═════ │ ═════CDR3═════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦YF │ P ◦◦◦◦◦◦◦◦◦◦TLTF │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦YF │ P ◦◦◦◦◦◦◦◦◦◦TLTF │ -├───────────┼──────────────────────────────────────────────────────────────────────┼─────────────────────────────────────────────────────────────────────┤ -│# n │ ............... u const notes u_max │ . .............. u const notes u_max│ -│1 243 │ CASSLVQPSTDTQYF 9 TRBC2 63 │ P CALWGDSGYSTLTF 3 TRAC 18│ -│2 1 │ CASSLVQPSTDTQYF 7 TRBC2 7 │ P CALWGDSGYSTLTF 2 TRAC gap from J stop to C start = 186 2│ -│3 1 │ CASSLVQPSTDTQYF 3 TRBC2 3 │ P CALWGDSGYSTLTF 2 TRAC gap from J stop to C start = 185 2│ -│4 1 │ CASSLVQPSTDTQYF 3 TRBC2 gap from J stop to C start = 152 3 │ P CALWGDSGYSTLTF 3 TRAC 3│ -│5 357 │ CASSLVQPSTDTQYF 7 TRBC2 31 │ │ -│6 3 │ │ P CALWGDSGYSTLTF 4 TRAC 4│ -└───────────┴──────────────────────────────────────────────────────────────────────┴─────────────────────────────────────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test13_output b/enclone_main/testx/inputs/outputs/enclone_test13_output deleted file mode 100644 index 99fc5fd56..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test13_output +++ /dev/null @@ -1,23 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 42 CELLS - -[1.1] CLONOTYPE = 42 CELLS -┌───────────┬─────────────────────────────┬────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 625|TRBV30 ◆ 554|TRBJ2-7 │ 490|TRAV24 ◆ 450|TRAJ6│ -│ ├─────────────────────────────┼────────────────────────┤ -│ │ 11111111111111 │ 111111111111 │ -│ │ 00011111111112 │ 111111111222 │ -│ │ 78901234567890 │ 123456789012 │ -│ │ ═════CDR3═════ │ ════CDR3════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦F │ ◦◦◦◦◦◦◦YIPTF │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦F │ ◦◦◦◦◦◦◦YIPTF │ -├───────────┼─────────────────────────────┼────────────────────────┤ -│# n │ .............. u const │ ............ u const│ -│1 42 │ CAWTPGQEPYEQYF 11 TRBC2 │ CAFRGGSYIPTF 3 TRAC │ -└───────────┴─────────────────────────────┴────────────────────────┘ ->group1.clonotype1.exact1.chain1 -ATGCTCTGCTCTCTCCTTGCCCTTCTCCTGGGCACTTTCTTTGGGGTCAGATCTCAGACTATTCATCAATGGCCAGCGACCCTGGTGCAGCCTGTGGGCAGCCCGCTCTCTCTGGAGTGCACTGTGGAGGGAACATCAAACCCCAACCTATACTGGTACCGACAGGCTGCAGGCAGGGGCCTCCAGCTGCTCTTCTACTCCGTTGGTATTGGCCAGATCAGCTCTGAGGTGCCCCAGAATCTCTCAGCCTCCAGACCCCAGGACCGGCAGTTCATCCTGAGTTCTAAGAAGCTCCTTCTCAGTGACTCTGGCTTCTATCTCTGTGCCTGGACCCCTGGACAGGAGCCCTACGAGCAGTACTTCGGGCCGGGCACCAGGCTCACGGTCACAGAGGACCTGAAAAACGTGTTCCCACCCGAGGTCGCTGTGTTTGAGCCATCAGAAGCAGAGATCTCCCACACCCAAAAGGCCACACTGGTGTGCCTGGCCACAGGCTTCTACCCCGACCACGTGGAGCTGAGCTGGTGGGTGAATGGGAAGGAGGTGCACAGTGGGGTCAGCACAGACCCGCAGCCCCTCAAGGAGCAGCCCGCCCTCAATGACTCCAGATACTGCCTGAGCAGCCGCCTGAGGGTCTCGGCCACCTTCTGGCAGAACCCCCGCAACCACTTCCGCTGTCAAGTCCAGTTCTACGGGCTCTCGGAGAATGACGAGTGGACCCAGGATAGGGCCAAACCTGTCACCCAGATCGTCAGCGCCGAGGCCTGGGGTAGAGCAGACTGTGGCTTCACCTCCGAGTCTTACCAGCAAGGGGTCCTGTCTGCCACCATCCTCTATGAGATCTTGCTAGGGAAGGCCACCTTGTATGCCGTGCTGGTCAGTGCCCTCGTGCTGATGGCCATGGTCAAGAGAAAGGATTCCAGAGGC ->group1.clonotype1.exact1.chain2 -ATGGAGAAGAATCCTTTGGCAGCCCCATTACTAATCCTCTGGTTTCATCTTGACTGCGTGAGCAGCATACTGAACGTGGAACAAAGTCCTCAGTCACTGCATGTTCAGGAGGGAGACAGCACCAATTTCACCTGCAGCTTCCCTTCCAGCAATTTTTATGCCTTACACTGGTACAGATGGGAAACTGCAAAAAGCCCCGAGGCCTTGTTTGTAATGACTTTAAATGGGGATGAAAAGAAGAAAGGACGAATAAGTGCCACTCTTAATACCAAGGAGGGTTACAGCTATTTGTACATCAAAGGATCCCAGCCTGAAGACTCAGCCACATACCTCTGTGCCTTTAGGGGAGGAAGCTACATACCTACATTTGGAAGAGGAACCAGCCTTATTGTTCATCCGTATATCCAGAACCCTGACCCTGCCGTGTACCAGCTGAGAGACTCTAAATCCAGTGACAAGTCTGTCTGCCTATTCACCGATTTTGATTCTCAAACAAATGTGTCACAAAGTAAGGATTCTGATGTGTATATCACAGACAAAACTGTGCTAGACATGAGGTCTATGGACTTCAAGAGCAACAGTGCTGTGGCCTGGAGCAACAAATCTGACTTTGCATGTGCAAACGCCTTCAACAACAGCATTATTCCAGAAGACACCTTCTTCCCCAGCCCAGAAAGTTCCTGTGATGTCAAGCTGGTCGAGAAAAGCTTTGAAACAGATACGAACCTAAACTTTCAAAACCTGTCAGTGATTGGGTTCCGAATCCTCCTCCTGAAAGTGGCCGGGTTTAATCTGCTCATGACGCTGCGGCTGTGGTCCAGC - diff --git a/enclone_main/testx/inputs/outputs/enclone_test14_output b/enclone_main/testx/inputs/outputs/enclone_test14_output deleted file mode 100644 index 89be973cf..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test14_output +++ /dev/null @@ -1,22 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 102 CELLS - -[1.1] CLONOTYPE = 102 CELLS -┌───────────┬────────────────────────────┬──────────────────────────┬────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ CHAIN 3 │ -│ │ 617|TRBV27 ◆ 552|TRBJ2-5 │ 518|TRAV5 ◆ 434|TRAJ43 │ 481|TRAV2 ◆ 432|TRAJ41│ -│ ├────────────────────────────┼──────────────────────────┼────────────────────────┤ -│ │ 1111111111111 │ 111111111111 │ 111111111111 │ -│ │ 0111111111122 │ 011111111112 │ 001111111111 │ -│ │ 9012345678901 │ 901234567890 │ 890123456789 │ -│ │ ═════CDR3════ │ ════CDR3════ │ ════CDR3════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦F │ ◦◦◦◦◦◦◦◦◦◦RF │ ◦◦◦◦◦◦◦YALNF │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦F │ ◦◦◦◦◦◦◦◦◦◦RF │ ◦◦◦◦◦◦◦YALNF │ -├───────────┼────────────────────────────┼──────────────────────────┼────────────────────────┤ -│# n │ ............. u const │ ............ u const │ ............ u const│ -│1 23 │ CASRLGGEETQYF 8 TRBC2 │ CAESIPLGGMRF 3 TRAC │ CAGVWSGYALNF 2 TRAC │ -│2 38 │ CASRLGGEETQYF 8 TRBC2 │ CAESIPLGGMRF 4 TRAC │ │ -│3 15 │ CASRLGGEETQYF 6 TRBC2 │ │ CAGVWSGYALNF 3 TRAC │ -│4 26 │ CASRLGGEETQYF 5 TRBC2 │ │ │ -└───────────┴────────────────────────────┴──────────────────────────┴────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test15_output b/enclone_main/testx/inputs/outputs/enclone_test15_output deleted file mode 100644 index 37deaa032..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test15_output +++ /dev/null @@ -1,24 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 10 CELLS - -[1.1] CLONOTYPE = 10 CELLS -┌───────────┬───────────────────────────────────────────────┬──────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 187|IGHV4-34 ◆ 55|IGHJ4 │ 266|IGKV2-28 ◆ 215|IGKJ2│ -│ ├───────────────────────────────────────────────┼──────────────────────────┤ -│ │ 111111111 111111111111111 │ 1111111 11111111111 │ -│ │ 000001111 222222222233333 │ 0000011 11111111222 │ -│ │ 567890123 012345678901234 │ 5678901 23456789012 │ -│ │ ══════CDR3═════ │ ════CDR3═══ │ -│reference │ LKLSSVTAA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ EDVGVYY CMQ◦◦◦◦◦◦◦◦ │ -│donor ref │ LKLSSVTAA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ EDVGVYY CMQ◦◦◦◦◦◦◦◦ │ -├───────────┼───────────────────────────────────────────────┼──────────────────────────┤ -│# n │ ......... .....x......x.. notes │ ....... ........... │ -│1 4 │ LKLSSVTAA CARGLVVVYAIFDYW │ EDVGVYY CMQALQTPRTF │ -│2 2 │ LKLSSVTAA CARGLVVVYAIFDYW │ EDVGVYY CMQALQTPRTF │ -│3 1 │ LKLSSVTAA CARGLLVVYAIFDYW ins = MDLL at 73 │ EDVGVYY CMQALQTPRTF │ -│4 1 │ LKLSSVTAA CARGLLVVYAIFDYW │ EDVGVYY CMQALQTPRTF │ -│5 1 │ LKLSSVTAA CARGLLVVYAIFDYW │ EDVGVYY CMQALQTPRTF │ -│6 1 │ LKLSSVTAA CARGLLVVYAIFDYW │ EDVGVYY CMQALQTPRTF │ -└───────────┴───────────────────────────────────────────────┴──────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test16_output b/enclone_main/testx/inputs/outputs/enclone_test16_output deleted file mode 100644 index 5bbf0bc2f..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test16_output +++ /dev/null @@ -1,24 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 114 CELLS - -[1.1] CLONOTYPE = 114 CELLS -┌────────────────────────────┬───────────────────────────────────────┬───────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 144.1.2|IGHV3-49 ◆ 53|IGHJ3 │ 282|IGKV3-11 ◆ 218|IGKJ5 │ -│ ├───────────────────────────────────────┼───────────────────────────────┤ -│ │ 1 11111111111111111 1 │ 1111111111111 │ -│ │ 51 11112222222222333 4 │ 6 0001111111111 │ -│ │ 53 67890123456789012 1 │ 4 7890123456789 │ -│ │ ═══════CDR3══════ │ ═════CDR3════ │ -│reference │ VV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W S │ R CQQ◦◦◦◦◦◦◦◦◦◦ │ -│donor ref │ FV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W S │ R CQQ◦◦◦◦◦◦◦◦◦◦ │ -├────────────────────────────┼───────────────────────────────────────┼───────────────────────────────┤ -│# n n_123085 n_123089 │ .x ................. x u const │ x .x........... u const│ -│1 104 48 56 │ FV CTRDRDLRGATDAFDIW S 424 IGHG1 │ R CQQRSNWPPSITF 10197 IGKC │ -│2 5 3 2 │ FM CTRDRDLRGATDAFDIW S 145 IGHG1 │ R CHQRSNWPPSITF 7690 IGKC │ -│3 2 1 1 │ FV CTRDRDLRGATDAFDIW S 2195 IGHG1 │ S CQQRSNWPPSITF 19219 IGKC │ -│4 1 0 1 │ FM CTRDRDLRGATDAFDIW S 3033 IGHG1 │ R CQQRSNWPPSITF 15557 IGKC │ -│5 1 0 1 │ FV CTRDRDLRGATDAFDIW S 1888 ? │ R CQQRSNWPPSITF 16958 IGKC │ -│6 1 1 0 │ FV CTRDRDLRGATDAFDIW S 33 IGHG1 │ R CQQRSNWPPSITF 116 IGKC │ -└────────────────────────────┴───────────────────────────────────────┴───────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test17_output b/enclone_main/testx/inputs/outputs/enclone_test17_output deleted file mode 100644 index 2f56c1851..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test17_output +++ /dev/null @@ -1,23 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 3 CELLS -┌───────────────────────────────────────────────────────────┬──────────────────────────────────┬─────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 195|IGHV4-59 ◆ 57|IGHJ5 │ 352|IGLV3-1 ◆ 314|IGLJ2│ -│ ├──────────────────────────────────┼─────────────────────────┤ -│ │ 11111111111111111111 │ 11111111111 │ -│ │ 11111112222222222333 │ 00000111111 │ -│ │ 34567890123456789012 │ 56789012345 │ -│ │ ════════CDR3════════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -├───────────────────────────────────────────────────────────┼──────────────────────────────────┼─────────────────────────┤ -│# barcode n gex_max gex n_gex CD19_ab_μ │ .................... u const │ ........... u const │ -│1 1 2743 2743 1 1210 │ CARSFFGDTAMVMFQAFDPW 2 IGHD │ CQAWDSSTVVF 8 IGLC2 │ -│ GCTGGGTTCAACCAAC-1 2743 1 │ 2 │ 8 │ -│2 2 2769 2769 2 952 │ │ CQAWDSSTVVF 6 IGLC2 │ -│ ACGTCAAAGTGGTAGC-1 2484 1 │ │ 6 │ -│ ATGTGTGAGAGTACCG-1 2769 1 │ │ 2 │ -└───────────────────────────────────────────────────────────┴──────────────────────────────────┴─────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test18_output b/enclone_main/testx/inputs/outputs/enclone_test18_output deleted file mode 100644 index aa2045c94..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test18_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 8 CELLS - -[1.1] CLONOTYPE = 8 CELLS -┌────────────────┬─────────────────────────────────────────────┬─────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 120|IGHV3-21 ◆ 53|IGHJ3 │ 226|IGKV1-27 ◆ 217|IGKJ4 │ -│ ├─────────────────────────────────────────────┼─────────────────────────────────┤ -│ │ 1 1111111111111111111111 │ 11111111111 │ -│ │ 2477790 1111112222222222333333 │ 23334679 01111111111 │ -│ │ 0901367 4567890123456789012345 │ 00160628 90123456789 │ -│ │ ═════════CDR3═════════ │ ════CDR3═══ │ -│reference │ VSSSSSE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ RSSVVKAS CQK◦◦◦◦◦◦◦◦ │ -│donor ref │ VSSSSSE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ RSSVVKAS CQK◦◦◦◦◦◦◦◦ │ -├────────────────┼─────────────────────────────────────────────┼─────────────────────────────────┤ -│# datasets n │ ....... ...................... u const │ ........ ........... u const│ -│1 123085 8 │ VISGGAG CVRDEGGARPNKWNYEGAFDIW 45 IGHG1 │ SFFIIKAG CQKYDSAPLTF 60 IGKC │ -└────────────────┴─────────────────────────────────────────────┴─────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test19_output b/enclone_main/testx/inputs/outputs/enclone_test19_output deleted file mode 100644 index 48c1d7164..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test19_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 34 CELLS - -[1.1] CLONOTYPE = 34 CELLS -┌───────────┬────────────────────────────────────────────┬───────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 181.1.1|IGHV4-30-2 ◆ 53|IGHJ3 │ 254|IGKV1D-39 ◆ 218|IGKJ5│ -│ ├────────────────────────────────────────────┼───────────────────────────┤ -│ │ 1 1111111111111111 │ 111111111111 │ -│ │ 222455678990 1111122222222223 │ 011111111112 │ -│ │ 057138037346 5678901234567890 │ 901234567890 │ -│ │ ══════CDR3══════ │ ════CDR3════ │ -│reference │ LSSASRPHVRST ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQQ◦◦◦◦◦◦◦◦◦ │ -│donor ref │ VSPTYRHYVTST ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQQ◦◦◦◦◦◦◦◦◦ │ -├───────────┼────────────────────────────────────────────┼───────────────────────────┤ -│# n │ ............ ................ u const │ ............ u const │ -│1 34 │ VSPTYRHYVTST CARRYFGVVADAFDIW 58 IGHM │ CQQSYSTPPITF 207 IGKC │ -└───────────┴────────────────────────────────────────────┴───────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test1_output b/enclone_main/testx/inputs/outputs/enclone_test1_output deleted file mode 100644 index 725b91d5d..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test1_output +++ /dev/null @@ -1,29 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 5 CELLS - -[1.1] CLONOTYPE = 5 CELLS -┌───────────┬───────────────────────────────────────────────┬───────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 88|IGHV1-69D ◆ 55|IGHJ4 │ 286|IGKV3-20 ◆ 216|IGKJ3 │ -│ ├───────────────────────────────────────────────┼───────────────────────────────┤ -│ │ 11 111111111111 1 │ 111111111111 │ -│ │ 144457777899901 111111222222 2 │ 38 001111111111 │ -│ │ 836822467734503 456789012345 8 │ 66 890123456789 │ -│ │ ════CDR3════ │ ════CDR3════ │ -│reference │ SSTSIIGANTSTSE◦ ◦◦◦◦◦◦◦◦◦◦◦◦ G │ EG CQQ◦◦◦◦◦◦◦◦◦ │ -│donor ref │ SSTSIIGANTSTSE◦ ◦◦◦◦◦◦◦◦◦◦◦◦ G │ EG CQQ◦◦◦◦◦◦◦◦◦ │ -├───────────┼───────────────────────────────────────────────┼───────────────────────────────┤ -│# n │ xxxxxxxxxxxxxxx .....x...... x u const │ xx .....x...... u const│ -│1 1 │ ASTTVMGTSTSMSDY CVRDREYYFDYW G 5719 IGHG1 │ EG CQQYGRSPLFTF 17319 IGKC │ -│2 1 │ SSTSIIGANTSTSDY CVRDREYYFDYW G 4574 IGHG1 │ EG CQQYGSSPLFTF 11308 IGKC │ -│3 1 │ SSTTIIGANTSTSDY CVRDREYYFDYW G 3007 IGHG1 │ EG CQQYGSSPLFTF 2937 IGKC │ -│4 1 │ SSPNIIGANTSTSEY CVRDRQYYFDYW G 196 IGHG1 │ EG CQQYGSSPLFTF 329 IGKC │ -│5 1 │ │ EG CQQYGSSPLFTF 363 IGKC │ -└───────────┴───────────────────────────────────────────────┴───────────────────────────────┘ -exact_subclonotype_id,n,v_name1,v_name2,nchains,var_indices_aa1,barcodes -1,1,IGHV1-69D,IGKV3-20,2,"18,43,46,48,52,72,74,76,77,87,93,94,95,100,113,119,128",AAGGAGCTCGTAGATC-1 -2,1,IGHV1-69D,IGKV3-20,2,"18,43,46,48,52,72,74,76,77,87,93,94,95,100,113,119,128",CCCAGTTAGCCCAACC-1 -3,1,IGHV1-69D,IGKV3-20,2,"18,43,46,48,52,72,74,76,77,87,93,94,95,100,113,119,128",CACAAACTCGAGAGCA-1 -4,1,IGHV1-69D,IGKV3-20,2,"18,43,46,48,52,72,74,76,77,87,93,94,95,100,113,119,128",GTGCATACAGATCCAT-1 -5,1,IGHV1-69D,IGKV3-20,2,"18,43,46,48,52,72,74,76,77,87,93,94,95,100,113,119,128",TGAAAGATCCACTGGG-1 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test20_output b/enclone_main/testx/inputs/outputs/enclone_test20_output deleted file mode 100644 index d08409863..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test20_output +++ /dev/null @@ -1,22 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 2 CELLS - -[1.1] CLONOTYPE = 2 CELLS -┌──────────────────────────┬────────────────────────────────────────────┬──────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 142|IGHV3-48 ◆ 59|IGHJ6 │ 330.1.1|IGLV1-47 ◆ 316|IGLJ3 │ -│ ├────────────────────────────────────────────┼──────────────────────────────────────┤ -│ │ 111111111111111111111 │ 111111111111111 │ -│ │ 111111222222222233333 │ 000111111111122 │ -│ │ 456789012345678901234 │ 789012345678901 │ -│ │ ═════════CDR3════════ │ ══════CDR3═════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦YMDVW │ CAAWD◦◦◦◦◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦YMDVW │ CAAWD◦◦◦◦◦◦◦◦◦◦ │ -├──────────────────────────┼────────────────────────────────────────────┼──────────────────────────────────────┤ -│# barcode n │ ..........xx..xx..... u const u_Σ │ ..x............ u const u_Σ│ -│1 1 │ CARAGSGWPSNFYRYYYMDVW 9771 IGHG1 9771 │ CATWDDSLSGPNWVF 15916 IGLC3 15916│ -│ AGCTCCTAGGGATGGG-1 │ 9771 │ 15916 │ -│2 1 │ CARAGSGWPSNLYRYSYMDVW 6177 IGHG2 6177 │ CATWDDSLSGPNWVF 11447 IGLC3 11447│ -│ CAGATCATCACGACTA-1 │ 6177 │ 11447 │ -└──────────────────────────┴────────────────────────────────────────────┴──────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test21_output b/enclone_main/testx/inputs/outputs/enclone_test21_output deleted file mode 100644 index c06510a3f..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test21_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 5 CELLS - -[1.1] CLONOTYPE = 5 CELLS -┌───────────┬────────────────────────────────────────────────┬───────────────────────────────┬───────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ CHAIN 3 │ -│ │ 195|IGHV4-59 ◆ 14|IGHD2-2 ◆ 743|IGHJ6 │ 387|IGLV5-52 ◆ 316|IGLJ3 │ 324|IGLV1-40 ◆ 314|IGLJ2 │ -│ ├────────────────────────────────────────────────┼───────────────────────────────┼───────────────────────────────┤ -│ │ 1111111111111111111111111 │ 11111111111111 │ 11111111111111 │ -│ │ 44777 1111111222222222233333333 │ 11111122222222 │ 5 00111111111122 │ -│ │ 14268 3456789012345678901234567 │ 45678901234567 │ 8 89012345678901 │ -│ │ ═══════════CDR3══════════ │ ═════CDR3═════ │ ═════CDR3═════ │ -│reference │ TGSNN ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CGTWH◦◦◦◦◦◦◦◦◦ │ Q CQSYD◦◦◦◦◦◦◦◦◦ │ -│donor ref │ TGSNN ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CGTWH◦◦◦◦◦◦◦◦◦ │ Q CQSYD◦◦◦◦◦◦◦◦◦ │ -├───────────┼────────────────────────────────────────────────┼───────────────────────────────┼───────────────────────────────┤ -│# n │ ..... ......................... u const │ .............. u const │ . .............. u const│ -│1 4 │ SNTKN CAREDIVVVPAAIQSRYYHYGMDVW 3286 IGHG1 │ CGTWHSNSKPNWVF 2725 IGLC3 │ H CQSYDSSLSGSRVF 9561 IGLC2│ -│2 1 │ SNTKN CAREDIVVVPAAIQSRYYHYGMDVW 99 IGHG1 │ CGTWHSNSKPNWVF 357 IGLC2 │ H CQSYDSSLSGSRVF 2253 IGLC2│ -└───────────┴────────────────────────────────────────────────┴───────────────────────────────┴───────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test22_output b/enclone_main/testx/inputs/outputs/enclone_test22_output deleted file mode 100644 index 8b1378917..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test22_output +++ /dev/null @@ -1 +0,0 @@ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test23_output b/enclone_main/testx/inputs/outputs/enclone_test23_output deleted file mode 100644 index 98bae6f5d..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test23_output +++ /dev/null @@ -1,22 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 344 CELLS - -[1.1] CLONOTYPE = 344 CELLS -┌───────────┬─────────────────────────────────────────────────────┬────────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 88|IGHV1-69D ◆ 55|IGHJ4 │ 286|IGKV3-20 ◆ 215|IGKJ2 │ -│ ├─────────────────────────────────────────────────────┼────────────────────────────────────────────┤ -│ │ 11111 11111111111111111 1 │ 1 11111111111 11 │ -│ │ 12445566778800011 11111122222222223 3 │ 1233445567777890 00111111111 22 │ -│ │ 1645692913241201513 45678901234567890 1 │ 6846890102367955 89012345678 57 │ -│ │ ═══════CDR3══════ │ ════CDR3═══ │ -│reference │ DFTSTSIPQLIGKFELRV◦ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ G │ DGPEVSSSPSSTGTIV CQQ◦◦◦◦◦◦◦◦ EK │ -│donor ref │ DFTSTSIPQLIGKFELRV◦ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ G │ DGPEVSSSPSSTGTIV CQQ◦◦◦◦◦◦◦◦ EK │ -├───────────┼─────────────────────────────────────────────────────┼────────────────────────────────────────────┤ -│# n │ x.x....xxx.xx...xxx ................. x u const │ ..xxxxx.x..x.xx. ..x........ xx u const│ -│1 172 │ RITSATFPQLLGRFDLRVY CAREGRGMVTTNPFDYW G 8 IGHM │ EDPEVSTTPSNTATIV CQQYGSSPYTF EK 34 IGKC │ -│2 107 │ HIISATFPQLLGKFDLSMY CAREGRGMVTTNPFDYW G 6 IGHM │ EDPEISSTASNTASVV CQQYGSSPYTF EQ 33 IGKC │ -│3 33 │ HIISATFPQLLGKFDLSMY CAREGRGMVTTNPFDYW G 6 IGHM │ EDPEISSTASNTASIV CQQYGSSPYTF EQ 35 IGKC │ -│4 32 │ RITSATFPQLLGRFDLKVY CAREGRGMVTTNPFDYW G 9 IGHM │ EDPEVSTTPSNTATIV CQQYGSSPYTF AK 45 IGKC │ -└───────────┴─────────────────────────────────────────────────────┴────────────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test24_output b/enclone_main/testx/inputs/outputs/enclone_test24_output deleted file mode 100644 index 30597da2e..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test24_output +++ /dev/null @@ -1,17 +0,0 @@ - -[1.1] CLONOTYPE = 1 CELLS -┌───────────┬─────────────────────────────────────────────────┬───────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 112|IGHV3-15 ◆ 21|IGHD3-3 ◆ 54|IGHJ4 │ 352|IGLV3-1 ◆ 314|IGLJ2 │ -│ ├─────────────────────────────────────────────────┼───────────────────────────────────────┤ -│ │ 1 1111111111111111111111 │ 1 1111111111 1 │ -│ │ 134457888990 1111222222222233333333 │ 1145555667790 0000011111 2 │ -│ │ 625838035791 6789012345678901234567 │ 2690136291732 5678901234 3 │ -│ │ ═════════CDR3═════════ │ ═══CDR3═══ │ -│reference │ VPFSSTYPKNLL ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ YSYACYKVSRESD CQAWD◦◦◦◦◦ L │ -│donor ref │ VPFSSTYPKNLL ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ YSYACYKVSRESD CQAWD◦◦◦◦◦ L │ -├───────────┼─────────────────────────────────────────────────┼───────────────────────────────────────┤ -│# n │ ............ ...................... u const │ ............. .......... . u const│ -│1 1 │ ASLTNASSRALL CSTGWGLDFDFWSGYYTAGYHW 4 IGHE │ HFHSYFRVNRATV CQTWASAVVF L 36 IGLC2│ -└───────────┴─────────────────────────────────────────────────┴───────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test25_output b/enclone_main/testx/inputs/outputs/enclone_test25_output deleted file mode 100644 index 41ff1cf66..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test25_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 5 CELLS - -[1.1] CLONOTYPE = 5 CELLS -┌───────────┬─────────────────────────────────┬────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 586|TRBV13-1 ◆ 575|TRBJ2-5 │ 489|TRAV4-3 ◆ 394|TRAJ49 │ -│ ├─────────────────────────────────┼────────────────────────────────┤ -│ │ 111111111111 │ 1111111111111 │ -│ │ 000111111111 │ 0000111111111 │ -│ │ 789012345678 │ 6789012345678 │ -│ │ ════CDR3════ │ ═════CDR3════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦YF │ ◦◦◦◦◦◦◦◦◦NFYF │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦YF │ ◦◦◦◦◦◦◦◦◦NFYF │ -├───────────┼─────────────────────────────────┼────────────────────────────────┤ -│# n │ ............ u const cdiff │ ............. u const cdiff│ -│1 4 │ CASSDAGDTQYF 8 TRBC2 │ CAAEDTGYQNFYF 3 TRAC │ -│2 1 │ CASSDAGDTQYF 4 TRBC2 │ │ -└───────────┴─────────────────────────────────┴────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test26_output b/enclone_main/testx/inputs/outputs/enclone_test26_output deleted file mode 100644 index 6c010fb3c..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test26_output +++ /dev/null @@ -1,59 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 68 CELLS - -[1.1] CLONOTYPE = 68 CELLS -┌─────────────────┬──────────────────────────────────────────┬─────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 151.2.1|IGHV3-64D ◆ 55|IGHJ4 │ 352.2.1|IGLV3-1 ◆ 312|IGLJ1│ -│ ├──────────────────────────────────────────┼─────────────────────────────┤ -│ │ 11 1111111111111111 │ 11111111111 │ -│ │ 46778900 1111112222222222 │ 6 00000111111 │ -│ │ 98151856 4567890123456789 │ 9 56789012345 │ -│ │ ══════CDR3══════ │ ════CDR3═══ │ -│reference │ SASSSYRA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ S CQAWD◦◦◦◦◦◦ │ -│donor ref │ SASSSYRA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ T CQAWD◦◦◦◦◦◦ │ -├─────────────────┼──────────────────────────────────────────┼─────────────────────────────┤ -│# datasets n │ ........ ................ u const │ . ........... u const │ -│1 123089 68 │ DSTNSSGT CVKDRVTGTITELDYW 1260 IGHG1 │ T CQAWDSSAGVF 5976 IGLC1 │ -└─────────────────┴──────────────────────────────────────────┴─────────────────────────────┘ - -╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ - -[2] GROUP = 1 CLONOTYPES = 54 CELLS - -[2.1] CLONOTYPE = 54 CELLS -┌─────────────────┬─────────────────────────────────────────┬─────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 151.1.1|IGHV3-64D ◆ 55|IGHJ4 │ 352.1.1|IGLV3-1 ◆ 312|IGLJ1│ -│ ├─────────────────────────────────────────┼─────────────────────────────┤ -│ │ 11 1111111111111111 │ 11111111111 │ -│ │ 46778900 1111112222222222 │ 6 00000111111 │ -│ │ 98151856 4567890123456789 │ 9 56789012345 │ -│ │ ══════CDR3══════ │ ════CDR3═══ │ -│reference │ SASSSYRA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ S CQAWD◦◦◦◦◦◦ │ -│donor ref │ SASSSYRA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ T CQAWD◦◦◦◦◦◦ │ -├─────────────────┼─────────────────────────────────────────┼─────────────────────────────┤ -│# datasets n │ ........ ................ u const │ . ........... u const │ -│1 123085 54 │ DSTNSSGT CVKDRVTGTITELDYW 321 IGHG1 │ T CQAWDSSAGVF 3925 IGLC1 │ -└─────────────────┴─────────────────────────────────────────┴─────────────────────────────┘ - -╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ - -[3] GROUP = 1 CLONOTYPES = 1 CELLS - -[3.1] CLONOTYPE = 1 CELLS -┌────────────────┬─────────────────────────────────────┐ -│ │ CHAIN 1 │ -│ │ 151.1.1|IGHV3-64D ◆ 55|IGHJ4 │ -│ ├─────────────────────────────────────┤ -│ │ 11 1111111111111111 │ -│ │ 46778900 1111112222222222 │ -│ │ 98151856 4567890123456789 │ -│ │ ══════CDR3══════ │ -│reference │ SASSSYRA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ -│donor ref │ SASSSYRA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ -├────────────────┼─────────────────────────────────────┤ -│# datasets n │ ........ ................ u const│ -│1 123085 1 │ DSTNSSGT CVKDRVTGTITELDYW 3 IGHG1│ -└────────────────┴─────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test27_output b/enclone_main/testx/inputs/outputs/enclone_test27_output deleted file mode 100644 index 821b0a4c9..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test27_output +++ /dev/null @@ -1,27 +0,0 @@ - -SUMMARY STATISTICS -1. overall - • number of datasets = 1 - • number of donors = 1 -2. for the selected clonotypes - ┌────────┬────────────────────────┬──────────────────┬───────┐ - │chains │ clonotypes with this │ cells in these │ %│ - │ │ number of chains │ clonotypes │ │ - ├────────┼────────────────────────┼──────────────────┼───────┤ - │1 │ 151 │ 151 │ 11.4│ - │2 │ 304 │ 923 │ 69.9│ - │3 │ 11 │ 243 │ 18.4│ - │4 │ 2 │ 4 │ 0.3│ - │total │ 468 │ 1321 │ 100.0│ - └────────┴────────────────────────┴──────────────────┴───────┘ - • number of clonotypes having at least two cells = 126 - • number of cells having 1 chain = 185 - • number of cells having 2 or 3 chains = 1135 - • mean over middle third of contig UMI counts (heavy chain / TRB) = 354.20 - • mean over middle third of contig UMI counts (light chain / TRA) = 2015.84 - ┌────────────────────┐ - │origin donor cells│ - ├────────────────────┤ - │s1 d1 1321│ - └────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test28_output b/enclone_main/testx/inputs/outputs/enclone_test28_output deleted file mode 100644 index 6be670e14..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test28_output +++ /dev/null @@ -1,39 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 2 CELLS - -[1.1] CLONOTYPE = 2 CELLS -┌───────────┬──────────────────────────────────────────────┬───────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 79.1.1|IGHV1-46 ◆ 49|IGHJ1 │ 251|IGKV1D-33 ◆ 217|IGKJ4 │ -│ ├──────────────────────────────────────────────┼───────────────────────────────┤ -│ │ 111111111111111111111 │ 11111111111 │ -│ │ 23445778 111111222222222233333 │ 345578 01111111111 │ -│ │ 528895342 456789012345678901234 │ 791266 90123456789 │ -│ │ ═════════CDR3════════ │ ════CDR3═══ │ -│reference │ RLVTSVGGF ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦HW │ GDSNES CQQ◦◦◦◦◦◦◦◦ │ -│donor ref │ RLVTSVGGL ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦HW │ GDSNES CQQ◦◦◦◦◦◦◦◦ │ -├───────────┼──────────────────────────────────────────────┼───────────────────────────────┤ -│# n │ ......... ..................... u const │ ...... ........... u const│ -│1 2 │ RVFNTLDGL CARDFAPTRKTIFNAGGFQDW 18 IGHA1 │ GHNIDG CQQYDNLPLTF 60 IGKC │ -└───────────┴──────────────────────────────────────────────┴───────────────────────────────┘ - -╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ - -[2] GROUP = 1 CLONOTYPES = 2 CELLS - -[2.1] CLONOTYPE = 2 CELLS -┌───────────┬─────────────────────────────────────────┬──────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 142.1.2|IGHV3-48 ◆ 743|IGHJ6 │ 286|IGKV3-20 ◆ 214|IGKJ1│ -│ ├─────────────────────────────────────────┼──────────────────────────┤ -│ │ 1 1111111111111111111 │ 111111111111 │ -│ │ 345791 1111112222222222333 │ 001111111111 │ -│ │ 391321 4567890123456789012 │ 890123456789 │ -│ │ ════════CDR3═══════ │ ════CDR3════ │ -│reference │ GSEGNV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CQQ◦◦◦◦◦◦◦◦◦ │ -│donor ref │ GSSSNV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CQQ◦◦◦◦◦◦◦◦◦ │ -├───────────┼─────────────────────────────────────────┼──────────────────────────┤ -│# n │ ...... ................... u const │ ............ u const │ -│1 2 │ GSSSNV CARSGWPSALYYYYGMDVW 29 IGHM │ CQQYGSSPPWTF 17 IGKC │ -└───────────┴─────────────────────────────────────────┴──────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test29_output b/enclone_main/testx/inputs/outputs/enclone_test29_output deleted file mode 100644 index e4cfadfdd..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test29_output +++ /dev/null @@ -1,131 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌─────────────────────┬───────────────────────────────────────────────────────┬────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 159|IGHV3-7 ◆ 54|IGHJ4 │ 277|IGKV2D-29 ◆ 217|IGKJ4 │ -│ ├───────────────────────────────────────────────────────┼────────────────────────────────────┤ -│ │ 1 111111111111111111 1 │ 1 11111111111 │ -│ │ 24455667777888889990 111111222222222233 3 │ 22445556770 11111111222 │ -│ │ 83938381345126784892 456789012345678901 3 │ 01790140684 23456789012 │ -│ │ ═══════CDR3═══════ │ ════CDR3═══ │ -│reference │ GSSSALNQGSESVFTIKYLN ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ Q │ DISLHSKYSRA CMQ◦◦◦◦◦◦◦◦ │ -│donor ref │ GSSSALNQGSESVFTIKYLN ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ Q │ DISLHSKYSRA CMQ◦◦◦◦◦◦◦◦ │ -├─────────────────────┼───────────────────────────────────────────────────────┼────────────────────────────────────┤ -│# n IGHV3-7_g_μ │ u const │ u const│ -│1 1 7 │ DSNNAPSNGTQAMISVRFLN CARHFDSTGYYEPRLDYW R 9 IGHM │ EVSLFGRFSHA CMQSIQFPLTF 21 IGKC │ -│Σ 1 7 │ │ │ -│μ 1.0 7.0 │ │ │ -└─────────────────────┴───────────────────────────────────────────────────────┴────────────────────────────────────┘ - -╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ - -[2] GROUP = 1 CLONOTYPES = 1 CELLS - -[2.1] CLONOTYPE = 1 CELLS -┌─────────────────────┬────────────────────────────────────────────────────────┬───────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 159|IGHV3-7 ◆ 54|IGHJ4 │ 220|IGKV1-12 ◆ 217|IGKJ4 │ -│ ├────────────────────────────────────────────────────────┼───────────────────────────┤ -│ │ 1 1111111111111111 │ 11111111111 │ -│ │ 444577890 1111112222222222 │ 46 01111111111 │ -│ │ 279317572 4567890123456789 │ 13 90123456789 │ -│ │ ══════CDR3══════ │ ════CDR3═══ │ -│reference │ AFSSQYRLN ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ TK CQQ◦◦◦◦◦◦◦◦ │ -│donor ref │ AFSSQYRLN ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ TK CQQ◦◦◦◦◦◦◦◦ │ -├─────────────────────┼────────────────────────────────────────────────────────┼───────────────────────────┤ -│# n IGHV3-7_g_μ │ u const notes │ u const│ -│1 1 5 │ GLRGENRLT CARDIPFSGSYEFSDW 6 IGHM ins = ME at 42 │ AR CQQANSFPLTF 15 IGKC │ -│Σ 1 5 │ │ │ -│μ 1.0 5.0 │ │ │ -└─────────────────────┴────────────────────────────────────────────────────────┴───────────────────────────┘ - -╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ - -[3] GROUP = 1 CLONOTYPES = 1 CELLS - -[3.1] CLONOTYPE = 1 CELLS -┌─────────────────────┬───────────────────────────────────────────┬───────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 159|IGHV3-7 ◆ 49|IGHJ1 │ 377|IGLV4-69 ◆ 316|IGLJ3 │ -│ ├───────────────────────────────────────────┼───────────────────────────────────┤ -│ │ 111 111111111111111 │ 11111111111 │ -│ │ 4457777789000 111111222222222 │ 2445566779 11111111122 │ -│ │ 7931568978237 456789012345678 │ 3490948062 12345678901 │ -│ │ ══════CDR3═════ │ ════CDR3═══ │ -│reference │ FSSQEKYVTYNSE ◦◦◦◦◦◦◦◦◦◦◦◦◦HW │ LSSYPRKNSA CQT◦◦◦◦◦◦◦◦ │ -│donor ref │ FSSQEKYVTYNSE ◦◦◦◦◦◦◦◦◦◦◦◦◦HW │ LSSYPRKNSA CQT◦◦◦◦◦◦◦◦ │ -├─────────────────────┼───────────────────────────────────────────┼───────────────────────────────────┤ -│# n IGHV3-7_g_μ │ u const │ u const│ -│1 1 5 │ FNDPATCVAFDTD CSTRIEVPGRLIQLW 7 IGHA2 │ VGNFPRNNTT CQLWGNGPPVF 12 IGLC2│ -│Σ 1 5 │ │ │ -│μ 1.0 5.0 │ │ │ -└─────────────────────┴───────────────────────────────────────────┴───────────────────────────────────┘ - -╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ - -[4] GROUP = 1 CLONOTYPES = 1 CELLS - -[4.1] CLONOTYPE = 1 CELLS -┌─────────────────────┬─────────────────────────────────────────┬──────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 159|IGHV3-7 ◆ 55|IGHJ4 │ 299|IGKV4-1 ◆ 215|IGKJ2 │ -│ ├─────────────────────────────────────────┼──────────────────────────────────┤ -│ │ 1 11111111111111 │ 11111111111 1 │ -│ │ 224467777990 11111122222222 │ 12445556 11111112222 2 │ -│ │ 158980256786 45678901234567 │ 91585691 34567890123 8 │ -│ │ ═════CDR3═════ │ ════CDR3═══ │ -│reference │ QSSSNKDEKLYA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ GISVKNAY CQQ◦◦◦◦◦◦◦◦ K │ -│donor ref │ QSSSNKDEKLYA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ GISVKNAY CQQ◦◦◦◦◦◦◦◦ K │ -├─────────────────────┼─────────────────────────────────────────┼──────────────────────────────────┤ -│# n IGHV3-7_g_μ │ u const │ u const│ -│1 1 5 │ QSTTSREGRVFV CTREGRKGGEIDYW 5 IGHA2 │ GTSLENGY CQQYLRAPNTF R 9 IGKC │ -│Σ 1 5 │ │ │ -│μ 1.0 5.0 │ │ │ -└─────────────────────┴─────────────────────────────────────────┴──────────────────────────────────┘ - -╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ - -[5] GROUP = 1 CLONOTYPES = 1 CELLS - -[5.1] CLONOTYPE = 1 CELLS -┌─────────────────────┬────────────────────────────────────────────────────┬─────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 159|IGHV3-7 ◆ 55|IGHJ4 │ 324|IGLV1-40 ◆ 312|IGLJ1 │ -│ ├────────────────────────────────────────────────────┼─────────────────────────────────────────┤ -│ │ 1 1111111111111 11 │ 111 1111111111111 │ -│ │ 234455666777779999991 1111112222222 33 │ 22335557789000 0011111111112 │ -│ │ 126903278015673456791 4567890123456 06 │ 01353680242157 8901234567890 │ -│ │ ═════CDR3════ │ ═════CDR3════ │ -│reference │ QPTSYSGANKQEKYAKNSLLV ◦◦◦◦◦◦◦◦◦◦◦◦◦ TS │ SVGRVYQGSGSEDY CQSYD◦◦◦◦◦◦◦◦ │ -│donor ref │ QPTSYSGANKQEKYAKNSLLV ◦◦◦◦◦◦◦◦◦◦◦◦◦ TS │ SVGRVYQGSGSEDY CQSYD◦◦◦◦◦◦◦◦ │ -├─────────────────────┼────────────────────────────────────────────────────┼─────────────────────────────────────────┤ -│# n IGHV3-7_g_μ │ u const │ u const│ -│1 1 6 │ HPSANSGGNKEVVKFRNYLLV CVRALLHDAYDYW AS 7 IGHM │ SVGGVYQAFGFDEY CQSYDSILIAYVF 36 IGLC1│ -│Σ 1 6 │ │ │ -│μ 1.0 6.0 │ │ │ -└─────────────────────┴────────────────────────────────────────────────────┴─────────────────────────────────────────┘ - -╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ - -[6] GROUP = 1 CLONOTYPES = 1 CELLS - -[6.1] CLONOTYPE = 1 CELLS -┌─────────────────────┬────────────────────────────────────────────────┬────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 159|IGHV3-7 ◆ 54|IGHJ4 │ 342|IGLV2-14 ◆ 316|IGLJ3 │ -│ ├────────────────────────────────────────────────┼────────────────────────────────────────┤ -│ │ 11 111111111111 │ 1 1111111111111 111 │ -│ │ 222244557777777999901 111111222222 │ 445556890 0011111111112 222 │ -│ │ 023819350145679357861 456789012345 │ 450687070 8901234567890 256 │ -│ │ ════CDR3════ │ ═════CDR3════ │ -│reference │ VLVGASSVKQSEKYVANLYAV ◦◦◦◦◦◦◦◦◦◦◦◦ │ SSYYQMNGA CSSYTSS◦◦◦◦◦◦ ◦KL │ -│donor ref │ VLVGASSVKQSEKYVANLYAV ◦◦◦◦◦◦◦◦◦◦◦◦ │ SSYYQMNGA CSSYTSS◦◦◦◦◦◦ ◦KL │ -├─────────────────────┼────────────────────────────────────────────────┼────────────────────────────────────────┤ -│# n IGHV3-7_g_μ │ u const │ u const│ -│1 1 5 │ MQADTNSLKHTAEFLANLHVV CARDDGGGWWDW 4 IGHM │ NKHCQIDGT CSSYTTSSTPWVF GRV 35 IGLC2│ -│Σ 1 5 │ │ │ -│μ 1.0 5.0 │ │ │ -└─────────────────────┴────────────────────────────────────────────────┴────────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test2_output b/enclone_main/testx/inputs/outputs/enclone_test2_output deleted file mode 100644 index caa167d60..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test2_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 12 CELLS - -[1.1] CLONOTYPE = 12 CELLS -┌───────────┬───────────────────────────────────────────────────────────────────────────────────────────────┬───────────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 181.1.1|IGHV4-30-2 ◆ 53|IGHJ3 │ 254|IGKV1D-39 ◆ 218|IGKJ5 │ -│ ├───────────────────────────────────────────────────────────────────────────────────────────────┼───────────────────────────────────────────────┤ -│ │ 1111111111111111 23 │ 1 111111111111 33 │ -│ │ 22456789 1111122222222223 48 │ 0 011111111112 14 │ -│ │ 07130313 5678901234567890 39 │ 6 901234567890 85 │ -│ │ ══════CDR3══════ │ ════CDR3════ │ -│reference │ LSASPHPR ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ T CQQ◦◦◦◦◦◦◦◦◦ │ -│donor ref │ VPTYHYPT ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ T CQQ◦◦◦◦◦◦◦◦◦ │ -├───────────┼───────────────────────────────────────────────────────────────────────────────────────────────┼───────────────────────────────────────────────┤ -│# n │ ......x. ..............x. u const comp edit var │ x ......x..... u const comp edit var│ -│1 10 │ VPTYHYPT CARRYFGVVADAFDIW 4285 IGHM 9 S-6•D-2:1•S4•S5•S7•I10:2•S13•S14•S16 CC │ T CQQSYSTPPITF 11793 IGKC 0 AA │ -│2 2 │ VPTYHYST CARRYFGVVADAFDIW 4383 IGHM 10 S-6•D-2:1•S4•S5•S7•I10:2•S13•S14•S16•S33 TT │ A CQQSYSPPPITF 13922 IGKC 1 S-8 GC │ -└───────────┴───────────────────────────────────────────────────────────────────────────────────────────────┴───────────────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test30_output b/enclone_main/testx/inputs/outputs/enclone_test30_output deleted file mode 100644 index d48cf9df5..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test30_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 59 CELLS - -[1.1] CLONOTYPE = 59 CELLS -┌───────────┬──────────────────────────────────────────────────────────┬─────────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 151.1.1|IGHV3-64D ◆ 55|IGHJ4 │ 352.1.1|IGLV3-1 ◆ 312|IGLJ1 │ -│ ├──────────────────────────────────────────────────────────┼─────────────────────────────────────────────┤ -│ │ 11 1111111111111111 │ 11111111111 │ -│ │ 46778900 1111112222222222 │ 6 00000111111 │ -│ │ 98151856 4567890123456789 │ 9 56789012345 │ -│ │ ══════CDR3══════ │ ════CDR3═══ │ -│reference │ SASSSYRA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ S CQAWD◦◦◦◦◦◦ │ -│donor ref │ SASSSYRA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ T CQAWD◦◦◦◦◦◦ │ -├───────────┼──────────────────────────────────────────────────────────┼─────────────────────────────────────────────┤ -│# n │ ........ ................ u const d_univ d_donor │ . ........... u const d_univ d_donor│ -│1 54 │ DSTNSSGT CVKDRVTGTITELDYW 321 IGHG1 9 8 │ T CQAWDSSAGVF 3925 IGLC1 1 0│ -│2 5 │ │ T CQAWDSSAGVF 262 IGLC1 1 0│ -└───────────┴──────────────────────────────────────────────────────────┴─────────────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test31_output b/enclone_main/testx/inputs/outputs/enclone_test31_output deleted file mode 100644 index bc422375d..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test31_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 60 CELLS - -[1.1] CLONOTYPE = 60 CELLS -┌───────────┬─────────────────────────────────────────┬────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 151.1.1|IGHV3-64D ◆ 55|IGHJ4 │ 352|IGLV3-1 ◆ 312|IGLJ1 │ -│ ├─────────────────────────────────────────┼────────────────────────────┤ -│ │ 11 1111111111111111 │ 11111111111 │ -│ │ 46778900 1111112222222222 │ 6 00000111111 │ -│ │ 98151856 4567890123456789 │ 9 56789012345 │ -│ │ ══════CDR3══════ │ ════CDR3═══ │ -│reference │ SASSSYRA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ S CQAWD◦◦◦◦◦◦ │ -│donor ref │ SASSSYRA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ S CQAWD◦◦◦◦◦◦ │ -├───────────┼─────────────────────────────────────────┼────────────────────────────┤ -│# n │ ........ ................ u const │ . ........... u const│ -│1 54 │ DSTNSSGT CVKDRVTGTITELDYW 336 IGHG1 │ T CQAWDSSAGVF 3945 IGLC1│ -│2 6 │ │ T CQAWDSSAGVF 368 IGLC1│ -└───────────┴─────────────────────────────────────────┴────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test32_output b/enclone_main/testx/inputs/outputs/enclone_test32_output deleted file mode 100644 index 80be5fbeb..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test32_output +++ /dev/null @@ -1,21 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 38 CELLS - -[1.1] CLONOTYPE = 38 CELLS -┌───────────┬───────────────────────────────────────────┬──────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 159.1.1|IGHV3-7 ◆ 21|IGHD3-3 ◆ 55|IGHJ4 │ 377|IGLV4-69 ◆ 316|IGLJ3│ -│ ├───────────────────────────────────────────┼──────────────────────────┤ -│ │ 1 111111111111111111 │ 11111111111 │ -│ │ 471 111111222222222233 │ 11111111122 │ -│ │ 1970 456789012345678901 │ 12345678901 │ -│ │ ═══════CDR3═══════ │ ════CDR3═══ │ -│reference │ ESYA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ CQT◦◦◦◦◦◦◦◦ │ -│donor ref │ ESYA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ CQT◦◦◦◦◦◦◦◦ │ -├───────────┼───────────────────────────────────────────┼──────────────────────────┤ -│# n │ ..x. .................. u const │ ........... u const│ -│1 27 │ ESFA CAREPLYYDFWSAYFDYW 876 IGHG1 │ CQTWGTGIRVF 2568 IGLC3│ -│2 2 │ ESYA CAREPLYYDFWSAYFDYW 2096 IGHG1 │ CQTWGTGIRVF 7064 IGLC3│ -│3 9 │ │ CQTWGTGIRVF 561 IGLC3│ -└───────────┴───────────────────────────────────────────┴──────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test33_output b/enclone_main/testx/inputs/outputs/enclone_test33_output deleted file mode 100644 index 9ef53be21..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test33_output +++ /dev/null @@ -1,20 +0,0 @@ - -FEATURE SCAN - -3 clonotypes containing 238 cells in test set -15 clonotypes containing 372 cells in control set - -enriched features - -id name library_type test control enrichment -ENSG00000243466 IGKV1-5 Gene Expression 253.94 0.29 874.67 -ENSG00000239951 IGKV3-20 Gene Expression 639.82 0.45 1408.37 -ENSG00000243264 IGKV2D-29 Gene Expression 1.08 0.01 80.34 -ENSG00000102055 PPP1R2C Gene Expression 0.76 0.02 46.89 -ENSG00000280411 IGHV1-69D Gene Expression 599.58 0.31 1906.36 -ENSG00000274576 IGHV2-70 Gene Expression 1.29 0.02 59.79 -ENSG00000259337 IGHV1OR15-2 Gene Expression 0.15 0.00 inf -ENSG00000167641 PPP1R14A Gene Expression 0.31 0.02 16.30 -ENSG00000211662 IGLV3-21 Gene Expression 3256.12 0.77 4205.82 -ENSG00000211669 IGLV3-10 Gene Expression 4079.23 0.69 5881.68 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test34_output b/enclone_main/testx/inputs/outputs/enclone_test34_output deleted file mode 100644 index 3fb08b0c5..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test34_output +++ /dev/null @@ -1,1562 +0,0 @@ -<svg version="1.1" -baseProfile="full" -width="400" height="459.24" -xmlns="http://www.w3.org/2000/svg"> -<circle cx="137.12" cy="201.03" r="3.08" fill="blue" /> -<circle cx="140.20" cy="206.37" r="3.08" fill="blue" /> -<circle cx="140.20" cy="195.70" r="3.08" fill="blue" /> -<circle cx="143.28" cy="201.03" r="3.08" fill="blue" /> -<circle cx="143.28" cy="211.71" r="3.08" fill="blue" /> -<circle cx="143.28" cy="190.36" r="3.08" fill="blue" /> -<circle cx="146.36" cy="206.37" r="3.08" fill="blue" /> -<circle cx="146.36" cy="195.70" r="3.08" fill="blue" /> -<circle cx="146.36" cy="217.04" r="3.08" fill="blue" /> -<circle cx="146.36" cy="185.02" r="3.08" fill="blue" /> -<circle cx="149.44" cy="211.71" r="3.08" fill="blue" /> -<circle cx="149.44" cy="201.03" r="3.08" fill="blue" /> -<circle cx="149.44" cy="190.36" r="3.08" fill="blue" /> -<circle cx="149.44" cy="222.38" r="3.08" fill="blue" /> -<circle cx="149.44" cy="179.69" r="3.08" fill="blue" /> -<circle cx="152.52" cy="217.04" r="3.08" fill="blue" /> -<circle cx="152.52" cy="206.37" r="3.08" fill="blue" /> -<circle cx="152.52" cy="195.70" r="3.08" fill="blue" /> -<circle cx="152.52" cy="185.02" r="3.08" fill="blue" /> -<circle cx="152.52" cy="227.72" r="3.08" fill="blue" /> -<circle cx="152.52" cy="174.35" r="3.08" fill="blue" /> -<circle cx="155.61" cy="222.38" r="3.08" fill="blue" /> -<circle cx="155.61" cy="211.71" r="3.08" fill="blue" /> -<circle cx="155.61" cy="201.03" r="3.08" fill="blue" /> -<circle cx="155.61" cy="190.36" r="3.08" fill="blue" /> -<circle cx="155.61" cy="179.69" r="3.08" fill="blue" /> -<circle cx="155.61" cy="233.05" r="3.08" fill="blue" /> -<circle cx="155.61" cy="169.01" r="3.08" fill="blue" /> -<circle cx="158.69" cy="227.72" r="3.08" fill="blue" /> -<circle cx="158.69" cy="217.04" r="3.08" fill="blue" /> -<circle cx="158.69" cy="185.02" r="3.08" fill="blue" /> -<circle cx="158.69" cy="174.35" r="3.08" fill="blue" /> -<circle cx="158.69" cy="206.37" r="3.08" fill="blue" /> -<circle cx="158.69" cy="195.70" r="3.08" fill="blue" /> -<circle cx="158.69" cy="238.39" r="3.08" fill="blue" /> -<circle cx="158.69" cy="163.67" r="3.08" fill="blue" /> -<circle cx="161.77" cy="233.05" r="3.08" fill="blue" /> -<circle cx="161.77" cy="222.38" r="3.08" fill="blue" /> -<circle cx="161.77" cy="179.69" r="3.08" fill="blue" /> -<circle cx="161.77" cy="169.01" r="3.08" fill="blue" /> -<circle cx="161.77" cy="211.71" r="3.08" fill="blue" /> -<circle cx="161.77" cy="190.36" r="3.08" fill="blue" /> -<circle cx="161.77" cy="201.03" r="3.08" fill="blue" /> -<circle cx="161.77" cy="243.73" r="3.08" fill="blue" /> -<circle cx="161.77" cy="158.34" r="3.08" fill="blue" /> -<circle cx="164.85" cy="238.39" r="3.08" fill="blue" /> -<circle cx="164.85" cy="227.72" r="3.08" fill="blue" /> -<circle cx="164.85" cy="174.35" r="3.08" fill="blue" /> -<circle cx="164.85" cy="163.67" r="3.08" fill="blue" /> -<circle cx="164.85" cy="217.04" r="3.08" fill="blue" /> -<circle cx="164.85" cy="185.02" r="3.08" fill="blue" /> -<circle cx="164.85" cy="206.37" r="3.08" fill="blue" /> -<circle cx="164.85" cy="195.70" r="3.08" fill="blue" /> -<circle cx="164.85" cy="249.06" r="3.08" fill="blue" /> -<circle cx="164.85" cy="153.00" r="3.08" fill="blue" /> -<circle cx="167.93" cy="243.73" r="3.08" fill="blue" /> -<circle cx="167.93" cy="233.05" r="3.08" fill="blue" /> -<circle cx="167.93" cy="169.01" r="3.08" fill="blue" /> -<circle cx="167.93" cy="158.34" r="3.08" fill="blue" /> -<circle cx="167.93" cy="222.38" r="3.08" fill="blue" /> -<circle cx="167.93" cy="179.69" r="3.08" fill="blue" /> -<circle cx="167.93" cy="211.71" r="3.08" fill="blue" /> -<circle cx="167.93" cy="190.36" r="3.08" fill="blue" /> -<circle cx="167.93" cy="201.03" r="3.08" fill="blue" /> -<circle cx="167.93" cy="254.40" r="3.08" fill="blue" /> -<circle cx="167.93" cy="147.66" r="3.08" fill="blue" /> -<circle cx="171.01" cy="249.06" r="3.08" fill="blue" /> -<circle cx="171.01" cy="238.39" r="3.08" fill="blue" /> -<circle cx="171.01" cy="163.67" r="3.08" fill="blue" /> -<circle cx="171.01" cy="153.00" r="3.08" fill="blue" /> -<circle cx="171.01" cy="227.72" r="3.08" fill="blue" /> -<circle cx="171.01" cy="174.35" r="3.08" fill="blue" /> -<circle cx="171.01" cy="217.04" r="3.08" fill="blue" /> -<circle cx="171.01" cy="185.02" r="3.08" fill="blue" /> -<circle cx="171.01" cy="206.37" r="3.08" fill="blue" /> -<circle cx="171.01" cy="195.70" r="3.08" fill="blue" /> -<circle cx="171.01" cy="259.74" r="3.08" fill="blue" /> -<circle cx="171.01" cy="142.33" r="3.08" fill="blue" /> -<circle cx="174.09" cy="254.40" r="3.08" fill="blue" /> -<circle cx="174.09" cy="243.73" r="3.08" fill="blue" /> -<circle cx="174.09" cy="158.34" r="3.08" fill="blue" /> -<circle cx="174.09" cy="147.66" r="3.08" fill="blue" /> -<circle cx="174.09" cy="233.05" r="3.08" fill="blue" /> -<circle cx="174.09" cy="169.01" r="3.08" fill="blue" /> -<circle cx="174.09" cy="222.38" r="3.08" fill="blue" /> -<circle cx="174.09" cy="179.69" r="3.08" fill="blue" /> -<circle cx="174.09" cy="211.71" r="3.08" fill="blue" /> -<circle cx="174.09" cy="201.03" r="3.08" fill="blue" /> -<circle cx="174.09" cy="190.36" r="3.08" fill="blue" /> -<circle cx="177.17" cy="249.06" r="3.08" fill="blue" /> -<circle cx="177.17" cy="238.39" r="3.08" fill="blue" /> -<circle cx="177.17" cy="163.67" r="3.08" fill="blue" /> -<circle cx="177.17" cy="153.00" r="3.08" fill="blue" /> -<circle cx="177.17" cy="227.72" r="3.08" fill="blue" /> -<circle cx="177.17" cy="174.35" r="3.08" fill="blue" /> -<circle cx="177.17" cy="259.74" r="3.08" fill="blue" /> -<circle cx="177.17" cy="217.04" r="3.08" fill="blue" /> -<circle cx="177.17" cy="206.37" r="3.08" fill="blue" /> -<circle cx="177.17" cy="195.70" r="3.08" fill="blue" /> -<circle cx="177.17" cy="185.02" r="3.08" fill="blue" /> -<circle cx="177.17" cy="142.33" r="3.08" fill="blue" /> -<circle cx="180.26" cy="254.40" r="3.08" fill="blue" /> -<circle cx="180.26" cy="147.66" r="3.08" fill="blue" /> -<circle cx="180.26" cy="243.73" r="3.08" fill="blue" /> -<circle cx="180.26" cy="158.34" r="3.08" fill="blue" /> -<circle cx="180.26" cy="233.05" r="3.08" fill="blue" /> -<circle cx="180.26" cy="169.01" r="3.08" fill="blue" /> -<circle cx="180.26" cy="222.38" r="3.08" fill="blue" /> -<circle cx="180.26" cy="211.71" r="3.08" fill="blue" /> -<circle cx="180.26" cy="190.36" r="3.08" fill="blue" /> -<circle cx="180.26" cy="179.69" r="3.08" fill="blue" /> -<circle cx="180.26" cy="201.03" r="3.08" fill="blue" /> -<circle cx="183.34" cy="249.06" r="3.08" fill="blue" /> -<circle cx="183.34" cy="153.00" r="3.08" fill="blue" /> -<circle cx="183.34" cy="238.39" r="3.08" fill="blue" /> -<circle cx="183.34" cy="163.67" r="3.08" fill="blue" /> -<circle cx="183.34" cy="227.72" r="3.08" fill="blue" /> -<circle cx="183.34" cy="217.04" r="3.08" fill="blue" /> -<circle cx="183.34" cy="259.74" r="3.08" fill="blue" /> -<circle cx="183.34" cy="185.02" r="3.08" fill="blue" /> -<circle cx="183.34" cy="174.35" r="3.08" fill="blue" /> -<circle cx="183.34" cy="142.33" r="3.08" fill="blue" /> -<circle cx="183.34" cy="206.37" r="3.08" fill="blue" /> -<circle cx="183.34" cy="195.70" r="3.08" fill="blue" /> -<circle cx="186.42" cy="254.40" r="3.08" fill="blue" /> -<circle cx="186.42" cy="147.66" r="3.08" fill="blue" /> -<circle cx="186.42" cy="243.73" r="3.08" fill="blue" /> -<circle cx="186.42" cy="158.34" r="3.08" fill="blue" /> -<circle cx="186.42" cy="233.05" r="3.08" fill="blue" /> -<circle cx="186.42" cy="222.38" r="3.08" fill="blue" /> -<circle cx="186.42" cy="179.69" r="3.08" fill="blue" /> -<circle cx="186.42" cy="169.01" r="3.08" fill="blue" /> -<circle cx="186.42" cy="211.71" r="3.08" fill="blue" /> -<circle cx="186.42" cy="201.03" r="3.08" fill="blue" /> -<circle cx="186.42" cy="190.36" r="3.08" fill="blue" /> -<circle cx="189.50" cy="249.06" r="3.08" fill="blue" /> -<circle cx="189.50" cy="153.00" r="3.08" fill="blue" /> -<circle cx="189.50" cy="238.39" r="3.08" fill="blue" /> -<circle cx="189.50" cy="163.67" r="3.08" fill="blue" /> -<circle cx="189.50" cy="227.72" r="3.08" fill="blue" /> -<circle cx="189.50" cy="259.74" r="3.08" fill="blue" /> -<circle cx="189.50" cy="174.35" r="3.08" fill="blue" /> -<circle cx="189.50" cy="142.33" r="3.08" fill="blue" /> -<circle cx="189.50" cy="217.04" r="3.08" fill="blue" /> -<circle cx="189.50" cy="206.37" r="3.08" fill="blue" /> -<circle cx="189.50" cy="195.70" r="3.08" fill="blue" /> -<circle cx="189.50" cy="185.02" r="3.08" fill="blue" /> -<circle cx="192.58" cy="254.40" r="3.08" fill="blue" /> -<circle cx="192.58" cy="147.66" r="3.08" fill="blue" /> -<circle cx="192.58" cy="243.73" r="3.08" fill="blue" /> -<circle cx="192.58" cy="158.34" r="3.08" fill="blue" /> -<circle cx="192.58" cy="233.05" r="3.08" fill="blue" /> -<circle cx="192.58" cy="201.03" r="3.08" fill="blue" /> -<circle cx="192.58" cy="169.01" r="3.08" fill="blue" /> -<circle cx="192.58" cy="222.38" r="3.08" fill="blue" /> -<circle cx="192.58" cy="211.71" r="3.08" fill="blue" /> -<circle cx="192.58" cy="190.36" r="3.08" fill="blue" /> -<circle cx="192.58" cy="179.69" r="3.08" fill="blue" /> -<circle cx="195.66" cy="153.00" r="3.08" fill="blue" /> -<circle cx="195.66" cy="249.06" r="3.08" fill="blue" /> -<circle cx="195.66" cy="163.67" r="3.08" fill="blue" /> -<circle cx="195.66" cy="238.39" r="3.08" fill="blue" /> -<circle cx="195.66" cy="227.72" r="3.08" fill="blue" /> -<circle cx="195.66" cy="206.37" r="3.08" fill="blue" /> -<circle cx="195.66" cy="142.33" r="3.08" fill="blue" /> -<circle cx="195.66" cy="259.74" r="3.08" fill="blue" /> -<circle cx="195.66" cy="195.70" r="3.08" fill="blue" /> -<circle cx="195.66" cy="217.04" r="3.08" fill="blue" /> -<circle cx="195.66" cy="174.35" r="3.08" fill="blue" /> -<circle cx="195.66" cy="185.02" r="3.08" fill="blue" /> -<circle cx="198.74" cy="254.40" r="3.08" fill="blue" /> -<circle cx="198.74" cy="147.66" r="3.08" fill="blue" /> -<circle cx="198.74" cy="243.73" r="3.08" fill="blue" /> -<circle cx="198.74" cy="158.34" r="3.08" fill="blue" /> -<circle cx="198.74" cy="233.05" r="3.08" fill="blue" /> -<circle cx="198.74" cy="211.71" r="3.08" fill="blue" /> -<circle cx="198.74" cy="201.03" r="3.08" fill="blue" /> -<circle cx="198.74" cy="190.36" r="3.08" fill="blue" /> -<circle cx="198.74" cy="169.01" r="3.08" fill="blue" /> -<circle cx="198.74" cy="222.38" r="3.08" fill="blue" /> -<circle cx="198.74" cy="179.69" r="3.08" fill="blue" /> -<circle cx="201.82" cy="153.00" r="3.08" fill="blue" /> -<circle cx="201.82" cy="249.06" r="3.08" fill="blue" /> -<circle cx="201.82" cy="163.67" r="3.08" fill="blue" /> -<circle cx="201.82" cy="238.39" r="3.08" fill="blue" /> -<circle cx="201.82" cy="227.72" r="3.08" fill="blue" /> -<circle cx="201.82" cy="142.33" r="3.08" fill="blue" /> -<circle cx="201.82" cy="206.37" r="3.08" fill="blue" /> -<circle cx="201.82" cy="195.70" r="3.08" fill="blue" /> -<circle cx="201.82" cy="259.74" r="3.08" fill="blue" /> -<circle cx="201.82" cy="217.04" r="3.08" fill="blue" /> -<circle cx="201.82" cy="174.35" r="3.08" fill="blue" /> -<circle cx="201.82" cy="185.02" r="3.08" fill="blue" /> -<circle cx="204.91" cy="254.40" r="3.08" fill="blue" /> -<circle cx="204.91" cy="147.66" r="3.08" fill="blue" /> -<circle cx="204.91" cy="243.73" r="3.08" fill="blue" /> -<circle cx="204.91" cy="158.34" r="3.08" fill="blue" /> -<circle cx="204.91" cy="233.05" r="3.08" fill="blue" /> -<circle cx="204.91" cy="211.71" r="3.08" fill="blue" /> -<circle cx="204.91" cy="201.03" r="3.08" fill="blue" /> -<circle cx="204.91" cy="190.36" r="3.08" fill="blue" /> -<circle cx="204.91" cy="169.01" r="3.08" fill="blue" /> -<circle cx="204.91" cy="222.38" r="3.08" fill="blue" /> -<circle cx="204.91" cy="179.69" r="3.08" fill="blue" /> -<circle cx="207.99" cy="153.00" r="3.08" fill="blue" /> -<circle cx="207.99" cy="249.06" r="3.08" fill="blue" /> -<circle cx="207.99" cy="163.67" r="3.08" fill="blue" /> -<circle cx="207.99" cy="238.39" r="3.08" fill="blue" /> -<circle cx="207.99" cy="227.72" r="3.08" fill="blue" /> -<circle cx="207.99" cy="142.33" r="3.08" fill="blue" /> -<circle cx="207.99" cy="206.37" r="3.08" fill="blue" /> -<circle cx="207.99" cy="195.70" r="3.08" fill="blue" /> -<circle cx="207.99" cy="259.74" r="3.08" fill="blue" /> -<circle cx="207.99" cy="217.04" r="3.08" fill="blue" /> -<circle cx="207.99" cy="174.35" r="3.08" fill="red" /> -<circle cx="207.99" cy="185.02" r="3.08" fill="red" /> -<circle cx="211.07" cy="254.40" r="3.08" fill="red" /> -<circle cx="211.07" cy="147.66" r="3.08" fill="red" /> -<circle cx="211.07" cy="243.73" r="3.08" fill="red" /> -<circle cx="211.07" cy="158.34" r="3.08" fill="red" /> -<circle cx="211.07" cy="233.05" r="3.08" fill="red" /> -<circle cx="211.07" cy="211.71" r="3.08" fill="red" /> -<circle cx="211.07" cy="201.03" r="3.08" fill="red" /> -<circle cx="211.07" cy="190.36" r="3.08" fill="red" /> -<circle cx="211.07" cy="169.01" r="3.08" fill="red" /> -<circle cx="211.07" cy="222.38" r="3.08" fill="red" /> -<circle cx="211.07" cy="179.69" r="3.08" fill="red" /> -<circle cx="214.15" cy="153.00" r="3.08" fill="red" /> -<circle cx="214.15" cy="249.06" r="3.08" fill="red" /> -<circle cx="214.15" cy="163.67" r="3.08" fill="red" /> -<circle cx="214.15" cy="238.39" r="3.08" fill="red" /> -<circle cx="214.15" cy="227.72" r="3.08" fill="red" /> -<circle cx="214.15" cy="206.37" r="3.08" fill="red" /> -<circle cx="214.15" cy="142.33" r="3.08" fill="red" /> -<circle cx="214.15" cy="259.74" r="3.08" fill="red" /> -<circle cx="214.15" cy="195.70" r="3.08" fill="red" /> -<circle cx="214.15" cy="217.04" r="3.08" fill="red" /> -<circle cx="214.15" cy="174.35" r="3.08" fill="red" /> -<circle cx="214.15" cy="185.02" r="3.08" fill="red" /> -<circle cx="217.23" cy="254.40" r="3.08" fill="red" /> -<circle cx="217.23" cy="147.66" r="3.08" fill="red" /> -<circle cx="217.23" cy="243.73" r="3.08" fill="red" /> -<circle cx="217.23" cy="158.34" r="3.08" fill="red" /> -<circle cx="217.23" cy="233.05" r="3.08" fill="red" /> -<circle cx="217.23" cy="201.03" r="3.08" fill="red" /> -<circle cx="217.23" cy="222.38" r="3.08" fill="red" /> -<circle cx="217.23" cy="211.71" r="3.08" fill="red" /> -<circle cx="217.23" cy="190.36" r="3.08" fill="red" /> -<circle cx="217.23" cy="179.69" r="3.08" fill="red" /> -<circle cx="217.23" cy="169.01" r="3.08" fill="red" /> -<circle cx="220.31" cy="249.06" r="3.08" fill="red" /> -<circle cx="220.31" cy="153.00" r="3.08" fill="red" /> -<circle cx="220.31" cy="238.39" r="3.08" fill="red" /> -<circle cx="220.31" cy="163.67" r="3.08" fill="red" /> -<circle cx="220.31" cy="259.74" r="3.08" fill="red" /> -<circle cx="220.31" cy="227.72" r="3.08" fill="red" /> -<circle cx="220.31" cy="142.33" r="3.08" fill="red" /> -<circle cx="220.31" cy="217.04" r="3.08" fill="red" /> -<circle cx="220.31" cy="206.37" r="3.08" fill="red" /> -<circle cx="220.31" cy="195.70" r="3.08" fill="red" /> -<circle cx="220.31" cy="185.02" r="3.08" fill="red" /> -<circle cx="220.31" cy="174.35" r="3.08" fill="red" /> -<circle cx="223.39" cy="254.40" r="3.08" fill="red" /> -<circle cx="223.39" cy="147.66" r="3.08" fill="red" /> -<circle cx="223.39" cy="243.73" r="3.08" fill="red" /> -<circle cx="223.39" cy="158.34" r="3.08" fill="red" /> -<circle cx="223.39" cy="233.05" r="3.08" fill="red" /> -<circle cx="223.39" cy="222.38" r="3.08" fill="red" /> -<circle cx="223.39" cy="211.71" r="3.08" fill="red" /> -<circle cx="223.39" cy="201.03" r="3.08" fill="red" /> -<circle cx="223.39" cy="190.36" r="3.08" fill="red" /> -<circle cx="223.39" cy="179.69" r="3.08" fill="red" /> -<circle cx="223.39" cy="169.01" r="3.08" fill="red" /> -<circle cx="226.47" cy="249.06" r="3.08" fill="red" /> -<circle cx="226.47" cy="153.00" r="3.08" fill="red" /> -<circle cx="226.47" cy="238.39" r="3.08" fill="red" /> -<circle cx="226.47" cy="163.67" r="3.08" fill="red" /> -<circle cx="226.47" cy="259.74" r="3.08" fill="red" /> -<circle cx="226.47" cy="227.72" r="3.08" fill="red" /> -<circle cx="226.47" cy="217.04" r="3.08" fill="red" /> -<circle cx="226.47" cy="142.33" r="3.08" fill="red" /> -<circle cx="226.47" cy="206.37" r="3.08" fill="red" /> -<circle cx="226.47" cy="195.70" r="3.08" fill="red" /> -<circle cx="226.47" cy="185.02" r="3.08" fill="red" /> -<circle cx="226.47" cy="174.35" r="3.08" fill="red" /> -<circle cx="229.55" cy="254.40" r="3.08" fill="red" /> -<circle cx="229.55" cy="147.66" r="3.08" fill="red" /> -<circle cx="229.55" cy="243.73" r="3.08" fill="red" /> -<circle cx="229.55" cy="158.34" r="3.08" fill="red" /> -<circle cx="229.55" cy="233.05" r="3.08" fill="red" /> -<circle cx="229.55" cy="169.01" r="3.08" fill="red" /> -<circle cx="229.55" cy="222.38" r="3.08" fill="red" /> -<circle cx="229.55" cy="211.71" r="3.08" fill="red" /> -<circle cx="229.55" cy="201.03" r="3.08" fill="red" /> -<circle cx="229.55" cy="190.36" r="3.08" fill="red" /> -<circle cx="229.55" cy="179.69" r="3.08" fill="red" /> -<circle cx="232.64" cy="249.06" r="3.08" fill="red" /> -<circle cx="232.64" cy="153.00" r="3.08" fill="red" /> -<circle cx="232.64" cy="238.39" r="3.08" fill="red" /> -<circle cx="232.64" cy="163.67" r="3.08" fill="red" /> -<circle cx="232.64" cy="227.72" r="3.08" fill="red" /> -<circle cx="232.64" cy="174.35" r="3.08" fill="red" /> -<circle cx="232.64" cy="259.74" r="3.08" fill="red" /> -<circle cx="232.64" cy="217.04" r="3.08" fill="red" /> -<circle cx="232.64" cy="206.37" r="3.08" fill="red" /> -<circle cx="232.64" cy="195.70" r="3.08" fill="red" /> -<circle cx="232.64" cy="185.02" r="3.08" fill="red" /> -<circle cx="232.64" cy="142.33" r="3.08" fill="red" /> -<circle cx="235.72" cy="254.40" r="3.08" fill="red" /> -<circle cx="235.72" cy="243.73" r="3.08" fill="red" /> -<circle cx="235.72" cy="158.34" r="3.08" fill="red" /> -<circle cx="235.72" cy="147.66" r="3.08" fill="red" /> -<circle cx="235.72" cy="233.05" r="3.08" fill="red" /> -<circle cx="235.72" cy="169.01" r="3.08" fill="red" /> -<circle cx="235.72" cy="222.38" r="3.08" fill="red" /> -<circle cx="235.72" cy="179.69" r="3.08" fill="red" /> -<circle cx="235.72" cy="211.71" r="3.08" fill="red" /> -<circle cx="235.72" cy="201.03" r="3.08" fill="red" /> -<circle cx="235.72" cy="190.36" r="3.08" fill="red" /> -<circle cx="238.80" cy="249.06" r="3.08" fill="red" /> -<circle cx="238.80" cy="238.39" r="3.08" fill="red" /> -<circle cx="238.80" cy="163.67" r="3.08" fill="red" /> -<circle cx="238.80" cy="153.00" r="3.08" fill="red" /> -<circle cx="238.80" cy="227.72" r="3.08" fill="red" /> -<circle cx="238.80" cy="174.35" r="3.08" fill="red" /> -<circle cx="238.80" cy="259.74" r="3.08" fill="red" /> -<circle cx="238.80" cy="217.04" r="3.08" fill="red" /> -<circle cx="238.80" cy="185.02" r="3.08" fill="red" /> -<circle cx="238.80" cy="142.33" r="3.08" fill="red" /> -<circle cx="238.80" cy="206.37" r="3.08" fill="red" /> -<circle cx="238.80" cy="195.70" r="3.08" fill="red" /> -<circle cx="241.88" cy="243.73" r="3.08" fill="red" /> -<circle cx="241.88" cy="233.05" r="3.08" fill="red" /> -<circle cx="241.88" cy="169.01" r="3.08" fill="red" /> -<circle cx="241.88" cy="158.34" r="3.08" fill="red" /> -<circle cx="241.88" cy="222.38" r="3.08" fill="red" /> -<circle cx="241.88" cy="179.69" r="3.08" fill="red" /> -<circle cx="241.88" cy="254.40" r="3.08" fill="red" /> -<circle cx="241.88" cy="211.71" r="3.08" fill="red" /> -<circle cx="241.88" cy="190.36" r="3.08" fill="red" /> -<circle cx="241.88" cy="147.66" r="3.08" fill="red" /> -<circle cx="241.88" cy="201.03" r="3.08" fill="red" /> -<circle cx="244.96" cy="238.39" r="3.08" fill="red" /> -<circle cx="244.96" cy="227.72" r="3.08" fill="red" /> -<circle cx="244.96" cy="174.35" r="3.08" fill="red" /> -<circle cx="244.96" cy="163.67" r="3.08" fill="red" /> -<circle cx="244.96" cy="217.04" r="3.08" fill="red" /> -<circle cx="244.96" cy="185.02" r="3.08" fill="red" /> -<circle cx="244.96" cy="249.06" r="3.08" fill="red" /> -<circle cx="244.96" cy="206.37" r="3.08" fill="red" /> -<circle cx="244.96" cy="195.70" r="3.08" fill="red" /> -<circle cx="244.96" cy="153.00" r="3.08" fill="red" /> -<circle cx="248.04" cy="233.05" r="3.08" fill="red" /> -<circle cx="248.04" cy="222.38" r="3.08" fill="red" /> -<circle cx="248.04" cy="179.69" r="3.08" fill="red" /> -<circle cx="248.04" cy="169.01" r="3.08" fill="red" /> -<circle cx="248.04" cy="211.71" r="3.08" fill="red" /> -<circle cx="248.04" cy="190.36" r="3.08" fill="red" /> -<circle cx="248.04" cy="243.73" r="3.08" fill="red" /> -<circle cx="248.04" cy="201.03" r="3.08" fill="red" /> -<circle cx="248.04" cy="158.34" r="3.08" fill="red" /> -<circle cx="251.12" cy="227.72" r="3.08" fill="red" /> -<circle cx="251.12" cy="217.04" r="3.08" fill="red" /> -<circle cx="251.12" cy="185.02" r="3.08" fill="red" /> -<circle cx="251.12" cy="174.35" r="3.08" fill="red" /> -<circle cx="251.12" cy="206.37" r="3.08" fill="red" /> -<circle cx="251.12" cy="195.70" r="3.08" fill="red" /> -<circle cx="251.12" cy="238.39" r="3.08" fill="red" /> -<circle cx="251.12" cy="163.67" r="3.08" fill="red" /> -<circle cx="254.20" cy="222.38" r="3.08" fill="red" /> -<circle cx="254.20" cy="211.71" r="3.08" fill="red" /> -<circle cx="254.20" cy="201.03" r="3.08" fill="red" /> -<circle cx="254.20" cy="190.36" r="3.08" fill="red" /> -<circle cx="254.20" cy="179.69" r="3.08" fill="red" /> -<circle cx="254.20" cy="233.05" r="3.08" fill="red" /> -<circle cx="254.20" cy="169.01" r="3.08" fill="red" /> -<circle cx="257.29" cy="217.04" r="3.08" fill="red" /> -<circle cx="257.29" cy="206.37" r="3.08" fill="red" /> -<circle cx="257.29" cy="195.70" r="3.08" fill="red" /> -<circle cx="257.29" cy="185.02" r="3.08" fill="red" /> -<circle cx="257.29" cy="227.72" r="3.08" fill="red" /> -<circle cx="257.29" cy="174.35" r="3.08" fill="red" /> -<circle cx="260.37" cy="211.71" r="3.08" fill="red" /> -<circle cx="260.37" cy="201.03" r="3.08" fill="red" /> -<circle cx="260.37" cy="190.36" r="3.08" fill="red" /> -<circle cx="260.37" cy="222.38" r="3.08" fill="red" /> -<circle cx="260.37" cy="179.69" r="3.08" fill="red" /> -<circle cx="263.45" cy="206.37" r="3.08" fill="red" /> -<circle cx="263.45" cy="195.70" r="3.08" fill="red" /> -<circle cx="263.45" cy="217.04" r="3.08" fill="red" /> -<circle cx="263.45" cy="185.02" r="3.08" fill="red" /> -<circle cx="266.53" cy="201.03" r="3.08" fill="red" /> -<circle cx="266.53" cy="211.71" r="3.08" fill="red" /> -<circle cx="266.53" cy="190.36" r="3.08" fill="red" /> -<circle cx="269.61" cy="206.37" r="3.08" fill="red" /> -<circle cx="269.61" cy="195.70" r="3.08" fill="red" /> -<circle cx="272.69" cy="201.03" r="3.08" fill="red" /> -<circle cx="278.85" cy="201.03" r="3.08" fill="red" /> -<circle cx="58.87" cy="131.36" r="3.08" fill="blue" /> -<circle cx="61.95" cy="136.69" r="3.08" fill="blue" /> -<circle cx="61.95" cy="126.02" r="3.08" fill="blue" /> -<circle cx="65.03" cy="142.03" r="3.08" fill="blue" /> -<circle cx="65.03" cy="131.36" r="3.08" fill="blue" /> -<circle cx="65.03" cy="120.68" r="3.08" fill="blue" /> -<circle cx="68.11" cy="147.37" r="3.08" fill="blue" /> -<circle cx="68.11" cy="136.69" r="3.08" fill="blue" /> -<circle cx="68.11" cy="126.02" r="3.08" fill="blue" /> -<circle cx="68.11" cy="115.35" r="3.08" fill="blue" /> -<circle cx="71.19" cy="152.70" r="3.08" fill="blue" /> -<circle cx="71.19" cy="142.03" r="3.08" fill="blue" /> -<circle cx="71.19" cy="120.68" r="3.08" fill="blue" /> -<circle cx="71.19" cy="110.01" r="3.08" fill="blue" /> -<circle cx="71.19" cy="131.36" r="3.08" fill="blue" /> -<circle cx="74.27" cy="158.04" r="3.08" fill="blue" /> -<circle cx="74.27" cy="147.37" r="3.08" fill="blue" /> -<circle cx="74.27" cy="115.35" r="3.08" fill="blue" /> -<circle cx="74.27" cy="104.67" r="3.08" fill="blue" /> -<circle cx="74.27" cy="136.69" r="3.08" fill="blue" /> -<circle cx="74.27" cy="126.02" r="3.08" fill="blue" /> -<circle cx="77.36" cy="163.38" r="3.08" fill="blue" /> -<circle cx="77.36" cy="152.70" r="3.08" fill="blue" /> -<circle cx="77.36" cy="110.01" r="3.08" fill="blue" /> -<circle cx="77.36" cy="99.33" r="3.08" fill="blue" /> -<circle cx="77.36" cy="142.03" r="3.08" fill="blue" /> -<circle cx="77.36" cy="131.36" r="3.08" fill="blue" /> -<circle cx="77.36" cy="120.68" r="3.08" fill="blue" /> -<circle cx="80.44" cy="158.04" r="3.08" fill="blue" /> -<circle cx="80.44" cy="104.67" r="3.08" fill="blue" /> -<circle cx="80.44" cy="147.37" r="3.08" fill="blue" /> -<circle cx="80.44" cy="136.69" r="3.08" fill="blue" /> -<circle cx="80.44" cy="126.02" r="3.08" fill="blue" /> -<circle cx="80.44" cy="115.35" r="3.08" fill="blue" /> -<circle cx="83.52" cy="163.38" r="3.08" fill="blue" /> -<circle cx="83.52" cy="131.36" r="3.08" fill="blue" /> -<circle cx="83.52" cy="99.33" r="3.08" fill="blue" /> -<circle cx="83.52" cy="152.70" r="3.08" fill="blue" /> -<circle cx="83.52" cy="142.03" r="3.08" fill="blue" /> -<circle cx="83.52" cy="120.68" r="3.08" fill="blue" /> -<circle cx="83.52" cy="110.01" r="3.08" fill="blue" /> -<circle cx="86.60" cy="158.04" r="3.08" fill="blue" /> -<circle cx="86.60" cy="136.69" r="3.08" fill="blue" /> -<circle cx="86.60" cy="126.02" r="3.08" fill="blue" /> -<circle cx="86.60" cy="147.37" r="3.08" fill="blue" /> -<circle cx="86.60" cy="104.67" r="3.08" fill="blue" /> -<circle cx="86.60" cy="115.35" r="3.08" fill="blue" /> -<circle cx="89.68" cy="163.38" r="3.08" fill="blue" /> -<circle cx="89.68" cy="142.03" r="3.08" fill="blue" /> -<circle cx="89.68" cy="131.36" r="3.08" fill="blue" /> -<circle cx="89.68" cy="120.68" r="3.08" fill="blue" /> -<circle cx="89.68" cy="99.33" r="3.08" fill="blue" /> -<circle cx="89.68" cy="152.70" r="3.08" fill="blue" /> -<circle cx="89.68" cy="110.01" r="3.08" fill="blue" /> -<circle cx="92.76" cy="158.04" r="3.08" fill="blue" /> -<circle cx="92.76" cy="136.69" r="3.08" fill="blue" /> -<circle cx="92.76" cy="126.02" r="3.08" fill="blue" /> -<circle cx="92.76" cy="147.37" r="3.08" fill="blue" /> -<circle cx="92.76" cy="104.67" r="3.08" fill="blue" /> -<circle cx="92.76" cy="115.35" r="3.08" fill="blue" /> -<circle cx="95.84" cy="163.38" r="3.08" fill="blue" /> -<circle cx="95.84" cy="142.03" r="3.08" fill="blue" /> -<circle cx="95.84" cy="131.36" r="3.08" fill="blue" /> -<circle cx="95.84" cy="120.68" r="3.08" fill="blue" /> -<circle cx="95.84" cy="99.33" r="3.08" fill="blue" /> -<circle cx="95.84" cy="152.70" r="3.08" fill="blue" /> -<circle cx="95.84" cy="110.01" r="3.08" fill="blue" /> -<circle cx="98.92" cy="158.04" r="3.08" fill="blue" /> -<circle cx="98.92" cy="136.69" r="3.08" fill="blue" /> -<circle cx="98.92" cy="126.02" r="3.08" fill="blue" /> -<circle cx="98.92" cy="147.37" r="3.08" fill="blue" /> -<circle cx="98.92" cy="104.67" r="3.08" fill="blue" /> -<circle cx="98.92" cy="115.35" r="3.08" fill="blue" /> -<circle cx="102.00" cy="163.38" r="3.08" fill="red" /> -<circle cx="102.00" cy="142.03" r="3.08" fill="red" /> -<circle cx="102.00" cy="131.36" r="3.08" fill="red" /> -<circle cx="102.00" cy="120.68" r="3.08" fill="red" /> -<circle cx="102.00" cy="99.33" r="3.08" fill="red" /> -<circle cx="102.00" cy="152.70" r="3.08" fill="red" /> -<circle cx="102.00" cy="110.01" r="3.08" fill="red" /> -<circle cx="105.09" cy="158.04" r="3.08" fill="red" /> -<circle cx="105.09" cy="136.69" r="3.08" fill="red" /> -<circle cx="105.09" cy="126.02" r="3.08" fill="red" /> -<circle cx="105.09" cy="147.37" r="3.08" fill="red" /> -<circle cx="105.09" cy="104.67" r="3.08" fill="red" /> -<circle cx="105.09" cy="115.35" r="3.08" fill="red" /> -<circle cx="108.17" cy="163.38" r="3.08" fill="red" /> -<circle cx="108.17" cy="131.36" r="3.08" fill="red" /> -<circle cx="108.17" cy="152.70" r="3.08" fill="red" /> -<circle cx="108.17" cy="142.03" r="3.08" fill="red" /> -<circle cx="108.17" cy="120.68" r="3.08" fill="red" /> -<circle cx="108.17" cy="110.01" r="3.08" fill="red" /> -<circle cx="108.17" cy="99.33" r="3.08" fill="red" /> -<circle cx="111.25" cy="158.04" r="3.08" fill="red" /> -<circle cx="111.25" cy="147.37" r="3.08" fill="red" /> -<circle cx="111.25" cy="136.69" r="3.08" fill="red" /> -<circle cx="111.25" cy="126.02" r="3.08" fill="red" /> -<circle cx="111.25" cy="115.35" r="3.08" fill="red" /> -<circle cx="111.25" cy="104.67" r="3.08" fill="red" /> -<circle cx="114.33" cy="163.38" r="3.08" fill="red" /> -<circle cx="114.33" cy="152.70" r="3.08" fill="red" /> -<circle cx="114.33" cy="142.03" r="3.08" fill="red" /> -<circle cx="114.33" cy="131.36" r="3.08" fill="red" /> -<circle cx="114.33" cy="120.68" r="3.08" fill="red" /> -<circle cx="114.33" cy="110.01" r="3.08" fill="red" /> -<circle cx="114.33" cy="99.33" r="3.08" fill="red" /> -<circle cx="117.41" cy="158.04" r="3.08" fill="red" /> -<circle cx="117.41" cy="147.37" r="3.08" fill="red" /> -<circle cx="117.41" cy="136.69" r="3.08" fill="red" /> -<circle cx="117.41" cy="126.02" r="3.08" fill="red" /> -<circle cx="117.41" cy="115.35" r="3.08" fill="red" /> -<circle cx="117.41" cy="104.67" r="3.08" fill="red" /> -<circle cx="120.49" cy="152.70" r="3.08" fill="red" /> -<circle cx="120.49" cy="142.03" r="3.08" fill="red" /> -<circle cx="120.49" cy="131.36" r="3.08" fill="red" /> -<circle cx="120.49" cy="120.68" r="3.08" fill="red" /> -<circle cx="120.49" cy="110.01" r="3.08" fill="red" /> -<circle cx="123.57" cy="147.37" r="3.08" fill="red" /> -<circle cx="123.57" cy="136.69" r="3.08" fill="red" /> -<circle cx="123.57" cy="126.02" r="3.08" fill="red" /> -<circle cx="123.57" cy="115.35" r="3.08" fill="red" /> -<circle cx="126.65" cy="110.01" r="3.08" fill="red" /> -<circle cx="126.65" cy="142.03" r="3.08" fill="red" /> -<circle cx="126.65" cy="131.36" r="3.08" fill="red" /> -<circle cx="126.65" cy="120.68" r="3.08" fill="red" /> -<circle cx="129.74" cy="115.35" r="3.08" fill="red" /> -<circle cx="129.74" cy="136.69" r="3.08" fill="red" /> -<circle cx="129.74" cy="126.02" r="3.08" fill="red" /> -<circle cx="132.82" cy="120.68" r="3.08" fill="red" /> -<circle cx="132.82" cy="131.36" r="3.08" fill="red" /> -<circle cx="135.90" cy="126.02" r="3.08" fill="red" /> -<circle cx="138.98" cy="131.36" r="3.08" fill="red" /> -<circle cx="289.42" cy="221.80" r="3.08" fill="blue" /> -<circle cx="292.50" cy="227.14" r="3.08" fill="blue" /> -<circle cx="292.50" cy="216.47" r="3.08" fill="blue" /> -<circle cx="295.58" cy="232.48" r="3.08" fill="blue" /> -<circle cx="295.58" cy="221.80" r="3.08" fill="blue" /> -<circle cx="295.58" cy="211.13" r="3.08" fill="blue" /> -<circle cx="298.66" cy="237.82" r="3.08" fill="blue" /> -<circle cx="298.66" cy="227.14" r="3.08" fill="blue" /> -<circle cx="298.66" cy="216.47" r="3.08" fill="blue" /> -<circle cx="298.66" cy="205.79" r="3.08" fill="blue" /> -<circle cx="301.74" cy="243.15" r="3.08" fill="blue" /> -<circle cx="301.74" cy="232.48" r="3.08" fill="blue" /> -<circle cx="301.74" cy="211.13" r="3.08" fill="blue" /> -<circle cx="301.74" cy="200.46" r="3.08" fill="blue" /> -<circle cx="301.74" cy="221.80" r="3.08" fill="blue" /> -<circle cx="304.82" cy="237.82" r="3.08" fill="blue" /> -<circle cx="304.82" cy="205.79" r="3.08" fill="blue" /> -<circle cx="304.82" cy="195.12" r="3.08" fill="blue" /> -<circle cx="304.82" cy="227.14" r="3.08" fill="blue" /> -<circle cx="304.82" cy="216.47" r="3.08" fill="blue" /> -<circle cx="307.90" cy="243.15" r="3.08" fill="blue" /> -<circle cx="307.90" cy="200.46" r="3.08" fill="blue" /> -<circle cx="307.90" cy="189.78" r="3.08" fill="blue" /> -<circle cx="307.90" cy="232.48" r="3.08" fill="blue" /> -<circle cx="307.90" cy="221.80" r="3.08" fill="blue" /> -<circle cx="307.90" cy="211.13" r="3.08" fill="blue" /> -<circle cx="310.99" cy="248.49" r="3.08" fill="blue" /> -<circle cx="310.99" cy="195.12" r="3.08" fill="blue" /> -<circle cx="310.99" cy="237.82" r="3.08" fill="blue" /> -<circle cx="310.99" cy="227.14" r="3.08" fill="blue" /> -<circle cx="310.99" cy="216.47" r="3.08" fill="blue" /> -<circle cx="310.99" cy="205.79" r="3.08" fill="blue" /> -<circle cx="314.07" cy="221.80" r="3.08" fill="blue" /> -<circle cx="314.07" cy="189.78" r="3.08" fill="blue" /> -<circle cx="314.07" cy="243.15" r="3.08" fill="blue" /> -<circle cx="314.07" cy="232.48" r="3.08" fill="blue" /> -<circle cx="314.07" cy="211.13" r="3.08" fill="blue" /> -<circle cx="314.07" cy="200.46" r="3.08" fill="blue" /> -<circle cx="317.15" cy="248.49" r="3.08" fill="blue" /> -<circle cx="317.15" cy="227.14" r="3.08" fill="blue" /> -<circle cx="317.15" cy="216.47" r="3.08" fill="blue" /> -<circle cx="317.15" cy="237.82" r="3.08" fill="blue" /> -<circle cx="317.15" cy="195.12" r="3.08" fill="blue" /> -<circle cx="317.15" cy="205.79" r="3.08" fill="blue" /> -<circle cx="320.23" cy="232.48" r="3.08" fill="blue" /> -<circle cx="320.23" cy="221.80" r="3.08" fill="blue" /> -<circle cx="320.23" cy="211.13" r="3.08" fill="blue" /> -<circle cx="320.23" cy="189.78" r="3.08" fill="blue" /> -<circle cx="320.23" cy="243.15" r="3.08" fill="blue" /> -<circle cx="320.23" cy="200.46" r="3.08" fill="blue" /> -<circle cx="323.31" cy="248.49" r="3.08" fill="blue" /> -<circle cx="323.31" cy="227.14" r="3.08" fill="blue" /> -<circle cx="323.31" cy="216.47" r="3.08" fill="blue" /> -<circle cx="323.31" cy="237.82" r="3.08" fill="blue" /> -<circle cx="323.31" cy="195.12" r="3.08" fill="blue" /> -<circle cx="323.31" cy="205.79" r="3.08" fill="blue" /> -<circle cx="326.39" cy="232.48" r="3.08" fill="blue" /> -<circle cx="326.39" cy="221.80" r="3.08" fill="blue" /> -<circle cx="326.39" cy="211.13" r="3.08" fill="blue" /> -<circle cx="326.39" cy="189.78" r="3.08" fill="blue" /> -<circle cx="326.39" cy="243.15" r="3.08" fill="blue" /> -<circle cx="326.39" cy="200.46" r="3.08" fill="blue" /> -<circle cx="329.47" cy="248.49" r="3.08" fill="blue" /> -<circle cx="329.47" cy="227.14" r="3.08" fill="blue" /> -<circle cx="329.47" cy="216.47" r="3.08" fill="blue" /> -<circle cx="329.47" cy="237.82" r="3.08" fill="red" /> -<circle cx="329.47" cy="195.12" r="3.08" fill="red" /> -<circle cx="329.47" cy="205.79" r="3.08" fill="red" /> -<circle cx="332.55" cy="232.48" r="3.08" fill="red" /> -<circle cx="332.55" cy="221.80" r="3.08" fill="red" /> -<circle cx="332.55" cy="211.13" r="3.08" fill="red" /> -<circle cx="332.55" cy="189.78" r="3.08" fill="red" /> -<circle cx="332.55" cy="243.15" r="3.08" fill="red" /> -<circle cx="332.55" cy="200.46" r="3.08" fill="red" /> -<circle cx="335.64" cy="248.49" r="3.08" fill="red" /> -<circle cx="335.64" cy="227.14" r="3.08" fill="red" /> -<circle cx="335.64" cy="216.47" r="3.08" fill="red" /> -<circle cx="335.64" cy="237.82" r="3.08" fill="red" /> -<circle cx="335.64" cy="195.12" r="3.08" fill="red" /> -<circle cx="335.64" cy="205.79" r="3.08" fill="red" /> -<circle cx="338.72" cy="221.80" r="3.08" fill="red" /> -<circle cx="338.72" cy="243.15" r="3.08" fill="red" /> -<circle cx="338.72" cy="232.48" r="3.08" fill="red" /> -<circle cx="338.72" cy="211.13" r="3.08" fill="red" /> -<circle cx="338.72" cy="200.46" r="3.08" fill="red" /> -<circle cx="338.72" cy="189.78" r="3.08" fill="red" /> -<circle cx="341.80" cy="248.49" r="3.08" fill="red" /> -<circle cx="341.80" cy="237.82" r="3.08" fill="red" /> -<circle cx="341.80" cy="227.14" r="3.08" fill="red" /> -<circle cx="341.80" cy="216.47" r="3.08" fill="red" /> -<circle cx="341.80" cy="205.79" r="3.08" fill="red" /> -<circle cx="341.80" cy="195.12" r="3.08" fill="red" /> -<circle cx="344.88" cy="243.15" r="3.08" fill="red" /> -<circle cx="344.88" cy="232.48" r="3.08" fill="red" /> -<circle cx="344.88" cy="221.80" r="3.08" fill="red" /> -<circle cx="344.88" cy="211.13" r="3.08" fill="red" /> -<circle cx="344.88" cy="200.46" r="3.08" fill="red" /> -<circle cx="344.88" cy="189.78" r="3.08" fill="red" /> -<circle cx="347.96" cy="237.82" r="3.08" fill="red" /> -<circle cx="347.96" cy="227.14" r="3.08" fill="red" /> -<circle cx="347.96" cy="216.47" r="3.08" fill="red" /> -<circle cx="347.96" cy="205.79" r="3.08" fill="red" /> -<circle cx="347.96" cy="195.12" r="3.08" fill="red" /> -<circle cx="351.04" cy="232.48" r="3.08" fill="red" /> -<circle cx="351.04" cy="221.80" r="3.08" fill="red" /> -<circle cx="351.04" cy="211.13" r="3.08" fill="red" /> -<circle cx="351.04" cy="200.46" r="3.08" fill="red" /> -<circle cx="354.12" cy="227.14" r="3.08" fill="red" /> -<circle cx="354.12" cy="216.47" r="3.08" fill="red" /> -<circle cx="354.12" cy="205.79" r="3.08" fill="red" /> -<circle cx="357.20" cy="221.80" r="3.08" fill="red" /> -<circle cx="357.20" cy="211.13" r="3.08" fill="red" /> -<circle cx="360.29" cy="216.47" r="3.08" fill="red" /> -<circle cx="363.37" cy="221.80" r="3.08" fill="red" /> -<circle cx="227.11" cy="309.15" r="3.08" fill="blue" /> -<circle cx="230.19" cy="314.49" r="3.08" fill="blue" /> -<circle cx="230.19" cy="303.81" r="3.08" fill="blue" /> -<circle cx="233.27" cy="319.82" r="3.08" fill="blue" /> -<circle cx="233.27" cy="309.15" r="3.08" fill="blue" /> -<circle cx="233.27" cy="298.48" r="3.08" fill="blue" /> -<circle cx="236.35" cy="325.16" r="3.08" fill="blue" /> -<circle cx="236.35" cy="314.49" r="3.08" fill="blue" /> -<circle cx="236.35" cy="303.81" r="3.08" fill="blue" /> -<circle cx="236.35" cy="293.14" r="3.08" fill="blue" /> -<circle cx="239.43" cy="330.50" r="3.08" fill="blue" /> -<circle cx="239.43" cy="319.82" r="3.08" fill="blue" /> -<circle cx="239.43" cy="298.48" r="3.08" fill="blue" /> -<circle cx="239.43" cy="287.80" r="3.08" fill="blue" /> -<circle cx="239.43" cy="309.15" r="3.08" fill="blue" /> -<circle cx="242.51" cy="325.16" r="3.08" fill="blue" /> -<circle cx="242.51" cy="293.14" r="3.08" fill="blue" /> -<circle cx="242.51" cy="282.47" r="3.08" fill="blue" /> -<circle cx="242.51" cy="314.49" r="3.08" fill="blue" /> -<circle cx="242.51" cy="303.81" r="3.08" fill="blue" /> -<circle cx="245.59" cy="330.50" r="3.08" fill="blue" /> -<circle cx="245.59" cy="287.80" r="3.08" fill="blue" /> -<circle cx="245.59" cy="277.13" r="3.08" fill="blue" /> -<circle cx="245.59" cy="319.82" r="3.08" fill="blue" /> -<circle cx="245.59" cy="309.15" r="3.08" fill="blue" /> -<circle cx="245.59" cy="298.48" r="3.08" fill="blue" /> -<circle cx="248.68" cy="335.84" r="3.08" fill="blue" /> -<circle cx="248.68" cy="282.47" r="3.08" fill="blue" /> -<circle cx="248.68" cy="325.16" r="3.08" fill="blue" /> -<circle cx="248.68" cy="314.49" r="3.08" fill="blue" /> -<circle cx="248.68" cy="303.81" r="3.08" fill="blue" /> -<circle cx="248.68" cy="293.14" r="3.08" fill="blue" /> -<circle cx="251.76" cy="309.15" r="3.08" fill="blue" /> -<circle cx="251.76" cy="277.13" r="3.08" fill="blue" /> -<circle cx="251.76" cy="330.50" r="3.08" fill="blue" /> -<circle cx="251.76" cy="319.82" r="3.08" fill="blue" /> -<circle cx="251.76" cy="298.48" r="3.08" fill="blue" /> -<circle cx="251.76" cy="287.80" r="3.08" fill="blue" /> -<circle cx="254.84" cy="335.84" r="3.08" fill="blue" /> -<circle cx="254.84" cy="314.49" r="3.08" fill="blue" /> -<circle cx="254.84" cy="303.81" r="3.08" fill="blue" /> -<circle cx="254.84" cy="325.16" r="3.08" fill="blue" /> -<circle cx="254.84" cy="282.47" r="3.08" fill="blue" /> -<circle cx="254.84" cy="293.14" r="3.08" fill="blue" /> -<circle cx="257.92" cy="319.82" r="3.08" fill="blue" /> -<circle cx="257.92" cy="309.15" r="3.08" fill="blue" /> -<circle cx="257.92" cy="298.48" r="3.08" fill="blue" /> -<circle cx="257.92" cy="277.13" r="3.08" fill="blue" /> -<circle cx="257.92" cy="330.50" r="3.08" fill="blue" /> -<circle cx="257.92" cy="287.80" r="3.08" fill="blue" /> -<circle cx="261.00" cy="335.84" r="3.08" fill="blue" /> -<circle cx="261.00" cy="314.49" r="3.08" fill="blue" /> -<circle cx="261.00" cy="303.81" r="3.08" fill="blue" /> -<circle cx="261.00" cy="325.16" r="3.08" fill="blue" /> -<circle cx="261.00" cy="282.47" r="3.08" fill="blue" /> -<circle cx="261.00" cy="293.14" r="3.08" fill="blue" /> -<circle cx="264.08" cy="319.82" r="3.08" fill="blue" /> -<circle cx="264.08" cy="309.15" r="3.08" fill="blue" /> -<circle cx="264.08" cy="298.48" r="3.08" fill="blue" /> -<circle cx="264.08" cy="277.13" r="3.08" fill="blue" /> -<circle cx="264.08" cy="330.50" r="3.08" fill="blue" /> -<circle cx="264.08" cy="287.80" r="3.08" fill="red" /> -<circle cx="267.16" cy="335.84" r="3.08" fill="red" /> -<circle cx="267.16" cy="314.49" r="3.08" fill="red" /> -<circle cx="267.16" cy="303.81" r="3.08" fill="red" /> -<circle cx="267.16" cy="325.16" r="3.08" fill="red" /> -<circle cx="267.16" cy="282.47" r="3.08" fill="red" /> -<circle cx="267.16" cy="293.14" r="3.08" fill="red" /> -<circle cx="270.24" cy="319.82" r="3.08" fill="red" /> -<circle cx="270.24" cy="309.15" r="3.08" fill="red" /> -<circle cx="270.24" cy="298.48" r="3.08" fill="red" /> -<circle cx="270.24" cy="277.13" r="3.08" fill="red" /> -<circle cx="270.24" cy="330.50" r="3.08" fill="red" /> -<circle cx="270.24" cy="287.80" r="3.08" fill="red" /> -<circle cx="273.33" cy="335.84" r="3.08" fill="red" /> -<circle cx="273.33" cy="314.49" r="3.08" fill="red" /> -<circle cx="273.33" cy="303.81" r="3.08" fill="red" /> -<circle cx="273.33" cy="325.16" r="3.08" fill="red" /> -<circle cx="273.33" cy="282.47" r="3.08" fill="red" /> -<circle cx="273.33" cy="293.14" r="3.08" fill="red" /> -<circle cx="276.41" cy="309.15" r="3.08" fill="red" /> -<circle cx="276.41" cy="330.50" r="3.08" fill="red" /> -<circle cx="276.41" cy="319.82" r="3.08" fill="red" /> -<circle cx="276.41" cy="298.48" r="3.08" fill="red" /> -<circle cx="276.41" cy="287.80" r="3.08" fill="red" /> -<circle cx="276.41" cy="277.13" r="3.08" fill="red" /> -<circle cx="279.49" cy="335.84" r="3.08" fill="red" /> -<circle cx="279.49" cy="325.16" r="3.08" fill="red" /> -<circle cx="279.49" cy="314.49" r="3.08" fill="red" /> -<circle cx="279.49" cy="303.81" r="3.08" fill="red" /> -<circle cx="279.49" cy="293.14" r="3.08" fill="red" /> -<circle cx="279.49" cy="282.47" r="3.08" fill="red" /> -<circle cx="282.57" cy="330.50" r="3.08" fill="red" /> -<circle cx="282.57" cy="319.82" r="3.08" fill="red" /> -<circle cx="282.57" cy="309.15" r="3.08" fill="red" /> -<circle cx="282.57" cy="298.48" r="3.08" fill="red" /> -<circle cx="282.57" cy="287.80" r="3.08" fill="red" /> -<circle cx="282.57" cy="277.13" r="3.08" fill="red" /> -<circle cx="285.65" cy="325.16" r="3.08" fill="red" /> -<circle cx="285.65" cy="314.49" r="3.08" fill="red" /> -<circle cx="285.65" cy="303.81" r="3.08" fill="red" /> -<circle cx="285.65" cy="293.14" r="3.08" fill="red" /> -<circle cx="285.65" cy="282.47" r="3.08" fill="red" /> -<circle cx="288.73" cy="319.82" r="3.08" fill="red" /> -<circle cx="288.73" cy="309.15" r="3.08" fill="red" /> -<circle cx="288.73" cy="298.48" r="3.08" fill="red" /> -<circle cx="288.73" cy="287.80" r="3.08" fill="red" /> -<circle cx="291.81" cy="314.49" r="3.08" fill="red" /> -<circle cx="291.81" cy="303.81" r="3.08" fill="red" /> -<circle cx="291.81" cy="293.14" r="3.08" fill="red" /> -<circle cx="294.89" cy="309.15" r="3.08" fill="red" /> -<circle cx="294.89" cy="298.48" r="3.08" fill="red" /> -<circle cx="297.97" cy="303.81" r="3.08" fill="red" /> -<circle cx="301.06" cy="309.15" r="3.08" fill="red" /> -<circle cx="245.40" cy="100.42" r="3.08" fill="blue" /> -<circle cx="248.48" cy="105.76" r="3.08" fill="blue" /> -<circle cx="248.48" cy="95.09" r="3.08" fill="blue" /> -<circle cx="251.56" cy="111.10" r="3.08" fill="blue" /> -<circle cx="251.56" cy="89.75" r="3.08" fill="blue" /> -<circle cx="251.56" cy="100.42" r="3.08" fill="blue" /> -<circle cx="254.64" cy="116.43" r="3.08" fill="blue" /> -<circle cx="254.64" cy="84.41" r="3.08" fill="blue" /> -<circle cx="254.64" cy="105.76" r="3.08" fill="blue" /> -<circle cx="254.64" cy="95.09" r="3.08" fill="blue" /> -<circle cx="257.72" cy="121.77" r="3.08" fill="blue" /> -<circle cx="257.72" cy="79.08" r="3.08" fill="blue" /> -<circle cx="257.72" cy="111.10" r="3.08" fill="blue" /> -<circle cx="257.72" cy="100.42" r="3.08" fill="blue" /> -<circle cx="257.72" cy="89.75" r="3.08" fill="blue" /> -<circle cx="260.81" cy="127.11" r="3.08" fill="blue" /> -<circle cx="260.81" cy="73.74" r="3.08" fill="blue" /> -<circle cx="260.81" cy="116.43" r="3.08" fill="blue" /> -<circle cx="260.81" cy="105.76" r="3.08" fill="blue" /> -<circle cx="260.81" cy="95.09" r="3.08" fill="blue" /> -<circle cx="260.81" cy="84.41" r="3.08" fill="blue" /> -<circle cx="263.89" cy="100.42" r="3.08" fill="blue" /> -<circle cx="263.89" cy="121.77" r="3.08" fill="blue" /> -<circle cx="263.89" cy="111.10" r="3.08" fill="blue" /> -<circle cx="263.89" cy="89.75" r="3.08" fill="blue" /> -<circle cx="263.89" cy="79.08" r="3.08" fill="blue" /> -<circle cx="266.97" cy="127.11" r="3.08" fill="blue" /> -<circle cx="266.97" cy="105.76" r="3.08" fill="blue" /> -<circle cx="266.97" cy="95.09" r="3.08" fill="blue" /> -<circle cx="266.97" cy="116.43" r="3.08" fill="blue" /> -<circle cx="266.97" cy="73.74" r="3.08" fill="blue" /> -<circle cx="266.97" cy="84.41" r="3.08" fill="blue" /> -<circle cx="270.05" cy="111.10" r="3.08" fill="blue" /> -<circle cx="270.05" cy="100.42" r="3.08" fill="blue" /> -<circle cx="270.05" cy="89.75" r="3.08" fill="blue" /> -<circle cx="270.05" cy="121.77" r="3.08" fill="blue" /> -<circle cx="270.05" cy="79.08" r="3.08" fill="blue" /> -<circle cx="273.13" cy="127.11" r="3.08" fill="blue" /> -<circle cx="273.13" cy="105.76" r="3.08" fill="blue" /> -<circle cx="273.13" cy="95.09" r="3.08" fill="blue" /> -<circle cx="273.13" cy="116.43" r="3.08" fill="blue" /> -<circle cx="273.13" cy="73.74" r="3.08" fill="blue" /> -<circle cx="273.13" cy="84.41" r="3.08" fill="blue" /> -<circle cx="276.21" cy="111.10" r="3.08" fill="blue" /> -<circle cx="276.21" cy="100.42" r="3.08" fill="blue" /> -<circle cx="276.21" cy="89.75" r="3.08" fill="blue" /> -<circle cx="276.21" cy="121.77" r="3.08" fill="blue" /> -<circle cx="276.21" cy="79.08" r="3.08" fill="blue" /> -<circle cx="279.29" cy="127.11" r="3.08" fill="blue" /> -<circle cx="279.29" cy="105.76" r="3.08" fill="blue" /> -<circle cx="279.29" cy="95.09" r="3.08" fill="blue" /> -<circle cx="279.29" cy="116.43" r="3.08" fill="blue" /> -<circle cx="279.29" cy="73.74" r="3.08" fill="blue" /> -<circle cx="279.29" cy="84.41" r="3.08" fill="red" /> -<circle cx="282.37" cy="111.10" r="3.08" fill="red" /> -<circle cx="282.37" cy="100.42" r="3.08" fill="red" /> -<circle cx="282.37" cy="89.75" r="3.08" fill="red" /> -<circle cx="282.37" cy="121.77" r="3.08" fill="red" /> -<circle cx="282.37" cy="79.08" r="3.08" fill="red" /> -<circle cx="285.46" cy="127.11" r="3.08" fill="red" /> -<circle cx="285.46" cy="105.76" r="3.08" fill="red" /> -<circle cx="285.46" cy="95.09" r="3.08" fill="red" /> -<circle cx="285.46" cy="116.43" r="3.08" fill="red" /> -<circle cx="285.46" cy="73.74" r="3.08" fill="red" /> -<circle cx="285.46" cy="84.41" r="3.08" fill="red" /> -<circle cx="288.54" cy="100.42" r="3.08" fill="red" /> -<circle cx="288.54" cy="121.77" r="3.08" fill="red" /> -<circle cx="288.54" cy="111.10" r="3.08" fill="red" /> -<circle cx="288.54" cy="89.75" r="3.08" fill="red" /> -<circle cx="288.54" cy="79.08" r="3.08" fill="red" /> -<circle cx="291.62" cy="127.11" r="3.08" fill="red" /> -<circle cx="291.62" cy="116.43" r="3.08" fill="red" /> -<circle cx="291.62" cy="105.76" r="3.08" fill="red" /> -<circle cx="291.62" cy="95.09" r="3.08" fill="red" /> -<circle cx="291.62" cy="84.41" r="3.08" fill="red" /> -<circle cx="291.62" cy="73.74" r="3.08" fill="red" /> -<circle cx="294.70" cy="121.77" r="3.08" fill="red" /> -<circle cx="294.70" cy="111.10" r="3.08" fill="red" /> -<circle cx="294.70" cy="100.42" r="3.08" fill="red" /> -<circle cx="294.70" cy="89.75" r="3.08" fill="red" /> -<circle cx="294.70" cy="79.08" r="3.08" fill="red" /> -<circle cx="297.78" cy="116.43" r="3.08" fill="red" /> -<circle cx="297.78" cy="105.76" r="3.08" fill="red" /> -<circle cx="297.78" cy="95.09" r="3.08" fill="red" /> -<circle cx="297.78" cy="84.41" r="3.08" fill="red" /> -<circle cx="300.86" cy="111.10" r="3.08" fill="red" /> -<circle cx="300.86" cy="100.42" r="3.08" fill="red" /> -<circle cx="300.86" cy="89.75" r="3.08" fill="red" /> -<circle cx="303.94" cy="105.76" r="3.08" fill="red" /> -<circle cx="303.94" cy="95.09" r="3.08" fill="red" /> -<circle cx="307.02" cy="100.42" r="3.08" fill="red" /> -<circle cx="307.02" cy="89.75" r="3.08" fill="red" /> -<circle cx="310.11" cy="95.09" r="3.08" fill="red" /> -<circle cx="313.19" cy="100.42" r="3.08" fill="red" /> -<circle cx="155.07" cy="75.91" r="3.08" fill="blue" /> -<circle cx="155.07" cy="86.59" r="3.08" fill="blue" /> -<circle cx="158.15" cy="70.58" r="3.08" fill="blue" /> -<circle cx="158.15" cy="91.92" r="3.08" fill="blue" /> -<circle cx="158.15" cy="81.25" r="3.08" fill="blue" /> -<circle cx="161.23" cy="65.24" r="3.08" fill="blue" /> -<circle cx="161.23" cy="97.26" r="3.08" fill="blue" /> -<circle cx="161.23" cy="86.59" r="3.08" fill="blue" /> -<circle cx="161.23" cy="75.91" r="3.08" fill="blue" /> -<circle cx="164.31" cy="59.90" r="3.08" fill="blue" /> -<circle cx="164.31" cy="102.60" r="3.08" fill="blue" /> -<circle cx="164.31" cy="91.92" r="3.08" fill="blue" /> -<circle cx="164.31" cy="81.25" r="3.08" fill="blue" /> -<circle cx="164.31" cy="70.58" r="3.08" fill="blue" /> -<circle cx="167.39" cy="86.59" r="3.08" fill="blue" /> -<circle cx="167.39" cy="107.93" r="3.08" fill="blue" /> -<circle cx="167.39" cy="97.26" r="3.08" fill="blue" /> -<circle cx="167.39" cy="75.91" r="3.08" fill="blue" /> -<circle cx="167.39" cy="65.24" r="3.08" fill="blue" /> -<circle cx="170.48" cy="91.92" r="3.08" fill="blue" /> -<circle cx="170.48" cy="81.25" r="3.08" fill="blue" /> -<circle cx="170.48" cy="102.60" r="3.08" fill="blue" /> -<circle cx="170.48" cy="59.90" r="3.08" fill="blue" /> -<circle cx="170.48" cy="70.58" r="3.08" fill="blue" /> -<circle cx="173.56" cy="97.26" r="3.08" fill="blue" /> -<circle cx="173.56" cy="86.59" r="3.08" fill="blue" /> -<circle cx="173.56" cy="75.91" r="3.08" fill="blue" /> -<circle cx="173.56" cy="107.93" r="3.08" fill="blue" /> -<circle cx="173.56" cy="65.24" r="3.08" fill="blue" /> -<circle cx="176.64" cy="91.92" r="3.08" fill="blue" /> -<circle cx="176.64" cy="81.25" r="3.08" fill="blue" /> -<circle cx="176.64" cy="102.60" r="3.08" fill="red" /> -<circle cx="176.64" cy="59.90" r="3.08" fill="red" /> -<circle cx="176.64" cy="70.58" r="3.08" fill="red" /> -<circle cx="179.72" cy="97.26" r="3.08" fill="red" /> -<circle cx="179.72" cy="86.59" r="3.08" fill="red" /> -<circle cx="179.72" cy="75.91" r="3.08" fill="red" /> -<circle cx="179.72" cy="107.93" r="3.08" fill="red" /> -<circle cx="179.72" cy="65.24" r="3.08" fill="red" /> -<circle cx="182.80" cy="91.92" r="3.08" fill="red" /> -<circle cx="182.80" cy="81.25" r="3.08" fill="red" /> -<circle cx="182.80" cy="102.60" r="3.08" fill="red" /> -<circle cx="182.80" cy="59.90" r="3.08" fill="red" /> -<circle cx="182.80" cy="70.58" r="3.08" fill="red" /> -<circle cx="185.88" cy="97.26" r="3.08" fill="red" /> -<circle cx="185.88" cy="86.59" r="3.08" fill="red" /> -<circle cx="185.88" cy="75.91" r="3.08" fill="red" /> -<circle cx="185.88" cy="107.93" r="3.08" fill="red" /> -<circle cx="185.88" cy="65.24" r="3.08" fill="red" /> -<circle cx="188.96" cy="91.92" r="3.08" fill="red" /> -<circle cx="188.96" cy="81.25" r="3.08" fill="red" /> -<circle cx="188.96" cy="102.60" r="3.08" fill="red" /> -<circle cx="188.96" cy="59.90" r="3.08" fill="red" /> -<circle cx="188.96" cy="70.58" r="3.08" fill="red" /> -<circle cx="192.04" cy="86.59" r="3.08" fill="red" /> -<circle cx="192.04" cy="107.93" r="3.08" fill="red" /> -<circle cx="192.04" cy="97.26" r="3.08" fill="red" /> -<circle cx="192.04" cy="75.91" r="3.08" fill="red" /> -<circle cx="192.04" cy="65.24" r="3.08" fill="red" /> -<circle cx="195.12" cy="102.60" r="3.08" fill="red" /> -<circle cx="195.12" cy="91.92" r="3.08" fill="red" /> -<circle cx="195.12" cy="81.25" r="3.08" fill="red" /> -<circle cx="195.12" cy="70.58" r="3.08" fill="red" /> -<circle cx="195.12" cy="59.90" r="3.08" fill="red" /> -<circle cx="198.21" cy="97.26" r="3.08" fill="red" /> -<circle cx="198.21" cy="86.59" r="3.08" fill="red" /> -<circle cx="198.21" cy="75.91" r="3.08" fill="red" /> -<circle cx="198.21" cy="65.24" r="3.08" fill="red" /> -<circle cx="201.29" cy="91.92" r="3.08" fill="red" /> -<circle cx="201.29" cy="81.25" r="3.08" fill="red" /> -<circle cx="201.29" cy="70.58" r="3.08" fill="red" /> -<circle cx="204.37" cy="86.59" r="3.08" fill="red" /> -<circle cx="204.37" cy="75.91" r="3.08" fill="red" /> -<circle cx="207.45" cy="81.25" r="3.08" fill="red" /> -<circle cx="210.53" cy="86.59" r="3.08" fill="red" /> -<circle cx="92.21" cy="259.60" r="3.08" fill="blue" /> -<circle cx="95.29" cy="264.94" r="3.08" fill="blue" /> -<circle cx="95.29" cy="254.26" r="3.08" fill="blue" /> -<circle cx="98.37" cy="259.60" r="3.08" fill="blue" /> -<circle cx="98.37" cy="270.27" r="3.08" fill="blue" /> -<circle cx="98.37" cy="248.93" r="3.08" fill="blue" /> -<circle cx="98.37" cy="238.25" r="3.08" fill="blue" /> -<circle cx="101.45" cy="264.94" r="3.08" fill="blue" /> -<circle cx="101.45" cy="254.26" r="3.08" fill="blue" /> -<circle cx="101.45" cy="275.61" r="3.08" fill="blue" /> -<circle cx="101.45" cy="243.59" r="3.08" fill="blue" /> -<circle cx="104.53" cy="270.27" r="3.08" fill="blue" /> -<circle cx="104.53" cy="259.60" r="3.08" fill="red" /> -<circle cx="104.53" cy="248.93" r="3.08" fill="red" /> -<circle cx="104.53" cy="238.25" r="3.08" fill="red" /> -<circle cx="107.62" cy="264.94" r="3.08" fill="red" /> -<circle cx="107.62" cy="254.26" r="3.08" fill="red" /> -<circle cx="107.62" cy="275.61" r="3.08" fill="red" /> -<circle cx="107.62" cy="243.59" r="3.08" fill="red" /> -<circle cx="110.70" cy="270.27" r="3.08" fill="red" /> -<circle cx="110.70" cy="259.60" r="3.08" fill="red" /> -<circle cx="110.70" cy="248.93" r="3.08" fill="red" /> -<circle cx="110.70" cy="238.25" r="3.08" fill="red" /> -<circle cx="113.78" cy="264.94" r="3.08" fill="red" /> -<circle cx="113.78" cy="254.26" r="3.08" fill="red" /> -<circle cx="113.78" cy="275.61" r="3.08" fill="red" /> -<circle cx="113.78" cy="243.59" r="3.08" fill="red" /> -<circle cx="116.86" cy="270.27" r="3.08" fill="red" /> -<circle cx="116.86" cy="259.60" r="3.08" fill="red" /> -<circle cx="116.86" cy="248.93" r="3.08" fill="red" /> -<circle cx="116.86" cy="238.25" r="3.08" fill="red" /> -<circle cx="119.94" cy="264.94" r="3.08" fill="red" /> -<circle cx="119.94" cy="254.26" r="3.08" fill="red" /> -<circle cx="119.94" cy="275.61" r="3.08" fill="red" /> -<circle cx="119.94" cy="243.59" r="3.08" fill="red" /> -<circle cx="123.02" cy="259.60" r="3.08" fill="red" /> -<circle cx="123.02" cy="270.27" r="3.08" fill="red" /> -<circle cx="123.02" cy="248.93" r="3.08" fill="red" /> -<circle cx="123.02" cy="238.25" r="3.08" fill="red" /> -<circle cx="126.10" cy="264.94" r="3.08" fill="red" /> -<circle cx="126.10" cy="254.26" r="3.08" fill="red" /> -<circle cx="126.10" cy="243.59" r="3.08" fill="red" /> -<circle cx="129.18" cy="259.60" r="3.08" fill="red" /> -<circle cx="129.18" cy="248.93" r="3.08" fill="red" /> -<circle cx="132.27" cy="254.26" r="3.08" fill="red" /> -<circle cx="135.35" cy="259.60" r="3.08" fill="red" /> -<circle cx="140.68" cy="295.29" r="3.08" fill="blue" /> -<circle cx="143.76" cy="300.63" r="3.08" fill="blue" /> -<circle cx="143.76" cy="289.95" r="3.08" fill="blue" /> -<circle cx="146.84" cy="295.29" r="3.08" fill="blue" /> -<circle cx="146.84" cy="305.96" r="3.08" fill="blue" /> -<circle cx="146.84" cy="284.62" r="3.08" fill="blue" /> -<circle cx="149.92" cy="300.63" r="3.08" fill="blue" /> -<circle cx="149.92" cy="289.95" r="3.08" fill="blue" /> -<circle cx="149.92" cy="311.30" r="3.08" fill="blue" /> -<circle cx="149.92" cy="279.28" r="3.08" fill="blue" /> -<circle cx="153.00" cy="305.96" r="3.08" fill="blue" /> -<circle cx="153.00" cy="295.29" r="3.08" fill="blue" /> -<circle cx="153.00" cy="284.62" r="3.08" fill="blue" /> -<circle cx="156.09" cy="300.63" r="3.08" fill="blue" /> -<circle cx="156.09" cy="289.95" r="3.08" fill="blue" /> -<circle cx="156.09" cy="311.30" r="3.08" fill="blue" /> -<circle cx="156.09" cy="279.28" r="3.08" fill="blue" /> -<circle cx="159.17" cy="305.96" r="3.08" fill="blue" /> -<circle cx="159.17" cy="295.29" r="3.08" fill="blue" /> -<circle cx="159.17" cy="284.62" r="3.08" fill="blue" /> -<circle cx="162.25" cy="300.63" r="3.08" fill="blue" /> -<circle cx="162.25" cy="289.95" r="3.08" fill="blue" /> -<circle cx="162.25" cy="311.30" r="3.08" fill="blue" /> -<circle cx="162.25" cy="279.28" r="3.08" fill="blue" /> -<circle cx="165.33" cy="305.96" r="3.08" fill="blue" /> -<circle cx="165.33" cy="295.29" r="3.08" fill="blue" /> -<circle cx="165.33" cy="284.62" r="3.08" fill="blue" /> -<circle cx="168.41" cy="300.63" r="3.08" fill="red" /> -<circle cx="168.41" cy="289.95" r="3.08" fill="red" /> -<circle cx="168.41" cy="311.30" r="3.08" fill="red" /> -<circle cx="168.41" cy="279.28" r="3.08" fill="red" /> -<circle cx="171.49" cy="295.29" r="3.08" fill="red" /> -<circle cx="171.49" cy="305.96" r="3.08" fill="red" /> -<circle cx="171.49" cy="284.62" r="3.08" fill="red" /> -<circle cx="174.57" cy="300.63" r="3.08" fill="red" /> -<circle cx="174.57" cy="289.95" r="3.08" fill="red" /> -<circle cx="177.65" cy="295.29" r="3.08" fill="red" /> -<circle cx="68.03" cy="208.55" r="3.08" fill="blue" /> -<circle cx="71.11" cy="213.89" r="3.08" fill="blue" /> -<circle cx="71.11" cy="203.22" r="3.08" fill="blue" /> -<circle cx="74.19" cy="208.55" r="3.08" fill="blue" /> -<circle cx="74.19" cy="219.23" r="3.08" fill="blue" /> -<circle cx="74.19" cy="197.88" r="3.08" fill="blue" /> -<circle cx="77.27" cy="213.89" r="3.08" fill="blue" /> -<circle cx="77.27" cy="203.22" r="3.08" fill="blue" /> -<circle cx="77.27" cy="224.57" r="3.08" fill="blue" /> -<circle cx="77.27" cy="192.54" r="3.08" fill="blue" /> -<circle cx="80.35" cy="219.23" r="3.08" fill="blue" /> -<circle cx="80.35" cy="208.55" r="3.08" fill="blue" /> -<circle cx="80.35" cy="197.88" r="3.08" fill="blue" /> -<circle cx="83.44" cy="213.89" r="3.08" fill="blue" /> -<circle cx="83.44" cy="203.22" r="3.08" fill="blue" /> -<circle cx="83.44" cy="224.57" r="3.08" fill="blue" /> -<circle cx="83.44" cy="192.54" r="3.08" fill="blue" /> -<circle cx="86.52" cy="219.23" r="3.08" fill="blue" /> -<circle cx="86.52" cy="208.55" r="3.08" fill="blue" /> -<circle cx="86.52" cy="197.88" r="3.08" fill="blue" /> -<circle cx="89.60" cy="213.89" r="3.08" fill="blue" /> -<circle cx="89.60" cy="203.22" r="3.08" fill="red" /> -<circle cx="89.60" cy="224.57" r="3.08" fill="red" /> -<circle cx="89.60" cy="192.54" r="3.08" fill="red" /> -<circle cx="92.68" cy="219.23" r="3.08" fill="red" /> -<circle cx="92.68" cy="208.55" r="3.08" fill="red" /> -<circle cx="92.68" cy="197.88" r="3.08" fill="red" /> -<circle cx="95.76" cy="213.89" r="3.08" fill="red" /> -<circle cx="95.76" cy="203.22" r="3.08" fill="red" /> -<circle cx="95.76" cy="224.57" r="3.08" fill="red" /> -<circle cx="95.76" cy="192.54" r="3.08" fill="red" /> -<circle cx="98.84" cy="208.55" r="3.08" fill="red" /> -<circle cx="98.84" cy="219.23" r="3.08" fill="red" /> -<circle cx="98.84" cy="197.88" r="3.08" fill="red" /> -<circle cx="101.92" cy="203.22" r="3.08" fill="red" /> -<circle cx="105.00" cy="208.55" r="3.08" fill="red" /> -<circle cx="301.06" cy="153.52" r="3.08" fill="blue" /> -<circle cx="304.14" cy="158.86" r="3.08" fill="blue" /> -<circle cx="304.14" cy="148.19" r="3.08" fill="blue" /> -<circle cx="307.22" cy="153.52" r="3.08" fill="blue" /> -<circle cx="307.22" cy="164.20" r="3.08" fill="blue" /> -<circle cx="307.22" cy="142.85" r="3.08" fill="blue" /> -<circle cx="310.31" cy="158.86" r="3.08" fill="blue" /> -<circle cx="310.31" cy="148.19" r="3.08" fill="blue" /> -<circle cx="310.31" cy="169.53" r="3.08" fill="blue" /> -<circle cx="310.31" cy="137.51" r="3.08" fill="blue" /> -<circle cx="313.39" cy="164.20" r="3.08" fill="blue" /> -<circle cx="313.39" cy="153.52" r="3.08" fill="blue" /> -<circle cx="313.39" cy="142.85" r="3.08" fill="blue" /> -<circle cx="316.47" cy="158.86" r="3.08" fill="blue" /> -<circle cx="316.47" cy="148.19" r="3.08" fill="blue" /> -<circle cx="316.47" cy="169.53" r="3.08" fill="blue" /> -<circle cx="316.47" cy="137.51" r="3.08" fill="blue" /> -<circle cx="319.55" cy="164.20" r="3.08" fill="blue" /> -<circle cx="319.55" cy="153.52" r="3.08" fill="blue" /> -<circle cx="319.55" cy="142.85" r="3.08" fill="red" /> -<circle cx="322.63" cy="158.86" r="3.08" fill="red" /> -<circle cx="322.63" cy="148.19" r="3.08" fill="red" /> -<circle cx="322.63" cy="169.53" r="3.08" fill="red" /> -<circle cx="322.63" cy="137.51" r="3.08" fill="red" /> -<circle cx="325.71" cy="164.20" r="3.08" fill="red" /> -<circle cx="325.71" cy="153.52" r="3.08" fill="red" /> -<circle cx="325.71" cy="142.85" r="3.08" fill="red" /> -<circle cx="328.79" cy="158.86" r="3.08" fill="red" /> -<circle cx="328.79" cy="148.19" r="3.08" fill="red" /> -<circle cx="328.79" cy="169.53" r="3.08" fill="red" /> -<circle cx="328.79" cy="137.51" r="3.08" fill="red" /> -<circle cx="331.87" cy="153.52" r="3.08" fill="red" /> -<circle cx="331.87" cy="142.85" r="3.08" fill="red" /> -<circle cx="334.96" cy="148.19" r="3.08" fill="red" /> -<circle cx="338.04" cy="153.52" r="3.08" fill="red" /> -<circle cx="178.60" cy="330.31" r="3.08" fill="blue" /> -<circle cx="181.68" cy="335.64" r="3.08" fill="blue" /> -<circle cx="181.68" cy="324.97" r="3.08" fill="blue" /> -<circle cx="184.76" cy="330.31" r="3.08" fill="blue" /> -<circle cx="184.76" cy="319.63" r="3.08" fill="blue" /> -<circle cx="187.84" cy="335.64" r="3.08" fill="blue" /> -<circle cx="187.84" cy="324.97" r="3.08" fill="blue" /> -<circle cx="187.84" cy="314.29" r="3.08" fill="blue" /> -<circle cx="190.92" cy="340.98" r="3.08" fill="blue" /> -<circle cx="190.92" cy="330.31" r="3.08" fill="blue" /> -<circle cx="190.92" cy="319.63" r="3.08" fill="blue" /> -<circle cx="194.00" cy="335.64" r="3.08" fill="red" /> -<circle cx="194.00" cy="324.97" r="3.08" fill="red" /> -<circle cx="194.00" cy="314.29" r="3.08" fill="red" /> -<circle cx="197.09" cy="340.98" r="3.08" fill="red" /> -<circle cx="197.09" cy="330.31" r="3.08" fill="red" /> -<circle cx="197.09" cy="319.63" r="3.08" fill="red" /> -<circle cx="200.17" cy="335.64" r="3.08" fill="red" /> -<circle cx="200.17" cy="324.97" r="3.08" fill="red" /> -<circle cx="200.17" cy="314.29" r="3.08" fill="red" /> -<circle cx="203.25" cy="340.98" r="3.08" fill="red" /> -<circle cx="203.25" cy="330.31" r="3.08" fill="red" /> -<circle cx="203.25" cy="319.63" r="3.08" fill="red" /> -<circle cx="206.33" cy="335.64" r="3.08" fill="red" /> -<circle cx="206.33" cy="324.97" r="3.08" fill="red" /> -<circle cx="206.33" cy="314.29" r="3.08" fill="red" /> -<circle cx="209.41" cy="330.31" r="3.08" fill="red" /> -<circle cx="209.41" cy="319.63" r="3.08" fill="red" /> -<circle cx="212.49" cy="324.97" r="3.08" fill="red" /> -<circle cx="215.57" cy="330.31" r="3.08" fill="red" /> -<circle cx="93.82" cy="315.86" r="3.08" fill="blue" /> -<circle cx="96.90" cy="321.20" r="3.08" fill="blue" /> -<circle cx="96.90" cy="310.52" r="3.08" fill="blue" /> -<circle cx="99.98" cy="315.86" r="3.08" fill="blue" /> -<circle cx="99.98" cy="305.19" r="3.08" fill="blue" /> -<circle cx="103.07" cy="321.20" r="3.08" fill="blue" /> -<circle cx="103.07" cy="310.52" r="3.08" fill="blue" /> -<circle cx="103.07" cy="299.85" r="3.08" fill="blue" /> -<circle cx="106.15" cy="326.53" r="3.08" fill="blue" /> -<circle cx="106.15" cy="315.86" r="3.08" fill="blue" /> -<circle cx="106.15" cy="305.19" r="3.08" fill="blue" /> -<circle cx="109.23" cy="321.20" r="3.08" fill="blue" /> -<circle cx="109.23" cy="310.52" r="3.08" fill="blue" /> -<circle cx="109.23" cy="299.85" r="3.08" fill="blue" /> -<circle cx="112.31" cy="326.53" r="3.08" fill="blue" /> -<circle cx="112.31" cy="315.86" r="3.08" fill="blue" /> -<circle cx="112.31" cy="305.19" r="3.08" fill="red" /> -<circle cx="115.39" cy="321.20" r="3.08" fill="red" /> -<circle cx="115.39" cy="310.52" r="3.08" fill="red" /> -<circle cx="115.39" cy="299.85" r="3.08" fill="red" /> -<circle cx="118.47" cy="326.53" r="3.08" fill="red" /> -<circle cx="118.47" cy="315.86" r="3.08" fill="red" /> -<circle cx="118.47" cy="305.19" r="3.08" fill="red" /> -<circle cx="121.55" cy="321.20" r="3.08" fill="red" /> -<circle cx="121.55" cy="310.52" r="3.08" fill="red" /> -<circle cx="121.55" cy="299.85" r="3.08" fill="red" /> -<circle cx="124.63" cy="315.86" r="3.08" fill="red" /> -<circle cx="124.63" cy="305.19" r="3.08" fill="red" /> -<circle cx="127.71" cy="310.52" r="3.08" fill="red" /> -<circle cx="130.80" cy="315.86" r="3.08" fill="red" /> -<circle cx="310.59" cy="289.84" r="3.08" fill="blue" /> -<circle cx="313.67" cy="284.51" r="3.08" fill="blue" /> -<circle cx="316.75" cy="289.84" r="3.08" fill="blue" /> -<circle cx="316.75" cy="279.17" r="3.08" fill="blue" /> -<circle cx="319.83" cy="295.18" r="3.08" fill="blue" /> -<circle cx="319.83" cy="284.51" r="3.08" fill="blue" /> -<circle cx="319.83" cy="273.83" r="3.08" fill="blue" /> -<circle cx="322.91" cy="300.52" r="3.08" fill="blue" /> -<circle cx="322.91" cy="289.84" r="3.08" fill="blue" /> -<circle cx="322.91" cy="279.17" r="3.08" fill="blue" /> -<circle cx="325.99" cy="295.18" r="3.08" fill="blue" /> -<circle cx="325.99" cy="284.51" r="3.08" fill="blue" /> -<circle cx="325.99" cy="273.83" r="3.08" fill="red" /> -<circle cx="329.07" cy="300.52" r="3.08" fill="red" /> -<circle cx="329.07" cy="289.84" r="3.08" fill="red" /> -<circle cx="329.07" cy="279.17" r="3.08" fill="red" /> -<circle cx="332.15" cy="295.18" r="3.08" fill="red" /> -<circle cx="332.15" cy="284.51" r="3.08" fill="red" /> -<circle cx="332.15" cy="273.83" r="3.08" fill="red" /> -<circle cx="335.24" cy="300.52" r="3.08" fill="red" /> -<circle cx="335.24" cy="289.84" r="3.08" fill="red" /> -<circle cx="335.24" cy="279.17" r="3.08" fill="red" /> -<circle cx="338.32" cy="295.18" r="3.08" fill="red" /> -<circle cx="338.32" cy="284.51" r="3.08" fill="red" /> -<circle cx="338.32" cy="273.83" r="3.08" fill="red" /> -<circle cx="341.40" cy="289.84" r="3.08" fill="red" /> -<circle cx="341.40" cy="279.17" r="3.08" fill="red" /> -<circle cx="344.48" cy="284.51" r="3.08" fill="red" /> -<circle cx="347.56" cy="289.84" r="3.08" fill="red" /> -<circle cx="216.85" cy="47.05" r="3.08" fill="blue" /> -<circle cx="219.93" cy="52.38" r="3.08" fill="blue" /> -<circle cx="219.93" cy="41.71" r="3.08" fill="blue" /> -<circle cx="223.01" cy="57.72" r="3.08" fill="blue" /> -<circle cx="223.01" cy="47.05" r="3.08" fill="blue" /> -<circle cx="223.01" cy="36.37" r="3.08" fill="blue" /> -<circle cx="226.09" cy="52.38" r="3.08" fill="blue" /> -<circle cx="226.09" cy="41.71" r="3.08" fill="blue" /> -<circle cx="226.09" cy="31.04" r="3.08" fill="blue" /> -<circle cx="229.17" cy="57.72" r="3.08" fill="blue" /> -<circle cx="229.17" cy="47.05" r="3.08" fill="blue" /> -<circle cx="229.17" cy="36.37" r="3.08" fill="blue" /> -<circle cx="232.25" cy="52.38" r="3.08" fill="blue" /> -<circle cx="232.25" cy="41.71" r="3.08" fill="blue" /> -<circle cx="232.25" cy="31.04" r="3.08" fill="blue" /> -<circle cx="235.33" cy="57.72" r="3.08" fill="red" /> -<circle cx="235.33" cy="47.05" r="3.08" fill="red" /> -<circle cx="235.33" cy="36.37" r="3.08" fill="red" /> -<circle cx="238.41" cy="52.38" r="3.08" fill="red" /> -<circle cx="238.41" cy="41.71" r="3.08" fill="red" /> -<circle cx="238.41" cy="31.04" r="3.08" fill="red" /> -<circle cx="241.50" cy="47.05" r="3.08" fill="red" /> -<circle cx="241.50" cy="36.37" r="3.08" fill="red" /> -<circle cx="244.58" cy="41.71" r="3.08" fill="red" /> -<circle cx="247.66" cy="47.05" r="3.08" fill="red" /> -<circle cx="42.30" cy="250.39" r="3.08" fill="blue" /> -<circle cx="45.38" cy="255.73" r="3.08" fill="blue" /> -<circle cx="45.38" cy="245.06" r="3.08" fill="blue" /> -<circle cx="48.46" cy="261.07" r="3.08" fill="blue" /> -<circle cx="48.46" cy="250.39" r="3.08" fill="blue" /> -<circle cx="48.46" cy="239.72" r="3.08" fill="blue" /> -<circle cx="51.54" cy="255.73" r="3.08" fill="blue" /> -<circle cx="51.54" cy="245.06" r="3.08" fill="blue" /> -<circle cx="54.62" cy="261.07" r="3.08" fill="blue" /> -<circle cx="54.62" cy="250.39" r="3.08" fill="blue" /> -<circle cx="54.62" cy="239.72" r="3.08" fill="blue" /> -<circle cx="57.71" cy="255.73" r="3.08" fill="blue" /> -<circle cx="57.71" cy="245.06" r="3.08" fill="blue" /> -<circle cx="57.71" cy="234.38" r="3.08" fill="red" /> -<circle cx="60.79" cy="261.07" r="3.08" fill="red" /> -<circle cx="60.79" cy="250.39" r="3.08" fill="red" /> -<circle cx="60.79" cy="239.72" r="3.08" fill="red" /> -<circle cx="63.87" cy="255.73" r="3.08" fill="red" /> -<circle cx="63.87" cy="245.06" r="3.08" fill="red" /> -<circle cx="63.87" cy="234.38" r="3.08" fill="red" /> -<circle cx="66.95" cy="250.39" r="3.08" fill="red" /> -<circle cx="66.95" cy="239.72" r="3.08" fill="red" /> -<circle cx="70.03" cy="245.06" r="3.08" fill="red" /> -<circle cx="73.11" cy="250.39" r="3.08" fill="red" /> -<circle cx="137.84" cy="348.99" r="3.08" fill="blue" /> -<circle cx="140.92" cy="354.32" r="3.08" fill="blue" /> -<circle cx="140.92" cy="343.65" r="3.08" fill="blue" /> -<circle cx="144.00" cy="359.66" r="3.08" fill="blue" /> -<circle cx="144.00" cy="348.99" r="3.08" fill="blue" /> -<circle cx="144.00" cy="338.31" r="3.08" fill="blue" /> -<circle cx="147.08" cy="354.32" r="3.08" fill="blue" /> -<circle cx="147.08" cy="343.65" r="3.08" fill="blue" /> -<circle cx="150.16" cy="359.66" r="3.08" fill="blue" /> -<circle cx="150.16" cy="348.99" r="3.08" fill="blue" /> -<circle cx="150.16" cy="338.31" r="3.08" fill="blue" /> -<circle cx="153.25" cy="354.32" r="3.08" fill="blue" /> -<circle cx="153.25" cy="343.65" r="3.08" fill="red" /> -<circle cx="156.33" cy="359.66" r="3.08" fill="red" /> -<circle cx="156.33" cy="348.99" r="3.08" fill="red" /> -<circle cx="156.33" cy="338.31" r="3.08" fill="red" /> -<circle cx="159.41" cy="354.32" r="3.08" fill="red" /> -<circle cx="159.41" cy="343.65" r="3.08" fill="red" /> -<circle cx="159.41" cy="332.98" r="3.08" fill="red" /> -<circle cx="162.49" cy="348.99" r="3.08" fill="red" /> -<circle cx="162.49" cy="338.31" r="3.08" fill="red" /> -<circle cx="165.57" cy="343.65" r="3.08" fill="red" /> -<circle cx="168.65" cy="348.99" r="3.08" fill="red" /> -<circle cx="109.57" cy="60.48" r="3.08" fill="blue" /> -<circle cx="112.65" cy="65.82" r="3.08" fill="blue" /> -<circle cx="112.65" cy="55.15" r="3.08" fill="blue" /> -<circle cx="115.73" cy="71.16" r="3.08" fill="blue" /> -<circle cx="115.73" cy="60.48" r="3.08" fill="blue" /> -<circle cx="115.73" cy="49.81" r="3.08" fill="blue" /> -<circle cx="118.82" cy="65.82" r="3.08" fill="blue" /> -<circle cx="118.82" cy="55.15" r="3.08" fill="blue" /> -<circle cx="121.90" cy="71.16" r="3.08" fill="blue" /> -<circle cx="121.90" cy="60.48" r="3.08" fill="blue" /> -<circle cx="121.90" cy="49.81" r="3.08" fill="blue" /> -<circle cx="124.98" cy="65.82" r="3.08" fill="blue" /> -<circle cx="124.98" cy="55.15" r="3.08" fill="blue" /> -<circle cx="128.06" cy="71.16" r="3.08" fill="blue" /> -<circle cx="128.06" cy="60.48" r="3.08" fill="red" /> -<circle cx="128.06" cy="49.81" r="3.08" fill="red" /> -<circle cx="131.14" cy="65.82" r="3.08" fill="red" /> -<circle cx="131.14" cy="55.15" r="3.08" fill="red" /> -<circle cx="134.22" cy="60.48" r="3.08" fill="red" /> -<circle cx="140.38" cy="60.48" r="3.08" fill="red" /> -<circle cx="29.85" cy="186.99" r="3.08" fill="blue" /> -<circle cx="32.93" cy="192.33" r="3.08" fill="blue" /> -<circle cx="32.93" cy="181.65" r="3.08" fill="blue" /> -<circle cx="36.01" cy="197.66" r="3.08" fill="blue" /> -<circle cx="36.01" cy="186.99" r="3.08" fill="blue" /> -<circle cx="36.01" cy="176.32" r="3.08" fill="blue" /> -<circle cx="39.09" cy="192.33" r="3.08" fill="blue" /> -<circle cx="39.09" cy="181.65" r="3.08" fill="blue" /> -<circle cx="42.18" cy="197.66" r="3.08" fill="blue" /> -<circle cx="42.18" cy="186.99" r="3.08" fill="blue" /> -<circle cx="42.18" cy="176.32" r="3.08" fill="red" /> -<circle cx="45.26" cy="192.33" r="3.08" fill="red" /> -<circle cx="45.26" cy="181.65" r="3.08" fill="red" /> -<circle cx="48.34" cy="197.66" r="3.08" fill="red" /> -<circle cx="48.34" cy="186.99" r="3.08" fill="red" /> -<circle cx="48.34" cy="176.32" r="3.08" fill="red" /> -<circle cx="51.42" cy="192.33" r="3.08" fill="red" /> -<circle cx="51.42" cy="181.65" r="3.08" fill="red" /> -<circle cx="54.50" cy="186.99" r="3.08" fill="red" /> -<circle cx="60.66" cy="186.99" r="3.08" fill="red" /> -<circle cx="325.23" cy="113.02" r="3.08" fill="blue" /> -<circle cx="328.31" cy="118.36" r="3.08" fill="blue" /> -<circle cx="328.31" cy="107.68" r="3.08" fill="blue" /> -<circle cx="331.39" cy="123.69" r="3.08" fill="blue" /> -<circle cx="331.39" cy="113.02" r="3.08" fill="blue" /> -<circle cx="331.39" cy="102.35" r="3.08" fill="blue" /> -<circle cx="334.47" cy="118.36" r="3.08" fill="blue" /> -<circle cx="334.47" cy="107.68" r="3.08" fill="blue" /> -<circle cx="337.55" cy="123.69" r="3.08" fill="blue" /> -<circle cx="337.55" cy="113.02" r="3.08" fill="blue" /> -<circle cx="337.55" cy="102.35" r="3.08" fill="blue" /> -<circle cx="340.63" cy="118.36" r="3.08" fill="blue" /> -<circle cx="340.63" cy="107.68" r="3.08" fill="blue" /> -<circle cx="343.72" cy="123.69" r="3.08" fill="blue" /> -<circle cx="343.72" cy="113.02" r="3.08" fill="blue" /> -<circle cx="343.72" cy="102.35" r="3.08" fill="red" /> -<circle cx="346.80" cy="107.68" r="3.08" fill="red" /> -<circle cx="349.88" cy="113.02" r="3.08" fill="red" /> -<circle cx="347.23" cy="169.70" r="3.08" fill="blue" /> -<circle cx="350.31" cy="175.04" r="3.08" fill="blue" /> -<circle cx="350.31" cy="164.36" r="3.08" fill="blue" /> -<circle cx="353.40" cy="180.37" r="3.08" fill="blue" /> -<circle cx="353.40" cy="169.70" r="3.08" fill="blue" /> -<circle cx="353.40" cy="159.03" r="3.08" fill="blue" /> -<circle cx="356.48" cy="175.04" r="3.08" fill="blue" /> -<circle cx="356.48" cy="164.36" r="3.08" fill="blue" /> -<circle cx="359.56" cy="180.37" r="3.08" fill="blue" /> -<circle cx="359.56" cy="169.70" r="3.08" fill="blue" /> -<circle cx="359.56" cy="159.03" r="3.08" fill="blue" /> -<circle cx="362.64" cy="175.04" r="3.08" fill="red" /> -<circle cx="362.64" cy="164.36" r="3.08" fill="red" /> -<circle cx="365.72" cy="180.37" r="3.08" fill="red" /> -<circle cx="365.72" cy="169.70" r="3.08" fill="red" /> -<circle cx="365.72" cy="159.03" r="3.08" fill="red" /> -<circle cx="368.80" cy="164.36" r="3.08" fill="red" /> -<circle cx="371.88" cy="169.70" r="3.08" fill="red" /> -<circle cx="59.55" cy="294.17" r="3.08" fill="blue" /> -<circle cx="62.63" cy="299.51" r="3.08" fill="blue" /> -<circle cx="62.63" cy="288.83" r="3.08" fill="blue" /> -<circle cx="65.71" cy="304.84" r="3.08" fill="blue" /> -<circle cx="65.71" cy="294.17" r="3.08" fill="red" /> -<circle cx="65.71" cy="283.49" r="3.08" fill="red" /> -<circle cx="68.80" cy="299.51" r="3.08" fill="red" /> -<circle cx="68.80" cy="288.83" r="3.08" fill="red" /> -<circle cx="71.88" cy="304.84" r="3.08" fill="red" /> -<circle cx="71.88" cy="294.17" r="3.08" fill="red" /> -<circle cx="71.88" cy="283.49" r="3.08" fill="red" /> -<circle cx="74.96" cy="299.51" r="3.08" fill="red" /> -<circle cx="74.96" cy="288.83" r="3.08" fill="red" /> -<circle cx="78.04" cy="294.17" r="3.08" fill="red" /> -<circle cx="78.04" cy="283.49" r="3.08" fill="red" /> -<circle cx="81.12" cy="288.83" r="3.08" fill="red" /> -<circle cx="84.20" cy="294.17" r="3.08" fill="red" /> -<circle cx="220.98" cy="362.65" r="3.08" fill="blue" /> -<circle cx="224.06" cy="367.99" r="3.08" fill="blue" /> -<circle cx="224.06" cy="357.32" r="3.08" fill="blue" /> -<circle cx="227.14" cy="373.33" r="3.08" fill="blue" /> -<circle cx="227.14" cy="362.65" r="3.08" fill="blue" /> -<circle cx="227.14" cy="351.98" r="3.08" fill="blue" /> -<circle cx="230.22" cy="367.99" r="3.08" fill="blue" /> -<circle cx="230.22" cy="357.32" r="3.08" fill="blue" /> -<circle cx="233.30" cy="373.33" r="3.08" fill="blue" /> -<circle cx="233.30" cy="362.65" r="3.08" fill="blue" /> -<circle cx="233.30" cy="351.98" r="3.08" fill="blue" /> -<circle cx="236.38" cy="367.99" r="3.08" fill="red" /> -<circle cx="236.38" cy="357.32" r="3.08" fill="red" /> -<circle cx="239.46" cy="362.65" r="3.08" fill="red" /> -<circle cx="239.46" cy="351.98" r="3.08" fill="red" /> -<circle cx="242.55" cy="357.32" r="3.08" fill="red" /> -<circle cx="245.63" cy="362.65" r="3.08" fill="red" /> -<circle cx="174.02" cy="29.04" r="3.08" fill="blue" /> -<circle cx="177.10" cy="34.38" r="3.08" fill="blue" /> -<circle cx="177.10" cy="23.71" r="3.08" fill="blue" /> -<circle cx="180.18" cy="29.04" r="3.08" fill="blue" /> -<circle cx="180.18" cy="18.37" r="3.08" fill="blue" /> -<circle cx="183.26" cy="34.38" r="3.08" fill="blue" /> -<circle cx="183.26" cy="23.71" r="3.08" fill="blue" /> -<circle cx="186.35" cy="29.04" r="3.08" fill="red" /> -<circle cx="186.35" cy="18.37" r="3.08" fill="red" /> -<circle cx="189.43" cy="34.38" r="3.08" fill="red" /> -<circle cx="189.43" cy="23.71" r="3.08" fill="red" /> -<circle cx="192.51" cy="29.04" r="3.08" fill="red" /> -<circle cx="192.51" cy="18.37" r="3.08" fill="red" /> -<circle cx="195.59" cy="23.71" r="3.08" fill="red" /> -<circle cx="198.67" cy="29.04" r="3.08" fill="red" /> -<circle cx="183.03" cy="373.82" r="3.08" fill="blue" /> -<circle cx="186.11" cy="379.16" r="3.08" fill="blue" /> -<circle cx="186.11" cy="368.49" r="3.08" fill="blue" /> -<circle cx="189.19" cy="373.82" r="3.08" fill="blue" /> -<circle cx="189.19" cy="363.15" r="3.08" fill="blue" /> -<circle cx="192.27" cy="379.16" r="3.08" fill="blue" /> -<circle cx="192.27" cy="368.49" r="3.08" fill="red" /> -<circle cx="195.35" cy="373.82" r="3.08" fill="red" /> -<circle cx="195.35" cy="363.15" r="3.08" fill="red" /> -<circle cx="198.44" cy="379.16" r="3.08" fill="red" /> -<circle cx="198.44" cy="368.49" r="3.08" fill="red" /> -<circle cx="201.52" cy="373.82" r="3.08" fill="red" /> -<circle cx="201.52" cy="363.15" r="3.08" fill="red" /> -<circle cx="204.60" cy="368.49" r="3.08" fill="red" /> -<circle cx="207.68" cy="373.82" r="3.08" fill="red" /> -<circle cx="359.30" cy="263.92" r="3.08" fill="blue" /> -<circle cx="362.38" cy="269.25" r="3.08" fill="blue" /> -<circle cx="362.38" cy="258.58" r="3.08" fill="blue" /> -<circle cx="365.46" cy="263.92" r="3.08" fill="blue" /> -<circle cx="365.46" cy="253.24" r="3.08" fill="blue" /> -<circle cx="368.54" cy="269.25" r="3.08" fill="blue" /> -<circle cx="368.54" cy="258.58" r="3.08" fill="blue" /> -<circle cx="371.62" cy="263.92" r="3.08" fill="blue" /> -<circle cx="371.62" cy="253.24" r="3.08" fill="blue" /> -<circle cx="374.70" cy="269.25" r="3.08" fill="blue" /> -<circle cx="374.70" cy="258.58" r="3.08" fill="red" /> -<circle cx="377.78" cy="263.92" r="3.08" fill="red" /> -<circle cx="377.78" cy="253.24" r="3.08" fill="red" /> -<circle cx="380.87" cy="258.58" r="3.08" fill="red" /> -<circle cx="383.95" cy="263.92" r="3.08" fill="red" /> -<circle cx="310.23" cy="334.55" r="3.08" fill="blue" /> -<circle cx="313.31" cy="329.21" r="3.08" fill="blue" /> -<circle cx="316.39" cy="334.55" r="3.08" fill="blue" /> -<circle cx="316.39" cy="323.88" r="3.08" fill="blue" /> -<circle cx="319.47" cy="339.89" r="3.08" fill="blue" /> -<circle cx="319.47" cy="329.21" r="3.08" fill="blue" /> -<circle cx="322.55" cy="334.55" r="3.08" fill="blue" /> -<circle cx="322.55" cy="323.88" r="3.08" fill="red" /> -<circle cx="325.63" cy="339.89" r="3.08" fill="red" /> -<circle cx="325.63" cy="329.21" r="3.08" fill="red" /> -<circle cx="328.71" cy="334.55" r="3.08" fill="red" /> -<circle cx="328.71" cy="323.88" r="3.08" fill="red" /> -<circle cx="331.79" cy="329.21" r="3.08" fill="red" /> -<circle cx="334.88" cy="334.55" r="3.08" fill="red" /> -<circle cx="361.01" cy="126.37" r="3.08" fill="blue" /> -<circle cx="364.09" cy="131.70" r="3.08" fill="blue" /> -<circle cx="364.09" cy="121.03" r="3.08" fill="blue" /> -<circle cx="367.17" cy="137.04" r="3.08" fill="blue" /> -<circle cx="367.17" cy="126.37" r="3.08" fill="blue" /> -<circle cx="370.25" cy="131.70" r="3.08" fill="blue" /> -<circle cx="370.25" cy="121.03" r="3.08" fill="blue" /> -<circle cx="373.33" cy="137.04" r="3.08" fill="blue" /> -<circle cx="373.33" cy="126.37" r="3.08" fill="blue" /> -<circle cx="376.41" cy="131.70" r="3.08" fill="red" /> -<circle cx="376.41" cy="121.03" r="3.08" fill="red" /> -<circle cx="379.50" cy="126.37" r="3.08" fill="red" /> -<circle cx="382.58" cy="131.70" r="3.08" fill="red" /> -<circle cx="277.69" cy="38.74" r="3.08" fill="blue" /> -<circle cx="277.69" cy="28.07" r="3.08" fill="blue" /> -<circle cx="280.77" cy="44.08" r="3.08" fill="blue" /> -<circle cx="280.77" cy="33.41" r="3.08" fill="blue" /> -<circle cx="283.86" cy="38.74" r="3.08" fill="blue" /> -<circle cx="283.86" cy="28.07" r="3.08" fill="blue" /> -<circle cx="286.94" cy="44.08" r="3.08" fill="red" /> -<circle cx="286.94" cy="33.41" r="3.08" fill="red" /> -<circle cx="290.02" cy="38.74" r="3.08" fill="red" /> -<circle cx="290.02" cy="28.07" r="3.08" fill="red" /> -<circle cx="293.10" cy="33.41" r="3.08" fill="red" /> -<circle cx="296.18" cy="38.74" r="3.08" fill="red" /> -<circle cx="378.43" cy="198.43" r="3.08" fill="blue" /> -<circle cx="378.43" cy="187.76" r="3.08" fill="blue" /> -<circle cx="381.51" cy="203.77" r="3.08" fill="blue" /> -<circle cx="381.51" cy="193.09" r="3.08" fill="blue" /> -<circle cx="384.59" cy="198.43" r="3.08" fill="blue" /> -<circle cx="384.59" cy="187.76" r="3.08" fill="blue" /> -<circle cx="387.68" cy="203.77" r="3.08" fill="red" /> -<circle cx="387.68" cy="193.09" r="3.08" fill="red" /> -<circle cx="390.76" cy="198.43" r="3.08" fill="red" /> -<circle cx="390.76" cy="187.76" r="3.08" fill="red" /> -<circle cx="393.84" cy="193.09" r="3.08" fill="red" /> -<circle cx="396.92" cy="198.43" r="3.08" fill="red" /> -<circle cx="269.14" cy="370.27" r="3.08" fill="blue" /> -<circle cx="272.22" cy="375.61" r="3.08" fill="blue" /> -<circle cx="272.22" cy="364.94" r="3.08" fill="blue" /> -<circle cx="275.30" cy="370.27" r="3.08" fill="blue" /> -<circle cx="275.30" cy="359.60" r="3.08" fill="red" /> -<circle cx="278.38" cy="375.61" r="3.08" fill="red" /> -<circle cx="278.38" cy="364.94" r="3.08" fill="red" /> -<circle cx="281.46" cy="370.27" r="3.08" fill="red" /> -<circle cx="281.46" cy="359.60" r="3.08" fill="red" /> -<circle cx="284.54" cy="364.94" r="3.08" fill="red" /> -<circle cx="287.62" cy="370.27" r="3.08" fill="red" /> -<circle cx="325.98" cy="72.23" r="3.08" fill="blue" /> -<circle cx="329.06" cy="77.56" r="3.08" fill="blue" /> -<circle cx="329.06" cy="66.89" r="3.08" fill="blue" /> -<circle cx="332.14" cy="72.23" r="3.08" fill="blue" /> -<circle cx="332.14" cy="61.55" r="3.08" fill="blue" /> -<circle cx="335.22" cy="77.56" r="3.08" fill="blue" /> -<circle cx="335.22" cy="66.89" r="3.08" fill="blue" /> -<circle cx="338.30" cy="72.23" r="3.08" fill="blue" /> -<circle cx="338.30" cy="61.55" r="3.08" fill="blue" /> -<circle cx="341.38" cy="66.89" r="3.08" fill="blue" /> -<circle cx="344.46" cy="72.23" r="3.08" fill="red" /> -<circle cx="23.01" cy="144.76" r="3.08" fill="blue" /> -<circle cx="26.09" cy="150.10" r="3.08" fill="blue" /> -<circle cx="26.09" cy="139.43" r="3.08" fill="blue" /> -<circle cx="29.17" cy="144.76" r="3.08" fill="blue" /> -<circle cx="29.17" cy="134.09" r="3.08" fill="blue" /> -<circle cx="32.26" cy="150.10" r="3.08" fill="blue" /> -<circle cx="32.26" cy="139.43" r="3.08" fill="blue" /> -<circle cx="35.34" cy="144.76" r="3.08" fill="red" /> -<circle cx="35.34" cy="134.09" r="3.08" fill="red" /> -<circle cx="38.42" cy="139.43" r="3.08" fill="red" /> -<circle cx="41.50" cy="144.76" r="3.08" fill="red" /> -<circle cx="101.88" cy="360.13" r="3.08" fill="blue" /> -<circle cx="104.96" cy="365.47" r="3.08" fill="blue" /> -<circle cx="104.96" cy="354.80" r="3.08" fill="blue" /> -<circle cx="108.04" cy="360.13" r="3.08" fill="blue" /> -<circle cx="108.04" cy="349.46" r="3.08" fill="blue" /> -<circle cx="111.12" cy="365.47" r="3.08" fill="blue" /> -<circle cx="111.12" cy="354.80" r="3.08" fill="blue" /> -<circle cx="114.20" cy="360.13" r="3.08" fill="red" /> -<circle cx="114.20" cy="349.46" r="3.08" fill="red" /> -<circle cx="117.28" cy="354.80" r="3.08" fill="red" /> -<circle cx="120.36" cy="360.13" r="3.08" fill="red" /> -<circle cx="140.44" cy="23.75" r="3.08" fill="blue" /> -<circle cx="143.52" cy="29.09" r="3.08" fill="blue" /> -<circle cx="143.52" cy="18.42" r="3.08" fill="blue" /> -<circle cx="146.61" cy="23.75" r="3.08" fill="blue" /> -<circle cx="149.69" cy="29.09" r="3.08" fill="blue" /> -<circle cx="149.69" cy="18.42" r="3.08" fill="red" /> -<circle cx="152.77" cy="23.75" r="3.08" fill="red" /> -<circle cx="152.77" cy="13.08" r="3.08" fill="red" /> -<circle cx="155.85" cy="18.42" r="3.08" fill="red" /> -<circle cx="158.93" cy="23.75" r="3.08" fill="red" /> -<circle cx="70.61" cy="66.30" r="3.08" fill="blue" /> -<circle cx="73.69" cy="71.64" r="3.08" fill="blue" /> -<circle cx="73.69" cy="60.96" r="3.08" fill="blue" /> -<circle cx="76.78" cy="66.30" r="3.08" fill="blue" /> -<circle cx="79.86" cy="71.64" r="3.08" fill="blue" /> -<circle cx="79.86" cy="60.96" r="3.08" fill="blue" /> -<circle cx="82.94" cy="66.30" r="3.08" fill="red" /> -<circle cx="82.94" cy="55.63" r="3.08" fill="red" /> -<circle cx="86.02" cy="60.96" r="3.08" fill="red" /> -<circle cx="89.10" cy="66.30" r="3.08" fill="red" /> -<circle cx="66.95" cy="334.18" r="3.08" fill="blue" /> -<circle cx="70.03" cy="339.51" r="3.08" fill="blue" /> -<circle cx="70.03" cy="328.84" r="3.08" fill="blue" /> -<circle cx="73.11" cy="334.18" r="3.08" fill="blue" /> -<circle cx="76.19" cy="339.51" r="3.08" fill="blue" /> -<circle cx="76.19" cy="328.84" r="3.08" fill="blue" /> -<circle cx="79.27" cy="334.18" r="3.08" fill="blue" /> -<circle cx="79.27" cy="323.50" r="3.08" fill="blue" /> -<circle cx="82.35" cy="328.84" r="3.08" fill="red" /> -<circle cx="85.44" cy="334.18" r="3.08" fill="red" /> -<circle cx="13.08" cy="225.31" r="3.08" fill="blue" /> -<circle cx="16.16" cy="230.64" r="3.08" fill="blue" /> -<circle cx="16.16" cy="219.97" r="3.08" fill="blue" /> -<circle cx="19.24" cy="225.31" r="3.08" fill="blue" /> -<circle cx="22.32" cy="230.64" r="3.08" fill="blue" /> -<circle cx="22.32" cy="219.97" r="3.08" fill="blue" /> -<circle cx="25.41" cy="225.31" r="3.08" fill="red" /> -<circle cx="25.41" cy="214.63" r="3.08" fill="red" /> -<circle cx="28.49" cy="219.97" r="3.08" fill="red" /> -<circle cx="31.57" cy="225.31" r="3.08" fill="red" /> -<circle cx="28.71" cy="292.56" r="3.08" fill="blue" /> -<circle cx="31.79" cy="297.89" r="3.08" fill="blue" /> -<circle cx="31.79" cy="287.22" r="3.08" fill="blue" /> -<circle cx="34.87" cy="292.56" r="3.08" fill="blue" /> -<circle cx="37.95" cy="297.89" r="3.08" fill="blue" /> -<circle cx="37.95" cy="287.22" r="3.08" fill="blue" /> -<circle cx="41.03" cy="292.56" r="3.08" fill="blue" /> -<circle cx="41.03" cy="281.88" r="3.08" fill="red" /> -<circle cx="44.11" cy="287.22" r="3.08" fill="red" /> -<circle cx="47.19" cy="292.56" r="3.08" fill="red" /> -<circle cx="363.78" cy="305.52" r="3.08" fill="blue" /> -<circle cx="366.86" cy="310.85" r="3.08" fill="blue" /> -<circle cx="366.86" cy="300.18" r="3.08" fill="red" /> -<circle cx="369.95" cy="305.52" r="3.08" fill="red" /> -<circle cx="373.03" cy="310.85" r="3.08" fill="red" /> -<circle cx="373.03" cy="300.18" r="3.08" fill="red" /> -<circle cx="376.11" cy="305.52" r="3.08" fill="red" /> -<circle cx="376.11" cy="294.84" r="3.08" fill="red" /> -<circle cx="379.19" cy="300.18" r="3.08" fill="red" /> -<circle cx="382.27" cy="305.52" r="3.08" fill="red" /> -<rect x="10" y="397.24" width="173.08" height="60" style="fill:white;stroke:black;stroke-width:2" /> -<text x="30" y="422.24" font-family="Arial" font-size="20">cell from 123085</text> -<circle cx="20" cy="417.24" r="4" fill="red" /> -<text x="30" y="447.24" font-family="Arial" font-size="20">cell from 123089</text> -<circle cx="20" cy="442.24" r="4" fill="blue" /> -</svg> diff --git a/enclone_main/testx/inputs/outputs/enclone_test35_output b/enclone_main/testx/inputs/outputs/enclone_test35_output deleted file mode 100644 index 2e964ac67..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test35_output +++ /dev/null @@ -1,584 +0,0 @@ -<svg version="1.1" -baseProfile="full" -width="400" height="539.0999999999999" -xmlns="http://www.w3.org/2000/svg"> -<circle cx="119.30" cy="219.51" r="5.33" fill="blue" /> -<circle cx="124.63" cy="228.74" r="5.33" fill="red" /> -<circle cx="124.63" cy="210.28" r="5.33" fill="red" /> -<circle cx="129.96" cy="237.97" r="5.33" fill="red" /> -<circle cx="129.96" cy="201.04" r="5.33" fill="red" /> -<circle cx="129.96" cy="219.51" r="5.33" fill="red" /> -<circle cx="135.28" cy="247.20" r="5.33" fill="red" /> -<circle cx="135.28" cy="191.81" r="5.33" fill="red" /> -<circle cx="135.28" cy="228.74" r="5.33" fill="red" /> -<circle cx="135.28" cy="210.28" r="5.33" fill="red" /> -<circle cx="140.61" cy="256.43" r="5.33" fill="red" /> -<circle cx="140.61" cy="182.58" r="5.33" fill="red" /> -<circle cx="140.61" cy="237.97" r="5.33" fill="red" /> -<circle cx="140.61" cy="219.51" r="5.33" fill="red" /> -<circle cx="140.61" cy="201.04" r="5.33" fill="red" /> -<circle cx="145.94" cy="265.66" r="5.33" fill="red" /> -<circle cx="145.94" cy="173.35" r="5.33" fill="red" /> -<circle cx="145.94" cy="247.20" r="5.33" fill="red" /> -<circle cx="145.94" cy="228.74" r="5.33" fill="red" /> -<circle cx="145.94" cy="210.28" r="5.33" fill="red" /> -<circle cx="145.94" cy="191.81" r="5.33" fill="red" /> -<circle cx="151.27" cy="274.89" r="5.33" fill="red" /> -<circle cx="151.27" cy="164.12" r="5.33" fill="red" /> -<circle cx="151.27" cy="256.43" r="5.33" fill="red" /> -<circle cx="151.27" cy="237.97" r="5.33" fill="red" /> -<circle cx="151.27" cy="201.04" r="5.33" fill="red" /> -<circle cx="151.27" cy="182.58" r="5.33" fill="red" /> -<circle cx="151.27" cy="219.51" r="5.33" fill="red" /> -<circle cx="156.60" cy="284.12" r="5.33" fill="red" /> -<circle cx="156.60" cy="154.89" r="5.33" fill="red" /> -<circle cx="156.60" cy="265.66" r="5.33" fill="red" /> -<circle cx="156.60" cy="247.20" r="5.33" fill="red" /> -<circle cx="156.60" cy="191.81" r="5.33" fill="red" /> -<circle cx="156.60" cy="173.35" r="5.33" fill="red" /> -<circle cx="156.60" cy="228.74" r="5.33" fill="red" /> -<circle cx="156.60" cy="210.28" r="5.33" fill="red" /> -<circle cx="161.93" cy="274.89" r="5.33" fill="red" /> -<circle cx="161.93" cy="256.43" r="5.33" fill="red" /> -<circle cx="161.93" cy="182.58" r="5.33" fill="red" /> -<circle cx="161.93" cy="164.12" r="5.33" fill="red" /> -<circle cx="161.93" cy="237.97" r="5.33" fill="red" /> -<circle cx="161.93" cy="219.51" r="5.33" fill="red" /> -<circle cx="161.93" cy="201.04" r="5.33" fill="red" /> -<circle cx="167.26" cy="284.12" r="5.33" fill="red" /> -<circle cx="167.26" cy="154.89" r="5.33" fill="red" /> -<circle cx="167.26" cy="265.66" r="5.33" fill="red" /> -<circle cx="167.26" cy="173.35" r="5.33" fill="red" /> -<circle cx="167.26" cy="247.20" r="5.33" fill="red" /> -<circle cx="167.26" cy="228.74" r="5.33" fill="red" /> -<circle cx="167.26" cy="210.28" r="5.33" fill="red" /> -<circle cx="167.26" cy="191.81" r="5.33" fill="red" /> -<circle cx="172.59" cy="274.89" r="5.33" fill="red" /> -<circle cx="172.59" cy="219.51" r="5.33" fill="red" /> -<circle cx="172.59" cy="164.12" r="5.33" fill="red" /> -<circle cx="172.59" cy="256.43" r="5.33" fill="red" /> -<circle cx="172.59" cy="237.97" r="5.33" fill="red" /> -<circle cx="172.59" cy="201.04" r="5.33" fill="red" /> -<circle cx="172.59" cy="182.58" r="5.33" fill="red" /> -<circle cx="177.92" cy="154.89" r="5.33" fill="red" /> -<circle cx="177.92" cy="284.12" r="5.33" fill="red" /> -<circle cx="177.92" cy="265.66" r="5.33" fill="red" /> -<circle cx="177.92" cy="228.74" r="5.33" fill="red" /> -<circle cx="177.92" cy="210.28" r="5.33" fill="red" /> -<circle cx="177.92" cy="247.20" r="5.33" fill="red" /> -<circle cx="177.92" cy="173.35" r="5.33" fill="red" /> -<circle cx="177.92" cy="191.81" r="5.33" fill="red" /> -<circle cx="183.25" cy="274.89" r="5.33" fill="red" /> -<circle cx="183.25" cy="237.97" r="5.33" fill="red" /> -<circle cx="183.25" cy="219.51" r="5.33" fill="red" /> -<circle cx="183.25" cy="201.04" r="5.33" fill="red" /> -<circle cx="183.25" cy="164.12" r="5.33" fill="red" /> -<circle cx="183.25" cy="256.43" r="5.33" fill="red" /> -<circle cx="183.25" cy="182.58" r="5.33" fill="red" /> -<circle cx="188.58" cy="154.89" r="5.33" fill="red" /> -<circle cx="188.58" cy="284.12" r="5.33" fill="red" /> -<circle cx="188.58" cy="265.66" r="5.33" fill="red" /> -<circle cx="188.58" cy="228.74" r="5.33" fill="red" /> -<circle cx="188.58" cy="210.28" r="5.33" fill="red" /> -<circle cx="188.58" cy="247.20" r="5.33" fill="red" /> -<circle cx="188.58" cy="173.35" r="5.33" fill="red" /> -<circle cx="188.58" cy="191.81" r="5.33" fill="red" /> -<circle cx="193.91" cy="274.89" r="5.33" fill="red" /> -<circle cx="193.91" cy="237.97" r="5.33" fill="red" /> -<circle cx="193.91" cy="219.51" r="5.33" fill="red" /> -<circle cx="193.91" cy="201.04" r="5.33" fill="red" /> -<circle cx="193.91" cy="164.12" r="5.33" fill="red" /> -<circle cx="193.91" cy="256.43" r="5.33" fill="red" /> -<circle cx="193.91" cy="182.58" r="5.33" fill="red" /> -<circle cx="199.24" cy="154.89" r="5.33" fill="red" /> -<circle cx="199.24" cy="284.12" r="5.33" fill="red" /> -<circle cx="199.24" cy="265.66" r="5.33" fill="red" /> -<circle cx="199.24" cy="228.74" r="5.33" fill="red" /> -<circle cx="199.24" cy="210.28" r="5.33" fill="red" /> -<circle cx="199.24" cy="247.20" r="5.33" fill="red" /> -<circle cx="199.24" cy="173.35" r="5.33" fill="red" /> -<circle cx="199.24" cy="191.81" r="5.33" fill="red" /> -<circle cx="204.57" cy="145.66" r="5.33" fill="red" /> -<circle cx="204.57" cy="274.89" r="5.33" fill="red" /> -<circle cx="204.57" cy="237.97" r="5.33" fill="red" /> -<circle cx="204.57" cy="219.51" r="5.33" fill="red" /> -<circle cx="204.57" cy="201.04" r="5.33" fill="red" /> -<circle cx="204.57" cy="164.12" r="5.33" fill="red" /> -<circle cx="204.57" cy="256.43" r="5.33" fill="red" /> -<circle cx="204.57" cy="182.58" r="5.33" fill="red" /> -<circle cx="209.90" cy="154.89" r="5.33" fill="red" /> -<circle cx="209.90" cy="284.12" r="5.33" fill="red" /> -<circle cx="209.90" cy="265.66" r="5.33" fill="red" /> -<circle cx="209.90" cy="228.74" r="5.33" fill="red" /> -<circle cx="209.90" cy="210.28" r="5.33" fill="red" /> -<circle cx="209.90" cy="247.20" r="5.33" fill="red" /> -<circle cx="209.90" cy="173.35" r="5.33" fill="red" /> -<circle cx="209.90" cy="191.81" r="5.33" fill="red" /> -<circle cx="215.23" cy="145.66" r="5.33" fill="red" /> -<circle cx="215.23" cy="274.89" r="5.33" fill="red" /> -<circle cx="215.23" cy="219.51" r="5.33" fill="red" /> -<circle cx="215.23" cy="256.43" r="5.33" fill="red" /> -<circle cx="215.23" cy="237.97" r="5.33" fill="red" /> -<circle cx="215.23" cy="201.04" r="5.33" fill="red" /> -<circle cx="215.23" cy="182.58" r="5.33" fill="red" /> -<circle cx="215.23" cy="164.12" r="5.33" fill="red" /> -<circle cx="220.55" cy="284.12" r="5.33" fill="red" /> -<circle cx="220.55" cy="154.89" r="5.33" fill="red" /> -<circle cx="220.55" cy="265.66" r="5.33" fill="red" /> -<circle cx="220.55" cy="247.20" r="5.33" fill="red" /> -<circle cx="220.55" cy="228.74" r="5.33" fill="red" /> -<circle cx="220.55" cy="210.28" r="5.33" fill="red" /> -<circle cx="220.55" cy="191.81" r="5.33" fill="red" /> -<circle cx="220.55" cy="173.35" r="5.33" fill="red" /> -<circle cx="225.88" cy="145.66" r="5.33" fill="red" /> -<circle cx="225.88" cy="274.89" r="5.33" fill="red" /> -<circle cx="225.88" cy="256.43" r="5.33" fill="red" /> -<circle cx="225.88" cy="237.97" r="5.33" fill="red" /> -<circle cx="225.88" cy="219.51" r="5.33" fill="red" /> -<circle cx="225.88" cy="201.04" r="5.33" fill="red" /> -<circle cx="225.88" cy="182.58" r="5.33" fill="red" /> -<circle cx="225.88" cy="164.12" r="5.33" fill="red" /> -<circle cx="231.21" cy="284.12" r="5.33" fill="red" /> -<circle cx="231.21" cy="154.89" r="5.33" fill="red" /> -<circle cx="231.21" cy="265.66" r="5.33" fill="red" /> -<circle cx="231.21" cy="247.20" r="5.33" fill="red" /> -<circle cx="231.21" cy="228.74" r="5.33" fill="red" /> -<circle cx="231.21" cy="210.28" r="5.33" fill="red" /> -<circle cx="231.21" cy="191.81" r="5.33" fill="red" /> -<circle cx="231.21" cy="173.35" r="5.33" fill="red" /> -<circle cx="236.54" cy="145.66" r="5.33" fill="red" /> -<circle cx="236.54" cy="274.89" r="5.33" fill="red" /> -<circle cx="236.54" cy="164.12" r="5.33" fill="red" /> -<circle cx="236.54" cy="256.43" r="5.33" fill="red" /> -<circle cx="236.54" cy="237.97" r="5.33" fill="red" /> -<circle cx="236.54" cy="219.51" r="5.33" fill="red" /> -<circle cx="236.54" cy="201.04" r="5.33" fill="red" /> -<circle cx="236.54" cy="182.58" r="5.33" fill="red" /> -<circle cx="241.87" cy="154.89" r="5.33" fill="red" /> -<circle cx="241.87" cy="265.66" r="5.33" fill="red" /> -<circle cx="241.87" cy="173.35" r="5.33" fill="red" /> -<circle cx="241.87" cy="247.20" r="5.33" fill="red" /> -<circle cx="241.87" cy="228.74" r="5.33" fill="red" /> -<circle cx="241.87" cy="210.28" r="5.33" fill="red" /> -<circle cx="241.87" cy="191.81" r="5.33" fill="red" /> -<circle cx="247.20" cy="164.12" r="5.33" fill="red" /> -<circle cx="247.20" cy="256.43" r="5.33" fill="red" /> -<circle cx="247.20" cy="182.58" r="5.33" fill="red" /> -<circle cx="247.20" cy="237.97" r="5.33" fill="red" /> -<circle cx="247.20" cy="219.51" r="5.33" fill="red" /> -<circle cx="247.20" cy="201.04" r="5.33" fill="red" /> -<circle cx="252.53" cy="173.35" r="5.33" fill="red" /> -<circle cx="252.53" cy="247.20" r="5.33" fill="red" /> -<circle cx="252.53" cy="191.81" r="5.33" fill="red" /> -<circle cx="252.53" cy="228.74" r="5.33" fill="red" /> -<circle cx="252.53" cy="210.28" r="5.33" fill="red" /> -<circle cx="257.86" cy="182.58" r="5.33" fill="red" /> -<circle cx="257.86" cy="237.97" r="5.33" fill="red" /> -<circle cx="257.86" cy="201.04" r="5.33" fill="red" /> -<circle cx="257.86" cy="219.51" r="5.33" fill="red" /> -<circle cx="263.19" cy="191.81" r="5.33" fill="red" /> -<circle cx="263.19" cy="228.74" r="5.33" fill="red" /> -<circle cx="263.19" cy="210.28" r="5.33" fill="red" /> -<circle cx="268.52" cy="201.04" r="5.33" fill="red" /> -<circle cx="268.52" cy="219.51" r="5.33" fill="red" /> -<circle cx="273.85" cy="210.28" r="5.33" fill="red" /> -<circle cx="279.18" cy="219.51" r="5.33" fill="red" /> -<circle cx="25.51" cy="139.16" r="5.33" fill="red" /> -<circle cx="30.84" cy="148.39" r="5.33" fill="red" /> -<circle cx="30.84" cy="129.93" r="5.33" fill="red" /> -<circle cx="36.17" cy="157.62" r="5.33" fill="red" /> -<circle cx="36.17" cy="139.16" r="5.33" fill="red" /> -<circle cx="36.17" cy="120.70" r="5.33" fill="red" /> -<circle cx="41.50" cy="166.85" r="5.33" fill="red" /> -<circle cx="41.50" cy="148.39" r="5.33" fill="red" /> -<circle cx="41.50" cy="129.93" r="5.33" fill="red" /> -<circle cx="41.50" cy="111.47" r="5.33" fill="red" /> -<circle cx="46.83" cy="139.16" r="5.33" fill="red" /> -<circle cx="46.83" cy="176.08" r="5.33" fill="red" /> -<circle cx="46.83" cy="157.62" r="5.33" fill="red" /> -<circle cx="46.83" cy="120.70" r="5.33" fill="red" /> -<circle cx="46.83" cy="102.24" r="5.33" fill="red" /> -<circle cx="52.16" cy="148.39" r="5.33" fill="red" /> -<circle cx="52.16" cy="129.93" r="5.33" fill="red" /> -<circle cx="52.16" cy="166.85" r="5.33" fill="red" /> -<circle cx="52.16" cy="111.47" r="5.33" fill="red" /> -<circle cx="57.49" cy="157.62" r="5.33" fill="red" /> -<circle cx="57.49" cy="139.16" r="5.33" fill="red" /> -<circle cx="57.49" cy="120.70" r="5.33" fill="red" /> -<circle cx="57.49" cy="102.24" r="5.33" fill="red" /> -<circle cx="62.82" cy="148.39" r="5.33" fill="red" /> -<circle cx="62.82" cy="129.93" r="5.33" fill="red" /> -<circle cx="62.82" cy="166.85" r="5.33" fill="red" /> -<circle cx="62.82" cy="111.47" r="5.33" fill="red" /> -<circle cx="68.15" cy="157.62" r="5.33" fill="red" /> -<circle cx="68.15" cy="139.16" r="5.33" fill="red" /> -<circle cx="68.15" cy="120.70" r="5.33" fill="red" /> -<circle cx="68.15" cy="102.24" r="5.33" fill="red" /> -<circle cx="73.48" cy="148.39" r="5.33" fill="red" /> -<circle cx="73.48" cy="129.93" r="5.33" fill="red" /> -<circle cx="73.48" cy="166.85" r="5.33" fill="red" /> -<circle cx="73.48" cy="111.47" r="5.33" fill="red" /> -<circle cx="78.81" cy="157.62" r="5.33" fill="red" /> -<circle cx="78.81" cy="139.16" r="5.33" fill="red" /> -<circle cx="78.81" cy="120.70" r="5.33" fill="red" /> -<circle cx="78.81" cy="102.24" r="5.33" fill="red" /> -<circle cx="84.14" cy="148.39" r="5.33" fill="red" /> -<circle cx="84.14" cy="129.93" r="5.33" fill="red" /> -<circle cx="84.14" cy="166.85" r="5.33" fill="red" /> -<circle cx="84.14" cy="111.47" r="5.33" fill="red" /> -<circle cx="89.46" cy="139.16" r="5.33" fill="red" /> -<circle cx="89.46" cy="157.62" r="5.33" fill="red" /> -<circle cx="89.46" cy="120.70" r="5.33" fill="red" /> -<circle cx="89.46" cy="102.24" r="5.33" fill="red" /> -<circle cx="94.79" cy="148.39" r="5.33" fill="red" /> -<circle cx="94.79" cy="129.93" r="5.33" fill="red" /> -<circle cx="94.79" cy="111.47" r="5.33" fill="red" /> -<circle cx="100.12" cy="139.16" r="5.33" fill="red" /> -<circle cx="100.12" cy="120.70" r="5.33" fill="red" /> -<circle cx="105.45" cy="129.93" r="5.33" fill="red" /> -<circle cx="110.78" cy="139.16" r="5.33" fill="red" /> -<circle cx="299.96" cy="232.28" r="5.33" fill="red" /> -<circle cx="305.29" cy="241.51" r="5.33" fill="red" /> -<circle cx="305.29" cy="223.05" r="5.33" fill="red" /> -<circle cx="310.62" cy="250.74" r="5.33" fill="red" /> -<circle cx="310.62" cy="232.28" r="5.33" fill="red" /> -<circle cx="310.62" cy="213.82" r="5.33" fill="red" /> -<circle cx="315.95" cy="259.97" r="5.33" fill="red" /> -<circle cx="315.95" cy="241.51" r="5.33" fill="red" /> -<circle cx="315.95" cy="223.05" r="5.33" fill="red" /> -<circle cx="315.95" cy="204.59" r="5.33" fill="red" /> -<circle cx="321.28" cy="232.28" r="5.33" fill="red" /> -<circle cx="321.28" cy="250.74" r="5.33" fill="red" /> -<circle cx="321.28" cy="213.82" r="5.33" fill="red" /> -<circle cx="321.28" cy="195.36" r="5.33" fill="red" /> -<circle cx="326.61" cy="241.51" r="5.33" fill="red" /> -<circle cx="326.61" cy="223.05" r="5.33" fill="red" /> -<circle cx="326.61" cy="259.97" r="5.33" fill="red" /> -<circle cx="326.61" cy="204.59" r="5.33" fill="red" /> -<circle cx="331.94" cy="250.74" r="5.33" fill="red" /> -<circle cx="331.94" cy="232.28" r="5.33" fill="red" /> -<circle cx="331.94" cy="213.82" r="5.33" fill="red" /> -<circle cx="331.94" cy="195.36" r="5.33" fill="red" /> -<circle cx="337.27" cy="241.51" r="5.33" fill="red" /> -<circle cx="337.27" cy="223.05" r="5.33" fill="red" /> -<circle cx="337.27" cy="259.97" r="5.33" fill="red" /> -<circle cx="337.27" cy="204.59" r="5.33" fill="red" /> -<circle cx="342.60" cy="250.74" r="5.33" fill="red" /> -<circle cx="342.60" cy="232.28" r="5.33" fill="red" /> -<circle cx="342.60" cy="213.82" r="5.33" fill="red" /> -<circle cx="342.60" cy="195.36" r="5.33" fill="red" /> -<circle cx="347.93" cy="241.51" r="5.33" fill="red" /> -<circle cx="347.93" cy="223.05" r="5.33" fill="red" /> -<circle cx="347.93" cy="259.97" r="5.33" fill="red" /> -<circle cx="347.93" cy="204.59" r="5.33" fill="red" /> -<circle cx="353.26" cy="250.74" r="5.33" fill="red" /> -<circle cx="353.26" cy="232.28" r="5.33" fill="red" /> -<circle cx="353.26" cy="213.82" r="5.33" fill="red" /> -<circle cx="353.26" cy="195.36" r="5.33" fill="red" /> -<circle cx="358.58" cy="241.51" r="5.33" fill="red" /> -<circle cx="358.58" cy="223.05" r="5.33" fill="red" /> -<circle cx="358.58" cy="259.97" r="5.33" fill="red" /> -<circle cx="358.58" cy="204.59" r="5.33" fill="red" /> -<circle cx="363.91" cy="232.28" r="5.33" fill="red" /> -<circle cx="363.91" cy="250.74" r="5.33" fill="red" /> -<circle cx="363.91" cy="213.82" r="5.33" fill="red" /> -<circle cx="363.91" cy="195.36" r="5.33" fill="red" /> -<circle cx="369.24" cy="241.51" r="5.33" fill="red" /> -<circle cx="369.24" cy="223.05" r="5.33" fill="red" /> -<circle cx="369.24" cy="204.59" r="5.33" fill="red" /> -<circle cx="374.57" cy="232.28" r="5.33" fill="red" /> -<circle cx="374.57" cy="213.82" r="5.33" fill="red" /> -<circle cx="379.90" cy="223.05" r="5.33" fill="red" /> -<circle cx="385.23" cy="232.28" r="5.33" fill="red" /> -<circle cx="224.66" cy="84.08" r="5.33" fill="blue" /> -<circle cx="229.99" cy="93.31" r="5.33" fill="green" /> -<circle cx="229.99" cy="74.85" r="5.33" fill="red" /> -<circle cx="235.32" cy="84.08" r="5.33" fill="red" /> -<circle cx="235.32" cy="102.54" r="5.33" fill="red" /> -<circle cx="235.32" cy="65.62" r="5.33" fill="red" /> -<circle cx="240.65" cy="93.31" r="5.33" fill="red" /> -<circle cx="240.65" cy="74.85" r="5.33" fill="red" /> -<circle cx="240.65" cy="111.77" r="5.33" fill="red" /> -<circle cx="240.65" cy="56.39" r="5.33" fill="red" /> -<circle cx="245.98" cy="102.54" r="5.33" fill="red" /> -<circle cx="245.98" cy="84.08" r="5.33" fill="red" /> -<circle cx="245.98" cy="65.62" r="5.33" fill="red" /> -<circle cx="251.31" cy="93.31" r="5.33" fill="red" /> -<circle cx="251.31" cy="74.85" r="5.33" fill="red" /> -<circle cx="251.31" cy="111.77" r="5.33" fill="red" /> -<circle cx="251.31" cy="56.39" r="5.33" fill="red" /> -<circle cx="256.64" cy="102.54" r="5.33" fill="red" /> -<circle cx="256.64" cy="84.08" r="5.33" fill="red" /> -<circle cx="256.64" cy="65.62" r="5.33" fill="red" /> -<circle cx="261.97" cy="93.31" r="5.33" fill="red" /> -<circle cx="261.97" cy="74.85" r="5.33" fill="red" /> -<circle cx="261.97" cy="111.77" r="5.33" fill="red" /> -<circle cx="261.97" cy="56.39" r="5.33" fill="red" /> -<circle cx="267.30" cy="102.54" r="5.33" fill="red" /> -<circle cx="267.30" cy="84.08" r="5.33" fill="red" /> -<circle cx="267.30" cy="65.62" r="5.33" fill="red" /> -<circle cx="272.63" cy="93.31" r="5.33" fill="red" /> -<circle cx="272.63" cy="74.85" r="5.33" fill="red" /> -<circle cx="272.63" cy="111.77" r="5.33" fill="red" /> -<circle cx="272.63" cy="56.39" r="5.33" fill="red" /> -<circle cx="277.95" cy="84.08" r="5.33" fill="red" /> -<circle cx="277.95" cy="102.54" r="5.33" fill="red" /> -<circle cx="277.95" cy="65.62" r="5.33" fill="red" /> -<circle cx="277.95" cy="47.16" r="5.33" fill="red" /> -<circle cx="283.28" cy="93.31" r="5.33" fill="red" /> -<circle cx="283.28" cy="74.85" r="5.33" fill="red" /> -<circle cx="283.28" cy="56.39" r="5.33" fill="red" /> -<circle cx="288.61" cy="84.08" r="5.33" fill="red" /> -<circle cx="288.61" cy="65.62" r="5.33" fill="red" /> -<circle cx="293.94" cy="74.85" r="5.33" fill="red" /> -<circle cx="299.27" cy="84.08" r="5.33" fill="red" /> -<circle cx="96.86" cy="354.08" r="5.33" fill="black" /> -<circle cx="102.19" cy="363.31" r="5.33" fill="red" /> -<circle cx="102.19" cy="344.85" r="5.33" fill="red" /> -<circle cx="107.52" cy="354.08" r="5.33" fill="red" /> -<circle cx="107.52" cy="372.54" r="5.33" fill="red" /> -<circle cx="107.52" cy="335.62" r="5.33" fill="red" /> -<circle cx="112.84" cy="363.31" r="5.33" fill="red" /> -<circle cx="112.84" cy="344.85" r="5.33" fill="red" /> -<circle cx="112.84" cy="381.77" r="5.33" fill="red" /> -<circle cx="112.84" cy="326.39" r="5.33" fill="red" /> -<circle cx="118.17" cy="372.54" r="5.33" fill="red" /> -<circle cx="118.17" cy="354.08" r="5.33" fill="red" /> -<circle cx="118.17" cy="335.62" r="5.33" fill="red" /> -<circle cx="123.50" cy="363.31" r="5.33" fill="red" /> -<circle cx="123.50" cy="344.85" r="5.33" fill="red" /> -<circle cx="123.50" cy="381.77" r="5.33" fill="red" /> -<circle cx="123.50" cy="326.39" r="5.33" fill="red" /> -<circle cx="128.83" cy="372.54" r="5.33" fill="red" /> -<circle cx="128.83" cy="354.08" r="5.33" fill="red" /> -<circle cx="128.83" cy="335.62" r="5.33" fill="red" /> -<circle cx="134.16" cy="363.31" r="5.33" fill="red" /> -<circle cx="134.16" cy="344.85" r="5.33" fill="red" /> -<circle cx="134.16" cy="381.77" r="5.33" fill="red" /> -<circle cx="134.16" cy="326.39" r="5.33" fill="red" /> -<circle cx="139.49" cy="372.54" r="5.33" fill="red" /> -<circle cx="139.49" cy="354.08" r="5.33" fill="red" /> -<circle cx="139.49" cy="335.62" r="5.33" fill="red" /> -<circle cx="144.82" cy="363.31" r="5.33" fill="red" /> -<circle cx="144.82" cy="344.85" r="5.33" fill="red" /> -<circle cx="144.82" cy="381.77" r="5.33" fill="red" /> -<circle cx="144.82" cy="326.39" r="5.33" fill="red" /> -<circle cx="150.15" cy="354.08" r="5.33" fill="red" /> -<circle cx="150.15" cy="372.54" r="5.33" fill="red" /> -<circle cx="150.15" cy="335.62" r="5.33" fill="red" /> -<circle cx="155.48" cy="363.31" r="5.33" fill="red" /> -<circle cx="155.48" cy="344.85" r="5.33" fill="red" /> -<circle cx="155.48" cy="326.39" r="5.33" fill="red" /> -<circle cx="160.81" cy="354.08" r="5.33" fill="red" /> -<circle cx="160.81" cy="335.62" r="5.33" fill="red" /> -<circle cx="166.14" cy="344.85" r="5.33" fill="red" /> -<circle cx="171.47" cy="354.08" r="5.33" fill="red" /> -<circle cx="15.33" cy="248.56" r="5.33" fill="black" /> -<circle cx="20.66" cy="257.79" r="5.33" fill="black" /> -<circle cx="20.66" cy="239.33" r="5.33" fill="black" /> -<circle cx="25.99" cy="248.56" r="5.33" fill="black" /> -<circle cx="25.99" cy="267.03" r="5.33" fill="black" /> -<circle cx="25.99" cy="230.10" r="5.33" fill="black" /> -<circle cx="31.32" cy="257.79" r="5.33" fill="black" /> -<circle cx="31.32" cy="239.33" r="5.33" fill="black" /> -<circle cx="31.32" cy="276.26" r="5.33" fill="black" /> -<circle cx="31.32" cy="220.87" r="5.33" fill="black" /> -<circle cx="36.65" cy="267.03" r="5.33" fill="black" /> -<circle cx="36.65" cy="248.56" r="5.33" fill="black" /> -<circle cx="36.65" cy="230.10" r="5.33" fill="black" /> -<circle cx="41.98" cy="257.79" r="5.33" fill="black" /> -<circle cx="41.98" cy="239.33" r="5.33" fill="black" /> -<circle cx="41.98" cy="276.26" r="5.33" fill="black" /> -<circle cx="41.98" cy="220.87" r="5.33" fill="black" /> -<circle cx="47.31" cy="267.03" r="5.33" fill="black" /> -<circle cx="47.31" cy="248.56" r="5.33" fill="black" /> -<circle cx="47.31" cy="230.10" r="5.33" fill="black" /> -<circle cx="52.64" cy="257.79" r="5.33" fill="black" /> -<circle cx="52.64" cy="239.33" r="5.33" fill="black" /> -<circle cx="52.64" cy="276.26" r="5.33" fill="black" /> -<circle cx="52.64" cy="220.87" r="5.33" fill="black" /> -<circle cx="57.96" cy="267.03" r="5.33" fill="black" /> -<circle cx="57.96" cy="248.56" r="5.33" fill="black" /> -<circle cx="57.96" cy="230.10" r="5.33" fill="black" /> -<circle cx="63.29" cy="257.79" r="5.33" fill="black" /> -<circle cx="63.29" cy="239.33" r="5.33" fill="black" /> -<circle cx="63.29" cy="276.26" r="5.33" fill="red" /> -<circle cx="63.29" cy="220.87" r="5.33" fill="red" /> -<circle cx="68.62" cy="248.56" r="5.33" fill="red" /> -<circle cx="68.62" cy="267.03" r="5.33" fill="red" /> -<circle cx="68.62" cy="230.10" r="5.33" fill="red" /> -<circle cx="73.95" cy="257.79" r="5.33" fill="red" /> -<circle cx="73.95" cy="239.33" r="5.33" fill="red" /> -<circle cx="79.28" cy="248.56" r="5.33" fill="red" /> -<circle cx="79.28" cy="230.10" r="5.33" fill="red" /> -<circle cx="84.61" cy="239.33" r="5.33" fill="red" /> -<circle cx="89.94" cy="248.56" r="5.33" fill="red" /> -<circle cx="201.18" cy="352.41" r="5.33" fill="black" /> -<circle cx="206.51" cy="361.64" r="5.33" fill="black" /> -<circle cx="206.51" cy="343.18" r="5.33" fill="orange" /> -<circle cx="211.84" cy="352.41" r="5.33" fill="orange" /> -<circle cx="211.84" cy="370.87" r="5.33" fill="orange" /> -<circle cx="211.84" cy="333.95" r="5.33" fill="orange" /> -<circle cx="217.16" cy="361.64" r="5.33" fill="orange" /> -<circle cx="217.16" cy="343.18" r="5.33" fill="orange" /> -<circle cx="217.16" cy="380.11" r="5.33" fill="orange" /> -<circle cx="217.16" cy="324.72" r="5.33" fill="orange" /> -<circle cx="222.49" cy="370.87" r="5.33" fill="orange" /> -<circle cx="222.49" cy="352.41" r="5.33" fill="orange" /> -<circle cx="222.49" cy="333.95" r="5.33" fill="orange" /> -<circle cx="227.82" cy="361.64" r="5.33" fill="orange" /> -<circle cx="227.82" cy="343.18" r="5.33" fill="orange" /> -<circle cx="227.82" cy="380.11" r="5.33" fill="orange" /> -<circle cx="227.82" cy="324.72" r="5.33" fill="orange" /> -<circle cx="233.15" cy="370.87" r="5.33" fill="orange" /> -<circle cx="233.15" cy="352.41" r="5.33" fill="orange" /> -<circle cx="233.15" cy="333.95" r="5.33" fill="orange" /> -<circle cx="238.48" cy="361.64" r="5.33" fill="orange" /> -<circle cx="238.48" cy="343.18" r="5.33" fill="orange" /> -<circle cx="238.48" cy="380.11" r="5.33" fill="orange" /> -<circle cx="238.48" cy="324.72" r="5.33" fill="orange" /> -<circle cx="243.81" cy="370.87" r="5.33" fill="orange" /> -<circle cx="243.81" cy="352.41" r="5.33" fill="orange" /> -<circle cx="243.81" cy="333.95" r="5.33" fill="orange" /> -<circle cx="249.14" cy="361.64" r="5.33" fill="orange" /> -<circle cx="249.14" cy="343.18" r="5.33" fill="orange" /> -<circle cx="249.14" cy="324.72" r="5.33" fill="orange" /> -<circle cx="254.47" cy="352.41" r="5.33" fill="orange" /> -<circle cx="254.47" cy="333.95" r="5.33" fill="orange" /> -<circle cx="259.80" cy="343.18" r="5.33" fill="orange" /> -<circle cx="265.13" cy="352.41" r="5.33" fill="orange" /> -<circle cx="140.16" cy="94.89" r="5.33" fill="blue" /> -<circle cx="145.49" cy="104.12" r="5.33" fill="red" /> -<circle cx="145.49" cy="85.66" r="5.33" fill="red" /> -<circle cx="150.82" cy="113.35" r="5.33" fill="red" /> -<circle cx="150.82" cy="94.89" r="5.33" fill="red" /> -<circle cx="150.82" cy="76.43" r="5.33" fill="red" /> -<circle cx="156.15" cy="104.12" r="5.33" fill="red" /> -<circle cx="156.15" cy="85.66" r="5.33" fill="red" /> -<circle cx="161.48" cy="113.35" r="5.33" fill="red" /> -<circle cx="161.48" cy="94.89" r="5.33" fill="red" /> -<circle cx="161.48" cy="76.43" r="5.33" fill="red" /> -<circle cx="166.81" cy="104.12" r="5.33" fill="red" /> -<circle cx="166.81" cy="85.66" r="5.33" fill="red" /> -<circle cx="172.14" cy="113.35" r="5.33" fill="red" /> -<circle cx="172.14" cy="94.89" r="5.33" fill="red" /> -<circle cx="172.14" cy="76.43" r="5.33" fill="red" /> -<circle cx="177.47" cy="104.12" r="5.33" fill="red" /> -<circle cx="177.47" cy="85.66" r="5.33" fill="red" /> -<circle cx="182.80" cy="94.89" r="5.33" fill="red" /> -<circle cx="285.03" cy="153.93" r="5.33" fill="red" /> -<circle cx="290.35" cy="163.16" r="5.33" fill="red" /> -<circle cx="290.35" cy="144.70" r="5.33" fill="red" /> -<circle cx="295.68" cy="172.39" r="5.33" fill="red" /> -<circle cx="295.68" cy="153.93" r="5.33" fill="red" /> -<circle cx="295.68" cy="135.47" r="5.33" fill="red" /> -<circle cx="301.01" cy="163.16" r="5.33" fill="red" /> -<circle cx="301.01" cy="144.70" r="5.33" fill="red" /> -<circle cx="306.34" cy="172.39" r="5.33" fill="red" /> -<circle cx="306.34" cy="153.93" r="5.33" fill="red" /> -<circle cx="306.34" cy="135.47" r="5.33" fill="red" /> -<circle cx="311.67" cy="163.16" r="5.33" fill="red" /> -<circle cx="311.67" cy="144.70" r="5.33" fill="red" /> -<circle cx="317.00" cy="153.93" r="5.33" fill="red" /> -<circle cx="317.00" cy="135.47" r="5.33" fill="red" /> -<circle cx="322.33" cy="144.70" r="5.33" fill="red" /> -<circle cx="327.66" cy="153.93" r="5.33" fill="red" /> -<circle cx="277.03" cy="306.59" r="5.33" fill="red" /> -<circle cx="282.36" cy="315.82" r="5.33" fill="red" /> -<circle cx="282.36" cy="297.36" r="5.33" fill="red" /> -<circle cx="287.69" cy="306.59" r="5.33" fill="red" /> -<circle cx="287.69" cy="288.13" r="5.33" fill="red" /> -<circle cx="293.02" cy="315.82" r="5.33" fill="red" /> -<circle cx="293.02" cy="297.36" r="5.33" fill="red" /> -<circle cx="298.35" cy="306.59" r="5.33" fill="red" /> -<circle cx="298.35" cy="288.13" r="5.33" fill="red" /> -<circle cx="303.68" cy="315.82" r="5.33" fill="red" /> -<circle cx="303.68" cy="297.36" r="5.33" fill="red" /> -<circle cx="309.01" cy="306.59" r="5.33" fill="red" /> -<circle cx="309.01" cy="288.13" r="5.33" fill="red" /> -<circle cx="314.34" cy="297.36" r="5.33" fill="red" /> -<circle cx="319.67" cy="306.59" r="5.33" fill="red" /> -<circle cx="81.46" cy="60.95" r="5.33" fill="red" /> -<circle cx="86.79" cy="51.72" r="5.33" fill="red" /> -<circle cx="92.12" cy="60.95" r="5.33" fill="red" /> -<circle cx="92.12" cy="42.49" r="5.33" fill="red" /> -<circle cx="97.45" cy="70.18" r="5.33" fill="red" /> -<circle cx="97.45" cy="51.72" r="5.33" fill="red" /> -<circle cx="102.78" cy="60.95" r="5.33" fill="red" /> -<circle cx="102.78" cy="42.49" r="5.33" fill="red" /> -<circle cx="108.10" cy="70.18" r="5.33" fill="red" /> -<circle cx="108.10" cy="51.72" r="5.33" fill="red" /> -<circle cx="113.43" cy="60.95" r="5.33" fill="red" /> -<circle cx="113.43" cy="42.49" r="5.33" fill="red" /> -<circle cx="118.76" cy="51.72" r="5.33" fill="red" /> -<circle cx="124.09" cy="60.95" r="5.33" fill="red" /> -<circle cx="166.20" cy="33.79" r="5.33" fill="blue" /> -<circle cx="171.53" cy="24.56" r="5.33" fill="red" /> -<circle cx="176.86" cy="33.79" r="5.33" fill="red" /> -<circle cx="176.86" cy="15.33" r="5.33" fill="red" /> -<circle cx="182.18" cy="43.02" r="5.33" fill="red" /> -<circle cx="182.18" cy="24.56" r="5.33" fill="red" /> -<circle cx="187.51" cy="33.79" r="5.33" fill="red" /> -<circle cx="187.51" cy="15.33" r="5.33" fill="red" /> -<circle cx="192.84" cy="43.02" r="5.33" fill="red" /> -<circle cx="192.84" cy="24.56" r="5.33" fill="red" /> -<circle cx="198.17" cy="33.79" r="5.33" fill="red" /> -<circle cx="198.17" cy="15.33" r="5.33" fill="red" /> -<circle cx="203.50" cy="24.56" r="5.33" fill="red" /> -<circle cx="208.83" cy="33.79" r="5.33" fill="red" /> -<circle cx="292.07" cy="360.90" r="5.33" fill="black" /> -<circle cx="297.40" cy="370.13" r="5.33" fill="green" /> -<circle cx="297.40" cy="351.67" r="5.33" fill="green" /> -<circle cx="302.73" cy="379.36" r="5.33" fill="green" /> -<circle cx="302.73" cy="360.90" r="5.33" fill="green" /> -<circle cx="308.06" cy="370.13" r="5.33" fill="green" /> -<circle cx="308.06" cy="351.67" r="5.33" fill="green" /> -<circle cx="313.39" cy="379.36" r="5.33" fill="green" /> -<circle cx="313.39" cy="360.90" r="5.33" fill="green" /> -<circle cx="318.72" cy="370.13" r="5.33" fill="green" /> -<circle cx="318.72" cy="351.67" r="5.33" fill="green" /> -<circle cx="324.05" cy="360.90" r="5.33" fill="green" /> -<circle cx="329.38" cy="370.13" r="5.33" fill="green" /> -<circle cx="33.09" cy="337.76" r="5.33" fill="red" /> -<circle cx="38.42" cy="347.00" r="5.33" fill="red" /> -<circle cx="38.42" cy="328.53" r="5.33" fill="red" /> -<circle cx="43.75" cy="337.76" r="5.33" fill="red" /> -<circle cx="43.75" cy="319.30" r="5.33" fill="red" /> -<circle cx="49.08" cy="347.00" r="5.33" fill="red" /> -<circle cx="49.08" cy="328.53" r="5.33" fill="red" /> -<circle cx="54.41" cy="337.76" r="5.33" fill="red" /> -<circle cx="54.41" cy="319.30" r="5.33" fill="red" /> -<circle cx="59.74" cy="328.53" r="5.33" fill="red" /> -<circle cx="65.07" cy="337.76" r="5.33" fill="red" /> -<circle cx="362.69" cy="149.60" r="5.33" fill="red" /> -<circle cx="368.02" cy="158.83" r="5.33" fill="red" /> -<circle cx="368.02" cy="140.37" r="5.33" fill="red" /> -<circle cx="373.35" cy="149.60" r="5.33" fill="red" /> -<circle cx="378.68" cy="158.83" r="5.33" fill="red" /> -<circle cx="378.68" cy="140.37" r="5.33" fill="red" /> -<circle cx="384.01" cy="149.60" r="5.33" fill="red" /> -<circle cx="384.01" cy="131.14" r="5.33" fill="red" /> -<circle cx="389.34" cy="140.37" r="5.33" fill="red" /> -<circle cx="394.67" cy="149.60" r="5.33" fill="red" /> -<circle cx="352.67" cy="319.48" r="5.33" fill="red" /> -<circle cx="358.00" cy="328.71" r="5.33" fill="red" /> -<circle cx="358.00" cy="310.25" r="5.33" fill="red" /> -<circle cx="363.33" cy="319.48" r="5.33" fill="red" /> -<circle cx="368.66" cy="328.71" r="5.33" fill="red" /> -<circle cx="368.66" cy="310.25" r="5.33" fill="red" /> -<circle cx="373.99" cy="319.48" r="5.33" fill="red" /> -<circle cx="373.99" cy="301.02" r="5.33" fill="red" /> -<circle cx="379.32" cy="310.25" r="5.33" fill="red" /> -<circle cx="384.65" cy="319.48" r="5.33" fill="red" /> -<rect x="10" y="402.09999999999997" width="127.44" height="135" style="fill:white;stroke:black;stroke-width:2" /> -<text x="30" y="427.09999999999997" font-family="Arial" font-size="20">IGHG1</text> -<circle cx="20" cy="422.09999999999997" r="4" fill="red" /> -<text x="30" y="452.09999999999997" font-family="Arial" font-size="20">IGHG3</text> -<circle cx="20" cy="447.09999999999997" r="4" fill="green" /> -<text x="30" y="477.09999999999997" font-family="Arial" font-size="20">IGHA1</text> -<circle cx="20" cy="472.09999999999997" r="4" fill="blue" /> -<text x="30" y="502.09999999999997" font-family="Arial" font-size="20">IGHM</text> -<circle cx="20" cy="497.09999999999997" r="4" fill="orange" /> -<text x="30" y="527.0999999999999" font-family="Arial" font-size="20">unassigned</text> -<circle cx="20" cy="522.0999999999999" r="4" fill="black" /> -</svg> diff --git a/enclone_main/testx/inputs/outputs/enclone_test36_output b/enclone_main/testx/inputs/outputs/enclone_test36_output deleted file mode 100644 index e6397f9d4..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test36_output +++ /dev/null @@ -1,22 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 2 CELLS - -[1.1] CLONOTYPE = 2 CELLS -┌───────────┬───────────────────────────────────────┬──────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 73|IGHV1-3 ◆ 743|IGHJ6 │ 254|IGKV1D-39 ◆ 217|IGKJ4 │ -│ ├───────────────────────────────────────┼──────────────────────────────┤ -│ │ 11111111111111111111 │ 111111111111 │ -│ │ 11111122222222223333 │ 011111111112 │ -│ │ 45678901234567890123 │ 901234567890 │ -│ │ ════════CDR3════════ │ ════CDR3════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CQQ◦◦◦◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CQQ◦◦◦◦◦◦◦◦◦ │ -├───────────┼───────────────────────────────────────┼──────────────────────────────┤ -│# n │ .................... u const u_Σ │ ............ u const u_Σ│ -│1 2 │ CARDGMTTVTTTAYYGMDVW 8 IGHM 13 │ CQQSYSTPRVTF 22 IGKC 38│ -└───────────┴───────────────────────────────────────┴──────────────────────────────┘ -barcode,const1,const2,u_sum1,u_sum1 -ACTATCTCATGCCTTC-1,IGHM,IGKC,13,13 -GATGAAAGTTACGACT-1,IGHM,IGKC,13,13 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test37_output b/enclone_main/testx/inputs/outputs/enclone_test37_output deleted file mode 100644 index 4da2cd670..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test37_output +++ /dev/null @@ -1,27 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 8 CELLS - -[1.1] CLONOTYPE = 8 CELLS -┌─────────────────────┬───────────────────────────────────────────┬──────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 88|IGHV1-69D ◆ 743|IGHJ6 │ 286|IGKV3-20 ◆ 216|IGKJ3 │ -│ ├───────────────────────────────────────────┼──────────────────────────────────┤ -│ │ 111111111111111111111 │ 11111111111 1 │ -│ │ 7789 111111222222222233333 │ 1125 00111111111 2 │ -│ │ 1612 456789012345678901234 │ 6740 89012345678 3 │ -│ │ ═════════CDR3════════ │ ════CDR3═══ │ -│reference │ PAKE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ DTTS CQQ◦◦◦◦◦◦◦◦ K │ -│donor ref │ PAKE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ DTTS CQQ◦◦◦◦◦◦◦◦ K │ -├─────────────────────┼───────────────────────────────────────────┼──────────────────────────────────┤ -│# datasets n │ xxxx ..................... u const │ ...x ........... . u const│ -│1 123085,123089 4 │ PAKE CAVTIFGVRTALPYYYALDVW 2976 IGHG1 │ EIAG CQQYGSSPFTF R 16827 IGKC │ -│2 123085 2 │ LTEK CAVTIFGVRTALPYYYALDVW 882 IGHG1 │ EIAG CQQYGSSPFTF R 12807 IGKC │ -│3 123089 1 │ PAKK CAVTIFGVRTALPYYYALDVW 2427 IGHG1 │ EIAS CQQYGSSPFTF R 8646 IGKC │ -│4 123085 1 │ PAKE CAVTIFGVRTALPYYYALDVW 359 IGHG1 │ EIAS CQQYGSSPFTF R 1162 IGKC │ -└─────────────────────┴───────────────────────────────────────────┴──────────────────────────────────┘ -123085_barcodes,123089_barcodes -"CGTGAGCCATCGGTTA-1,TTCTCAAAGGTACTCT-1","AGTAGTCTCGCTTGTC-1,CAGCCGACAATCGAAA-1" -"TACGGGCTCCTGCTTG-1,TATCAGGAGCTGTTCA-1", -,GTTCATTCATGTCTCC-1 -CAGATCAGTTCTGGTA-1, - diff --git a/enclone_main/testx/inputs/outputs/enclone_test38_output b/enclone_main/testx/inputs/outputs/enclone_test38_output deleted file mode 100644 index 33a2e30c5..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test38_output +++ /dev/null @@ -1,31 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 8 CELLS - -[1.1] CLONOTYPE = 8 CELLS -┌─────────────────────┬───────────────────────────────────────────┬──────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 88|IGHV1-69D ◆ 743|IGHJ6 │ 286|IGKV3-20 ◆ 216|IGKJ3 │ -│ ├───────────────────────────────────────────┼──────────────────────────────────┤ -│ │ 111111111111111111111 │ 11111111111 1 │ -│ │ 7789 111111222222222233333 │ 1125 00111111111 2 │ -│ │ 1612 456789012345678901234 │ 6740 89012345678 3 │ -│ │ ═════════CDR3════════ │ ════CDR3═══ │ -│reference │ PAKE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ DTTS CQQ◦◦◦◦◦◦◦◦ K │ -│donor ref │ PAKE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ DTTS CQQ◦◦◦◦◦◦◦◦ K │ -├─────────────────────┼───────────────────────────────────────────┼──────────────────────────────────┤ -│# datasets n │ xxxx ..................... u const │ ...x ........... . u const│ -│1 123085,123089 4 │ PAKE CAVTIFGVRTALPYYYALDVW 2976 IGHG1 │ EIAG CQQYGSSPFTF R 16827 IGKC │ -│2 123085 2 │ LTEK CAVTIFGVRTALPYYYALDVW 882 IGHG1 │ EIAG CQQYGSSPFTF R 12807 IGKC │ -│3 123089 1 │ PAKK CAVTIFGVRTALPYYYALDVW 2427 IGHG1 │ EIAS CQQYGSSPFTF R 8646 IGKC │ -│4 123085 1 │ PAKE CAVTIFGVRTALPYYYALDVW 359 IGHG1 │ EIAS CQQYGSSPFTF R 1162 IGKC │ -└─────────────────────┴───────────────────────────────────────────┴──────────────────────────────────┘ -123085_barcode,123089_barcode -CGTGAGCCATCGGTTA-1, -TTCTCAAAGGTACTCT-1, -,AGTAGTCTCGCTTGTC-1 -,CAGCCGACAATCGAAA-1 -TACGGGCTCCTGCTTG-1, -TATCAGGAGCTGTTCA-1, -,GTTCATTCATGTCTCC-1 -CAGATCAGTTCTGGTA-1, - diff --git a/enclone_main/testx/inputs/outputs/enclone_test39_output b/enclone_main/testx/inputs/outputs/enclone_test39_output deleted file mode 100644 index 76f5dbcba..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test39_output +++ /dev/null @@ -1,22 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌──────────────────────────┬───────────────────────────────────┬────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 81|IGHV1-58 ◆ 58|IGHJ6 │ 254|IGKV1D-39 ◆ 215|IGKJ2 │ -│ ├───────────────────────────────────┼────────────────────────────┤ -│ │ 1111111111111111 │ 11111111111 │ -│ │ 1111112222222222 │ 01111111111 │ -│ │ 4567890123456789 │ 90123456789 │ -│ │ ══════CDR3══════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦YMDVW │ CQQ◦◦◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦YMDVW │ CQQ◦◦◦◦◦◦◦◦ │ -├──────────────────────────┼───────────────────────────────────┼────────────────────────────┤ -│# barcode n │ ................ u const r │ ........... u const r│ -│1 1 │ CAADGGGDQYYYMDVW 3 IGHD 478 │ CQQSYSTPYTF 3 IGKC 406│ -│ AGCTCCTCACCGTTGG-1 │ 3 478 │ 3 406│ -└──────────────────────────┴───────────────────────────────────┴────────────────────────────┘ -barcode u1 u_cell1 r2 r_cell2 -AGCTCCTCACCGTTGG-1 3 3 406 406 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test3_output b/enclone_main/testx/inputs/outputs/enclone_test3_output deleted file mode 100644 index 256653fbb..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test3_output +++ /dev/null @@ -1,39 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌───────────┬────────────────────────────────────────────┬───────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 122.1.1|IGHV3-23 ◆ 55|IGHJ4 │ 330|IGLV1-47 ◆ 314|IGLJ2 │ -│ ├────────────────────────────────────────────┼───────────────────────────────────┤ -│ │ 11111111111111111111 │ 1111111111111 │ -│ │ 2 11111122222222223333 │ 6 0001111111111 │ -│ │ 3 45678901234567890123 │ 9 7890123456789 │ -│ │ ════════CDR3════════ │ ═════CDR3════ │ -│reference │ V ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ S CAAWD◦◦◦◦◦◦◦◦ │ -│donor ref │ L ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ S CAAWD◦◦◦◦◦◦◦◦ │ -├───────────┼────────────────────────────────────────────┼───────────────────────────────────┤ -│# n │ . .................... const u_Σ ulen │ . ............. const u_Σ ulen│ -│1 1 │ L CAKDHFAAAGTLGPYYFDYW IGHM 14 79 │ R CAAWDDSLSGVVF IGLC2 15 46│ -└───────────┴────────────────────────────────────────────┴───────────────────────────────────┘ - -╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ - -[2] GROUP = 1 CLONOTYPES = 1 CELLS - -[2.1] CLONOTYPE = 1 CELLS -┌───────────┬──────────────────────────────────────┬───────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 81|IGHV1-58 ◆ 58|IGHJ6 │ 254|IGKV1D-39 ◆ 215|IGKJ2 │ -│ ├──────────────────────────────────────┼───────────────────────────────┤ -│ │ 1111111111111111 │ 11111111111 │ -│ │ 1111112222222222 │ 01111111111 │ -│ │ 4567890123456789 │ 90123456789 │ -│ │ ══════CDR3══════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦YMDVW │ CQQ◦◦◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦YMDVW │ CQQ◦◦◦◦◦◦◦◦ │ -├───────────┼──────────────────────────────────────┼───────────────────────────────┤ -│# n │ ................ const u_Σ ulen │ ........... const u_Σ ulen│ -│1 1 │ CAADGGGDQYYYMDVW IGHD 3 61 │ CQQSYSTPYTF IGKC 3 23│ -└───────────┴──────────────────────────────────────┴───────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test40_output b/enclone_main/testx/inputs/outputs/enclone_test40_output deleted file mode 100644 index d1350f791..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test40_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 213 CELLS - -[1.1] CLONOTYPE = 213 CELLS -┌───────────┬─────────────────────────────────────────────────┬──────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 645|TRBV5-5 ◆ 550|TRBJ2-3 │ 458.1.1|TRAV1-2 ◆ 400|TRAJ11│ -│ ├─────────────────────────────────────────────────┼──────────────────────────────┤ -│ │ │ 5 │ -│ │ │ 9 │ -│ │ │ │ -│reference │ │ P │ -│donor ref │ │ P │ -├───────────┼─────────────────────────────────────────────────┼──────────────────────────────┤ -│# n │ u const notes │ . u const │ -│1 104 │ 10 TRBC2 │ P 3 TRAC │ -│2 108 │ 10 TRBC2 │ │ -│3 1 │ 3 TRBC2 gap from J stop to C start = 152 │ │ -└───────────┴─────────────────────────────────────────────────┴──────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test41_output b/enclone_main/testx/inputs/outputs/enclone_test41_output deleted file mode 100644 index 12cdd3e85..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test41_output +++ /dev/null @@ -1,23 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 2 CELLS - -[1.1] CLONOTYPE = 2 CELLS -┌───────────┬─────────────────────────────────┬─────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 122.1.2|IGHV3-23 ◆ 54|IGHJ4 │ 238|IGKV1-8 ◆ 218|IGKJ5│ -│ ├─────────────────────────────────┼─────────────────────────┤ -│ │ 11111111111111111 │ 11111111111 │ -│ │ 2 11111122222222223 │ 00011111111 │ -│ │ 3 45678901234567890 │ 78901234567 │ -│ │ ═══════CDR3══════ │ ════CDR3═══ │ -│reference │ V ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ CQQ◦◦◦◦◦◦◦◦ │ -│donor ref │ L ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ CQQ◦◦◦◦◦◦◦◦ │ -├───────────┼─────────────────────────────────┼─────────────────────────┤ -│# n │ . ................. u const │ ........... u const │ -│1 1 │ L CAKAVAGKAVAGGWDYW 2 IGHD │ CQQYYSYPRTF 2 IGKC │ -│2 1 │ │ CQQYYSYPRTF 3 IGKC │ -└───────────┴─────────────────────────────────┴─────────────────────────┘ -gex_cell - 1928 - 1785 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test44_output b/enclone_main/testx/inputs/outputs/enclone_test44_output deleted file mode 100644 index 411a2b3a5..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test44_output +++ /dev/null @@ -1,24 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 3 CELLS -┌───────────────────────┬──────────────────────────────────┬─────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 195|IGHV4-59 ◆ 57|IGHJ5 │ 352|IGLV3-1 ◆ 314|IGLJ2│ -│ ├──────────────────────────────────┼─────────────────────────┤ -│ │ 11111111111111111111 │ 11111111111 │ -│ │ 11111112222222222333 │ 00000111111 │ -│ │ 34567890123456789012 │ 56789012345 │ -│ │ ════════CDR3════════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -├───────────────────────┼──────────────────────────────────┼─────────────────────────┤ -│# n gex RPS27_g_μ │ .................... u const │ ........... u const │ -│1 1 2743 26 │ CARSFFGDTAMVMFQAFDPW 2 IGHD │ CQAWDSSTVVF 8 IGLC2 │ -│2 2 2769 25 │ │ CQAWDSSTVVF 6 IGLC2 │ -└───────────────────────┴──────────────────────────────────┴─────────────────────────┘ -barcode gex_cell CD19_ab CD19_ab_cell -GCTGGGTTCAACCAAC-1 2743 1210 1210 -ACGTCAAAGTGGTAGC-1 2484 1054 850 -ATGTGTGAGAGTACCG-1 2769 1054 1054 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test45_output b/enclone_main/testx/inputs/outputs/enclone_test45_output deleted file mode 100644 index d379debd6..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test45_output +++ /dev/null @@ -1,35 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 44 CELLS - -[1.1] CLONOTYPE = 44 CELLS -┌───────────┬────────────────────────────────────────────────────────────────────────────────┬───────────────────────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 159|IGHV3-7 ◆ 53|IGHJ3 │ 379|IGLV5-37 ◆ 316|IGLJ3 │ -│ ├────────────────────────────────────────────────────────────────────────────────┼───────────────────────────────────────────────────────────┤ -│ │ 11 1111111111111111111 │ 11 11111111111 │ -│ │ 23344445667777788999901 1111112222222222333 │ 23556679911 11111122222 │ -│ │ 22324893380156725357903 4567890123456789012 │ 17068902403 45678901234 │ -│ │ ════════CDR3═══════ │ ════CDR3═══ │ -│reference │ LPGAGSSSLNKQEKYVRANLLQ◦ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ VRSYLYYAAAY CMIW◦◦◦◦◦◦◦ │ -│donor ref │ LPGAGSSSLNKQEKYVRANLLQ◦ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ VRSYLYYAAAY CMIW◦◦◦◦◦◦◦ │ -├───────────┼────────────────────────────────────────────────────────────────────────────────┼───────────────────────────────────────────────────────────┤ -│# n │ xxxxxxxxxxxxxxxxxxxxxxx .x......x........x. u const ndiff1vj ndiff2vj │ xxxxxxxxxxx ........... u const ndiff1vj ndiff2vj│ -│1 10 │ LPGAGSSSPNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW 2795 IGHG1 0 11 │ VRGYLYYAAAY CMIWPSNAWVF 14994 IGLC2 0 6│ -│2 8 │ LPGAGSSSLNKEEKYMRANLLQY CARDQNFDESSGYDAFDIW 3944 IGHG1 11 0 │ VRSYLYYAAAY CMIWPSNAWVF 12199 IGLC2 6 0│ -│3 7 │ LPGAKSNSLNKEQKYVRANLLQY CARDQNFDESSGYDAFDIW 2314 IGHG1 12 13 │ VRSYLYYTAAY CMIWPSNAWVF 10274 IGLC2 4 4│ -│4 5 │ LPGAGSSSLNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW 11 IGHG1 4 7 │ VRSYLYYAGAY CMIWPSNAWVF 26 IGLC2 6 6│ -│5 2 │ LPGAGSSSLNKEEKYMRANLLQY CARDQNFDESSGYDAFDIW 1290 IGHG1 10 1 │ VRSYLYYAAAY CMIWPSNAWVF 6825 IGLC2 6 0│ -│6 1 │ LPGAGSNSLNKEEIYVRANLLEY CTRDQNFDESSGYDAFDIW 4407 IGHG1 11 12 │ VRSYLYYAAAY CMIWPSNAWVF 14507 IGLC2 3 3│ -│7 1 │ LPGAGSSSLNKEEKYVRANLLQY CARDQNFDDSSGYDAFDIW 3893 IGHG1 6 8 │ VRSYLYYAAAY CMIWPSNAWVF 15622 IGLC2 4 2│ -│8 1 │ LPGAGSSSLNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW 3302 IGHG1 4 7 │ VRSYLYYAAAY CMIWPSNAWVF 5256 IGLC2 5 5│ -│9 1 │ LPGAGSSSPNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW 3067 IGHG1 2 12 │ VRGYLYYAAAY CMIWPSNAWVF 6429 IGLC2 0 6│ -│10 1 │ LPGAGRNSLNKEEKYVRGNLLQY CARDQNFDESSGYDAFDIW 2724 IGHG3 15 8 │ VRGYLYYAAAY CMIWPSNAWVF 5775 IGLC2 4 4│ -│11 1 │ LPGAGSSSLNKEEKYMRANLLQY CARDQNFDESSGYDAFDIW 2504 IGHA1 11 0 │ VRSYLYYAAAY CMIWPSNAWVF 14551 IGLC2 6 0│ -│12 1 │ LPGAGSSSLNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW 404 IGHG1 1 10 │ VRGYLYYAAAY CMIWPSNAWVF 3456 IGLC2 0 6│ -│13 1 │ LPGAGSSSLNKQEKYVRANLLQY CARDQNFDESSGYDAFDIW 136 IGHG1 6 7 │ VRSYLYYAAAY CMIWPSNAWVF 1023 IGLC2 4 4│ -│14 1 │ LPGAGSNSLNKEEIYVRANLLEY CTRDQNFDESSGYDAFDIW 96 IGHG1 11 12 │ VRSYLYYAAAY CMIWPSNAWVF 1762 IGLC2 5 5│ -│15 1 │ LPGAGSSSPNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW 27 IGHG1 1 10 │ VRSYLYYAAAY CMIWPSNAWVF 153 IGLC2 1 5│ -│16 1 │ │ VRSYLYYAAAY CMIWPSNAWVF 792 IGLC2 6 0│ -│17 1 │ │ VRSYLYYAAAY CMIWPSNAWVF 232 IGLC2 5 5│ -└───────────┴────────────────────────────────────────────────────────────────────────────────┴───────────────────────────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test46_output b/enclone_main/testx/inputs/outputs/enclone_test46_output deleted file mode 100644 index 5785c6d4e..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test46_output +++ /dev/null @@ -1,21 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌───────────┬─────────────────────────────────────────────────────────────────────────┬──────────────────────────────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 81|IGHV1-58 ◆ 58|IGHJ6 │ 254|IGKV1D-39 ◆ 215|IGKJ2 │ -│ ├─────────────────────────────────────────────────────────────────────────┼──────────────────────────────────────────────────────────────────┤ -│ │ 1111111111111111 │ 11111111111 │ -│ │ 1111112222222222 │ 01111111111 │ -│ │ 4567890123456789 │ 90123456789 │ -│ │ ══════CDR3══════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦YMDVW │ CQQ◦◦◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦YMDVW │ CQQ◦◦◦◦◦◦◦◦ │ -├───────────┼─────────────────────────────────────────────────────────────────────────┼──────────────────────────────────────────────────────────────────┤ -│# n │ ................ u const u_μ u_min u_max r r_μ r_min r_max │ ........... u const u_μ u_min u_max r r_μ r_min r_max│ -│1 1 │ CAADGGGDQYYYMDVW 3 IGHD 3 3 3 478 478 478 478 │ CQQSYSTPYTF 3 IGKC 3 3 3 406 406 406 406│ -└───────────┴─────────────────────────────────────────────────────────────────────────┴──────────────────────────────────────────────────────────────────┘ -u_mean1 u_min1 u_max1 r2 r_mean2 r_min2 r_max2 - 3 3 3 406 406 406 406 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test48_output b/enclone_main/testx/inputs/outputs/enclone_test48_output deleted file mode 100644 index c167c20a3..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test48_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 13 CELLS - -[1.1] CLONOTYPE = 13 CELLS -┌─────────────────────┬─────────────────────────────────────────────┬──────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 181.1.1|IGHV4-30-2 ◆ 55|IGHJ4 │ 394|IGLV8-61 ◆ 314|IGLJ2 │ -│ ├─────────────────────────────────────────────┼──────────────────────────────────┤ -│ │ 1 1111111111111 │ 11 111111111111 1 │ -│ │ 2224555667889990 1111122222222 │ 56701 111111122222 3 │ -│ │ 0571358093471346 5678901234567 │ 54452 345678901234 3 │ -│ │ ═════CDR3════ │ ════CDR3════ │ -│reference │ LSSASSRPGHKVVRST ◦◦◦◦◦◦◦◦◦◦◦◦◦ │ STYAY CVLY◦◦◦◦◦◦◦◦ L │ -│donor ref │ VSPTYSRHGYKVVTST ◦◦◦◦◦◦◦◦◦◦◦◦◦ │ STYAY CVLY◦◦◦◦◦◦◦◦ L │ -├─────────────────────┼─────────────────────────────────────────────┼──────────────────────────────────┤ -│# n gex_μ gex_Σ │ ................ ............. u const │ ..... ............ . u const│ -│1 12 6231 74770 │ VSPTYIRHGYNVLTST CASRKSGNYIIYW 73 IGHG3 │ SSHAY CVLYMGSGIVVF L 187 IGLC2│ -│2 1 5251 5251 │ VSPTYIRHGYNVLTST CASRKSGNYIIYW 17 ? │ SSHAY CVLYMGSGIVVF L 27 IGLC2│ -└─────────────────────┴─────────────────────────────────────────────┴──────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test49_output b/enclone_main/testx/inputs/outputs/enclone_test49_output deleted file mode 100644 index 76c91929d..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test49_output +++ /dev/null @@ -1,61 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -<!-- --> -<html xmlns="http://www.w3.org/1999/xhtml"> -<head> -<meta http-equiv="Content-Type" content="application/xml+xhtml; charset=UTF-8"/> -<title>CAAWDDSLNGWVF - - - -
[1] GROUP = 1 CLONOTYPES = 1 CELLS
-
-[1.1] CLONOTYPE = 1 CELLS
-┌───────────┬──────────────────────────┐
-│           │  CHAIN 1                 │
-│           │  322|IGLV1-36 ◆ 316|IGLJ3│
-│           ├──────────────────────────┤
-│           │  1111111111111           │
-│           │  0001111111111           │
-│           │  7890123456789           │
-│           │  ═════CDR3════           │
-│reference  │  CAAWD◦◦◦◦◦◦◦◦           │
-│donor ref  │  CAAWD◦◦◦◦◦◦◦◦           │
-├───────────┼──────────────────────────┤
-│#  n         .............  u  const │
-│1  1       │  CAAWDDSLNGWVF  6  IGLC3 │
-└───────────┴──────────────────────────┘
->group1.clonotype1.exact1.chain1
-ATGGCCTGGTCCCCTCTCTTCCTCACCCTCATCACTCACTGTGCAGGGTCCTGGGCCCAGTCTGTGCTGACTCAGCCACCCTCGGTGTCTGAAGCCCCCAGGCAGAGGGTCACCATCTCCTGTTCTGGAAGCAGCTCCAACATCGGAAATAATGCTGTAAACTGGTACCAGCAGCTCCCAGGAAAGGCTCCCAAACTCCTCATCTATTATGATGATCTGCTGCCCTCAGGGGTCTCTGACCGATTCTCTGGCTCCAAGTCTGGCACCTCAGCCTCCCTGGCCATCAGTGGGCTCCAGTCTGAGGATGAGGCTGATTATTACTGTGCAGCATGGGATGACAGCCTGAATGGTTGGGTGTTCGGCGGAGGGACCAAGCTGACCGTCCTAGGTCAGCCCAAGGCTGCCCCCTCGGTCACTCTGTTCCCACCCTCCTCTGAGGAGCTTCAAGCCAACAAGGCCACACTGGTGTGTCTCATAAGTGACTTCTACCCGGGAGCCGTGACAGTGGCCTGGAAGGCAGATAGCAGCCCCGTCAAGGCGGGAGTGGAGACCACCACACCCTCCAAACAAAGCAACAACAAGTACGCGGCCAGCAGCTACCTGAGCCTGACGCCTGAGCAGTGGAAGTCCCACAAAAGCTACAGCTGCCAGGTCACGCATGAAGGGAGCACCGTGGAGAAGACAGTGGCCCCTACAGAATGTTCA
->group1.clonotype1.exact1.chain1
-MAWSPLFLTLITHCAGSWAQSVLTQPPSVSEAPRQRVTISCSGSSSNIGNNAVNWYQQLPGKAPKLLIYYDDLLPSGVSDRFSGSKSGTSASLAISGLQSEDEADYYCAAWDDSLNGWVFGGGTKLTVLGQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHKSYSCQVTHEGSTVEKTVAPTECS
-barcodes            n
-AGGGTGAAGCGTGAAC-1  1
-
- - - diff --git a/enclone_main/testx/inputs/outputs/enclone_test4_output b/enclone_main/testx/inputs/outputs/enclone_test4_output deleted file mode 100644 index 98694e381..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test4_output +++ /dev/null @@ -1,26 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 3 CELLS -CHAIN 1 -• ACATGGGAAATACTTTCTGAGAGTCCTGGACCTCCTGTGCAAGAACATGAAACATCTGTGGTTCTTCCTTCTCCTGGTGGCAGCTCCCAGATGGGTCCTGTCCCAGGTGCAGCTGCAGGAGTCGGGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGTAGTTACTACTGGAGCTGGATCCGGCAGCCCCCAGGGAAGGGACTGGAGTGGATTGGGTATATCTATTACAGTGGGAGCACCAACTACAACCCCTCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCCAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACCGCTGCGGACACGGCCGTGTATTACTGTGCGAGGTCCTTTTTCGGGGATACAGCTATGGTTATGTTCCAGGCGTTCGACCCCTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCAGCACCCACCAAGGCTCCGGATGTGTTCCCCATCATATCAGGGTGCAGACACCCAAAGGATAACAGCCCTGTGGTCCTGGCATGCTTGATAACTGGGTACCACC -CHAIN 2 -• GACACAGCTCCTCCTGCAGCAGCCCCTGACTGCTGATTTGCATCACGGGCCGCTCTTTCCAGCAAGGGGATAAGAGAGGCCTGGAAGAACCTGCCCAGCCTGGGCCTCAGGAAGCAGCATCGGAGGTGCCTCAGCCATGGCATGGATCCCTCTCTTCCTCGGCGTCCTTGCTTACTGCACAGGATCCGTGGCCTCCTATGAGCTGACTCAGCCACCCTCAGTGTCCGTGTCCCCAGGACAGACAGCCAGCATCACCTGCTCTGGAGATAAATTGGGGGATAAATATGCTTGCTGGTATCAGCAGAAGCCAGGCCAGTCCCCTGTGCTGGTCATCTATCAAGATAGCAAGCGGCCCTCAGGGATCCCTGAGCGATTCTCTGGCTCCAACTCTGGGAACACAGCCACTCTGACCATCAGCGGGACCCAGGCTATGGATGAGGCTGACTATTACTGTCAGGCGTGGGACAGCAGCACTGTGGTATTCGGCGGAGGGACCAAGCTGACCGTCCTAGGTCAGCCCAAGGCTGCCCCCTCGGTCACTCTGTTCCCGCCCTCCTCTGAGGAGCTTCAAGCCAACAAGGCCACACTGGTGTGTCTCATAAGTGACTTCTACCCGGGAGCCGTGACAGTGGCCTGGAAGGCAGATAGCAGCCCCGTCAAGGCGGGAGTGGAGACCACCACACCCTCCAAACAAAGCAACAACAAGTACGCGGCCAGCAGCTA -┌─────────────────────────────────────────────────────────────────────────┬───────────────────────────────────────────────┬────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 195|IGHV4-59 ◆ 57|IGHJ5 │ 352|IGLV3-1 ◆ 314|IGLJ2 │ -│ ├───────────────────────────────────────────────┼────────────────────────────────────┤ -│ │ 11111111111111111111 │ 11111111111 │ -│ │ 11111112222222222333 │ 00000111111 │ -│ │ 34567890123456789012 │ 56789012345 │ -│ │ ════════CDR3════════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -├─────────────────────────────────────────────────────────────────────────┼───────────────────────────────────────────────┼────────────────────────────────────┤ -│# n gex CD19_ab_μ CD25_ab_μ IGLV3-1_g_μ IGLV3-1_g_% RPS27_g_μ │ u const ulen udiff │ u const ulen udiff│ -│1 1 2743 1210 102 8 0.29 26 │ CARSFFGDTAMVMFQAFDPW 2 IGHD 46 │ CQAWDSSTVVF 8 IGLC2 136 +136│ -│2 2 2769 952 53 4 0.15 25 │ │ CQAWDSSTVVF 6 IGLC2 87 +87│ -│Σ 3 3114 207 16 0.60 76 │ │ │ -│μ 1.0 1038.0 69.0 5.3 0.20 25.3 │ │ │ -└─────────────────────────────────────────────────────────────────────────┴───────────────────────────────────────────────┴────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test52_output b/enclone_main/testx/inputs/outputs/enclone_test52_output deleted file mode 100644 index 5a406c45d..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test52_output +++ /dev/null @@ -1,22 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 17 CELLS - -[1.1] CLONOTYPE = 17 CELLS -┌──────────────────────────────────────────────────────────┬──────────────────────────────────────┬───────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 204|IGHV5-51 ◆ 53|IGHJ3 │ 330|IGLV1-47 ◆ 316|IGLJ3 │ -│ ├──────────────────────────────────────┼───────────────────────────────┤ -│ │ 1111111111111111 │ 11111111111 │ -│ │ 6777 1111112222222222 │ 467 00011111111 │ -│ │ 5257 4567890123456789 │ 490 78901234567 │ -│ │ ══════CDR3══════ │ ════CDR3═══ │ -│reference │ WGDR ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ SSN CAAWD◦◦◦◦◦◦ │ -│donor ref │ WGDR ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ SSN CAAWD◦◦◦◦◦◦ │ -├──────────────────────────────────────────────────────────┼──────────────────────────────────────┼───────────────────────────────┤ -│# n (IGHV5-51|IGLV1-47)_g_% IGH.*_g_% IG(K|L).*_g_% │ x... ................ u const │ ... ........... u const│ -│1 14 47.66 28.32 29.25 │ FAHT CARPKSDYIIDAFDIW 518 IGHG1 │ NRS CAAWDDSLWVF 1050 IGLC3│ -│2 1 67.69 32.21 40.14 │ FAHT CARPKSDYIIDAFDIW 8949 IGHA1 │ NRS CAAWDDSLWVF 11485 IGLC3│ -│3 1 1.64 1.28 1.76 │ FAHT CARPKSDYIIDAFDIW 20 IGHG1 │ NRS CAAWDDSLWVF 21 IGLC2│ -│4 1 1.17 25.57 18.17 │ WAHT CARPKSDYIIDAFDIW 15 IGHG1 │ NRS CAAWDDSLWVF 711 IGLC3│ -└──────────────────────────────────────────────────────────┴──────────────────────────────────────┴───────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test53_output b/enclone_main/testx/inputs/outputs/enclone_test53_output deleted file mode 100644 index 592f3394c..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test53_output +++ /dev/null @@ -1,26 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 6 CELLS - -[1.1] CLONOTYPE = 6 CELLS -┌───────────────────────────────────────────────────────────┬────────────────────────────────────────┬─────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 146.1.1|IGHV3-53 ◆ 51|IGHJ2 │ 282|IGKV3-11 ◆ 216|IGKJ3 │ -│ ├────────────────────────────────────────┼─────────────────────────────┤ -│ │ 11111111111111111111 │ 1111111111111 │ -│ │ 12 11111112222222222333 │ 0001111111111 │ -│ │ 35 34567890123456789012 │ 7890123456789 │ -│ │ ════════CDR3════════ │ ═════CDR3════ │ -│reference │ ST ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦LW │ CQQ◦◦◦◦◦◦◦◦◦◦ │ -│donor ref │ LS ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦LW │ CQQ◦◦◦◦◦◦◦◦◦◦ │ -├───────────────────────────────────────────────────────────┼────────────────────────────────────────┼─────────────────────────────┤ -│# barcode n gex n_gex JCHAIN_g_% IG% │ .. ...........x........ u_μ const │ ............. u_μ const│ -│1 5 18224 5 3.15 65.29 │ LS CAREGGVGVVTATDWYFDLW 3147 IGHM │ CQQRSNWPPLFTF 11514 IGKC │ -│ ACGCAGCGTCTGCAAT-1 8004 1 2.21 62.72 │ │ │ -│ AGTGAGGTCTGCGGCA-1 54206 1 4.23 67.71 │ │ │ -│ TAAGTGCAGGCCCGTT-1 25692 1 2.83 74.28 │ │ │ -│ TACGGATAGTCCGTAT-1 18224 1 0.81 37.19 │ │ │ -│ TGACTAGGTTGTGGCC-1 17268 1 3.16 75.13 │ │ │ -│2 1 16875 1 2.69 68.72 │ LS CAREGGVGVVTTTDWYFDLW 4935 IGHM │ CQQRSNWPPLFTF 11945 IGKC │ -│ AGCGTCGGTCTAGTGT-1 16875 1 2.69 68.72 │ │ │ -└───────────────────────────────────────────────────────────┴────────────────────────────────────────┴─────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test55_output b/enclone_main/testx/inputs/outputs/enclone_test55_output deleted file mode 100644 index 4a2bae0e2..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test55_output +++ /dev/null @@ -1,27 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 3 CELLS -┌────────────────────────────────┬──────────────────────────────────┬─────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 195|IGHV4-59 ◆ 57|IGHJ5 │ 352|IGLV3-1 ◆ 314|IGLJ2│ -│ ├──────────────────────────────────┼─────────────────────────┤ -│ │ 11111111111111111111 │ 11111111111 │ -│ │ 11111112222222222333 │ 00000111111 │ -│ │ 34567890123456789012 │ 56789012345 │ -│ │ ════════CDR3════════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -├────────────────────────────────┼──────────────────────────────────┼─────────────────────────┤ -│# barcode n cred │ .................... u const │ ........... u const │ -│1 1 92.6 │ CARSFFGDTAMVMFQAFDPW 2 IGHD │ CQAWDSSTVVF 8 IGLC2 │ -│ GCTGGGTTCAACCAAC-1 92.6 │ 2 │ 8 │ -│2 2 90.6 │ │ CQAWDSSTVVF 6 IGLC2 │ -│ ACGTCAAAGTGGTAGC-1 89.7 │ │ 6 │ -│ ATGTGTGAGAGTACCG-1 90.6 │ │ 2 │ -└────────────────────────────────┴──────────────────────────────────┴─────────────────────────┘ -cred_cell - 92.6 - 89.7 - 90.6 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test56_output b/enclone_main/testx/inputs/outputs/enclone_test56_output deleted file mode 100644 index f4b18d86f..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test56_output +++ /dev/null @@ -1,22 +0,0 @@ - - -[1.1] CLONOTYPE = 1 CELLS -┌───────────┬───────────────────────────────────┬──────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 67.1.1|IGHV1-2 ◆ 58|IGHJ6 │ 342|IGLV2-14 ◆ 312|IGLJ1 │ -│ ├───────────────────────────────────┼──────────────────────────────┤ -│ │ 1111111111111111111 │ 1111111111111 │ -│ │ 8 1111112222222222333 │ 7 0011111111112 │ -│ │ 5 4567890123456789012 │ 560 8901234567890 │ -│ │ ════════CDR3═══════ │ ═════CDR3════ │ -│reference │ W ◦◦◦◦◦◦◦◦◦◦◦◦◦◦YMDVWLLE CSSYTSS◦◦◦◦◦◦ │ -│donor ref │ R ◦◦◦◦◦◦◦◦◦◦◦◦◦◦YMDVWLLE CSSYTSS◦◦◦◦◦◦ │ -├───────────┼───────────────────────────────────┼──────────────────────────────┤ -# n . ................... u const ... ............. u const -│1 1 │ R CARDPRGWGVELLYYMDVW 9 IGHM │ LFD CSSYTSSSLPYVF 55 IGLC1│ -└───────────┴───────────────────────────────────┴──────────────────────────────┘ - - - diff --git a/enclone_main/testx/inputs/outputs/enclone_test57_output b/enclone_main/testx/inputs/outputs/enclone_test57_output deleted file mode 100644 index cae0cda42..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test57_output +++ /dev/null @@ -1,25 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌──────┬──────────────────────────┐ -│ │ CHAIN 1 │ -│ │ 392|IGLV7-46 ◆ 316|IGLJ3│ -│ ├──────────────────────────┤ -│# n │ u const │ -│1 1 │ 2 IGLC3 │ -└──────┴──────────────────────────┘ - -╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ - -[2] GROUP = 1 CLONOTYPES = 1 CELLS - -[2.1] CLONOTYPE = 1 CELLS -┌──────┬──────────────────────────┐ -│ │ CHAIN 1 │ -│ │ 366|IGLV3-25 ◆ 316|IGLJ3│ -│ ├──────────────────────────┤ -│# n │ u const │ -│1 1 │ 21 IGLC2 │ -└──────┴──────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test58_output b/enclone_main/testx/inputs/outputs/enclone_test58_output deleted file mode 100644 index eca7cd194..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test58_output +++ /dev/null @@ -1,21 +0,0 @@ - -[1.1] CLONOTYPE = 1 CELLS -┌──────┬──────────────────────────┐ -│ │ CHAIN 1 │ -│ │ 392|IGLV7-46 ◆ 316|IGLJ3│ -│ ├──────────────────────────┤ -│# n │ u const │ -│1 1 │ 2 IGLC3 │ -└──────┴──────────────────────────┘ - -╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ - -[2.1] CLONOTYPE = 1 CELLS -┌──────┬──────────────────────────┐ -│ │ CHAIN 1 │ -│ │ 366|IGLV3-25 ◆ 316|IGLJ3│ -│ ├──────────────────────────┤ -│# n │ u const │ -│1 1 │ 21 IGLC2 │ -└──────┴──────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test59_output b/enclone_main/testx/inputs/outputs/enclone_test59_output deleted file mode 100644 index c88bb8153..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test59_output +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - -enclone output - - - -
[1] GROUP = 1 CLONOTYPES = 1 CELLS
-
-[1.1] CLONOTYPE = 1 CELLS
-┌──────┬──────────────────────────┐
-│      │  CHAIN 1                 │
-│      │  392|IGLV7-46 ◆ 316|IGLJ3│
-│      ├──────────────────────────┤
-│#  n  │  u  const                │
-│1  1  │  2  IGLC3                │
-└──────┴──────────────────────────┘
-
-╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸
-
-[2] GROUP = 1 CLONOTYPES = 1 CELLS
-
-[2.1] CLONOTYPE = 1 CELLS
-┌──────┬──────────────────────────┐
-│      │  CHAIN 1                 │
-│      │  366|IGLV3-25 ◆ 316|IGLJ3│
-│      ├──────────────────────────┤
-│#  n  │   u  const               │
-│1  1  │  21  IGLC2               │
-└──────┴──────────────────────────┘
-
- - - diff --git a/enclone_main/testx/inputs/outputs/enclone_test5_output b/enclone_main/testx/inputs/outputs/enclone_test5_output deleted file mode 100644 index c1765fb64..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test5_output +++ /dev/null @@ -1,23 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 274 CELLS - -[1.1] CLONOTYPE = 274 CELLS -┌───────────┬─────────────────────────────────────────────────────────────┬───────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 621|TRBV29-1 ◆ 541|TRBJ1-2 │ 466|TRAV12-3 ◆ 429|TRAJ39│ -│ ├─────────────────────────────────────────────────────────────┼───────────────────────────┤ -│ │ 1111111111111 │ 11111111111 │ -│ │ 0001111111111 │ 11111111112 │ -│ │ 7890123456789 │ 01234567890 │ -│ │ ═════CDR3════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦F │ ◦◦◦◦◦◦NMLTF │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦F │ ◦◦◦◦◦◦NMLTF │ -├───────────┼─────────────────────────────────────────────────────────────┼───────────────────────────┤ -│# n │ ............. u const notes │ ........... u const │ -│1 240 │ CSVDLEANYGYTF 6 TRBC1 │ CAIGPGNMLTF 4 TRAC │ -│2 2 │ CSVDLEANYGYTF 7 TRBC1 gap from J stop to C start = 293 │ CAIGPGNMLTF 9 TRAC │ -│3 1 │ CSVDLEANYGYTF 3 TRBC1 gap from J stop to C start = 157 │ CAIGPGNMLTF 5 TRAC │ -│4 10 │ CSVDLEANYGYTF 4 TRBC1 │ │ -│5 21 │ │ CAIGPGNMLTF 3 TRAC │ -└───────────┴─────────────────────────────────────────────────────────────┴───────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test60_output b/enclone_main/testx/inputs/outputs/enclone_test60_output deleted file mode 100644 index 7595852a5..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test60_output +++ /dev/null @@ -1,57 +0,0 @@ - - - - - - -enclone output - - - -
[1.1] CLONOTYPE = 1 CELLS
-┌──────┬──────────────────────────┐
-│      │  CHAIN 1                 │
-│      │  392|IGLV7-46 ◆ 316|IGLJ3│
-│      ├──────────────────────────┤
-│#  n  │  u  const                │
-│1  1  │  2  IGLC3                │
-└──────┴──────────────────────────┘
-
-╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸
-
-[2.1] CLONOTYPE = 1 CELLS
-┌──────┬──────────────────────────┐
-│      │  CHAIN 1                 │
-│      │  366|IGLV3-25 ◆ 316|IGLJ3│
-│      ├──────────────────────────┤
-│#  n  │   u  const               │
-│1  1  │  21  IGLC2               │
-└──────┴──────────────────────────┘
-
- - - diff --git a/enclone_main/testx/inputs/outputs/enclone_test61_output b/enclone_main/testx/inputs/outputs/enclone_test61_output deleted file mode 100644 index 1ead283be..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test61_output +++ /dev/null @@ -1,21 +0,0 @@ - -[1] GROUP = 2 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 2 CELLS -┌──────┬───────────────────────────────┬──────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 122.1.1|IGHV3-23 ◆ 55|IGHJ4 │ 284|IGKV3-15 ◆ 214|IGKJ1│ -│ ├───────────────────────────────┼──────────────────────────┤ -│# n │ u const │ u const │ -│1 2 │ 2 IGHM │ 17 IGKC │ -└──────┴───────────────────────────────┴──────────────────────────┘ - -[1.2] CLONOTYPE = 1 CELLS -┌──────┬─────────────────────────────────────────────┬──────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 122.1.1|IGHV3-23 ◆ 33|IGHD6-19 ◆ 55|IGHJ4 │ 284|IGKV3-15 ◆ 214|IGKJ1│ -│ ├─────────────────────────────────────────────┼──────────────────────────┤ -│# n │ u const │ u const │ -│1 1 │ 5 IGHM │ 28 IGKC │ -└──────┴─────────────────────────────────────────────┴──────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test62_output b/enclone_main/testx/inputs/outputs/enclone_test62_output deleted file mode 100644 index ffd634904..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test62_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1.1] CLONOTYPE = 2 CELLS -┌──────┬───────────────────────────────┬──────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 122.1.1|IGHV3-23 ◆ 55|IGHJ4 │ 284|IGKV3-15 ◆ 214|IGKJ1│ -│ ├───────────────────────────────┼──────────────────────────┤ -│# n │ u const │ u const │ -│1 2 │ 2 IGHM │ 17 IGKC │ -└──────┴───────────────────────────────┴──────────────────────────┘ - -[1.2] CLONOTYPE = 1 CELLS -┌──────┬─────────────────────────────────────────────┬──────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 122.1.1|IGHV3-23 ◆ 33|IGHD6-19 ◆ 55|IGHJ4 │ 284|IGKV3-15 ◆ 214|IGKJ1│ -│ ├─────────────────────────────────────────────┼──────────────────────────┤ -│# n │ u const │ u const │ -│1 1 │ 5 IGHM │ 28 IGKC │ -└──────┴─────────────────────────────────────────────┴──────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test63_output b/enclone_main/testx/inputs/outputs/enclone_test63_output deleted file mode 100644 index 7bc6c32ce..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test63_output +++ /dev/null @@ -1,57 +0,0 @@ - - - - - - -enclone output - - - -
[1] GROUP = 2 CLONOTYPES = 3 CELLS
-
-[1.1] CLONOTYPE = 2 CELLS
-┌──────┬───────────────────────────────┬──────────────────────────┐
-│      │  CHAIN 1                      │  CHAIN 2                 │
-│      │  122.1.1|IGHV3-23 ◆ 55|IGHJ4  │  284|IGKV3-15 ◆ 214|IGKJ1│
-│      ├───────────────────────────────┼──────────────────────────┤
-│#  n  │  u  const                     │   u  const               │
-│1  2  │  2  IGHM                      │  17  IGKC                │
-└──────┴───────────────────────────────┴──────────────────────────┘
-
-[1.2] CLONOTYPE = 1 CELLS
-┌──────┬─────────────────────────────────────────────┬──────────────────────────┐
-│      │  CHAIN 1                                    │  CHAIN 2                 │
-│      │  122.1.1|IGHV3-23 ◆ 33|IGHD6-19 ◆ 55|IGHJ4  │  284|IGKV3-15 ◆ 214|IGKJ1│
-│      ├─────────────────────────────────────────────┼──────────────────────────┤
-│#  n  │  u  const                                   │   u  const               │
-│1  1  │  5  IGHM                                    │  28  IGKC                │
-└──────┴─────────────────────────────────────────────┴──────────────────────────┘
-
- - - diff --git a/enclone_main/testx/inputs/outputs/enclone_test64_output b/enclone_main/testx/inputs/outputs/enclone_test64_output deleted file mode 100644 index 4c0865817..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test64_output +++ /dev/null @@ -1,55 +0,0 @@ - - - - - - -enclone output - - - -
[1.1] CLONOTYPE = 2 CELLS
-┌──────┬───────────────────────────────┬──────────────────────────┐
-│      │  CHAIN 1                      │  CHAIN 2                 │
-│      │  122.1.1|IGHV3-23 ◆ 55|IGHJ4  │  284|IGKV3-15 ◆ 214|IGKJ1│
-│      ├───────────────────────────────┼──────────────────────────┤
-│#  n  │  u  const                     │   u  const               │
-│1  2  │  2  IGHM                      │  17  IGKC                │
-└──────┴───────────────────────────────┴──────────────────────────┘
-
-[1.2] CLONOTYPE = 1 CELLS
-┌──────┬─────────────────────────────────────────────┬──────────────────────────┐
-│      │  CHAIN 1                                    │  CHAIN 2                 │
-│      │  122.1.1|IGHV3-23 ◆ 33|IGHD6-19 ◆ 55|IGHJ4  │  284|IGKV3-15 ◆ 214|IGKJ1│
-│      ├─────────────────────────────────────────────┼──────────────────────────┤
-│#  n  │  u  const                                   │   u  const               │
-│1  1  │  5  IGHM                                    │  28  IGKC                │
-└──────┴─────────────────────────────────────────────┴──────────────────────────┘
-
- - - diff --git a/enclone_main/testx/inputs/outputs/enclone_test65_output b/enclone_main/testx/inputs/outputs/enclone_test65_output deleted file mode 100644 index c25633cb2..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test65_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 2 CELLS - -[1.1] CLONOTYPE = 2 CELLS -┌───────────┬─────────────────────────────────────────────────────┬──────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 126|IGHV3-33 ◆ 743|IGHJ6 │ 240|IGKV1-9 ◆ 218|IGKJ5 │ -│ ├─────────────────────────────────────────────────────┼──────────────────────────────────┤ -│ │ 11 1111111111111111111 │ 1 11111111111 │ -│ │ 2444456667779901 1111112222222222333 │ 3455790 01111111111 │ -│ │ 2468911494697821 4567890123456789012 │ 6949106 90123456789 │ -│ │ ════════CDR3═══════ │ ════CDR3═══ │ -│reference │ LGTSSGKEISKALYNV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ VGLQATT CQQ◦◦◦◦◦◦◦◦ │ -│donor ref │ LGTSSGKEISKALYNV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ VGLQATT CQQ◦◦◦◦◦◦◦◦ │ -├───────────┼─────────────────────────────────────────────────────┼──────────────────────────────────┤ -│# n │ ................ ................... u const │ ....... ........... u const│ -│1 1 │ LGTGRDREVSQILFDV CAKTATTLGGYYSHGLDVW 1658 IGHA1 │ VDLRGAA CQQVIRSPLTF 3315 IGKC │ -│2 1 │ │ VDLRGAA CQQVIRSPLTF 56 IGKC │ -└───────────┴─────────────────────────────────────────────────────┴──────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test66_output b/enclone_main/testx/inputs/outputs/enclone_test66_output deleted file mode 100644 index 0a7755d84..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test66_output +++ /dev/null @@ -1,24 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 2 CELLS - -[1.1] CLONOTYPE = 2 CELLS -┌──────────────────────────────────────────┬─────────────────────────────────────┬────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 122.1.2|IGHV3-23 ◆ 51|IGHJ2 │ 299|IGKV4-1 ◆ 214|IGKJ1 │ -│ ├─────────────────────────────────────┼────────────────────────────┤ -│ │ 1111111111111111111 │ 1 11111111111 │ -│ │ 2 1111112222222222333 │ 1 11111112222 │ -│ │ 3 4567890123456789012 │ 0 34567890123 │ -│ │ ════════CDR3═══════ │ ════CDR3═══ │ -│reference │ V ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦LW │ V CQQ◦◦◦◦◦◦◦◦ │ -│donor ref │ L ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦LW │ V CQQ◦◦◦◦◦◦◦◦ │ -├──────────────────────────────────────────┼─────────────────────────────────────┼────────────────────────────┤ -│# barcode n gex cred T │ . ................... u const │ . ........... u const│ -│1 2 24404 66.5 │ L CAKAGPTESGYYVWYFDLW 918 IGHG1 │ L CQQYYNTPWAF 9223 IGKC │ -│ ACAGCTAGTGGCTCCA-1 19081 66.5 │ 918 │ 8174 │ -│ CATATGGTCAGTTGAC-1 24404 37.4 ◯ │ 511 │ 9223 │ -└──────────────────────────────────────────┴─────────────────────────────────────┴────────────────────────────┘ -barcode T -ACAGCTAGTGGCTCCA-1 -CATATGGTCAGTTGAC-1 ◯ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test67_output b/enclone_main/testx/inputs/outputs/enclone_test67_output deleted file mode 100644 index e69de29bb..000000000 diff --git a/enclone_main/testx/inputs/outputs/enclone_test68_output b/enclone_main/testx/inputs/outputs/enclone_test68_output deleted file mode 100644 index d2a74034a..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test68_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌───────────┬───────────────────────────────────────┐ -│ │ CHAIN 1 │ -│ │ 88|IGHV1-69D ◆ 14|IGHD2-2 ◆ 743|IGHJ6│ -│ ├───────────────────────────────────────┤ -│ │ 111111111111111111111111111 │ -│ │ 111111222222222233333333334 │ -│ │ 456789012345678901234567890 │ -│ │ ════════════CDR3═══════════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ -├───────────┼───────────────────────────────────────┤ -│# n │ ........................... u const│ -│1 1 │ CARENHPVEYCSSTSCYKAYYYGMDVW 7 IGHD │ -└───────────┴───────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test69_output b/enclone_main/testx/inputs/outputs/enclone_test69_output deleted file mode 100644 index d2a74034a..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test69_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌───────────┬───────────────────────────────────────┐ -│ │ CHAIN 1 │ -│ │ 88|IGHV1-69D ◆ 14|IGHD2-2 ◆ 743|IGHJ6│ -│ ├───────────────────────────────────────┤ -│ │ 111111111111111111111111111 │ -│ │ 111111222222222233333333334 │ -│ │ 456789012345678901234567890 │ -│ │ ════════════CDR3═══════════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ -├───────────┼───────────────────────────────────────┤ -│# n │ ........................... u const│ -│1 1 │ CARENHPVEYCSSTSCYKAYYYGMDVW 7 IGHD │ -└───────────┴───────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test6_output b/enclone_main/testx/inputs/outputs/enclone_test6_output deleted file mode 100644 index b86d333ec..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test6_output +++ /dev/null @@ -1,13 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 3 CELLS -┌──────┬──────────────────────────────────────────────────────────────────────────────────────────┬─────────────────────────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 195|IGHV4-59 ◆ 57|IGHJ5 │ 352|IGLV3-1 ◆ 314|IGLJ2 │ -│ ├──────────────────────────────────────────────────────────────────────────────────────────┼─────────────────────────────────────────────────────────────┤ -│# n │ u r cdr3_dna cdr3_len vjlen │ u r cdr3_dna cdr3_len vjlen│ -│1 1 │ 2 1292 TGTGCGAGGTCCTTTTTCGGGGATACAGCTATGGTTATGTTCCAGGCGTTCGACCCCTGG 20 430 │ 8 2766 TGTCAGGCGTGGGACAGCAGCACTGTGGTATTC 11 376│ -│2 2 │ │ 6 1964 TGTCAGGCGTGGGACAGCAGCACTGTGGTATTC 11 376│ -└──────┴──────────────────────────────────────────────────────────────────────────────────────────┴─────────────────────────────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test71_output b/enclone_main/testx/inputs/outputs/enclone_test71_output deleted file mode 100644 index 003e04965..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test71_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 3 CELLS -┌──────────────────────────────┬──────────────────────────────────┬─────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 195|IGHV4-59 ◆ 57|IGHJ5 │ 352|IGLV3-1 ◆ 314|IGLJ2│ -│ ├──────────────────────────────────┼─────────────────────────┤ -│ │ 11111111111111111111 │ 11111111111 │ -│ │ 11111112222222222333 │ 00000111111 │ -│ │ 34567890123456789012 │ 56789012345 │ -│ │ ════════CDR3════════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -├──────────────────────────────┼──────────────────────────────────┼─────────────────────────┤ -│# datasets n donors gex │ .................... u const │ ........... u const │ -│1 toast 1 d 2743 │ CARSFFGDTAMVMFQAFDPW 2 IGHD │ CQAWDSSTVVF 8 IGLC2 │ -│2 toast 2 d 2769 │ │ CQAWDSSTVVF 6 IGLC2 │ -└──────────────────────────────┴──────────────────────────────────┴─────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test72_output b/enclone_main/testx/inputs/outputs/enclone_test72_output deleted file mode 100644 index 98c3d7668..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test72_output +++ /dev/null @@ -1,4 +0,0 @@ - -middle_mean_umis_heavy,middle_mean_umis_light,n_twothreesie -4.47,7.44,259 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test73_output b/enclone_main/testx/inputs/outputs/enclone_test73_output deleted file mode 100644 index 574ba2411..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test73_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌───────────┬──────────────────────────────────────────────────┬────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 88|IGHV1-69D ◆ 743|IGHJ6 │ 220.1.1|IGKV1-12 ◆ 217|IGKJ4 │ -│ ├──────────────────────────────────────────────────┼────────────────────────────────────────┤ -│ │ 11 11111111111111111 │ 11 11111111111 111 │ -│ │ 344445556778889900 11111122222222223 │ 334445579900 01111111111 222 │ -│ │ 602891238260385612 45678901234567890 │ 040391201725 90123456789 456 │ -│ │ ═══════CDR3══════ │ ════CDR3═══ │ -│reference │ VCASSAISGIAQQISTLS ◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ SAVTGSSYDSEA CQQ◦◦◦◦◦◦◦◦ KVE │ -│donor ref │ VCASSAISGIAQQISTLS ◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ SAISPSRFHNEA CQQ◦◦◦◦◦◦◦◦ KVE │ -├───────────┼──────────────────────────────────────────────────┼────────────────────────────────────────┤ -│# n │ .................. ................. u const │ ............ ........... ... u const│ -│1 1 │ MCTNSTVNATTHKINTMR CARGRNWNHVPYGMDVW 3 IGHG1 │ SAISPSRFHNEA CQQANSYPLTF KLD 2 IGKC │ -└───────────┴──────────────────────────────────────────────────┴────────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test74_output b/enclone_main/testx/inputs/outputs/enclone_test74_output deleted file mode 100644 index 6bea08b23..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test74_output +++ /dev/null @@ -1,21 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 9 CELLS - -[1.1] CLONOTYPE = 9 CELLS -┌───────────┬─────────────────────────────────┬───────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 126|IGHV3-33 ◆ 743|IGHJ6 │ 275|IGKV2D-28 ◆ 214|IGKJ1│ -│ ├─────────────────────────────────┼───────────────────────────┤ -│ │ 11111111111111111 │ 11111111111 │ -│ │ 11111122222222223 │ 11111111222 │ -│ │ 45678901234567890 │ 23456789012 │ -│ │ ═══════CDR3══════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CMQ◦◦◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CMQ◦◦◦◦◦◦◦◦ │ -├───────────┼─────────────────────────────────┼───────────────────────────┤ -│# n │ ................. u const │ ........... u const │ -│1 9 │ CARGYEDFTMKYGMDVW 166 IGHG1 │ CMQALQTPQTF 398 IGKC │ -└───────────┴─────────────────────────────────┴───────────────────────────┘ -utr_id2 - 274 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test75_output b/enclone_main/testx/inputs/outputs/enclone_test75_output deleted file mode 100644 index 68e563143..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test75_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌───────────┬───────────────────────────────────────────────────────────────┬───────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 116|IGHV3-20 ◆ 32|IGHD6-13 ◆ 54|IGHJ4 │ 254|IGKV1D-39 ◆ 214|IGKJ1│ -│ ├───────────────────────────────────────────────────────────────┼───────────────────────────┤ -│ │ 11111 111111111111111 │ 11111111111 │ -│ │ 112233344555666777777999900011 111111222222222 │ 01111111111 │ -│ │ 237098913406136279035679235636901 456789012345678 │ 90123456789 │ -│ │ ══════CDR3═════ │ ════CDR3═══ │ -│reference │ FGVVEGVRGGFTGSRGSINGSTGANANSSATAL ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ CQQ◦◦◦◦◦◦◦◦ │ -│donor ref │ FGVVEGVRGGFTGSRGSINGSTGANANSSATAL ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ CQQ◦◦◦◦◦◦◦◦ │ -├───────────┼───────────────────────────────────────────────────────────────┼───────────────────────────┤ -│# n │ ................................. ............... u const │ ........... u const │ -│1 1 │ LGILEGLQGRCSAHRGSISSGIGANSSTSASAV CAKDGYSSSWYVVDW 4 IGHG1 │ CQQSYSTPRTF 7 IGKC │ -└───────────┴───────────────────────────────────────────────────────────────┴───────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test76_output b/enclone_main/testx/inputs/outputs/enclone_test76_output deleted file mode 100644 index 30a53c380..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test76_output +++ /dev/null @@ -1,594 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -IGHA1 - -IGHA2 - -IGHD - -IGHE - -IGHG1 - -IGHG2 - -IGHG3 - -IGHG4 - -IGHM - -undetermined - - diff --git a/enclone_main/testx/inputs/outputs/enclone_test7_output b/enclone_main/testx/inputs/outputs/enclone_test7_output deleted file mode 100644 index b4afa6384..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test7_output +++ /dev/null @@ -1,25 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 49 CELLS - -[1.1] CLONOTYPE = 49 CELLS -┌──────────────────┬─────────────────────────────────────────────────────────────┬─────────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 159.1.1|IGHV3-7 ◆ 21|IGHD3-3 ◆ 55|IGHJ4 │ 377|IGLV4-69 ◆ 316|IGLJ3 │ -│ ├─────────────────────────────────────────────────────────────┼─────────────────────────────────────────────┤ -│ │ 1 111111111111111111 11123 │ 11111111111 │ -│ │ 571 111111222222222233 55533 │ 11111111122 │ -│ │ 1270 456789012345678901 556722 │ 12345678901 │ -│ │ ═══════CDR3═══════ │ ════CDR3═══ │ -│reference │ EMYA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ CQT◦◦◦◦◦◦◦◦ │ -│donor ref │ EMYA ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ CQT◦◦◦◦◦◦◦◦ │ -├──────────────────┼─────────────────────────────────────────────────────────────┼─────────────────────────────────────────────┤ -│# n near far │ xxxx .................. u const var clen cdiff │ ........... u const var clen cdiff│ -│1 34 0 3 │ EMFA CAREPLYYDFWSAYFDYW 897 IGHG1 AGATTT 182 67G │ CQTWGTGIRVF 4715 IGLC3 211 │ -│2 2 1 4 │ EMFA CAREPLYYDFWSAYFDYW 6378 IGHG1 GGATTT 182 67G │ CQTWGTGIRVF 15594 IGLC3 211 │ -│3 2 1 4 │ EMYA CAREPLYYDFWSAYFDYW 913 IGHG1 AGATAT 182 67G │ CQTWGTGIRVF 695 IGLC3 211 │ -│4 1 1 4 │ EMFA CAREPLYYDFWSAYFDYW 315 IGHG1 AGATTC 182 67G │ CQTWGTGIRVF 1348 IGLC3 211 │ -│5 1 3 4 │ E-FA CAREPLYYDFWSAYFDYW 53 IGHG1 A---TT 182 67G │ CQTWGTGIRVF 833 IGLC3 211 │ -│6 1 0 3 │ EMFA CAREPLYYDFWSAYFDYW 15 ? AGATTT 15 +15 │ CQTWGTGIRVF 890 IGLC3 211 │ -│7 8 │ │ CQTWGTGIRVF 700 IGLC3 211 │ -└──────────────────┴─────────────────────────────────────────────────────────────┴─────────────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test81_output b/enclone_main/testx/inputs/outputs/enclone_test81_output deleted file mode 100644 index d6b9d8080..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test81_output +++ /dev/null @@ -1,24 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 3 CELLS -┌───────────┬──────────────────────────────────┬─────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 195|IGHV4-59 ◆ 57|IGHJ5 │ 352|IGLV3-1 ◆ 314|IGLJ2│ -│ ├──────────────────────────────────┼─────────────────────────┤ -│ │ 11111111111111111111 │ 11111111111 │ -│ │ 11111112222222222333 │ 00000111111 │ -│ │ 34567890123456789012 │ 56789012345 │ -│ │ ════════CDR3════════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -├───────────┼──────────────────────────────────┼─────────────────────────┤ -│# n │ .................... u const │ ........... u const │ -│1 1 │ CARSFFGDTAMVMFQAFDPW 2 IGHD │ CQAWDSSTVVF 8 IGLC2 │ -│2 2 │ │ CQAWDSSTVVF 6 IGLC2 │ -└───────────┴──────────────────────────────────┴─────────────────────────┘ -barcode IG IG.*_g_%_cell IGN IG.*_g_% -GCTGGGTTCAACCAAC-1 0.55 0.55 0.55 0.55 -ACGTCAAAGTGGTAGC-1 0.24 0.24 0.27 0.27 -ATGTGTGAGAGTACCG-1 0.29 0.29 0.27 0.27 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test82_output b/enclone_main/testx/inputs/outputs/enclone_test82_output deleted file mode 100644 index 6cf22abd9..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test82_output +++ /dev/null @@ -1,31 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 5 CELLS - -[1.1] CLONOTYPE = 5 CELLS -┌───────────────────────────────────┬───────────────────────────────────────────────────────┬───────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 85|IGHV1-69 ◆ 743|IGHJ6 │ 251|IGKV1D-33 ◆ 215|IGKJ2 │ -│ ├───────────────────────────────────────────────────────┼───────────────────────────────────────────┤ -│ │ 1111 111111111111111111111 11 │ 111 11111111111 1 │ -│ │ 4577778990001 111111222222222233333 34 │ 5567777899000 01111111111 2 │ -│ │ 5124677261241 456789012345678901234 94 │ 3814578968246 90123456789 6 │ -│ │ ═════════CDR3════════ │ ════CDR3═══ │ -│reference │ GAIGANTKTLSLV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW TS │ YQPNLTGGISEIT CQQ◦◦◦◦◦◦◦◦ E │ -│donor ref │ GAIGANTKTLSLV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW TS │ YQPNLTGGISEIT CQQ◦◦◦◦◦◦◦◦ E │ -├───────────────────────────────────┼───────────────────────────────────────────────────────┼───────────────────────────────────────────┤ -│# barcode n entropy │ xxxxxx....... ..................... .. u const │ ............. ........... . u const│ -│1 4 8.31 │ DSVGPNTQILNLV CARAQRHDFWGGYYHYGMDVW TS 2055 IGHG1 │ HQPNLAGGVGGVT CQQYDNLPHTF G 17260 IGKC │ -│ AACTGGTTCGCCAAAT-1 8.31 │ 65 │ 172 │ -│ CACATAGCAGCTGTAT-1 6.98 │ 93 │ 502 │ -│ GACTACAAGTTGTAGA-1 7.75 │ 3312 │ 17657 │ -│ TCGCGAGAGGCCCTTG-1 9.01 │ 2055 │ 17260 │ -│2 1 5.72 │ GAIGANTQILNLV CARAQRHDFWGGYYHYGMDVW TS 30 IGHG1 │ HQPNLAGGVGGVT CQQYDNLPHTF G 3210 IGKC │ -│ AAGTCTGCATTCTTAC-1 5.72 │ 30 │ 3210 │ -└───────────────────────────────────┴───────────────────────────────────────────────────────┴───────────────────────────────────────────┘ -barcode entropy entropy_cell -AACTGGTTCGCCAAAT-1 8.31 8.31 -CACATAGCAGCTGTAT-1 8.31 6.98 -GACTACAAGTTGTAGA-1 8.31 7.75 -TCGCGAGAGGCCCTTG-1 8.31 9.01 -AAGTCTGCATTCTTAC-1 5.72 5.72 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test83_output b/enclone_main/testx/inputs/outputs/enclone_test83_output deleted file mode 100644 index f5e87767c..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test83_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌────────────┬──────────────────────────────────┬─────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 195|IGHV4-59 ◆ 57|IGHJ5 │ 352|IGLV3-1 ◆ 314|IGLJ2│ -│ ├──────────────────────────────────┼─────────────────────────┤ -│ │ 11111111111111111111 │ 11111111111 │ -│ │ 11111112222222222333 │ 00000111111 │ -│ │ 34567890123456789012 │ 56789012345 │ -│ │ ════════CDR3════════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -├────────────┼──────────────────────────────────┼─────────────────────────┤ -│# n dref │ .................... u const │ ........... u const │ -│1 1 0 │ CARSFFGDTAMVMFQAFDPW 2 IGHD │ CQAWDSSTVVF 8 IGLC2 │ -└────────────┴──────────────────────────────────┴─────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test84_output b/enclone_main/testx/inputs/outputs/enclone_test84_output deleted file mode 100644 index 19ad0a51e..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test84_output +++ /dev/null @@ -1,48 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 3 CELLS -┌───────────┬──────────────────────────────────────────────────────┬───────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 81|IGHV1-58 ◆ 743|IGHJ6 │ 266|IGKV2-28 ◆ 214|IGKJ1 │ -│ ├──────────────────────────────────────────────────────┼───────────────────────────────────┤ -│ │ 1 11111111111111111111111 1 │ 11111111111 1 │ -│ │ 444577778990 11111122222222223333333 4 │ 344557 11111111222 2 │ -│ │ 689923473047 45678901234567890123456 5 │ 336123 23456789012 9 │ -│ │ ══════════CDR3═════════ │ ════CDR3═══ │ -│reference │ TTSRGSGNQRTE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW S │ TRQSNY CMQ◦◦◦◦◦◦◦◦ E │ -│donor ref │ TTSRGSGNQRTE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW S │ TRQSNY CMQ◦◦◦◦◦◦◦◦ E │ -├───────────┼──────────────────────────────────────────────────────┼───────────────────────────────────┤ -│# n │ .xxx.x....xx ....................... . u const │ .xxxxx ........... . u const│ -│1 1 │ NTTRGNSKHGTE CAADRQLWSRSPGDYIYYGMQVW A 107 IGHG1 │ IRQSNF CMQALQTPWTF E 454 IGKC │ -│2 1 │ NTSRGSSKHGTE CAADRQLWSRSPGDYIYYGMQVW A 73 IGHG1 │ IRHSYY CMQALQTPWTF E 2193 IGKC │ -│3 1 │ NTTRGNSKHGTE CAADRQLWSRSPGDYIYYGMQVW A 16 IGHG1 │ IKHSYF CMQALQTPWTF E 29 IGKC │ -└───────────┴──────────────────────────────────────────────────────┴───────────────────────────────────┘ - -CLUSTALW - -1.1.1 MDWIWRILFLVGAATGAHSQMQLVQSGPEVKKPGTSVKVSCKASGFNFTTSAVQWVRQAR 60 -1.1.2 MDWIWRILFLVGAATGAHSQMQLVQSGPEVKKPGTSVKVSCKASGFNFTSSAVQWVRQAR 60 -1.1.3 MDWIWRILFLVGAATGAHSQMQLVQSGPEVKKPGTSVKVSCKASGFNFTTSAVQWVRQAR 60 - *************************************************:********** - -1.1.1 GQRLEWIGWIVVGNSNTKYAQKFHERVTITGDMSTSTAYMELSSLRSEDTAVYYCAADRQ 60 -1.1.2 GQRLEWIGWIVVGSSNTKYAQKFHERVTITGDMSTSTAYMELSSLRSEDTAVYYCAADRQ 60 -1.1.3 GQRLEWIGWIVVGNSNTKYAQKFHERVTITGDMSTSTAYMELSSLRSEDTAVYYCAADRQ 60 - *************.********************************************** - -1.1.1 LWSRSPGDYIYYGMQVWGQGTTVTVASMRLPAQLLGLLMLWVSGSSGDIVMTQSPLSLPV 60 -1.1.2 LWSRSPGDYIYYGMQVWGQGTTVTVASMRLPAQLLGLLMLWVSGSSGDIVMTQSPLSLPV 60 -1.1.3 LWSRSPGDYIYYGMQVWGQGTTVTVASMRLPAQLLGLLMLWVSGSSGDIVMTQSPLSLPV 60 - ************************************************************ - -1.1.1 IPGEPASISCRSSQSLLHSNGYNYLDWYLQKPGQSPQLLIFLGSNRASGVPDRFSGSGSG 60 -1.1.2 IPGEPASISCRSSHSLLHSYGYNYLDWYLQKPGQSPQLLIYLGSNRASGVPDRFSGSGSG 60 -1.1.3 IPGEPASISCKSSHSLLHSYGYNYLDWYLQKPGQSPQLLIFLGSNRASGVPDRFSGSGSG 60 - **********:**:***** ********************:******************* - -1.1.1 TDFTLKISRVEAEDVGVYYCMQALQTPWTFGQGTKVEIK 39 -1.1.2 TDFTLKISRVEAEDVGVYYCMQALQTPWTFGQGTKVEIK 39 -1.1.3 TDFTLKISRVEAEDVGVYYCMQALQTPWTFGQGTKVEIK 39 - *************************************** - diff --git a/enclone_main/testx/inputs/outputs/enclone_test85_output b/enclone_main/testx/inputs/outputs/enclone_test85_output deleted file mode 100644 index 3e89f7104..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test85_output +++ /dev/null @@ -1,21 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 24 CELLS - -[1.1] CLONOTYPE = 24 CELLS -┌───────────┬───────────────────────────────────────────────────────────────┬────────────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 191.1.1|IGHV4-39 ◆ 743|IGHJ6 │ 284.1.1|IGKV3-15 ◆ 215|IGKJ2 │ -│ ├───────────────────────────────────────────────────────────────┼────────────────────────────────────────────┤ -│ │ 111 1111111111111111111111 │ 11 111111111111 │ -│ │ 23555566667778888999011 2222222233333333334444 │ 1123455556779900 000111111111 │ -│ │ 96258902485791237678909 2345678901234567890123 │ 5837002780566746 789012345678 │ -│ │ ═════════CDR3═════════ │ ════CDR3════ │ -│reference │ LLGSSYYGIPISYSGSNISVLSV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ PTMRLSLQKGTGSLVY CQQ◦◦◦◦◦◦◦◦◦ │ -│donor ref │ LLGSGYYGIPISYSGSNISVLSV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ PTMRLSLQKGTGSLVY CQQ◦◦◦◦◦◦◦◦◦ │ -├───────────┼───────────────────────────────────────────────────────────────┼────────────────────────────────────────────┤ -│# n │ ....................... ...................... u const │ ................ ............ u const│ -│1 1 │ LVDFAYYAVPLTYTGITLSILIL CARAPEDTSRWPQYNYSGLDVW 2865 IGHA1 │ PSLRVNLQKGAAGLVF CHQYNNWPTFTF 3919 IGKC │ -│2 14 │ LVDFAYYAVPLTYTGITLSILIL CARAPEDTSRWPQYNYSGLDVW 1 IGHA1 │ │ -│3 9 │ │ PSLRVNLQKGAAGLVF CHQYNNWPTFTF 1 IGKC │ -└───────────┴───────────────────────────────────────────────────────────────┴────────────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test86_output b/enclone_main/testx/inputs/outputs/enclone_test86_output deleted file mode 100644 index f6ed43577..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test86_output +++ /dev/null @@ -1,78 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 3 CELLS -┌───────────┬──────────────────────────────────┬─────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 195|IGHV4-59 ◆ 57|IGHJ5 │ 352|IGLV3-1 ◆ 314|IGLJ2│ -│ ├──────────────────────────────────┼─────────────────────────┤ -│ │ 11111111111111111111 │ 11111111111 │ -│ │ 11111112222222222333 │ 00000111111 │ -│ │ 34567890123456789012 │ 56789012345 │ -│ │ ════════CDR3════════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ CQAWD◦◦◦◦◦◦ │ -├───────────┼──────────────────────────────────┼─────────────────────────┤ -│# n │ .................... u const │ ........... u const │ -│1 1 │ CARSFFGDTAMVMFQAFDPW 2 IGHD │ CQAWDSSTVVF 8 IGLC2 │ -│2 2 │ │ CQAWDSSTVVF 6 IGLC2 │ -└───────────┴──────────────────────────────────┴─────────────────────────┘ - -CLUSTALW - -1.1.1 ATGAAACATCTGTGGTTCTTCCTTCTCCTGGTGGCAGCTCCCAGATGGGTCCTGTCCCAG 60 -1.1.2 ------------------------------------------------------------ 60 - - -1.1.1 GTGCAGCTGCAGGAGTCGGGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACC 60 -1.1.2 ------------------------------------------------------------ 60 - - -1.1.1 TGCACTGTCTCTGGTGGCTCCATCAGTAGTTACTACTGGAGCTGGATCCGGCAGCCCCCA 60 -1.1.2 ------------------------------------------------------------ 60 - - -1.1.1 GGGAAGGGACTGGAGTGGATTGGGTATATCTATTACAGTGGGAGCACCAACTACAACCCC 60 -1.1.2 ------------------------------------------------------------ 60 - - -1.1.1 TCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCCAAGAACCAGTTCTCCCTGAAG 60 -1.1.2 ------------------------------------------------------------ 60 - - -1.1.1 CTGAGCTCTGTGACCGCTGCGGACACGGCCGTGTATTACTGTGCGAGGTCCTTTTTCGGG 60 -1.1.2 ------------------------------------------------------------ 60 - - -1.1.1 GATACAGCTATGGTTATGTTCCAGGCGTTCGACCCCTGGGGCCAGGGAACCCTGGTCACC 60 -1.1.2 ------------------------------------------------------------ 60 - - -1.1.1 GTCTCCTCAGATGGCATGGATCCCTCTCTTCCTCGGCGTCCTTGCTTACTGCACAGGATC 60 -1.1.2 ----------ATGGCATGGATCCCTCTCTTCCTCGGCGTCCTTGCTTACTGCACAGGATC 60 - ************************************************** - -1.1.1 CGTGGCCTCCTATGAGCTGACTCAGCCACCCTCAGTGTCCGTGTCCCCAGGACAGACAGC 60 -1.1.2 CGTGGCCTCCTATGAGCTGACTCAGCCACCCTCAGTGTCCGTGTCCCCAGGACAGACAGC 60 - ************************************************************ - -1.1.1 CAGCATCACCTGCTCTGGAGATAAATTGGGGGATAAATATGCTTGCTGGTATCAGCAGAA 60 -1.1.2 CAGCATCACCTGCTCTGGAGATAAATTGGGGGATAAATATGCTTGCTGGTATCAGCAGAA 60 - ************************************************************ - -1.1.1 GCCAGGCCAGTCCCCTGTGCTGGTCATCTATCAAGATAGCAAGCGGCCCTCAGGGATCCC 60 -1.1.2 GCCAGGCCAGTCCCCTGTGCTGGTCATCTATCAAGATAGCAAGCGGCCCTCAGGGATCCC 60 - ************************************************************ - -1.1.1 TGAGCGATTCTCTGGCTCCAACTCTGGGAACACAGCCACTCTGACCATCAGCGGGACCCA 60 -1.1.2 TGAGCGATTCTCTGGCTCCAACTCTGGGAACACAGCCACTCTGACCATCAGCGGGACCCA 60 - ************************************************************ - -1.1.1 GGCTATGGATGAGGCTGACTATTACTGTCAGGCGTGGGACAGCAGCACTGTGGTATTCGG 60 -1.1.2 GGCTATGGATGAGGCTGACTATTACTGTCAGGCGTGGGACAGCAGCACTGTGGTATTCGG 60 - ************************************************************ - -1.1.1 CGGAGGGACCAAGCTGACCGTCCTAG 26 -1.1.2 CGGAGGGACCAAGCTGACCGTCCTAG 26 - ************************** - diff --git a/enclone_main/testx/inputs/outputs/enclone_test87_output b/enclone_main/testx/inputs/outputs/enclone_test87_output deleted file mode 100644 index e2b4c7e13..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test87_output +++ /dev/null @@ -1,26 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 3 CELLS -┌───────────┬──────────────────────────────────────────────────────┬───────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 81|IGHV1-58 ◆ 743|IGHJ6 │ 266|IGKV2-28 ◆ 214|IGKJ1 │ -│ ├──────────────────────────────────────────────────────┼───────────────────────────────────┤ -│ │ 1 11111111111111111111111 1 │ 11111111111 1 │ -│ │ 444577778990 11111122222222223333333 4 │ 344557 11111111222 2 │ -│ │ 689923473047 45678901234567890123456 5 │ 336123 23456789012 9 │ -│ │ ══════════CDR3═════════ │ ════CDR3═══ │ -│reference │ TTSRGSGNQRTE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW S │ TRQSNY CMQ◦◦◦◦◦◦◦◦ E │ -│donor ref │ TTSRGSGNQRTE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW S │ TRQSNY CMQ◦◦◦◦◦◦◦◦ E │ -├───────────┼──────────────────────────────────────────────────────┼───────────────────────────────────┤ -│# n │ .xxx.x....xx ....................... . u const │ .xxxxx ........... . u const│ -│1 1 │ NTTRGNSKHGTE CAADRQLWSRSPGDYIYYGMQVW A 107 IGHG1 │ IRQSNF CMQALQTPWTF E 454 IGKC │ -│2 1 │ NTSRGSSKHGTE CAADRQLWSRSPGDYIYYGMQVW A 73 IGHG1 │ IRHSYY CMQALQTPWTF E 2193 IGKC │ -│3 1 │ NTTRGNSKHGTE CAADRQLWSRSPGDYIYYGMQVW A 16 IGHG1 │ IKHSYF CMQALQTPWTF E 29 IGKC │ -└───────────┴──────────────────────────────────────────────────────┴───────────────────────────────────┘ - -3 279 -1 MDWIWRILFLVGAATGAHSQMQLVQSGPEVKKPGTSVKVSCKASGFNFTTSAVQWVRQARGQRLEWIGWIVVGNSNTKYAQKFHERVTITGDMSTSTAYMELSSLRSEDTAVYYCAADRQLWSRSPGDYIYYGMQVWGQGTTVTVASMRLPAQLLGLLMLWVSGSSGDIVMTQSPLSLPVIPGEPASISCRSSQSLLHSNGYNYLDWYLQKPGQSPQLLIFLGSNRASGVPDRFSGSGSGTDFTLKISRVEAEDVGVYYCMQALQTPWTFGQGTKVEIK -2 MDWIWRILFLVGAATGAHSQMQLVQSGPEVKKPGTSVKVSCKASGFNFTSSAVQWVRQARGQRLEWIGWIVVGSSNTKYAQKFHERVTITGDMSTSTAYMELSSLRSEDTAVYYCAADRQLWSRSPGDYIYYGMQVWGQGTTVTVASMRLPAQLLGLLMLWVSGSSGDIVMTQSPLSLPVIPGEPASISCRSSHSLLHSYGYNYLDWYLQKPGQSPQLLIYLGSNRASGVPDRFSGSGSGTDFTLKISRVEAEDVGVYYCMQALQTPWTFGQGTKVEIK -3 MDWIWRILFLVGAATGAHSQMQLVQSGPEVKKPGTSVKVSCKASGFNFTTSAVQWVRQARGQRLEWIGWIVVGNSNTKYAQKFHERVTITGDMSTSTAYMELSSLRSEDTAVYYCAADRQLWSRSPGDYIYYGMQVWGQGTTVTVASMRLPAQLLGLLMLWVSGSSGDIVMTQSPLSLPVIPGEPASISCKSSHSLLHSYGYNYLDWYLQKPGQSPQLLIFLGSNRASGVPDRFSGSGSGTDFTLKISRVEAEDVGVYYCMQALQTPWTFGQGTKVEIK - diff --git a/enclone_main/testx/inputs/outputs/enclone_test88_output b/enclone_main/testx/inputs/outputs/enclone_test88_output deleted file mode 100644 index 735a9b4d6..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test88_output +++ /dev/null @@ -1,26 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 3 CELLS - -[1.1] CLONOTYPE = 3 CELLS -┌───────────┬──────────────────────────────────────────────────────┬───────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 81|IGHV1-58 ◆ 743|IGHJ6 │ 266|IGKV2-28 ◆ 214|IGKJ1 │ -│ ├──────────────────────────────────────────────────────┼───────────────────────────────────┤ -│ │ 1 11111111111111111111111 1 │ 11111111111 1 │ -│ │ 444577778990 11111122222222223333333 4 │ 344557 11111111222 2 │ -│ │ 689923473047 45678901234567890123456 5 │ 336123 23456789012 9 │ -│ │ ══════════CDR3═════════ │ ════CDR3═══ │ -│reference │ TTSRGSGNQRTE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW S │ TRQSNY CMQ◦◦◦◦◦◦◦◦ E │ -│donor ref │ TTSRGSGNQRTE ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW S │ TRQSNY CMQ◦◦◦◦◦◦◦◦ E │ -├───────────┼──────────────────────────────────────────────────────┼───────────────────────────────────┤ -│# n │ ..x..x...... ....................... . u const │ .xx.xx ........... . u const│ -│1 1 │ NTTRGNSKHGTE CAADRQLWSRSPGDYIYYGMQVW A 107 IGHG1 │ IRQSNF CMQALQTPWTF E 454 IGKC │ -│2 1 │ NTSRGSSKHGTE CAADRQLWSRSPGDYIYYGMQVW A 73 IGHG1 │ IRHSYY CMQALQTPWTF E 2193 IGKC │ -│3 1 │ NTTRGNSKHGTE CAADRQLWSRSPGDYIYYGMQVW A 16 IGHG1 │ IKHSYF CMQALQTPWTF E 29 IGKC │ -└───────────┴──────────────────────────────────────────────────────┴───────────────────────────────────┘ - -3 839 -1 ATGGACTGGATTTGGAGGATCCTCTTCTTGGTGGGAGCAGCGACAGGTGCCCACTCCCAAATGCAGCTGGTGCAGTCTGGGCCTGAGGTGAAGAAGCCTGGGACCTCAGTGAAGGTCTCCTGCAAGGCTTCTGGATTCAACTTTACGACCTCTGCTGTGCAGTGGGTGCGACAGGCTCGAGGACAACGCCTTGAGTGGATAGGATGGATCGTCGTTGGAAATAGTAACACAAAGTACGCACAGAAGTTCCACGAAAGAGTCACCATTACCGGGGACATGTCCACGAGCACAGCCTACATGGAGCTGAGCAGCCTGAGATCCGAAGACACGGCCGTGTATTACTGTGCGGCAGATAGGCAGCTATGGTCACGGAGTCCAGGGGACTACATCTACTACGGTATGCAAGTCTGGGGCCAAGGGACCACGGTCACCGTCGCCTCAGATGAGGCTCCCTGCTCAGCTCCTGGGGCTGCTAATGCTCTGGGTCTCTGGATCCAGTGGGGATATTGTGATGACTCAGTCTCCACTCTCCCTGCCCGTCATACCTGGAGAGCCGGCCTCCATCTCCTGCAGGTCTAGTCAGAGCCTCCTGCATAGTAATGGATACAACTATTTGGATTGGTACCTGCAGAAGCCAGGGCAGTCTCCACAGCTCCTGATCTTTTTGGGTTCTAATCGGGCCTCCGGGGTCCCTGACAGGTTCAGTGGCAGTGGATCAGGCACAGATTTTACACTGAAAATCAGCAGAGTGGAGGCTGAGGATGTTGGGGTTTATTACTGCATGCAAGCTCTACAAACTCCGTGGACGTTCGGCCAAGGGACCAAGGTGGAGATCAAAC 839 -2 ATGGACTGGATTTGGAGGATCCTCTTCTTGGTGGGAGCAGCGACAGGTGCCCACTCCCAAATGCAGCTGGTGCAGTCTGGGCCTGAGGTGAAGAAGCCTGGGACCTCAGTGAAGGTCTCCTGCAAGGCTTCTGGATTCAACTTTACTTCCTCTGCTGTGCAGTGGGTGCGACAGGCTCGTGGACAACGCCTTGAGTGGATAGGATGGATCGTCGTTGGAAGTAGTAACACAAAGTACGCACAGAAGTTCCACGAAAGAGTCACCATTACCGGGGACATGTCCACAAGCACAGCCTACATGGAGCTGAGCAGCCTGAGATCCGAGGACACGGCCGTGTATTACTGTGCGGCAGATAGGCAGCTATGGTCACGGAGTCCAGGGGACTACATCTACTACGGTATGCAAGTCTGGGGCCAAGGGACCACGGTCACCGTCGCCTCAGATGAGGCTCCCTGCTCAGCTCCTGGGGCTGCTAATGCTCTGGGTCTCTGGATCCAGTGGGGATATTGTGATGACTCAGTCTCCACTCTCCCTGCCCGTCATACCTGGAGAGCCGGCCTCCATCTCCTGCAGGTCTAGTCACAGCCTCCTGCATAGTTATGGATACAACTATTTGGATTGGTACCTGCAGAAGCCAGGGCAGTCTCCACAGCTCCTGATCTATTTGGGTTCTAATCGGGCCTCCGGGGTCCCTGACAGGTTCAGTGGCAGTGGATCAGGCACAGATTTTACACTGAAAATCAGCAGAGTGGAGGCTGAGGATGTTGGGGTTTATTACTGCATGCAAGCTCTACAAACTCCGTGGACGTTCGGCCAAGGGACCAAGGTGGAGATCAAAC 839 -3 ATGGACTGGATTTGGAGGATCCTCTTCTTGGTGGGAGCAGCGACAGGTGCCCACTCCCAAATGCAGCTGGTGCAGTCTGGGCCTGAGGTGAAGAAGCCTGGGACCTCAGTGAAGGTCTCCTGCAAGGCTTCTGGATTCAACTTTACGACCTCTGCTGTGCAGTGGGTGCGACAGGCTCGAGGACAACGCCTTGAGTGGATAGGATGGATCGTCGTTGGAAATAGTAACACAAAGTACGCACAGAAGTTCCACGAAAGAGTCACCATTACCGGGGACATGTCCACGAGCACAGCCTACATGGAGCTGAGCAGCCTGAGATCCGAAGACACGGCCGTGTATTACTGTGCGGCAGATAGGCAGCTATGGTCACGGAGTCCAGGGGACTACATCTACTACGGTATGCAAGTCTGGGGCCAAGGGACCACGGTCACCGTCGCCTCAGATGAGGCTCCCTGCTCAGCTCCTGGGGCTGCTAATGCTCTGGGTCTCTGGATCCAGTGGGGATATTGTGATGACTCAGTCTCCACTCTCCCTGCCCGTCATACCTGGAGAGCCGGCCTCCATCTCCTGCAAATCTAGTCACAGCCTCCTGCATAGCTATGGATACAACTATTTGGATTGGTACCTGCAGAAGCCAGGGCAGTCTCCACAGCTCCTGATCTTTTTGGGTTCTAATCGGGCCTCCGGGGTCCCTGACAGGTTCAGTGGCAGTGGATCAGGCACAGATTTTACACTGAAAATCAGCAGAGTGGAGGCTGAGGATGTTGGGGTTTATTACTGCATGCAAGCTCTACAAACTCCGTGGACGTTCGGCCAAGGGACCAAGGTGGAGATCAAAC 839 - diff --git a/enclone_main/testx/inputs/outputs/enclone_test89_output b/enclone_main/testx/inputs/outputs/enclone_test89_output deleted file mode 100644 index 6a7d7a48b..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test89_output +++ /dev/null @@ -1,51 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 14 CELLS - -[1.1] CLONOTYPE = 14 CELLS -┌───────────┬──────────────────────────────────────────────┬─────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 88|IGHV1-69D ◆ 56|IGHJ5 │ 234|IGKV1-5 ◆ 214|IGKJ1 │ -│ ├──────────────────────────────────────────────┼─────────────────────────────┤ -│ │ 111 1111111111111111 │ 11111111111 │ -│ │ 234456789001 1111112222222222 │ 7 01111111111 │ -│ │ 878932242071 4567890123456789 │ 1 90123456789 │ -│ │ ══════CDR3══════ │ ════CDR3═══ │ -│reference │ EKSSSGIGEEEV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ K CQQ◦◦◦◦◦◦◦◦ │ -│donor ref │ EKSSSGIGEEEV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ K CQQ◦◦◦◦◦◦◦◦ │ -├───────────┼──────────────────────────────────────────────┼─────────────────────────────┤ -│# n │ xxxxxxxxxx.. ................ u const │ . ........... u const│ -│1 4 │ DRSSNGLGEDDI CARDLGGRYYGSKDPW 3366 IGHG1 │ E CQQYNSYSWTF 6451 IGKC │ -│2 2 │ DKSSSGLGEDDI CARDLGGRYYGSKDPW 3417 IGHG1 │ E CQQYNSYSWTF 2874 IGKC │ -│3 1 │ ERTSNGLGEDDI CARDLGGRYYGSKDPW 4648 IGHG1 │ E CQQYNSYSWTF 6686 IGKC │ -│4 1 │ DRSSNGLGEDDI CARDLGGRYYGSKDPW 4323 IGHG1 │ E CQQYNSYSWTF 18435 IGKC │ -│5 1 │ DRSSNGLGEEDI CARDLGGRYYGSKDPW 3409 IGHG1 │ E CQQYNSYSWTF 12342 IGKC │ -│6 1 │ EKSSSGLGEDDI CARDLGGRYYGSKDPW 1809 IGHG1 │ E CQQYNSYSWTF 2810 IGKC │ -│7 1 │ DRSSSGLGEDDI CARDLGGRYYGSKDPW 1253 IGHG1 │ E CQQYNSYSWTF 1837 IGKC │ -│8 1 │ EKSSSGIGEDDI CARDLGGRYYGSKDPW 382 IGHG1 │ E CQQYNSYSWTF 881 IGKC │ -│9 1 │ EKSSNGLGEDDI CARDLGGRYYGSKDPW 73 IGHG1 │ E CQQYNSYSWTF 294 IGKC │ -│10 1 │ EKSSNGIGEEDI CARDLGGRYYGSKDPW 72 IGHG1 │ E CQQYNSYSWTF 249 IGKC │ -└───────────┴──────────────────────────────────────────────┴─────────────────────────────┘ - -((8:0.19,(((((1:0.25,5:0.75)I2:0.50,(3:1.44,4:0.56)I1:1.50)I3:1.12,10:1.88)I4:0.57,9:0.68)I5:1.31,((2:0.06,7:0.94)I6:0.84,6:0.16)I9:0.38)I8:0.62)I7:6.81)0; - -• -╚═════════════════════════════════════ • [6.81] - ╠═ 8 [0.19] - ╚═══ • [0.62] - ╠═══════ • [1.31] - ║ ╠═══ • [0.57] - ║ ║ ╠══════ • [1.12] - ║ ║ ║ ╠═══ • [0.50] - ║ ║ ║ ║ ╠═ 1 [0.25] - ║ ║ ║ ║ ╚════ 5 [0.75] - ║ ║ ║ ╚════════ • [1.50] - ║ ║ ║ ╠════════ 3 [1.44] - ║ ║ ║ ╚═══ 4 [0.56] - ║ ║ ╚══════════ 10 [1.88] - ║ ╚════ 9 [0.68] - ╚══ • [0.38] - ╠═════ • [0.84] - ║ ╠═ 2 [0.06] - ║ ╚═════ 7 [0.94] - ╚═ 6 [0.16] - diff --git a/enclone_main/testx/inputs/outputs/enclone_test8_output b/enclone_main/testx/inputs/outputs/enclone_test8_output deleted file mode 100644 index 316b2705a..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test8_output +++ /dev/null @@ -1,13 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 20 CELLS - -[1.1] CLONOTYPE = 20 CELLS -┌───────┬─────────────────────────────┬──────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 629|TRBV4-1 ◆ 551|TRBJ2-4 │ 534|TRAV9-2 ◆ 420|TRAJ30│ -│ ├─────────────────────────────┼──────────────────────────┤ -│# n │ u const │ u const │ -│1 16 │ 5 TRBC2 │ 5 TRAC │ -│2 4 │ 7 TRBC2 │ │ -└───────┴─────────────────────────────┴──────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test90_output b/enclone_main/testx/inputs/outputs/enclone_test90_output deleted file mode 100644 index 353b0b0de..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test90_output +++ /dev/null @@ -1,20 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌──────────────────────────────────────────┬────────────────────────────────┬──────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 126|IGHV3-33 ◆ 743|IGHJ6 │ 266|IGKV2-28 ◆ 214|IGKJ1│ -│ ├────────────────────────────────┼──────────────────────────┤ -│ │ 11111111111111111 │ 11111111111 │ -│ │ 11111122222222223 │ 11111111222 │ -│ │ 45678901234567890 │ 23456789012 │ -│ │ ═══════CDR3══════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CMQ◦◦◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CMQ◦◦◦◦◦◦◦◦ │ -├──────────────────────────────────────────┼────────────────────────────────┼──────────────────────────┤ -│# barcode n gex cred T │ ................. u const │ ........... u const │ -│1 1 10548 1.7 │ CARGYEDFTMKYGMDVW 22 IGHG1 │ CMQALQTPQTF 138 IGKC │ -│ CATATGGCATATACCG-1 10548 1.7 ◯ │ 22 │ 138 │ -└──────────────────────────────────────────┴────────────────────────────────┴──────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test91_output b/enclone_main/testx/inputs/outputs/enclone_test91_output deleted file mode 100644 index dc7735b29..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test91_output +++ /dev/null @@ -1,27 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 8 CELLS - -[1.1] CLONOTYPE = 8 CELLS -┌──────────────────────────────────────────┬──────────────────────────────────┬───────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 126|IGHV3-33 ◆ 743|IGHJ6 │ 266|IGKV2-28 ◆ 214|IGKJ1 │ -│ ├──────────────────────────────────┼───────────────────────────┤ -│ │ 11111111111111111 │ 11111111111 │ -│ │ 11111122222222223 │ 11111111222 │ -│ │ 45678901234567890 │ 23456789012 │ -│ │ ═══════CDR3══════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CMQ◦◦◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CMQ◦◦◦◦◦◦◦◦ │ -├──────────────────────────────────────────┼──────────────────────────────────┼───────────────────────────┤ -│# barcode n gex cred T │ ................. u const │ ........... u const│ -│1 8 4913 65.8 │ CARGYEDFTMKYGMDVW 431 IGHG1 │ CMQALQTPQTF 3794 IGKC │ -│ AACACGTTCACAAACC-1 14668 70.2 │ 3363 │ 13547 │ -│ AGCCTAAGTGGACGAT-1 2416 64.4 │ 130 │ 398 │ -│ ATGTGTGCATACCATG-1 4913 63.9 │ 166 │ 386 │ -│ ATTGGTGAGCGGATCA-1 1829 65.8 │ 33 │ 338 │ -│ CCTCTGAAGATGGGTC-1 2258 64.0 │ 44 │ 239 │ -│ GGCTCGAGTAGCACGA-1 2940 64.8 │ 1500 │ 4283 │ -│ GGTGAAGCACTACAGT-1 10447 71.0 │ 1998 │ 14816 │ -│ TCAGATGAGTCCAGGA-1 6759 69.5 │ 431 │ 3794 │ -└──────────────────────────────────────────┴──────────────────────────────────┴───────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test92_output b/enclone_main/testx/inputs/outputs/enclone_test92_output deleted file mode 100644 index fd6cd2cd9..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test92_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 64 CELLS - -[1.1] CLONOTYPE = 64 CELLS -┌───────────┬────────────────────────────────┐ -│ │ CHAIN 1 │ -│ │ 226|IGKV1-27 ◆ 217|IGKJ4 │ -│ ├────────────────────────────────┤ -│ │ 11111111111 │ -│ │ 23334679 01111111111 │ -│ │ 00160628 90123456789 │ -│ │ ════CDR3═══ │ -│reference │ RSSVVKAS CQK◦◦◦◦◦◦◦◦ │ -│donor ref │ RSSVVKAS CQK◦◦◦◦◦◦◦◦ │ -├───────────┼────────────────────────────────┤ -│# n │ ........ ........... u const│ -│1 64 │ SFFIIKAG CQKYDSAPLTF 5 IGKC │ -└───────────┴────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test93_output b/enclone_main/testx/inputs/outputs/enclone_test93_output deleted file mode 100644 index 64ba46dc0..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test93_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 11 CELLS - -[1.1] CLONOTYPE = 11 CELLS -┌──────────────────────────────────────────┬─────────────────────────────────────────┬────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 124|IGHV3-30 ◆ 55|IGHJ4 │ 268|IGKV2-30 ◆ 216|IGKJ3 │ -│ ├─────────────────────────────────────────┼────────────────────────────────┤ -│ │ 1111111111111111 │ 111111111111 1 │ -│ │ 77788999 1111112222222222 │ 25 111111112222 3 │ -│ │ 67905256 4567890123456789 │ 10 234567890123 2 │ -│ │ ══════CDR3══════ │ ════CDR3════ │ -│reference │ KYADRNNT ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ VY CMQ◦◦◦◦◦◦◦◦◦ K │ -│donor ref │ KYADRNNT ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ VY CMQ◦◦◦◦◦◦◦◦◦ K │ -├──────────────────────────────────────────┼─────────────────────────────────────────┼────────────────────────────────┤ -│# datasets n n_123089 n_123085 │ ........ ................ u const │ .. ............ . u const│ -│1 123085,123089 11 10 1 │ EYVDRDST CATGGDFWGEGDFDFW 658 IGHG1 │ IH CMQGTHWPPFTF R 3479 IGKC │ -└──────────────────────────────────────────┴─────────────────────────────────────────┴────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test94_output b/enclone_main/testx/inputs/outputs/enclone_test94_output deleted file mode 100644 index 194777752..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test94_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 6 CELLS - -[1.1] CLONOTYPE = 6 CELLS -┌───────────┬──────────────────────────────────────────┬───────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 204|IGHV5-51 ◆ 22|IGHD3-9 ◆ 743|IGHJ6 │ 254|IGKV1D-39 ◆ 217|IGKJ4│ -│ ├──────────────────────────────────────────┼───────────────────────────┤ -│ │ 1111111111111111111111111 │ 11111111111 │ -│ │ 1111112222222222333333333 │ 01111111111 │ -│ │ 4567890123456789012345678 │ 90123456789 │ -│ │ ═══════════CDR3══════════ │ ════CDR3═══ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CQQ◦◦◦◦◦◦◦◦ │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦GMDVW │ CQQ◦◦◦◦◦◦◦◦ │ -├───────────┼──────────────────────────────────────────┼───────────────────────────┤ -│# n │ ......................... u const │ ........... u const │ -│1 6 │ CARLYYDILTGYYAEDYYYYGMDVW 1943 IGHG1 │ CQQSYSTPLTF 3941 IGKC │ -└───────────┴──────────────────────────────────────────┴───────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test95_output b/enclone_main/testx/inputs/outputs/enclone_test95_output deleted file mode 100644 index 184d0b3d7..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test95_output +++ /dev/null @@ -1,34 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 19 CELLS - -[1.1] CLONOTYPE = 19 CELLS -┌───────────┬──────────────────────────────────────┬───────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 204|IGHV5-51 ◆ 53|IGHJ3 │ 330|IGLV1-47 ◆ 316|IGLJ3 │ -│ ├──────────────────────────────────────┼───────────────────────────────┤ -│ │ 1111111111111111 │ 11111111111 │ -│ │ 6777 1111112222222222 │ 467 00011111111 │ -│ │ 5257 4567890123456789 │ 490 78901234567 │ -│ │ ══════CDR3══════ │ ════CDR3═══ │ -│reference │ WGDR ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ SSN CAAWD◦◦◦◦◦◦ │ -│donor ref │ WGDR ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ SSN CAAWD◦◦◦◦◦◦ │ -├───────────┼──────────────────────────────────────┼───────────────────────────────┤ -│# n │ x... ................ u const │ ... ........... u const│ -│1 15 │ FAHT CARPKSDYIIDAFDIW 105 IGHG1 │ NRS CAAWDDSLWVF 539 IGLC3│ -│2 1 │ FAHT CARPKSDYIIDAFDIW 8949 IGHA1 │ NRS CAAWDDSLWVF 11485 IGLC3│ -│3 1 │ FAHT CARPKSDYIIDAFDIW 20 IGHG1 │ NRS CAAWDDSLWVF 21 IGLC2│ -│4 1 │ WAHT CARPKSDYIIDAFDIW 15 IGHG1 │ NRS CAAWDDSLWVF 711 IGLC3│ -│5 1 │ FAHT CARPKSDYIIDAFDIW 6 IGHG1 │ NRS CAAWDDSLWVF 5 IGLC1│ -└───────────┴──────────────────────────────────────┴───────────────────────────────┘ - -• -╚═════════════════════════════════════════════════ • [6.00] - ╠═ 4 [0.00,IGHG1] - ╚════════════════ • [2.00] - ╠═ 1 [0.00,IGHG1] - ╚═ • [0.00] - ╠═ 2 [0.00,IGHA1] - ╚═ • [0.00] - ╠═ 3 [0.00,IGHG1] - ╚═ 5 [0.00,IGHG1] - diff --git a/enclone_main/testx/inputs/outputs/enclone_test96_output b/enclone_main/testx/inputs/outputs/enclone_test96_output deleted file mode 100644 index 6691890ad..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test96_output +++ /dev/null @@ -1,32 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 41 CELLS - -[1.1] CLONOTYPE = 41 CELLS -┌───────────┬─────────────────────────────────────────────────────────┬─────────────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 159|IGHV3-7 ◆ 53|IGHJ3 │ 379|IGLV5-37 ◆ 316|IGLJ3 │ -│ ├─────────────────────────────────────────────────────────┼─────────────────────────────────────┤ -│ │ 1 1111111111111111111 │ 11111111111 │ -│ │ 23344445667778899991 1111112222222222333 │ 235566799 11111122222 │ -│ │ 22324893381572535793 4567890123456789012 │ 170689024 45678901234 │ -│ │ ════════CDR3═══════ │ ════CDR3═══ │ -│reference │ LPGAGSSSLNQEYVRANLL◦ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ VRSYLYYAA CMIW◦◦◦◦◦◦◦ │ -│donor ref │ LPGAGSSSLNQEYVRANLL◦ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W │ VRSYLYYAA CMIW◦◦◦◦◦◦◦ │ -├───────────┼─────────────────────────────────────────────────────────┼─────────────────────────────────────┤ -│# n │ xxxxxxxxxxxxxxxxxxxx ........x........x. u const │ xxxxxxxxx ........... u const│ -│1 10 │ LPGAGSSSPNEEYVRANLLY CARDQNFDESSGYDAFDIW 2795 IGHG1 │ VRGYLYYAA CMIWPSNAWVF 14994 IGLC2│ -│2 8 │ LPGAGSSSLNEEYMRANLLY CARDQNFDESSGYDAFDIW 3944 IGHG1 │ VRSYLYYAA CMIWPSNAWVF 12199 IGLC2│ -│3 7 │ LPGAKSNSLNEQYVRANLLY CARDQNFDESSGYDAFDIW 2314 IGHG1 │ VRSYLYYTA CMIWPSNAWVF 10274 IGLC2│ -│4 5 │ LPGAGSSSLNEEYVRANLLY CARDQNFDESSGYDAFDIW 11 IGHG1 │ VRSYLYYAG CMIWPSNAWVF 26 IGLC2│ -│5 2 │ LPGAGSSSLNEEYMRANLLY CARDQNFDESSGYDAFDIW 1290 IGHG1 │ VRSYLYYAA CMIWPSNAWVF 6825 IGLC2│ -│6 1 │ LPGAGSSSLNEEYVRANLLY CARDQNFDDSSGYDAFDIW 3893 IGHG1 │ VRSYLYYAA CMIWPSNAWVF 15622 IGLC2│ -│7 1 │ LPGAGSSSLNEEYVRANLLY CARDQNFDESSGYDAFDIW 3302 IGHG1 │ VRSYLYYAA CMIWPSNAWVF 5256 IGLC2│ -│8 1 │ LPGAGSSSPNEEYVRANLLY CARDQNFDESSGYDAFDIW 3067 IGHG1 │ VRGYLYYAA CMIWPSNAWVF 6429 IGLC2│ -│9 1 │ LPGAGRNSLNEEYVRGNLLY CARDQNFDESSGYDAFDIW 2724 IGHG3 │ VRGYLYYAA CMIWPSNAWVF 5775 IGLC2│ -│10 1 │ LPGAGSSSLNEEYMRANLLY CARDQNFDESSGYDAFDIW 2504 IGHA1 │ VRSYLYYAA CMIWPSNAWVF 14551 IGLC2│ -│11 1 │ LPGAGSSSLNEEYVRANLLY CARDQNFDESSGYDAFDIW 404 IGHG1 │ VRGYLYYAA CMIWPSNAWVF 3456 IGLC2│ -│12 1 │ LPGAGSSSLNQEYVRANLLY CARDQNFDESSGYDAFDIW 136 IGHG1 │ VRSYLYYAA CMIWPSNAWVF 1023 IGLC2│ -│13 1 │ LPGAGSSSPNEEYVRANLLY CARDQNFDESSGYDAFDIW 27 IGHG1 │ VRSYLYYAA CMIWPSNAWVF 153 IGLC2│ -│14 1 │ │ VRSYLYYAA CMIWPSNAWVF 792 IGLC2│ -└───────────┴─────────────────────────────────────────────────────────┴─────────────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test97_output b/enclone_main/testx/inputs/outputs/enclone_test97_output deleted file mode 100644 index a24cc3f29..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test97_output +++ /dev/null @@ -1,23 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 10 CELLS - -[1.1] CLONOTYPE = 10 CELLS -┌───────────┬──────────────────────────────────────┬────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 67.1.1|IGHV1-2 ◆ 55|IGHJ4 │ 284|IGKV3-15 ◆ 215|IGKJ2 │ -│ ├──────────────────────────────────────┼────────────────────────────┤ -│ │ 11 1111111111111111 │ 11111111 │ -│ │ 48900 1111112222222222 │ 88999 00011111 │ -│ │ 25502 4567890123456789 │ 59789 78901234 │ -│ │ ══════CDR3══════ │ ══CDR3══ │ -│reference │ AWSES ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ GELQS CQQ◦◦◦◦◦ │ -│donor ref │ ARSES ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ GELQS CQQ◦◦◦◦◦ │ -├───────────┼──────────────────────────────────────┼────────────────────────────┤ -│# n │ x.xxx .........x...... u const │ xxxxx ........ u const│ -│1 4 │ ARSES CARESVVGLLPIFDYW 189 IGHG1 │ GELQS CQQFPSTF 331 IGKC │ -│2 3 │ ARIET CARESVVGLLPIFDYW 39 IGHG1 │ VELQS CQQFPSTF 339 IGKC │ -│3 1 │ ARIET CARESVVGLLPIFDYW 256 IGHG1 │ VELEP CQQFPSTF 479 IGKC │ -│4 1 │ ARSES CARESVVGLLPIFDYW 219 IGHG1 │ GDLEP CQQFPSTF 453 IGKC │ -│5 1 │ ARSES CARESVVGLLPIFDYW 145 IGHG1 │ EELQS CQQFPSTF 166 IGKC │ -└───────────┴──────────────────────────────────────┴────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test98_output b/enclone_main/testx/inputs/outputs/enclone_test98_output deleted file mode 100644 index 2caaad6bd..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test98_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌───────────┬─────────────────────────────────────────┬───────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 637|TRBV5-1 ◆ 736|TRBD2 ◆ 550|TRBJ2-3 │ 470|TRAV13-2 ◆ 422|TRAJ32│ -│ ├─────────────────────────────────────────┼───────────────────────────┤ -│ │ 11111111111111111111 │ 11111111111111 │ -│ │ 01111111111222222222 │ 01111111111222 │ -│ │ 90123456789012345678 │ 90123456789012 │ -│ │ ════════CDR3════════ │ ═════CDR3═════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦YF │ ◦◦◦◦◦◦◦◦TNKLIF │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦YF │ ◦◦◦◦◦◦◦◦TNKLIF │ -├───────────┼─────────────────────────────────────────┼───────────────────────────┤ -│# n │ .................... u const │ .............. u const │ -│1 1 │ CASSPAGTSGKVWGTDTQYF 16 TRBC2 │ CAESAPFPTNKLIF 4 TRAC │ -└───────────┴─────────────────────────────────────────┴───────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test99_output b/enclone_main/testx/inputs/outputs/enclone_test99_output deleted file mode 100644 index 6735ea0ef..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test99_output +++ /dev/null @@ -1,19 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 1 CELLS - -[1.1] CLONOTYPE = 1 CELLS -┌─────────────┬──────────────────────────────┬──────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 600|TRBV20-1 ◆ 540|TRBJ1-1 │ 458.1.1|TRAV1-2 ◆ 423|TRAJ33│ -│ ├──────────────────────────────┼──────────────────────────────┤ -│ │ 111111111111 │ 111111111111 │ -│ │ 000111111111 │ 5 000000111111 │ -│ │ 789012345678 │ 9 456789012345 │ -│ │ ════CDR3════ │ ════CDR3════ │ -│reference │ ◦◦◦◦◦◦◦◦◦◦◦F │ P ◦◦◦◦◦◦◦◦◦LIW │ -│donor ref │ ◦◦◦◦◦◦◦◦◦◦◦F │ P ◦◦◦◦◦◦◦◦◦LIW │ -├─────────────┼──────────────────────────────┼──────────────────────────────┤ -│# n mait │ ............ u const │ . ............ u const │ -│1 1 𝝰gj𝝱g │ CSAGQGDTEAFF 5 TRBC1 │ P CAVMDSNYQLIW 1 TRAC │ -└─────────────┴──────────────────────────────┴──────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/outputs/enclone_test9_output b/enclone_main/testx/inputs/outputs/enclone_test9_output deleted file mode 100644 index 5d5b34d60..000000000 --- a/enclone_main/testx/inputs/outputs/enclone_test9_output +++ /dev/null @@ -1,21 +0,0 @@ - -[1] GROUP = 1 CLONOTYPES = 2 CELLS - -[1.1] CLONOTYPE = 2 CELLS -┌──────────────────────────┬──────────────────────────────────────────┬─────────────────────────────┐ -│ │ CHAIN 1 │ CHAIN 2 │ -│ │ 122.1.1|IGHV3-23 ◆ 55|IGHJ4 │ 284|IGKV3-15 ◆ 214|IGKJ1 │ -│ ├──────────────────────────────────────────┼─────────────────────────────┤ -│ │ 1111111111111111111 │ 1111111111 │ -│ │ 12444569 1111112222222222333 │ 11466 0001111111 │ -│ │ 73689985 4567890123456789012 │ 16809 7890123456 │ -│ │ ════════CDR3═══════ │ ═══CDR3═══ │ -│reference │ QVTSSPAN ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ LDVGG CQQ◦◦◦◦◦◦◦ │ -│donor ref │ QLTSSPAN ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦ │ LDVGG CQQ◦◦◦◦◦◦◦ │ -├──────────────────────────┼──────────────────────────────────────────┼─────────────────────────────┤ -│# barcode n │ ........ ................... u const │ ..... .......... u const│ -│1 2 │ QLSSAPTS CAKGDRTGYSYGGGIFDYW 2 IGHM │ LDIGA CQQYNNWWTF 17 IGKC │ -│ GGTATTGTCTGCAGTA-1 │ 1 │ 17 │ -│ TTCCCAGTCGGTTAAC-1 │ 2 │ 15 │ -└──────────────────────────┴──────────────────────────────────────────┴─────────────────────────────┘ - diff --git a/enclone_main/testx/inputs/test11_meta b/enclone_main/testx/inputs/test11_meta deleted file mode 100644 index 8215af634..000000000 --- a/enclone_main/testx/inputs/test11_meta +++ /dev/null @@ -1,3 +0,0 @@ -bcr,gex,origin,donor -zip:123085,,a,b -toast:86237,85679,c,d diff --git a/enclone_main/testx/inputs/test35_bc b/enclone_main/testx/inputs/test35_bc deleted file mode 100644 index 465789d69..000000000 --- a/enclone_main/testx/inputs/test35_bc +++ /dev/null @@ -1,579 +0,0 @@ -barcode,origin,donor,color -AAACGGGAGGGATACC-1,s,d,red -AAAGATGAGAGGTAGA-1,s,d,red -AACCATGAGCAATATG-1,s,d,red -AAGCCGCAGAGTACAT-1,s,d,red -AATCCAGTCATTGCGA-1,s,d,red -ACACCGGTCAACACCA-1,s,d,red -ACAGCTATCGCCTGAG-1,s,d,red -ACATACGGTACCATCA-1,s,d,red -ACGATACGTTATCGGT-1,s,d,red -ACGATGTAGGCTATCT-1,s,d,red -ACGCAGCGTCTAGGTT-1,s,d,red -ACGTCAACATGACATC-1,s,d,red -ACGTCAAGTGCGGTAA-1,s,d,red -ACTGAACCACATAACC-1,s,d,red -ACTTGTTGTCACTTCC-1,s,d,red -ACTTGTTTCGATAGAA-1,s,d,red -AGAGTGGCAGTCACTA-1,s,d,red -AGATTGCGTGGACGAT-1,s,d,red -AGCAGCCAGGATTCGG-1,s,d,red -AGCATACCAATGCCAT-1,s,d,red -AGCGGTCGTTAAGTAG-1,s,d,red -AGGCCGTAGTGCAAGC-1,s,d,red -AGTGGGAGTTTGTTTC-1,s,d,red -AGTTGGTAGGTGCAAC-1,s,d,red -ATAACGCTCTGGTGTA-1,s,d,red -ATCTGCCGTAGCAAAT-1,s,d,red -ATTCTACCAGTGACAG-1,s,d,red -ATTCTACTCAAGGTAA-1,s,d,red -ATTGGACTCAACACTG-1,s,d,red -ATTGGTGTCAGTACGT-1,s,d,red -ATTTCTGAGGAATTAC-1,s,d,red -CAACTAGGTCGCTTTC-1,s,d,red -CACACCTAGCCGATTT-1,s,d,red -CACACCTTCGCGATCG-1,s,d,red -CACCAGGCACTGTTAG-1,s,d,red -CACTCCACATTCACTT-1,s,d,red -CACTCCAGTACAGACG-1,s,d,red -CAGAATCAGCTGCGAA-1,s,d,red -CAGAGAGTCGCTGATA-1,s,d,red -CAGTCCTAGCAATCTC-1,s,d,red -CATCGAATCCCATTTA-1,s,d,red -CATGACACATGCCACG-1,s,d,red -CCAATCCTCAACACTG-1,s,d,red -CCACCTACAATTGCTG-1,s,d,red -CCACGGATCCTGTACC-1,s,d,red -CCCAGTTCAAGGTTTC-1,s,d,red -CCCTCCTTCTTAGAGC-1,s,d,red -CCTAAAGCACCTCGTT-1,s,d,red -CCTCTGAAGACCTTTG-1,s,d,red -CCTTCGACATGTCTCC-1,s,d,red -CCTTCGAGTGTCGCTG-1,s,d,red -CGACCTTCATGCAATC-1,s,d,red -CGCTATCGTCCAAGTT-1,s,d,red -CGCTGGACAGGAACGT-1,s,d,red -CGCTGGATCTGGCGAC-1,s,d,red -CGGGTCATCTGTCTCG-1,s,d,red -CGTCAGGAGCCCAATT-1,s,d,red -CGTGAGCAGTGGTCCC-1,s,d,red -CGTTAGACACAAGACG-1,s,d,red -CTACATTTCAGGATCT-1,s,d,red -CTCACACGTATAGGTA-1,s,d,red -CTCACACGTTAAGGGC-1,s,d,red -CTCGGAGGTCACTTCC-1,s,d,red -CTCGGGAAGGGTTTCT-1,s,d,red -CTCTAATGTTACTGAC-1,s,d,red -CTCTACGAGTGTTTGC-1,s,d,red -CTGATCCTCGAACGGA-1,s,d,red -CTGCCTAAGTCTCGGC-1,s,d,red -CTGCGGAAGTTCGATC-1,s,d,red -CTTAACTCAAGTTAAG-1,s,d,red -CTTCTCTCAGTGACAG-1,s,d,red -GAACGGATCCAATGGT-1,s,d,red -GAATAAGCAGGCAGTA-1,s,d,red -GACACGCTCACCATAG-1,s,d,red -GACCTGGTCTGACCTC-1,s,d,red -GACGGCTAGTCATCCA-1,s,d,red -GACTAACAGGACATTA-1,s,d,red -GACTGCGCAACAACCT-1,s,d,red -GAGTCCGTCGTTACAG-1,s,d,red -GATCGATTCACTGGGC-1,s,d,red -GATCGCGGTATTACCG-1,s,d,red -GATCGTAAGCTGATAA-1,s,d,red -GATCGTAGTACCGGCT-1,s,d,red -GATGAAACACGGTAGA-1,s,d,red -GATGCTACATTACGAC-1,s,d,red -GCAAACTGTTACCGAT-1,s,d,red -GCAGCCATCCTATGTT-1,s,d,red -GCATACAGTGCCTGTG-1,s,d,red -GCATGATCAAACTGCT-1,s,d,red -GCATGCGAGCGTTTAC-1,s,d,red -GCCTCTATCTAGAGTC-1,s,d,red -GCGCAGTAGAGTCTGG-1,s,d,red -GCGCAGTGTGTGGTTT-1,s,d,red -GCTCCTACAGGTCGTC-1,s,d,red -GGAGCAATCCTGCAGG-1,s,d,red -GGATGTTAGAAGGTGA-1,s,d,red -GGCAATTCAAACCCAT-1,s,d,red -GGGACCTGTTTCGCTC-1,s,d,red -GGGAGATGTTAAGTAG-1,s,d,red -GGGAGATTCCCAAGAT-1,s,d,red -GGTGAAGCACAAGACG-1,s,d,red -GGTGCGTCACCAACCG-1,s,d,red -GGTGCGTCACCAGCAC-1,s,d,red -GTCACAAAGGAGTTGC-1,s,d,red -GTCACGGGTGCCTGGT-1,s,d,red -GTCCTCATCTCCCTGA-1,s,d,red -GTCTTCGAGCGTTTAC-1,s,d,red -GTGTTAGCAAACCTAC-1,s,d,red -GTTCATTAGTGAAGTT-1,s,d,red -TAAACCGGTACCGTTA-1,s,d,red -TACCTATAGCGAAGGG-1,s,d,red -TACCTATAGCGTTCCG-1,s,d,red -TACCTTACAGGGTATG-1,s,d,red -TACTCATAGGCGACAT-1,s,d,red -TACTCATCATTTGCCC-1,s,d,red -TAGAGCTCACCAGTTA-1,s,d,red -TAGTTGGCAAACGTGG-1,s,d,red -TATGCCCCACCATCCT-1,s,d,red -TCAACGACATGCCACG-1,s,d,red -TCGTAGAAGTAGGTGC-1,s,d,red -TCTCATAGTGCAACGA-1,s,d,red -TCTTCGGCACGAAGCA-1,s,d,red -TCTTTCCAGCCAGTTT-1,s,d,red -TGACTAGCACAGACTT-1,s,d,red -TGAGGGAAGACACTAA-1,s,d,red -TGAGGGACAATAAGCA-1,s,d,red -TGCCAAATCCTAGTGA-1,s,d,red -TGCCCATAGAGTGAGA-1,s,d,red -TGCCCTACATGCATGT-1,s,d,red -TGCTACCAGTATCGAA-1,s,d,red -TGGCCAGGTTATCGGT-1,s,d,red -TGGCTGGAGTCCGGTC-1,s,d,red -TGGGCGTTCCGTTGTC-1,s,d,red -TGTTCCGCACGACGAA-1,s,d,red -TTAGGCAGTTGAACTC-1,s,d,red -TTATGCTTCACCTTAT-1,s,d,red -TTCGAAGCACTTACGA-1,s,d,red -TTCTACAGTGCAGGTA-1,s,d,red -TTCTCAAAGATATGGT-1,s,d,red -TTCTCAACATACTACG-1,s,d,red -TTCTCCTAGGCTCTTA-1,s,d,red -TTCTCCTCATGGTCTA-1,s,d,red -TTCTCCTGTTGTGGAG-1,s,d,red -TTGCCGTTCTGTGCAA-1,s,d,red -TTGGAACTCTTATCTG-1,s,d,red -TTTCCTCAGCCGCCTA-1,s,d,red -TTTCCTCGTAAACCTC-1,s,d,red -AACACGTCACCGCTAG-1,s,d,red -ACCCACTGTTGTCGCG-1,s,d,red -ACTGAGTAGAGGGATA-1,s,d,red -AGCGTCGCACTACAGT-1,s,d,red -AGCTTGAGTAAAGTCA-1,s,d,red -GACCAATAGTGGTAGC-1,s,d,red -TGACTTTAGTGTACGG-1,s,d,red -TGGACGCAGGCGATAC-1,s,d,red -TTCTTAGAGCGTCAAG-1,s,d,red -CAGCAGCAGACCGGAT-1,s,d,red -GCACTCTCACCAACCG-1,s,d,red -CACCTTGAGGTGATTA-1,s,d,red -CGCTATCCACTCTGTC-1,s,d,red -CCAATCCCATGCATGT-1,s,d,red -GCATGCGCATTTGCCC-1,s,d,red -CTTAACTAGTAGGCCA-1,s,d,red -TCAGGATGTGCAACTT-1,s,d,red -CGTCCATGTCTCCACT-1,s,d,red -GCATGATCAGCTCGAC-1,s,d,red -CGGAGCTAGCGCCTTG-1,s,d,red -ACTTTCAGTTAGAACA-1,s,d,red -GAAATGAGTCTGCCAG-1,s,d,red -CATATTCAGTCCGTAT-1,s,d,red -GTGTTAGAGTCGATAA-1,s,d,red -CTTCTCTTCCGTCATC-1,s,d,blue -TAGCCGGAGTAGGCCA-1,s,d,red -GTACGTATCGCCATAA-1,s,d,red -CGCGTTTAGTATCTCG-1,s,d,red -AGCGTCGAGGGTCGAT-1,s,d,red -CATCGGGTCTCAAACG-1,s,d,red -CGTCCATAGCTGAACG-1,s,d,red -GTAGTCAAGTTCGATC-1,s,d,red -AAGCCGCAGAAACGAG-1,s,d,red -CGTGTAAGTCCATGAT-1,s,d,red -AATCGGTCATATGCTG-1,s,d,red -AGCCTAAAGCGTCTAT-1,s,d,red -ATGCGATAGATGTGGC-1,s,d,red -CACCAGGCAGGGAGAG-1,s,d,red -CCACTACCATACCATG-1,s,d,red -CCGTACTGTATAGGGC-1,s,d,red -CCGTTCATCGGTGTTA-1,s,d,red -CTAGCCTAGCCTATGT-1,s,d,red -CTGTTTATCTGTCTAT-1,s,d,red -GATGCTACAATTCCTT-1,s,d,red -GATGCTACAGACAGGT-1,s,d,red -GGACATTAGGACACCA-1,s,d,red -TCTGAGATCACCCTCA-1,s,d,red -TGGGCGTTCCACTCCA-1,s,d,red -AAGACCTCAAGGTTTC-1,s,d,red -AAACCTGTCGGAAATA-1,s,d,red -AACGTTGCATTATCTC-1,s,d,red -AACTCAGGTCAGGACA-1,s,d,red -AAGCCGCCAGGTCTCG-1,s,d,red -ACGGAGAAGGCCCTCA-1,s,d,red -ACGGCCAAGGCAATTA-1,s,d,red -ACGGGTCTCTTGTACT-1,s,d,red -ACTTACTAGGATATAC-1,s,d,red -AGAATAGCATTTGCCC-1,s,d,red -AGCAGCCCAGTGGGAT-1,s,d,red -AGCAGCCTCACGGTTA-1,s,d,red -AGTTGGTTCCACGTGG-1,s,d,red -ATAACGCAGGGATCTG-1,s,d,red -ATAGACCCAATGTTGC-1,s,d,red -CACACAACACGACGAA-1,s,d,red -CACATTTTCCACGTGG-1,s,d,red -CAGCTAATCTTATCTG-1,s,d,red -CATGGCGAGGGCACTA-1,s,d,red -CATGGCGCATATGAGA-1,s,d,red -CCATTCGCACGGTAAG-1,s,d,red -CCGTTCACAATGGTCT-1,s,d,red -CCTTACGCACTCAGGC-1,s,d,red -CCTTACGTCGATAGAA-1,s,d,red -CGAGCACTCTTCCTTC-1,s,d,red -CGTTAGAGTATGAAAC-1,s,d,red -CTAACTTGTTAAAGTG-1,s,d,red -CTAATGGTCTTGAGAC-1,s,d,red -CTACCCAAGCTCCCAG-1,s,d,red -CTCACACTCGTACCGG-1,s,d,red -CTCGTCACAATGACCT-1,s,d,red -CTGGTCTAGTGAAGTT-1,s,d,red -GACCAATGTCTTCTCG-1,s,d,red -GACGTTAAGGTACTCT-1,s,d,red -GAGGTGAGTCTTGATG-1,s,d,red -GCAGTTACATTCTTAC-1,s,d,red -GCGCAACAGTCCTCCT-1,s,d,red -GCGCAGTGTTAAAGTG-1,s,d,red -GCTTCCATCGTTGACA-1,s,d,red -GGAAAGCCATCACGAT-1,s,d,red -GGCTCGAAGGTGATTA-1,s,d,red -GGGCATCTCAGCCTAA-1,s,d,red -GGGTCTGCACGAAAGC-1,s,d,red -GTACTTTTCGGCCGAT-1,s,d,red -GTTCGGGTCTGAAAGA-1,s,d,red -TATTACCAGATGTAAC-1,s,d,red -TCAATCTTCGAATGGG-1,s,d,red -TCAGCTCCATTCGACA-1,s,d,red -TGAGCATAGTTCGATC-1,s,d,red -TGAGCCGGTTTGTGTG-1,s,d,red -TGCTGCTCATTGAGCT-1,s,d,red -TGGCTGGTCGGCGCAT-1,s,d,red -TTCTTAGTCACCTTAT-1,s,d,red -TTGAACGTCCTCCTAG-1,s,d,red -TTGCGTCAGGCATGGT-1,s,d,red -CGGAGCTGTTATCGGT-1,s,d,red -AACCGCGCACTCGACG-1,s,d,red -AACTCAGAGTGTACTC-1,s,d,red -AAGACCTCAAGCGTAG-1,s,d,red -ACACCAACAATGCCAT-1,s,d,red -ACATGGTCAGCTCCGA-1,s,d,red -ACGATGTGTGGCTCCA-1,s,d,red -AGCATACGTCATATGC-1,s,d,red -ATAGACCGTGCGAAAC-1,s,d,red -ATCCACCGTAAGTAGT-1,s,d,red -ATGAGGGGTCTACCTC-1,s,d,red -ATTGGACCACGTTGGC-1,s,d,red -CAACCTCCATGACATC-1,s,d,red -CACATAGAGGACACCA-1,s,d,red -CAGCAGCAGCTCCTTC-1,s,d,red -CAGCAGCGTACAGCAG-1,s,d,red -CAGCGACAGTGGTAAT-1,s,d,red -CATATGGCACCGATAT-1,s,d,red -CCAATCCCACTATCTT-1,s,d,red -CCACCTAGTTGATTGC-1,s,d,red -CCGTACTGTCTTGCGG-1,s,d,red -CGCCAAGAGCTAGTGG-1,s,d,red -CGCGGTAAGCTGCGAA-1,s,d,red -CTCTAATAGCTACCTA-1,s,d,red -CTGCTGTAGGACAGAA-1,s,d,red -CTGGTCTGTCCATCCT-1,s,d,red -CTTAGGATCGCGGATC-1,s,d,red -GAATAAGAGAAGGGTA-1,s,d,red -GATCGTAGTGACAAAT-1,s,d,red -GCATGCGGTTAGGGTG-1,s,d,red -GCATGTACAATGCCAT-1,s,d,red -GCGCAGTTCGCGCCAA-1,s,d,red -GCTGCAGCAAATTGCC-1,s,d,red -GGAATAAGTGAGGGTT-1,s,d,red -GGTATTGTCAGGATCT-1,s,d,red -GTAACGTCACAGGCCT-1,s,d,red -GTAACGTTCCCAAGAT-1,s,d,red -GTGCTTCAGAGTGACC-1,s,d,red -TCAGATGAGGCGATAC-1,s,d,red -TCAGATGTCTCTGTCG-1,s,d,red -TCCCGATAGGCAGTCA-1,s,d,red -TCTTCGGTCTACCTGC-1,s,d,red -TGCGGGTTCAGCCTAA-1,s,d,red -TGGACGCGTCACCCAG-1,s,d,red -TTCGAAGAGCAATCTC-1,s,d,red -TTCGAAGGTGCCTGTG-1,s,d,red -TTGCCGTGTTGCGTTA-1,s,d,red -TTGCGTCTCCATGAGT-1,s,d,red -TTGGCAAAGGTGCTAG-1,s,d,red -CGATTGAGTATTAGCC-1,s,d,red -CTCGGAGAGCTCTCGG-1,s,d,red -TGGGCGTAGTCGCCGT-1,s,d,red -GACGCGTAGCTAGTCT-1,s,d,red -CAGCGACTCCTATTCA-1,s,d,red -AACACGTGTCACCCAG-1,s,d,red -AACTCTTAGGAGCGAG-1,s,d,red -AAGGCAGGTTGTCGCG-1,s,d,red -ACATGGTCATCCAACA-1,s,d,red -ACGAGCCGTAAGTAGT-1,s,d,red -AGGCCGTGTCGTTGTA-1,s,d,red -AGGTCATCACAGGCCT-1,s,d,red -AGTCTTTGTCATACTG-1,s,d,red -CAAGGCCTCGAGGTAG-1,s,d,red -CACAAACAGATATGCA-1,s,d,red -CAGAGAGCAGCGTAAG-1,s,d,red -CAGCCGACAGTAAGAT-1,s,d,red -CGCTTCATCTCAAGTG-1,s,d,red -CTACACCCACGTCTCT-1,s,d,red -CTCACACCAAGTTAAG-1,s,d,red -CTGAAACAGAAACCGC-1,s,d,red -CTGGTCTCACGGCTAC-1,s,d,red -GACCAATGTTACGCGC-1,s,d,red -GCACTCTCACTTCGAA-1,s,d,red -GCTGGGTGTCCTCTTG-1,s,d,red -GGACAGACACGCTTTC-1,s,d,red -GGCTGGTCATGAGCGA-1,s,d,red -GTCTTCGGTACCAGTT-1,s,d,red -TACTTACAGATGGGTC-1,s,d,red -TAGACCACATGGGACA-1,s,d,red -TCAGGATAGGACTGGT-1,s,d,red -TCGCGTTCAATCCGAT-1,s,d,red -TCTCTAAGTCAATACC-1,s,d,red -TGAGCCGAGGCGATAC-1,s,d,red -TGCGGGTAGACGACGT-1,s,d,red -TGGACGCAGTGTACCT-1,s,d,red -TTCGGTCAGTAGGTGC-1,s,d,red -TTCTTAGAGGGATACC-1,s,d,red -TTTGTCACAAAGGTGC-1,s,d,red -TCAGATGAGCACCGTC-1,s,d,red -TGGCGCAGTGCATCTA-1,s,d,red -GCTTCCAAGGACATTA-1,s,d,red -TGTCCCACATTGGCGC-1,s,d,red -AGACGTTAGAGTGACC-1,s,d,red -AGATCTGTCATCACCC-1,s,d,red -TGCGTGGAGTACGACG-1,s,d,red -AGTAGTCTCAGCTCTC-1,s,d,red -GCACATATCAGCGATT-1,s,d,red -TATTACCGTCAACTGT-1,s,d,red -TGCGCAGCACCAACCG-1,s,d,red -AACTCAGAGGTGCACA-1,s,d,red -ACACCCTCATTTGCCC-1,s,d,red -ACCTTTACACAGTCGC-1,s,d,red -CACATTTAGACAGAGA-1,s,d,red -CTAGTGAGTAAATACG-1,s,d,red -GCAGTTAAGGAGTTTA-1,s,d,red -GGGATGAGTTGTGGCC-1,s,d,red -GTGCGGTTCCATGCTC-1,s,d,red -TATGCCCTCTCGATGA-1,s,d,red -TGTGTTTAGAAGCCCA-1,s,d,red -AAAGCAAAGCTGATAA-1,s,d,red -ACACTGAGTTCCGTCT-1,s,d,red -AGTGTCATCGACGGAA-1,s,d,red -CTAATGGAGTTAGGTA-1,s,d,red -CTTTGCGCATCATCCC-1,s,d,red -TCACAAGCAATGGTCT-1,s,d,red -TGCCCATAGCGATCCC-1,s,d,red -TTAGGACAGGAGTAGA-1,s,d,red -CGGAGCTGTTGTGGAG-1,s,d,red -CTAGAGTTCGCAAGCC-1,s,d,red -GAAGCAGGTGTGTGCC-1,s,d,red -GCTTCCACACAAGACG-1,s,d,red -GTAACTGTCCGCGGTA-1,s,d,red -TATCTCAGTAGCTGCC-1,s,d,red -TGCCCTAGTTCCCTTG-1,s,d,red -ATGAGGGTCTCCCTGA-1,s,d,red -CGAACATAGCGCTCCA-1,s,d,red -GATGAGGGTCTCTCTG-1,s,d,red -GATGAGGTCTCCCTGA-1,s,d,red -GTCTTCGCAAGGACAC-1,s,d,red -CAGTAACCACCAACCG-1,s,d,red -TCAGATGTCATCGCTC-1,s,d,red -GTGAAGGCACCACGTG-1,s,d,red -CGCTTCAAGGACATTA-1,s,d,red -GCGGGTTAGAGCCCAA-1,s,d,red -GCTGCGATCGCTGATA-1,s,d,red -AAAGTAGCAACTGCGC-1,s,d,green -TTCTCCTGTGTTCGAT-1,s,d,blue -TATGCCCCACGGCCAT-1,s,d,red -CGACCTTGTCCGTGAC-1,s,d,red -ACATGGTCAGCCACCA-1,s,d,red -CATCAGATCACTATTC-1,s,d,red -ACGAGGAGTTCGCTAA-1,s,d,red -CACAGTATCCACGACG-1,s,d,red -CAGCAGCTCGGCGGTT-1,s,d,red -CGTCAGGCAGGACGTA-1,s,d,red -GAATAAGCATCCCACT-1,s,d,red -GCGCAGTAGGGTTTCT-1,s,d,red -TTCCCAGCAAAGGCGT-1,s,d,red -TTTCCTCTCATCACCC-1,s,d,red -CTTGGCTAGAGTCTGG-1,s,d,red -GGAAAGCGTTTGGCGC-1,s,d,red -GTACTCCTCCACTGGG-1,s,d,red -AACCATGAGGAGTCTG-1,s,d,orange -ACATCAGAGTTAACGA-1,s,d,orange -ACATCAGCAATGCCAT-1,s,d,orange -ACGGCCAAGTGCTGCC-1,s,d,orange -ACTGAACGTGCAGGTA-1,s,d,orange -AGAATAGCAAACCCAT-1,s,d,orange -AGAATAGCACCTTGTC-1,s,d,orange -AGTGTCACATCTACGA-1,s,d,orange -ATGCGATAGGGATGGG-1,s,d,orange -ATGGGAGTCAACACGT-1,s,d,orange -ATTTCTGTCCGCATCT-1,s,d,orange -CAGAATCCATTCACTT-1,s,d,orange -CCACCTACAGTTTACG-1,s,d,orange -CCTACACTCGGAGGTA-1,s,d,orange -CGAGCCATCTTGTATC-1,s,d,orange -CGATGGCCACCGCTAG-1,s,d,orange -CGCCAAGTCAGCTTAG-1,s,d,orange -CGCGGTACATTGCGGC-1,s,d,orange -CGCTGGACAACTGCTA-1,s,d,orange -CGGAGTCAGAATAGGG-1,s,d,orange -CGTAGGCAGTCCGGTC-1,s,d,orange -CTAGTGACATGCCTTC-1,s,d,orange -CTCAGAAAGGGATACC-1,s,d,orange -CTCGGAGCACCAGATT-1,s,d,orange -GGGCATCGTAAGGATT-1,s,d,orange -GTACGTAAGGGCTCTC-1,s,d,orange -GTAGGCCGTCGCCATG-1,s,d,orange -GTCTTCGTCTTCGGTC-1,s,d,orange -GTGTTAGGTCTTCGTC-1,s,d,orange -TACCTATGTGTGAAAT-1,s,d,orange -TGATTTCAGACTGTAA-1,s,d,orange -TGCACCTGTTTGACTG-1,s,d,orange -AGCAGCCCATTAGGCT-1,s,d,red -ATCGAGTGTCTGGAGA-1,s,d,red -CAACCAAGTACAGTTC-1,s,d,red -CAAGTTGTCTGGCGAC-1,s,d,red -CACCTTGAGGCCCTCA-1,s,d,red -CCGGGATCACGGCTAC-1,s,d,red -CGATGGCTCATGCAAC-1,s,d,red -GGAAAGCTCAGATAAG-1,s,d,red -GGATGTTTCGATCCCT-1,s,d,red -GGCCGATGTCCATGAT-1,s,d,red -GTACTTTTCAATAAGG-1,s,d,red -TCGTACCAGTGAATTG-1,s,d,red -TGAAAGACAAAGCAAT-1,s,d,red -TGCGTGGGTAGCGTCC-1,s,d,red -TTAACTCGTCCATGAT-1,s,d,red -CAGCTAACAACAACCT-1,s,d,blue -TCATTTGTCAAGGCTT-1,s,d,red -ATGCGATCACTTCTGC-1,s,d,red -TGCACCTTCGATCCCT-1,s,d,red -ACGCAGCCACTAGTAC-1,s,d,green -ACTGATGTCTCTGTCG-1,s,d,green -CAGCATATCGCATGAT-1,s,d,green -CAGTAACCAAAGAATC-1,s,d,green -CGTAGCGAGACTCGGA-1,s,d,green -CGTAGGCTCCGCGGTA-1,s,d,green -GATGAGGCAATGAATG-1,s,d,green -GATTCAGGTAGCGTGA-1,s,d,green -GCATGCGCAATAGCGG-1,s,d,green -GGTGCGTCAAGCTGGA-1,s,d,green -GTAACGTAGGATATAC-1,s,d,green -GTAACTGTCTTGTTTG-1,s,d,green -GTATCTTTCTGCGACG-1,s,d,green -GTTCTCGCAAGCGTAG-1,s,d,green -TGCCAAACAAGGGTCA-1,s,d,green -CCCAGTTTCTTCAACT-1,s,d,green -TACCTATTCTGCGACG-1,s,d,green -AACGTTGGTTGGTTTG-1,s,d,red -AAGGCAGAGAGTGAGA-1,s,d,red -ATCTGCCCAGTATAAG-1,s,d,red -ATTTCTGTCTCACATT-1,s,d,red -CGAGCCACATCGGAAG-1,s,d,red -CTGTTTAGTGTTCTTT-1,s,d,red -GACTAACGTACAAGTA-1,s,d,red -GCTGCTTTCCGAATGT-1,s,d,red -GGGTCTGAGGAGTACC-1,s,d,red -GTCACAAAGGCCATAG-1,s,d,red -GTGTGCGAGCTCTCGG-1,s,d,red -TATTACCAGACTAGGC-1,s,d,red -TCGTACCTCAACACGT-1,s,d,red -TGGTTAGTCGTAGATC-1,s,d,red -TTGAACGAGCTAACAA-1,s,d,red -CATGCCTAGGGAGTAA-1,s,d,red -TACGGGCTCTACTTAC-1,s,d,red -CACTCCAAGCTGCAAG-1,s,d,red -CCTTCCCGTCCATGAT-1,s,d,red -CGGAGCTTCCTTGCCA-1,s,d,red -CGTCACTGTGACGGTA-1,s,d,red -CTCTGGTAGGGATACC-1,s,d,red -CTGCTGTGTTCACGGC-1,s,d,red -GTAGTCAGTAGCGCTC-1,s,d,red -GTATCTTTCAACACGT-1,s,d,red -TGGCCAGGTCTTCAAG-1,s,d,red -TGGGAAGTCAGCTCTC-1,s,d,red -TGAAAGAAGGCTCAGA-1,s,d,blue -TGAAAGAGTTCCACAA-1,s,d,red -CGTCCATCACTTGGAT-1,s,d,red -GTTCATTTCCTTGCCA-1,s,d,red -AACTTTCGTCAAACTC-1,s,d,red -ACATCAGAGCTGTTCA-1,s,d,red -AGCTCCTGTGTTGGGA-1,s,d,red -AGTGAGGCACAGGCCT-1,s,d,red -ATCTACTCATCACGTA-1,s,d,red -CGCTGGAGTTATGTGC-1,s,d,red -CTAAGACAGGATGTAT-1,s,d,red -CTCTACGAGGAATTAC-1,s,d,red -CTTACCGCAGGATTGG-1,s,d,red -GACGTGCCATGGATGG-1,s,d,red -GGAACTTCAGTCCTTC-1,s,d,red -GGGATGAGTCTTCGTC-1,s,d,red -GGGTCTGCATCCCATC-1,s,d,red -GTTAAGCAGTGGGTTG-1,s,d,red -TTAGGACAGATCCGAG-1,s,d,red -ACATGGTAGTACGCCC-1,s,d,red -GTACTTTAGATCCTGT-1,s,d,red -GTCACAACAGCCTTTC-1,s,d,red -GTTACAGCATTTCAGG-1,s,d,red -ACGGCCACATACCATG-1,s,d,red -CATATGGGTACGAAAT-1,s,d,red -AGGGTGATCAAACAAG-1,s,d,red -CCGTGGAAGTGAAGAG-1,s,d,red -AAGGAGCTCTTCAACT-1,s,d,red -CGAGCACCATTCTTAC-1,s,d,red -TCTTTCCGTTCCACAA-1,s,d,red -AGGGATGAGATATGGT-1,s,d,red -ACATGGTGTAACGCGA-1,s,d,red -AACACGTCAGTAACGG-1,s,d,red -ATGCGATGTCTCAACA-1,s,d,red -CAACTAGGTAGAGGAA-1,s,d,red -CATCCACCAGCGAACA-1,s,d,red -CCTTTCTAGGACGAAA-1,s,d,red -CGATCGGCATTGGCGC-1,s,d,red -CTCGAAACAAGCCGTC-1,s,d,red -TAAGCGTTCAAAGACA-1,s,d,red -TGGCTGGAGGATGTAT-1,s,d,red -TGTATTCAGGTCGGAT-1,s,d,red -GATCAGTGTCGAGTTT-1,s,d,red -CACCAGGAGCTCAACT-1,s,d,red -CGACTTCAGCAGGCTA-1,s,d,red -CGACTTCCATGCGCAC-1,s,d,red -CGATGGCAGATGTAAC-1,s,d,red -CGTTAGATCAACCAAC-1,s,d,red -CGTTCTGCAGTTCCCT-1,s,d,red -GGCGACTCAGCGTCCA-1,s,d,red -TGCCAAATCCGGCACA-1,s,d,red -ATTATCCGTTACGGAG-1,s,d,red -GTCACGGCAGGATTGG-1,s,d,red -TCGTAGAAGGTGACCA-1,s,d,red -TTCTCCTCACAGAGGT-1,s,d,red -AAAGCAACAAGCCATT-1,s,d,red -AGGGATGGTAGCGATG-1,s,d,red -CTGTTTAAGCCCGAAA-1,s,d,red -GAGGTGAAGTTCCACA-1,s,d,red -CCGGTAGCATCTCCCA-1,s,d,red -TGCCCATAGCTGCAAG-1,s,d,red -CACAGGCTCTTAGCCC-1,s,d,red -AACTCAGTCCGCAGTG-1,s,d,red -AGCGGTCTCTGGCGTG-1,s,d,red -CATGACAGTAGCTCCG-1,s,d,red -CTGAAGTGTTACGGAG-1,s,d,red -TTCCCAGGTTAAGGGC-1,s,d,red -ACTGAGTCACGGCCAT-1,s,d,red -CTGCCTATCACCTCGT-1,s,d,red -TGGGAAGGTAGCACGA-1,s,d,red -CAGAGAGGTCCAGTGC-1,s,d,red -AGCATACTCAGAGGTG-1,s,d,red -CGTGAGCGTATATGGA-1,s,d,red -CGTTAGAAGGAGTAGA-1,s,d,red -CTGCCTATCAATAAGG-1,s,d,red -GAAATGAAGATATGGT-1,s,d,red -GACGTTACAGGATCGA-1,s,d,red -GAGTCCGAGGCCCTTG-1,s,d,red -GCTGCTTCATATGGTC-1,s,d,red -TACTTACGTAAGGGAA-1,s,d,red -CCGGTAGTCGGTTCGG-1,s,d,red diff --git a/enclone_main/testx/inputs/test35_meta b/enclone_main/testx/inputs/test35_meta deleted file mode 100644 index 8e93fcf1d..000000000 --- a/enclone_main/testx/inputs/test35_meta +++ /dev/null @@ -1,2 +0,0 @@ -bcr,origin,donor,bc -123085,s,d,testx/inputs/test35_bc diff --git a/enclone_print/Cargo.toml b/enclone_print/Cargo.toml index 8c2b47246..47c29ad34 100644 --- a/enclone_print/Cargo.toml +++ b/enclone_print/Cargo.toml @@ -1,50 +1,59 @@ [package] name = "enclone_print" -version = "0.4.49" +version = "0.5.219" authors = ["""David Jaffe , + Nigel Delaney , Keri Dockter , + Jessica Hamel , + Lance Hepler , Shaun Jackman , Sreenath Krishnan , Meryl Lewis , + Alvin Liang , Patrick Marks , Wyatt McDonnell """] -edition = "2018" -license = "LICENSE.txt" +edition = "2021" +license-file = "LICENSE.txt" publish = false # Please do not edit crate versions within this file. Instead edit the file master.toml # in the root of the enclone repo. +# To keep compile time down, this file should *not* access the enclone crate. + [dependencies] -amino = "0.1.1" -ansi_escape = "0.1.0" -bio = "0.31.0" -bytes = "0.5.5" -byteorder = "1.3.2" -debruijn = "0.3.2" +amino = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +ansi_escape = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +bio_edit = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +debruijn = "0.3" +enclone_args = { path = "../enclone_args" } enclone_core = { path = "../enclone_core" } enclone_proto = { path = "../enclone_proto" } -equiv = "0.1.1" -failure = "0.1.5" -io_utils = "0.2" -itertools = "0.9.0" -mirror_sparse_matrix = "0.1.4" -ndarray = "0.13" -permutation = "0.2.5" -pretty_trace = "0.3.2" -rayon = "1.0.2" -serde = "1.0.90" -serde_derive = "1.0.102" -serde_json = "*" -stats_utils = "0.1.1" -string_utils = "0.1.1" -tables = "0.1.2" -vdj_ann = { git = "https://github.com/10XGenomics/rust-toolbox.git", rev="183e2d657e6436494072a32cf8da4f7b753d1e69" } -vector_utils = "0.1.3" +enclone_vars = { path = "../enclone_vars" } +equiv = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +expr_tools = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +io_utils = { version = "0.3", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +itertools.workspace = true +ndarray = "0.15" +permutation = "0.4" +qd = { git = "https://github.com/Barandis/qd" } +rayon = "1" +regex = { version = "1", default-features = false, features = ["std", "perf"] } +serde_json = "1" +stats_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +string_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +tables = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +triple_accel = "0.4" +vdj_ann = { version = "0.4", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +vector_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } -[dependencies.hdf5] +[target.'cfg(not(windows))'.dependencies.hdf5] +git = "https://github.com/10XGenomics/hdf5-rust.git" +branch = "conda_nov2021" features = ["conda"] -git = "https://github.com/pmarks/hdf5-rs.git" -rev = "0c98e57b2af1f4247708c198b324ba3a8bc18dba" - +default-features = false +[target.'cfg(windows)'.dependencies.hdf5] +git = "https://github.com/10XGenomics/hdf5-rust.git" +branch = "conda_nov2021" +default-features = false diff --git a/enclone_print/LICENSE.txt b/enclone_print/LICENSE.txt new file mode 120000 index 000000000..4ab43736a --- /dev/null +++ b/enclone_print/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/enclone_print/src/build_table_stuff.rs b/enclone_print/src/build_table_stuff.rs new file mode 100644 index 000000000..773f8ac37 --- /dev/null +++ b/enclone_print/src/build_table_stuff.rs @@ -0,0 +1,359 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use crate::print_utils1::insert_position_rows; +use ansi_escape::bold; +use enclone_core::defs::{justification, ColInfo, EncloneControl, ExactClonotype}; +use itertools::Itertools; +use string_utils::{strme, TextUtils}; +use vector_utils::unique_sort; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn build_table_stuff( + ctl: &EncloneControl, + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], + rsi: &ColInfo, + vars: &[Vec], + show_aa: &[Vec], + field_types: &[Vec], + row1: &mut Vec, + justify: &mut Vec, + drows: &mut Vec>, + rows: &mut Vec>, + lvars: &[String], +) { + // Build lead header row and justification to match. + + let cols = rsi.vids.len(); + let nexacts = exacts.len(); + if !ctl.clono_print_opt.bu { + row1.push("#".to_string()); + } else { + row1.push("# barcode".to_string()); + } + justify.push(b'l'); + for x in lvars { + let x = if x.contains(':') { + x.before(":") + } else { + x.as_str() + }; + row1.push(x.to_string()); + justify.push(justification(x)); + } + + // Insert main chain row. Then insert chain info row if we're using CHAIN_SPLIT. + + let mut row = vec!["".to_string(); row1.len()]; + for j in 0..cols { + if rsi.chain_descrip[j].contains(&"IGH".to_string()) + || rsi.chain_descrip[j].contains(&"TRB".to_string()) + { + row.push(bold(&format!("CHAIN {}", j + 1))); + } else { + row.push(format!("CHAIN {}", j + 1)); + } + for _ in 1..rsi.cvars[j].len() { + row.push("\\ext".to_string()); + } + } + rows.push(row); + let mut row = vec!["".to_string(); row1.len()]; + for j in 0..cols { + if !ctl.gen_opt.fold_headers { + if rsi.chain_descrip[j].contains(&"IGH".to_string()) + || rsi.chain_descrip[j].contains(&"TRB".to_string()) + { + row.push(bold(&rsi.chain_descrip[j].to_string())); + } else { + row.push(rsi.chain_descrip[j].to_string()); + } + } else if rsi.chain_descrip[j].contains(&"IGH".to_string()) + || rsi.chain_descrip[j].contains(&"TRB".to_string()) + { + row.push(bold(rsi.chain_descrip[j].before(" ◆ "))); + } else { + row.push(rsi.chain_descrip[j].before(" ◆ ").to_string()); + } + for _ in 1..rsi.cvars[j].len() { + row.push("\\ext".to_string()); + } + } + rows.push(row); + if ctl.gen_opt.fold_headers { + let mut row = vec!["".to_string(); row1.len()]; + for j in 0..cols { + let mut next = rsi.chain_descrip[j].after(" ◆ ").to_string(); + if next.contains(" ◆ ") { + next = next.before(" ◆ ").to_string(); + } + if rsi.chain_descrip[j].contains(&"IGH".to_string()) + || rsi.chain_descrip[j].contains(&"TRB".to_string()) + { + row.push(bold(&format!("◆ {next}"))); + } else { + row.push(format!("◆ {next}")); + } + for _ in 1..rsi.cvars[j].len() { + row.push("\\ext".to_string()); + } + } + rows.push(row); + let mut have_last = false; + for j in 0..cols { + if rsi.chain_descrip[j].after(" ◆ ").contains(" ◆ ") { + have_last = true; + } + } + if have_last { + let mut row = vec!["".to_string(); row1.len()]; + for j in 0..cols { + let mut last = String::new(); + if rsi.chain_descrip[j].after(" ◆ ").contains(" ◆ ") { + last = rsi.chain_descrip[j].after(" ◆ ").after(" ◆ ").to_string(); + } + if last.is_empty() { + row.push(String::new()); + } else if rsi.chain_descrip[j].contains(&"IGH".to_string()) + || rsi.chain_descrip[j].contains(&"TRB".to_string()) + { + row.push(bold(&format!("◆ {last}"))); + } else { + row.push(format!("◆ {last}")); + } + for _ in 1..rsi.cvars[j].len() { + row.push("\\ext".to_string()); + } + } + rows.push(row); + } + } + + // Insert divider row (horizontal line across the chains). + + let mut row = vec!["".to_string(); lvars.len() + 1]; + let mut ncall = 0; + for j in 0..cols { + ncall += rsi.cvars[j].len(); + } + row.append(&mut vec!["\\hline".to_string(); ncall]); + rows.push(row); + + // Insert position rows. + + *drows = insert_position_rows(ctl, rsi, show_aa, field_types, vars, row1); + let mut drows2 = drows.clone(); + rows.append(&mut drows2); + + // Insert main per-chain header row. + + let mut row = vec!["".to_string(); row1.len()]; + for cx in 0..cols { + let show = &show_aa[cx]; + for j in 0..rsi.cvars[cx].len() { + if rsi.cvars[cx][j] != *"amino" { + if drows.is_empty() { + row.push(rsi.cvars[cx][j].to_string()); + } else { + row.push("".to_string()); + } + continue; + } + for u in 0..nexacts { + let m = rsi.mat[cx][u]; + if m.is_none() { + continue; + } + let m = m.unwrap(); + let mut n = show.len(); + for k in 1..show.len() { + if field_types[cx][k] != field_types[cx][k - 1] && !ctl.gen_opt.nospaces { + n += 1; + } + } + let mut ch = vec![' '; n]; + let amino = &ctl.clono_print_opt.amino; + let ex = &exact_clonotypes[exacts[u]]; + let x = &ex.share[m]; + let mut cs1 = 0; + if rsi.cdr1_starts[cx].is_some() { + cs1 = rsi.cdr1_starts[cx].unwrap(); + } + let mut cs2 = 0; + if rsi.cdr2_starts[cx].is_some() { + cs2 = rsi.cdr2_starts[cx].unwrap(); + } + let mut fs2 = 0; + if rsi.fr2_starts[cx].is_some() { + fs2 = rsi.fr2_starts[cx].unwrap(); + } + let mut fs3 = 0; + if rsi.fr3_starts[cx].is_some() { + fs3 = rsi.fr3_starts[cx].unwrap(); + } + let fields = [ + ( + "fwr1", + rsi.fr1_starts[cx], + cs1, + rsi.cdr1_starts[cx].is_some(), + ), + ( + "fwr2", + fs2, + cs2, + rsi.fr2_starts[cx].is_some() && rsi.cdr2_starts[cx].is_some(), + ), + ( + "fwr3", + fs3, + rsi.cdr3_starts[cx], + rsi.fr3_starts[cx].is_some(), + ), + ( + "cdr1", + cs1, + fs2, + rsi.cdr1_starts[cx].is_some() && rsi.fr2_starts[cx].is_some(), + ), + ( + "cdr2", + cs2, + fs3, + rsi.cdr2_starts[cx].is_some() && rsi.fr3_starts[cx].is_some(), + ), + ( + "cdr3", + rsi.cdr3_starts[cx], + rsi.cdr3_starts[cx] + x.cdr3_aa.len() * 3, + true, + ), + ( + "fwr4", + rsi.cdr3_starts[cx] + x.cdr3_aa.len() * 3, + rsi.seq_del_lens[cx] - 1, + true, + ), + ]; + for field in fields { + if field.3 && field.1 <= field.2 && amino.contains(&field.0.to_string()) { + let cs1 = field.1 / 3; + let mut ch_start = 0; + for (k, (&s, &t)) in show.iter().zip(field_types[cx].iter()).enumerate() { + if k > 0 && t != field_types[cx][k - 1] && !ctl.gen_opt.nospaces { + ch_start += 1; + } + if s == cs1 { + break; + } + ch_start += 1; + } + let q = (field.2 - field.1) / 3; + + // Catch an error condition that has happened a few times. + + if ch_start + q > ch.len() { + let mut ds = Vec::<&str>::new(); + for clone in &ex.clones { + let li = clone[m].dataset_index; + ds.push(ctl.origin_info.dataset_id[li].as_str()); + } + unique_sort(&mut ds); + let fields_msg = format!( + "fields[z].0 = {}, fields[z].1 = {}, fields[z].2 = {},", + field.0, field.1, field.2, + ); + panic!( + "Internal error, out of range in \ + build_table_stuff, CDR3 = {}, datasets = {},\n\ + ch_start = {}, q = {}, ch.len() = {},\n\ + {}\n\ + show = {},\n\ + field_types = {}.", + x.cdr3_aa, + ds.iter().format(","), + ch_start, + q, + ch.len(), + fields_msg, + show.iter().format(","), + field_types[cx].iter().format(","), + ); + } + + // Do the work. + + let t = field.0.to_ascii_uppercase(); + let t = t.as_bytes(); + // The second form of this gets converted below. + let c = if t[0] == b'C' || !ctl.gen_opt.nospaces { + "═" + } else { + "┅" + }; + let mut s = String::new(); + if q >= 4 { + let left = (q - 3) / 2; + let right = q - left - 4; + s.reserve((left + right) * c.len() + t.len()); + for _ in 0..left { + s += c; + } + s += strme(t); + for _ in 0..right { + s += c; + } + } else if q == 3 { + s += strme(&t[0..1]); + s += strme(&t[2..4]); + } else if q == 2 { + s += strme(&t[0..1]); + s += strme(&t[3..4]); + } else if q == 1 { + s += strme(&t[3..4]); + } + let schars = s.chars().take(q).collect::>(); + ch[ch_start..(q + ch_start)].copy_from_slice(&schars); + } + } + let mut s = String::new(); + for c in ch { + s.push(c); + } + s = s.trim_end().to_string(); + + // Convert ┅ to ═ in different color. + + if ctl.gen_opt.nospaces { + let chars = s.chars().collect::>(); + s.clear(); + for (i, &char_i) in chars.iter().enumerate() { + if (i == 0 || (i > 0 && chars[i - 1] != '┅')) && char_i == '┅' { + s += ""; + s.push('═'); + } else if char_i == '┅' { + s.push('═'); + if i == chars.len() - 1 || (i < chars.len() - 1 && chars[i + 1] != '┅') + { + s += ""; + } + } else { + s.push(char_i); + } + } + s = s.replace("FWR1", "FWR1"); + s = s.replace("FWR2", "FWR2"); + s = s.replace("FWR3", "FWR3"); + s = s.replace("FWR4", "FWR4"); + } + + // Save. + + row.push(s); + break; + } + } + } + rows.push(row); +} diff --git a/enclone_print/src/define_mat.rs b/enclone_print/src/define_mat.rs new file mode 100644 index 000000000..1490fe37b --- /dev/null +++ b/enclone_print/src/define_mat.rs @@ -0,0 +1,441 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype, PotentialJoin}; +use enclone_core::join_one::join_one; +use enclone_proto::types::DonorReferenceItem; +use equiv::EquivRel; +use qd::Double; +use std::cmp::max; +use std::collections::HashMap; +use vdj_ann::refx::RefData; +use vector_utils::{bin_position, next_diff12_3, next_diff1_3, unique_sort}; + +// Define an equivalence relation on the chains, introducing connections defined by the +// raw joins. Also join where there are identical V..J sequences. + +fn joiner( + infos: &[usize], + info: &[CloneInfo], + to_exacts: &HashMap, + raw_joinsx: &[Vec], + chains: &[(usize, usize)], + seq_chains: &[(Vec, usize, usize)], +) -> EquivRel { + let mut e = EquivRel::new(chains.len() as i32); + for i1 in 0..infos.len() { + let j1 = infos[i1]; + let u1 = info[j1].clonotype_index; + let v1 = to_exacts[&u1]; + let m1s = &info[j1].exact_cols; + for i2 in raw_joinsx[i1].iter() { + let j2 = infos[*i2]; + let u2 = info[j2].clonotype_index; + let v2 = to_exacts[&u2]; + let m2s = &info[j2].exact_cols; + for j in 0..2 { + let z1 = bin_position(chains, &(v1, m1s[j])); + let z2 = bin_position(chains, &(v2, m2s[j])); + e.join(z1, z2); + } + } + } + let mut i = 0; + while i < seq_chains.len() { + let j = next_diff1_3(seq_chains, i as i32) as usize; + for k in i + 1..j { + let (x1, x2) = (&seq_chains[i], &seq_chains[k]); + let z1 = bin_position(chains, &(x1.1, x1.2)); + let z2 = bin_position(chains, &(x2.1, x2.2)); + e.join(z1, z2); + } + i = j; + } + e +} + +pub fn setup_define_mat( + orbit: &[i32], + info: &[CloneInfo], +) -> (Vec<(Vec, usize, i32)>, Vec) { + let mut od = Vec::<(Vec, usize, i32)>::new(); + for id in orbit.iter() { + let x: &CloneInfo = &info[*id as usize]; + od.push((x.origin.clone(), x.clonotype_id, *id)); + } + od.sort(); + let mut exacts = Vec::::new(); + let mut j = 0; + while j < od.len() { + let k = next_diff12_3(&od, j as i32) as usize; + exacts.push(od[j].1); + j = k; + } + (od, exacts) +} + +// This generates a cols x nexacts matrices for a given clonotype, where cols is defined by the +// algorithm, and is the number of columns (chains) in the clonotype table. + +pub fn define_mat( + is_bcr: bool, + to_bc: &HashMap<(usize, usize), Vec>, + sr: &[Vec], + ctl: &EncloneControl, + exact_clonotypes: &[ExactClonotype], + exacts: &[usize], + od: &[(Vec, usize, i32)], + info: &[CloneInfo], + raw_joins: &[Vec], + refdata: &RefData, + dref: &[DonorReferenceItem], +) -> Vec>> { + // Define map of indices into exacts. + + let nexacts = exacts.len(); + let to_exacts: HashMap = + exacts.iter().enumerate().map(|(u, &x)| (x, u)).collect(); + + // Get the info indices corresponding to this clonotype. + + let mut infos: Vec = od + .iter() + .filter_map(|i| { + let x = i.2 as usize; + if to_exacts.contains_key(&info[x].clonotype_index) { + Some(x) + } else { + None + } + }) + .collect(); + infos.sort_unstable(); + + // Define map of exacts to infos. + + let mut to_infos = vec![Vec::::new(); nexacts]; + for (i, &inf) in infos.iter().enumerate() { + let u = to_exacts[&info[inf].clonotype_index]; + to_infos[u].push(i); + } + + // Form the set of all chains that appear in an exact subclonotypes of this clonotype, and + // also track the V..J sequences for the chains. + + let mut chains = Vec::<(usize, usize)>::new(); + let mut seq_chains = Vec::<(Vec, usize, usize)>::new(); + for (u, &exu) in exacts.iter().enumerate() { + let ex = &exact_clonotypes[exu]; + for (m, share) in ex.share.iter().enumerate() { + chains.push((u, m)); + seq_chains.push((share.seq.clone(), u, m)); + } + } + seq_chains.sort(); + + // Gather the raw joins. + + let mut raw_joinsx = vec![Vec::::new(); infos.len()]; + for (&j1, raw_i1) in infos.iter().zip(raw_joinsx.iter_mut()) { + for x in raw_joins[j1].iter() { + let i2 = bin_position(&infos, x); + if i2 >= 0 { + raw_i1.push(i2 as usize); + } + } + } + + // Look for additional raw joins. In the join stage, for efficiency reasons, we don't try to + // make a join if two info elements are already in the same equivalence class. This causes + // us to miss raw joins for two reasons: + // • One reason is that where we have two exact subclonotypes, and at least one one has more + // than two chains, then we may miss a raw join that is needed here. + // • Another reason is that exact subclonotypes may have been deleted since the original + // equivalence relation was built. This we address partially. + + let mut extras = Vec::<(usize, usize)>::new(); + for (i1, (raw_i1, &j1)) in raw_joinsx.iter().zip(infos.iter()).enumerate() { + for &i2 in raw_i1.iter() { + let j2 = infos[i2]; + let (u1, u2) = (info[j1].clonotype_index, info[j2].clonotype_index); + let (ex1, ex2) = (&exact_clonotypes[u1], &exact_clonotypes[u2]); + let (v1, v2) = (to_exacts[&u1], to_exacts[&u2]); + if ex1.share.len() > 2 || ex2.share.len() > 2 { + let (s1, s2) = (&to_infos[v1], &to_infos[v2]); + for k1 in s1.iter() { + for k2 in s2.iter() { + let (k1, k2) = (*k1, *k2); + if (k1 == i1 && k2 == i2) || (k1 == i2 && k2 == i1) { + continue; + } + let (l1, l2) = (infos[k1], infos[k2]); + if info[l1].lens == info[l2].lens { + let mut pot = Vec::::new(); + if join_one( + is_bcr, + l1, + l2, + ctl, + exact_clonotypes, + info, + to_bc, + sr, + &mut pot, + refdata, + dref, + ) { + extras.push((k1, k2)); + } + } + } + } + } + } + } + for x in extras.iter() { + raw_joinsx[x.0].push(x.1); + } + + // Define an initial equivalence relation on the chains, and get orbit representatives. + + let mut e = joiner(&infos, info, &to_exacts, &raw_joinsx, &chains, &seq_chains); + let mut r = Vec::::new(); + e.orbit_reps(&mut r); + + // First for each pair of chain orbits with one "heavy" and one "light", pick some info + // entries, if there are any. This is effectively at random. A parameter governs how + // much we pick. + + let mut rxi = Vec::<(usize, usize, usize)>::new(); // (heavy orbit, light orbit, infos index) + for (i, &inf_i) in infos.iter().enumerate() { + let z = &info[inf_i]; + let u = z.clonotype_index; + let v = to_exacts[&u]; + if z.exact_cols.len() != 2 { + continue; + } + let (m1, m2) = (z.exact_cols[0], z.exact_cols[1]); + let ex = &exact_clonotypes[u]; + if !ex.share[m1].left || ex.share[m2].left { + continue; // maybe never happens + } + let p1 = e.class_id(bin_position(&chains, &(v, m1))); + let p2 = e.class_id(bin_position(&chains, &(v, m2))); + let q1 = bin_position(&r, &p1) as usize; + let q2 = bin_position(&r, &p2) as usize; + rxi.push((q1, q2, i)); + } + rxi.sort_unstable(); + const MAX_USE: usize = 5; // knob set empirically + let mut rxir = Vec::<(usize, usize, usize)>::new(); // (heavy orbit, light orbit, info index) + let mut i = 0; + while i < rxi.len() { + let j = next_diff12_3(&rxi, i as i32) as usize; + rxir.extend(&rxi[i..j.min(i + MAX_USE)]); + i = j; + } + + // Now for each pair of these, if they are not effectively joined, attempt to join them. + // This partially addresses the "second reason" described above. It is partial because we + // picked an info entry above at random, rather than trying them all. + + for f1 in rxir.iter() { + for f2 in rxir.iter() { + if f1.0 != f2.0 || f1.1 != f2.1 { + let (i1, i2) = (infos[f1.2], infos[f2.2]); + if info[i1].lens != info[i2].lens { + continue; + } + let mut pot = Vec::::new(); + if join_one( + is_bcr, + i1, + i2, + ctl, + exact_clonotypes, + info, + to_bc, + sr, + &mut pot, + refdata, + dref, + ) { + e.join(r[f1.0], r[f2.0]); + e.join(r[f1.1], r[f2.1]); + } + } + } + } + + // Find the exact subclonotypes having three chains and list their orbits, allowing for order + // to vary. + + let mut r = Vec::::new(); + e.orbit_reps(&mut r); + let mut threes = Vec::>::new(); + let mut threesp = HashMap::, usize>::new(); + for u in 0..nexacts { + let ex = &exact_clonotypes[exacts[u]]; + if ex.share.len() == 3 { + let zs = [ + [0, 1, 2], + [0, 2, 1], + [1, 0, 2], + [1, 2, 0], + [2, 0, 1], + [2, 1, 0], + ]; + for z in zs.iter() { + if ex.share[z[0]].left && !ex.share[z[2]].left { + let p1 = e.class_id(bin_position(&chains, &(u, z[0]))); + let p2 = e.class_id(bin_position(&chains, &(u, z[1]))); + let p3 = e.class_id(bin_position(&chains, &(u, z[2]))); + let q1 = bin_position(&r, &p1) as usize; + let q2 = bin_position(&r, &p2) as usize; + let q3 = bin_position(&r, &p3) as usize; + threes.push(vec![q1, q2, q3]); + threesp.insert(vec![q1, q2, q3], u); + } + } + } + } + unique_sort(&mut threes); + + // There is one more case to deal with. This is where we have two exact subclonotypes, each + // with three chains, and we joined two of their chains, but not the third. And where the + // join algorithm would not have joined the third. In this case, if the third chains are + // "close enough", we join them anyway. As before, we only test representatives. + + for t1 in threes.iter() { + 't2_loop: for t2 in threes.iter() { + if t1 == t2 { + continue; + } + let (mut matches, mut mismatch) = (0, 0); + for i in 0..3 { + if t1[i] == t2[i] { + matches += 1; + } else { + mismatch = i; + } + } + if matches != 2 { + continue; + } + let (u1, u2) = (threesp[t1], threesp[t2]); + let (ex1, ex2) = (&exact_clonotypes[exacts[u1]], &exact_clonotypes[exacts[u2]]); + for (m1, ex1_sm1) in ex1.share.iter().enumerate() { + let p1 = bin_position(&chains, &(u1, m1)); + let q1 = bin_position(&r, &p1) as usize; + if q1 == t1[mismatch] { + for (m2, ex2_sm2) in ex2.share.iter().enumerate() { + let p2 = bin_position(&chains, &(u2, m2)); + let q2 = bin_position(&r, &p2) as usize; + if q2 == t2[mismatch] { + let (seq1, seq2) = (&ex1_sm1.seq, &ex2_sm2.seq); + if seq1.len() == seq2.len() { + const MAX_DIFFS: usize = 10; + let diffs = seq1 + .iter() + .zip(seq2.iter()) + .filter(|(&s1, &s2)| s1 != s2) + .count(); + if diffs <= MAX_DIFFS { + e.join(p1, p2); + break 't2_loop; + } + } + } + } + } + } + } + } + + // Get representatives for the chain orbits. + + let mut r = Vec::::new(); + e.orbit_reps(&mut r); + + // Reorder the chains. This is done to get the heavy chains before the light chains and also + // to mimic the behavior of the previous version of this algorithm, to minimiize churn. Then + // update the representatives. + + let mut chainsp = Vec::<(String, usize, usize, usize)>::with_capacity(exacts.len()); + for (u, &exu) in exacts.iter().enumerate() { + let ex = &exact_clonotypes[exu]; + for (m, share_m) in ex.share.iter().enumerate() { + let mut c = share_m.chain_type.clone(); + if c.starts_with("TRB") { + c = c.replacen("TRB", "TRX", 1); + } else if c.starts_with("TRA") { + c = c.replacen("TRA", "TRY", 1); + } + chainsp.push((format!("{c}:{}", share_m.cdr3_aa), share_m.seq.len(), u, m)); + } + } + chainsp.sort(); + let mut chainso = Vec::<(usize, usize)>::new(); + let mut chainsox = Vec::<(usize, usize, usize)>::new(); + for (i, c) in chainsp.into_iter().enumerate() { + chainso.push((c.2, c.3)); + chainsox.push((c.2, c.3, i)); + } + chainsox.sort_unstable(); + for ri in r.iter_mut() { + *ri = chainsox[*ri as usize].2 as i32; + } + r.sort_unstable(); + + // Create rmap, that sends + // (index into exact subclonotypes for this clonotype, + // index into chains for one of these exact subclonotypes) + // to an index into the orbit reps for the chains. + + let mut rpos = HashMap::<(usize, usize), usize>::new(); + for (i, chain) in chains.into_iter().enumerate() { + let c = e.class_id(i as i32); + let f = chainsox[c as usize].2 as i32; + let q = bin_position(&r, &f) as usize; + rpos.insert((chain.0, chain.1), q); + } + + // Find the maximum multiplicity of each orbit, and the number of columns. + + let mut mm = vec![0; r.len()]; + for (u, &exu) in exacts.iter().enumerate() { + let ex = &exact_clonotypes[exu]; + let mut mm0 = vec![0; r.len()]; + for m in 0..ex.share.len() { + mm0[rpos[&(u, m)]] += 1; + } + for i in 0..r.len() { + mm[i] = max(mm[i], mm0[i]); + } + } + let cols = mm.iter().sum(); + + // Define a matrix mat[col][ex] which is the column of the exact subclonotype ex corresponding + // to the given column col of the clonotype, which may or may not be defined. + + let mut mat = vec![vec![None; nexacts]; cols]; + for (cx, cc) in mat.iter_mut().enumerate() { + // for every column + 'exact: for (u, &exu) in exacts.iter().enumerate() { + // for every exact subclonotype + let ex = &exact_clonotypes[exu]; + let mut mm0 = vec![0; r.len()]; + for m in 0..ex.share.len() { + // for every chain in the exact subclonotype: + let q = rpos[&(u, m)]; + let mut col = mm0[q]; + col += mm.iter().take(q).sum::(); + mm0[q] += 1; + if col == cx { + cc[u] = Some(m); + continue 'exact; + } + } + } + } + mat +} diff --git a/enclone_print/src/filter.rs b/enclone_print/src/filter.rs index a1c0757f5..951ec850c 100644 --- a/enclone_print/src/filter.rs +++ b/enclone_print/src/filter.rs @@ -1,24 +1,28 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // Test a clonotype to see if it passes the filters. // See also enclone_core src for a list of these filters and // the related struct. -use vdj_ann::*; +use vdj_ann::refx; -use self::refx::*; -use enclone_core::defs::*; -use std::cmp::*; -use string_utils::*; -use vector_utils::*; +use self::refx::RefData; +use enclone_core::defs::{ColInfo, EncloneControl, ExactClonotype, GexInfo}; +use enclone_core::opt_d::opt_d; +use enclone_proto::types::DonorReferenceItem; +use std::cmp::{max, min}; +use string_utils::TextUtils; +use triple_accel::levenshtein; +use vector_utils::{make_freq, next_diff, unique_sort}; pub fn survives_filter( - exacts: &Vec, + exacts: &[usize], rsi: &ColInfo, ctl: &EncloneControl, - exact_clonotypes: &Vec, + exact_clonotypes: &[ExactClonotype], refdata: &RefData, gex_info: &GexInfo, + dref: &[DonorReferenceItem], ) -> bool { let mut mults = Vec::::new(); for i in 0..exacts.len() { @@ -28,11 +32,15 @@ pub fn survives_filter( if n == 0 { return false; } + // Clonotypes with at least n cells + if n < ctl.clono_filt_opt.ncells_low { return false; } + // Clonotypes having iNKT or MAIT evidence + if ctl.clono_filt_opt.inkt { let mut evidence = false; for s in exacts.iter() { @@ -65,7 +73,9 @@ pub fn survives_filter( return false; } } + // Clonotypes marked by heuristics + if ctl.clono_filt_opt.marked { let mut marked = false; for s in exacts.iter() { @@ -80,8 +90,10 @@ pub fn survives_filter( return false; } } + // Marked clonotypes which are also B cells by annotation - if ctl.clono_filt_opt.marked_b { + + if ctl.clono_filt_opt_def.marked_b { let mut marked_b = false; for s in exacts.iter() { let ex = &exact_clonotypes[*s]; @@ -89,10 +101,10 @@ pub fn survives_filter( if ex.clones[i][0].marked { let li = ex.clones[i][0].dataset_index; let bc = &ex.clones[i][0].barcode; - if gex_info.cell_type[li].contains_key(&bc.clone()) { - if gex_info.cell_type[li][&bc.clone()].starts_with('B') { - marked_b = true; - } + if gex_info.cell_type[li].contains_key(&bc.clone()) + && gex_info.cell_type[li][&bc.clone()].starts_with('B') + { + marked_b = true; } } } @@ -102,8 +114,29 @@ pub fn survives_filter( } } let cols = rsi.vids.len(); + + // DATASET=... + + if ctl.clono_filt_opt.dataset.is_some() { + let mut ok = false; + for d in ctl.clono_filt_opt.dataset.as_ref().unwrap().iter() { + for s in exacts.iter() { + let ex = &exact_clonotypes[*s]; + for i in 0..ex.clones.len() { + if ctl.origin_info.dataset_id[ex.clones[i][0].dataset_index] == *d { + ok = true; + } + } + } + } + if !ok { + return false; + } + } + // Barcode required - if ctl.clono_filt_opt.barcode.len() > 0 { + + if !ctl.clono_filt_opt.barcode.is_empty() { let mut ok = false; for s in exacts.iter() { let ex = &exact_clonotypes[*s]; @@ -119,7 +152,9 @@ pub fn survives_filter( return false; } } + // Clonotypes with deletions + if ctl.clono_filt_opt.del { let mut ok = false; for s in exacts.iter() { @@ -134,11 +169,13 @@ pub fn survives_filter( return false; } } + // Clonotypes with same V gene in 2 chains + if ctl.clono_filt_opt.vdup { let mut dup = false; let mut x = rsi.vids.clone(); - x.sort(); + x.sort_unstable(); let mut i = 0; while i < x.len() { let j = next_diff(&x, i); @@ -151,7 +188,9 @@ pub fn survives_filter( return false; } } + // Clonotypes with constant region differences + if ctl.clono_filt_opt.cdiff { let mut cdiff = false; for s in exacts.iter() { @@ -159,9 +198,8 @@ pub fn survives_filter( for m in 0..ex.share.len() { let cstart = ex.share[m].j_stop; let clen = ex.share[m].full_seq.len() - cstart; - let cid = ex.share[m].c_ref_id; - if cid.is_some() { - let r = &refdata.refs[cid.unwrap()]; + if let Some(cid) = ex.share[m].c_ref_id { + let r = &refdata.refs[cid]; for i in 0..min(clen, r.len()) { let tb = ex.share[m].full_seq[cstart + i]; let rb = r.to_ascii_vec()[i]; @@ -176,7 +214,9 @@ pub fn survives_filter( return false; } } + // Clonotypes with onesie exact subclonotypes + if ctl.clono_filt_opt.have_onesie { let mut have = false; for i in 0..exacts.len() { @@ -188,7 +228,9 @@ pub fn survives_filter( return false; } } + // Clonotypes with full length V..J + if !ctl.clono_filt_opt.vj.is_empty() { let mut have_vj = false; for s in exacts.iter() { @@ -203,15 +245,24 @@ pub fn survives_filter( return false; } } + // Clonotypes with no more than n cells + if n > ctl.clono_filt_opt.ncells_high { return false; } - // Clonotypes with at least n chains + + // Clonotypes with at least n chains or at most n chains + if exacts.len() < ctl.clono_filt_opt.min_exacts { return false; } + if exacts.len() > ctl.clono_filt_opt.max_exacts { + return false; + } + // Clonotypes with given V gene name + for i in 0..ctl.clono_filt_opt.seg.len() { let mut hit = false; for j in 0..ctl.clono_filt_opt.seg[i].len() { @@ -219,9 +270,7 @@ pub fn survives_filter( if refdata.name[rsi.vids[cx]] == ctl.clono_filt_opt.seg[i][j] { hit = true; } - let did = rsi.dids[cx]; - if did.is_some() { - let did = did.unwrap(); + if let Some(did) = rsi.dids[cx] { if refdata.name[did] == ctl.clono_filt_opt.seg[i][j] { hit = true; } @@ -229,8 +278,8 @@ pub fn survives_filter( if refdata.name[rsi.jids[cx]] == ctl.clono_filt_opt.seg[i][j] { hit = true; } - if rsi.cids[cx].is_some() { - if refdata.name[rsi.cids[cx].unwrap()] == ctl.clono_filt_opt.seg[i][j] { + if let Some(cid) = rsi.cids[cx] { + if refdata.name[cid] == ctl.clono_filt_opt.seg[i][j] { hit = true; } } @@ -240,7 +289,35 @@ pub fn survives_filter( return false; } } + for i in 0..ctl.clono_filt_opt.nseg.len() { + let mut hit = false; + for j in 0..ctl.clono_filt_opt.nseg[i].len() { + for cx in 0..cols { + if refdata.name[rsi.vids[cx]] == ctl.clono_filt_opt.nseg[i][j] { + hit = true; + } + if let Some(did) = rsi.dids[cx] { + if refdata.name[did] == ctl.clono_filt_opt.nseg[i][j] { + hit = true; + } + } + if refdata.name[rsi.jids[cx]] == ctl.clono_filt_opt.nseg[i][j] { + hit = true; + } + if let Some(cid) = rsi.cids[cx] { + if refdata.name[cid] == ctl.clono_filt_opt.nseg[i][j] { + hit = true; + } + } + } + } + if hit { + return false; + } + } + // Clonotypes with given V gene number/allele + for i in 0..ctl.clono_filt_opt.segn.len() { let mut hit = false; for j in 0..ctl.clono_filt_opt.segn[i].len() { @@ -248,9 +325,7 @@ pub fn survives_filter( if refdata.id[rsi.vids[cx]] == ctl.clono_filt_opt.segn[i][j].force_i32() { hit = true; } - let did = rsi.dids[cx]; - if did.is_some() { - let did = did.unwrap(); + if let Some(did) = rsi.dids[cx] { if refdata.id[did] == ctl.clono_filt_opt.segn[i][j].force_i32() { hit = true; } @@ -258,10 +333,8 @@ pub fn survives_filter( if refdata.id[rsi.jids[cx]] == ctl.clono_filt_opt.segn[i][j].force_i32() { hit = true; } - if rsi.cids[cx].is_some() { - if refdata.id[rsi.cids[cx].unwrap()] - == ctl.clono_filt_opt.segn[i][j].force_i32() - { + if let Some(cid) = rsi.cids[cx] { + if refdata.id[cid] == ctl.clono_filt_opt.segn[i][j].force_i32() { hit = true; } } @@ -271,7 +344,35 @@ pub fn survives_filter( return false; } } + for i in 0..ctl.clono_filt_opt.nsegn.len() { + let mut hit = false; + for j in 0..ctl.clono_filt_opt.nsegn[i].len() { + for cx in 0..cols { + if refdata.id[rsi.vids[cx]] == ctl.clono_filt_opt.nsegn[i][j].force_i32() { + hit = true; + } + if let Some(did) = rsi.dids[cx] { + if refdata.id[did] == ctl.clono_filt_opt.nsegn[i][j].force_i32() { + hit = true; + } + } + if refdata.id[rsi.jids[cx]] == ctl.clono_filt_opt.nsegn[i][j].force_i32() { + hit = true; + } + if let Some(cid) = rsi.cids[cx] { + if refdata.id[cid] == ctl.clono_filt_opt.nsegn[i][j].force_i32() { + hit = true; + } + } + } + } + if hit { + return false; + } + } + // Clonotypes with at least n cells + if mults.iter().sum::() < ctl.clono_filt_opt.ncells_low { return false; } @@ -280,7 +381,9 @@ pub fn survives_filter( let ex = &exact_clonotypes[exacts[i]]; numi = max(numi, ex.max_umi_count()); } + // Clonotypes with at least n UMIs for contig + if numi < ctl.clono_filt_opt.min_umi { return false; } @@ -290,11 +393,37 @@ pub fn survives_filter( lis.append(&mut z); } unique_sort(&mut lis); + // Clonotypes found in at least n datasets + if lis.len() < ctl.clono_filt_opt.min_datasets { return false; } + + // Clonotypes found in at least n origins + + let mut origins = Vec::::new(); + for id in lis.iter() { + origins.push(ctl.origin_info.origin_id[*id].clone()); + } + unique_sort(&mut origins); + if origins.len() < ctl.clono_filt_opt.min_origins { + return false; + } + + // Clonotypes found in at least n donors + + let mut donors = Vec::::new(); + for id in lis.iter() { + donors.push(ctl.origin_info.donor_id[*id].clone()); + } + unique_sort(&mut donors); + if donors.len() < ctl.clono_filt_opt.min_donors { + return false; + } + // Clonotypes in no more than n datasets + if lis.len() > ctl.clono_filt_opt.max_datasets { return false; } @@ -309,7 +438,7 @@ pub fn survives_filter( datasets.push(ex.clones[j][0].dataset_index); } } - datasets.sort(); + datasets.sort_unstable(); let mut freq = Vec::<(u32, usize)>::new(); make_freq(&datasets, &mut freq); if freq.len() == 1 @@ -320,10 +449,13 @@ pub fn survives_filter( } // Clonotypes with no more and no less than min and max chains + if cols < ctl.clono_filt_opt.min_chains || cols > ctl.clono_filt_opt.max_chains { return false; } + // Clonotypes with given junction AA sequence + if ctl.clono_filt_opt.cdr3.is_some() { let mut ok = false; for s in exacts.iter() { @@ -344,18 +476,184 @@ pub fn survives_filter( return false; } } - let mut donors = Vec::::new(); - for u in 0..exacts.len() { - let ex = &exact_clonotypes[exacts[u]]; - for m in 0..ex.clones.len() { - if ex.clones[m][0].donor_index.is_some() { - donors.push(ex.clones[m][0].donor_index.unwrap()); - } + + // Clonotypes having given CDR3 (given by Levenshtein distance pattern). + + if !ctl.clono_filt_opt.cdr3_lev.is_empty() { + let fields = ctl.clono_filt_opt.cdr3_lev.split('|'); + let cdr3 = fields + .map(|field| { + let (c, d) = field.split_once('~').unwrap(); + (c, d.force_usize()) + }) + .collect::>(); + let ok = exacts.iter().any(move |s| { + exact_clonotypes[*s].share.iter().any(|share| { + cdr3.iter().any(|(cdr, dist)| { + levenshtein(share.cdr3_aa.as_bytes(), cdr.as_bytes()) as usize <= *dist + }) + }) + }); + if !ok { + return false; } } + + // Donors. + + let mut donors = exacts + .iter() + .flat_map(|&u| { + exact_clonotypes[u] + .clones + .iter() + .filter_map(|clone| clone[0].donor_index) + }) + .collect::>(); unique_sort(&mut donors); if ctl.clono_filt_opt.fail_only && donors.len() <= 1 { return false; } - return true; + + // Inconsistent D genes. + + if ctl.clono_filt_opt.d_inconsistent { + let mut inconsistent = false; + for col in 0..rsi.mat.len() { + let mut dvotes = Vec::>::new(); + for u in 0..exacts.len() { + let ex = &exact_clonotypes[exacts[u]]; + if let Some(m) = rsi.mat[col][u] { + if ex.share[m].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, + dref, + &mut scores, + &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col], + ); + let mut opt = Vec::new(); + if !ds.is_empty() { + opt = ds[0].clone(); + } + dvotes.push(opt); + } + } + } + unique_sort(&mut dvotes); + if dvotes.len() >= 2 { + inconsistent = true; + } + } + if !inconsistent { + return false; + } + } + + // None D genes. + + if ctl.clono_filt_opt.d_none { + let mut none = false; + for col in 0..rsi.mat.len() { + for u in 0..exacts.len() { + let ex = &exact_clonotypes[exacts[u]]; + if let Some(m) = rsi.mat[col][u] { + if ex.share[m].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, + dref, + &mut scores, + &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col], + ); + let mut opt = Vec::new(); + if !ds.is_empty() { + opt = ds[0].clone(); + } + if opt.is_empty() { + none = true; + } + } + } + } + } + if !none { + return false; + } + } + + // Second D genes. + + if ctl.clono_filt_opt.d_second { + let mut second = false; + for col in 0..rsi.mat.len() { + for u in 0..exacts.len() { + let ex = &exact_clonotypes[exacts[u]]; + if let Some(m) = rsi.mat[col][u] { + if ex.share[m].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, + dref, + &mut scores, + &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col], + ); + let mut opt = Vec::new(); + if !ds.is_empty() { + opt = ds[0].clone(); + } + if opt.len() == 2 { + second = true; + } + } + } + } + } + if !second { + return false; + } + } + + // Done. + + true } diff --git a/enclone_print/src/finish_table.rs b/enclone_print/src/finish_table.rs new file mode 100644 index 000000000..5cec60f1e --- /dev/null +++ b/enclone_print/src/finish_table.rs @@ -0,0 +1,319 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use crate::build_table_stuff::build_table_stuff; +use crate::print_utils1::make_table; +use crate::print_utils3::{add_header_text, insert_reference_rows}; +use crate::print_utils5::{build_diff_row, insert_consensus_row}; +use enclone_core::defs::{justification, ColInfo, EncloneControl, ExactClonotype}; +use enclone_proto::types::DonorReferenceItem; +use std::collections::HashMap; +use std::fmt::Write; +use string_utils::TextUtils; +use vdj_ann::refx::RefData; +use vector_utils::bin_member; + +pub fn finish_table( + n: usize, + ctl: &EncloneControl, + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], + rsi: &ColInfo, + vars: &[Vec], + show_aa: &[Vec], + field_types: &[Vec], + lvars: &[String], + refdata: &RefData, + dref: &[DonorReferenceItem], + peer_groups: &[Vec<(usize, u8, u32)>], + mlog: &mut Vec, + logz: &mut String, + stats: &[(String, Vec)], + sr: &mut [(Vec, Vec>, Vec>, usize)], + extra_args: &[String], + pcols_sort: &[String], + out_data: &mut Vec>, + rord: &[usize], + pass: usize, + cdr3_con: &[Vec], +) { + // Fill in exact_subclonotype_id, reorder. + + let nexacts = exacts.len(); + if !ctl.parseable_opt.pout.is_empty() || !extra_args.is_empty() { + for u in 0..nexacts { + macro_rules! speak { + ($u:expr, $var:expr, $val:expr) => { + if pass == 2 && (ctl.parseable_opt.pout.len() > 0 || !extra_args.is_empty()) { + if pcols_sort.is_empty() + || bin_member(&pcols_sort, &$var.to_string()) + || bin_member(&extra_args, &$var.to_string()) + { + out_data[$u].insert($var.to_string(), $val); + } + } + }; + } + speak![rord[u], "exact_subclonotype_id", format!("{}", u + 1)]; + } + let mut out_data2 = vec![HashMap::::new(); nexacts]; + for v in 0..nexacts { + out_data2[v] = out_data[rord[v]].clone(); + } + *out_data = out_data2; + } + + // Add header text to mlog. + + let mat = &rsi.mat; + add_header_text(ctl, exacts, exact_clonotypes, rord, mat, mlog); + + // Build table stuff. + + let mut row1 = Vec::::new(); + let mut justify = Vec::::new(); + let mut rows = Vec::>::new(); + let mut drows = Vec::>::new(); + build_table_stuff( + ctl, + exacts, + exact_clonotypes, + rsi, + vars, + show_aa, + field_types, + &mut row1, + &mut justify, + &mut drows, + &mut rows, + lvars, + ); + + // Insert universal and donor reference rows. + + insert_reference_rows( + ctl, + rsi, + show_aa, + field_types, + refdata, + dref, + &row1, + &mut drows, + &mut rows, + exacts, + exact_clonotypes, + peer_groups, + cdr3_con, + ); + + // Insert consensus row. + + insert_consensus_row( + ctl, + rsi, + exacts.len(), + field_types, + show_aa, + &row1, + &mut rows, + ); + + // Insert horizontal line. + + let cols = rsi.mat.len(); + if !drows.is_empty() { + let mut width = 1 + lvars.len(); + for col in 0..cols { + width += rsi.cvars[col].len(); + } + rows.push(vec!["\\hline".to_string(); width]); + } + + // Build the diff row. + + build_diff_row( + ctl, + rsi, + &mut rows, + &mut drows, + &row1, + exacts.len(), + field_types, + show_aa, + ); + + // Finish building table content. + + for (j, srj) in sr.iter_mut().enumerate() { + srj.0[0] = format!("{}", j + 1); // row number (#) + rows.push(srj.0.clone()); + rows.extend(srj.1.clone()); + } + + // Add sum and mean rows. + + if ctl.clono_print_opt.sum { + let mut row = vec!["Σ".to_string()]; + for lvar in lvars { + let mut x = lvar.as_str(); + if x.contains(':') { + x = x.before(":"); + } + let mut found = false; + let mut total = 0.0; + for stat in stats { + if stat.0 == x { + found = true; + total += stat + .1 + .iter() + .filter_map(|statk| statk.parse::().ok()) + .sum::(); + } + } + if !found { + row.push(String::new()); + } else if !lvar.ends_with("_%") { + row.push(format!("{}", total.round() as usize)); + } else { + row.push(format!("{total:.2}")); + } + } + // This is necessary but should not be: + for cx in 0..cols { + for _ in 0..rsi.cvars[cx].len() { + row.push(String::new()); + } + } + rows.push(row); + } + if ctl.clono_print_opt.mean { + let mut row = vec!["μ".to_string()]; + for lvar in lvars { + let mut x = lvar.as_str(); + if x.contains(':') { + x = x.before(":"); + } + let mut found = false; + let mut total = 0.0; + for stat in stats { + if stat.0 == x { + found = true; + total += stat + .1 + .iter() + .filter_map(|statk| statk.parse::().ok()) + .sum::(); + } + } + let mean = total / n as f64; + if !found { + row.push(String::new()); + } else if !lvar.ends_with("_%") { + row.push(format!("{mean:.1}")); + } else { + row.push(format!("{mean:.2}")); + } + } + // This is necessary but should not be: + for cx in 0..cols { + for _ in 0..rsi.cvars[cx].len() { + row.push(String::new()); + } + } + rows.push(row); + } + + // Make table. + + for row in rows.iter_mut() { + for v in row.iter_mut() { + *v = v.replace("|TRX", "TRB").replace("|TRY", "TRA"); + } + } + for cx in 0..cols { + justify.push(b'|'); + for m in 0..rsi.cvars[cx].len() { + justify.push(justification(&rsi.cvars[cx][m])); + } + } + make_table(ctl, &mut rows, &justify, mlog, logz); + + // Add phylogeny. + + let nexacts = exacts.len(); + if ctl.gen_opt.toy { + let mut vrefs = Vec::>::new(); + let mut jrefs = Vec::>::new(); + for cx in 0..cols { + let (mut vref, mut jref) = (Vec::::new(), Vec::::new()); + for (&exact, &m) in exacts.iter().zip(rsi.mat[cx].iter()) { + if let Some(m) = m { + jref = exact_clonotypes[exact].share[m].js.to_ascii_vec(); + } + let vseq1 = refdata.refs[rsi.vids[cx]].to_ascii_vec(); + if rsi.vpids[cx].is_some() { + vref = dref[rsi.vpids[cx].unwrap()].nt_sequence.clone(); + } else { + vref = vseq1.clone(); + } + } + vrefs.push(vref); + jrefs.push(jref); + } + for u1 in 0..nexacts { + let ex1 = &exact_clonotypes[exacts[u1]]; + for u2 in u1 + 1..nexacts { + let ex2 = &exact_clonotypes[exacts[u2]]; + let (mut d1, mut d2) = (0, 0); + let mut d = 0; + for cx in 0..cols { + let (m1, m2) = (rsi.mat[cx][u1], rsi.mat[cx][u2]); + if m1.is_none() || m2.is_none() { + continue; + } + let (m1, m2) = (m1.unwrap(), m2.unwrap()); + let (s1, s2) = (&ex1.share[m1].seq_del, &ex2.share[m2].seq_del); + let n = s1.len(); + let (vref, jref) = (&vrefs[cx], &jrefs[cx]); + for j in 0..vars[cx].len() { + let p = vars[cx][j]; + if s1[p] != s2[p] { + if p < vref.len() - ctl.heur.ref_v_trim { + if s1[p] == vref[p] { + d1 += 1; + } else if s2[p] == vref[p] { + d2 += 1; + } + } else if p >= n - (jref.len() - ctl.heur.ref_j_trim) { + if s1[p] == jref[jref.len() - (n - p)] { + d1 += 1; + } else if s2[p] == jref[jref.len() - (n - p)] { + d2 += 1; + } + } else { + d += 1; + } + } + } + } + if (d1 == 0) ^ (d2 == 0) { + if d1 == 0 { + write!(*logz, "{} ==> {}", u1 + 1, u2 + 1).unwrap(); + } else { + write!(*logz, "{} ==> {}", u2 + 1, u1 + 1).unwrap(); + } + let s = format!( + "; u1 = {}, u2 = {}, d1 = {}, d2 = {}, d = {}\n", + u1 + 1, + u2 + 1, + d1, + d2, + d + ); + *logz += &s; + } + } + } + } +} diff --git a/enclone_print/src/gene_scan.rs b/enclone_print/src/gene_scan.rs new file mode 100644 index 000000000..68c18615a --- /dev/null +++ b/enclone_print/src/gene_scan.rs @@ -0,0 +1,125 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use enclone_core::defs::EncloneControl; + +pub fn gene_scan_test( + ctl: &EncloneControl, + stats: &[(String, Vec)], + stats_orig: &[(String, Vec)], + nexacts: usize, + n: usize, + in_test: &mut Vec, + in_control: &mut Vec, +) { + // See if we're in the test and control sets for gene scan (non-exact case). + + if let Some(ref scan_test) = ctl.gen_opt.gene_scan_test { + if !ctl.gen_opt.gene_scan_exact { + let x = scan_test; + let means = x + .var + .iter() + .take(x.n()) + .map(|xn| { + stats + .iter() + .filter_map(|stat| { + if stat.0 == *xn { + Some( + stat.1 + .iter() + .filter_map(|k| k.parse::().ok()) + .sum::(), + ) + } else { + None + } + }) + .next() + .unwrap_or_default() + / n as f64 + }) + .collect::>(); + + in_test.push(x.satisfied(&means)); + let x = ctl.gen_opt.gene_scan_control.as_ref().unwrap(); + let means = x + .var + .iter() + .take(x.n()) + .map(|xn| { + stats + .iter() + .filter_map(|stat| { + if stat.0 == *xn { + Some( + stat.1 + .iter() + .filter_map(|k| k.parse::().ok()) + .sum::(), + ) + } else { + None + } + }) + .next() + .unwrap_or_default() + / n as f64 + }) + .collect::>(); + in_control.push(x.satisfied(&means)); + } + } + + // See if we're in the test and control sets for gene scan (exact case). + + if ctl.gen_opt.gene_scan_test.is_some() && ctl.gen_opt.gene_scan_exact { + let x = ctl.gen_opt.gene_scan_test.clone().unwrap(); + for k in 0..nexacts { + let mut means = Vec::::new(); + for xn in x.var.iter().take(x.n()) { + let mut vals = Vec::::new(); + let mut count = 0; + for stat in stats_orig { + if stat.0 == *xn { + if count == k { + for k in &stat.1 { + if let Ok(v) = k.parse::() { + vals.push(v); + } + } + break; + } else { + count += 1; + } + } + } + let n = vals.len() as f64; + means.push(vals.into_iter().sum::() / n); + } + in_test.push(x.satisfied(&means)); + let x = ctl.gen_opt.gene_scan_control.clone().unwrap(); + let mut means = Vec::::new(); + for xn in x.var.iter().take(x.n()) { + let mut vals = Vec::::new(); + let mut count = 0; + for stat in stats_orig { + if stat.0 == *xn { + if count == k { + for k in &stat.1 { + if let Ok(v) = k.parse::() { + vals.push(v); + } + } + break; + } else { + count += 1; + } + } + } + means.push(vals.into_iter().sum::() / n as f64); + } + in_control.push(x.satisfied(&means)); + } + } +} diff --git a/enclone_print/src/lib.rs b/enclone_print/src/lib.rs index 09f07f685..a6d9b6171 100644 --- a/enclone_print/src/lib.rs +++ b/enclone_print/src/lib.rs @@ -1,6 +1,10 @@ -// Copyright (c) 2020 10x Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. +pub mod build_table_stuff; +pub mod define_mat; pub mod filter; +pub mod finish_table; +pub mod gene_scan; pub mod loupe; pub mod print_clonotypes; pub mod print_utils1; @@ -8,3 +12,6 @@ pub mod print_utils2; pub mod print_utils3; pub mod print_utils4; pub mod print_utils5; +pub mod proc_cvar_auto; +pub mod proc_lvar2; +pub mod proc_lvar_auto; diff --git a/enclone_print/src/loupe.rs b/enclone_print/src/loupe.rs index 1fffe247b..89c3c63b0 100644 --- a/enclone_print/src/loupe.rs +++ b/enclone_print/src/loupe.rs @@ -1,38 +1,49 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. +// // This set of functions writes a protobuf data structure that // Loupe uses to access clonotype data. use enclone_proto::proto_io::write_proto; use enclone_proto::PROTO_VERSION; -use vdj_ann::*; +use vdj_ann::refx; -use self::refx::*; -use amino::*; -use bio::alignment::pairwise::*; +use self::refx::RefData; +use amino::codon_to_aa; +use bio_edit::alignment::pairwise::Aligner; -use debruijn::dna_string::*; -use enclone_core::defs::*; -use enclone_proto::types::*; -use io_utils::*; -use vector_utils::*; +use debruijn::dna_string::DnaString; +use enclone_core::defs::{ColInfo, EncloneControl, ExactClonotype}; +use enclone_proto::types::{ + Alignment, Clonotype, ClonotypeChain, DonorReference, DonorReferenceItem, EncloneOutputs, + ExactSubClonotype, ExactSubClonotypeChain, ExactSubClonotypeChainInfo, + InvariantTCellAnnotation, Metadata, Region, UniversalReference, UniversalReferenceItem, +}; +use io_utils::write_obj; +use std::fmt::Write; // Export donor reference/inferred alt allele sequences pub fn make_donor_refs( - alt_refs: &Vec<(usize, usize, DnaString)>, + alt_refs: &[(usize, usize, DnaString, usize, bool)], refdata: &RefData, ) -> Vec { let mut drefs = Vec::::new(); let mut i = 0; while i < alt_refs.len() { - let j = next_diff12_3(&alt_refs, i as i32) as usize; - for k in i..j { - let x = &alt_refs[k]; + let mut j = i + 1; + while j < alt_refs.len() { + if alt_refs[j].0 != alt_refs[i].0 || alt_refs[j].1 != alt_refs[i].1 { + break; + } + j += 1; + } + // let j = next_diff12_5(alt_refs, i as i32) as usize; + for (k, x) in alt_refs.iter().enumerate().take(j).skip(i) { let donor_id = x.0; let ref_id = x.1; let alt = x.2.to_ascii_vec(); let alt_name = format!( "{}, donor {}, alt allele {}", - refdata.name[ref_id].clone(), + refdata.name[ref_id].as_str(), donor_id, k - i + 1 ); @@ -44,14 +55,14 @@ pub fn make_donor_refs( matches += 1; } else { if matches > 0 { - cigar.push_str(&format!("{}=", matches)); + write!(cigar, "{matches}=").unwrap(); } cigar.push_str("1X"); matches = 0; } } if matches > 0 { - cigar.push_str(&format!("{}=", matches)); + write!(cigar, "{matches}=").unwrap(); } drefs.push(DonorReferenceItem { universal_idx: ref_id as u32, @@ -61,7 +72,7 @@ pub fn make_donor_refs( nt_sequence: alt, universal_aln: Alignment { ref_start: 0, - cigar: cigar, + cigar, }, }); } @@ -71,18 +82,16 @@ pub fn make_donor_refs( } fn amino_acid(seq: &[u8], start: usize) -> Vec { - seq[start..] - .chunks_exact(3) - .map(|codon| codon_to_aa(&codon)) - .collect() + seq[start..].chunks_exact(3).map(codon_to_aa).collect() } pub fn make_loupe_clonotype( - exact_clonotypes: &Vec, - exacts: &Vec, + exact_clonotypes: &[ExactClonotype], + exacts: &[usize], rsi: &ColInfo, refdata: &RefData, - dref: &Vec, + dref: &[DonorReferenceItem], + ctl: &EncloneControl, ) -> Clonotype { // Define concatenated universal and donor reference sequences. @@ -134,6 +143,7 @@ pub fn make_loupe_clonotype( if mat[cx][z].is_some() { u0 = z; m0 = mat[cx][z].unwrap(); + break; } } let ex = &exact_clonotypes[exacts[u0]]; @@ -154,21 +164,42 @@ pub fn make_loupe_clonotype( let aa_sequence = amino_acid(&nt_sequence, ex.share[m0].v_start); let aa_sequence_universal = amino_acid(&universal_reference, vstartu[cx]); let aa_sequence_donor = amino_acid(&donor_reference, vstartd[cx]); + + let v_start = ex.share[m0].v_start; + let fwr1_start = Some((v_start + ex.share[m0].fr1_start) as u32); + let mut cdr1_start = None; + if ex.share[m0].cdr1_start.is_some() { + cdr1_start = Some((v_start + ex.share[m0].cdr1_start.unwrap()) as u32); + } + let mut fwr2_start = None; + if ex.share[m0].fr2_start.is_some() { + fwr2_start = Some((v_start + ex.share[m0].fr2_start.unwrap()) as u32); + } + let mut cdr2_start = None; + if ex.share[m0].cdr2_start.is_some() { + cdr2_start = Some((v_start + ex.share[m0].cdr2_start.unwrap()) as u32); + } + let mut fwr3_start = None; + if ex.share[m0].fr3_start.is_some() { + fwr3_start = Some((v_start + ex.share[m0].fr3_start.unwrap()) as u32); + } + let fwr4_end = Some((ex.share[m0].v_start + ex.share[m0].seq.len()) as u32); + xchains.push(ClonotypeChain { - nt_sequence: nt_sequence, - aa_sequence: aa_sequence, + nt_sequence, + aa_sequence, u_idx: rsi.uids[cx].map(|idx| idx as u32), v_idx: rsi.vids[cx] as u32, d_idx: rsi.dids[cx].map(|idx| idx as u32), j_idx: rsi.jids[cx] as u32, c_idx: rsi.cids[cx].map(|idx| idx as u32), donor_v_idx: donor_v_idx.map(|idx| idx as u32), - donor_j_idx: donor_j_idx, - universal_reference: universal_reference, - universal_reference_aln: universal_reference_aln, + donor_j_idx, + universal_reference, + universal_reference_aln, aa_sequence_universal, - donor_reference: donor_reference, - donor_reference_aln: donor_reference_aln, + donor_reference, + donor_reference_aln, aa_sequence_donor, v_start: ex.share[m0].v_start as u32, v_end: ex.share[m0].v_stop as u32, @@ -176,10 +207,16 @@ pub fn make_loupe_clonotype( j_start: ex.share[m0].j_start as u32, j_start_ref: ex.share[m0].j_start_ref as u32, j_end: ex.share[m0].j_stop as u32, + fwr1_start, + cdr1_start, + fwr2_start, + cdr2_start, + fwr3_start, cdr3_start: ex.share[m0].v_start as u32 + ex.share[m0].cdr3_start as u32, cdr3_end: ex.share[m0].v_start as u32 + (ex.share[m0].cdr3_start + 3 * ex.share[m0].cdr3_aa.len()) as u32, - chain_type: chain_type, + fwr4_end, + chain_type, }); } @@ -191,7 +228,7 @@ pub fn make_loupe_clonotype( let ex = &exact_clonotypes[exacts[j]]; for cx in 0..cols { let m = mat[cx][j]; - if !m.is_some() { + if m.is_none() { chains.push(None); continue; } @@ -206,8 +243,26 @@ pub fn make_loupe_clonotype( } let j_end = ex.share[m].j_stop; let c_region_idx = rsi.cids[cx]; + let fwr1_start = Some((v_start + ex.share[m].fr1_start) as u32); + let mut cdr1_start = None; + if ex.share[m].cdr1_start.is_some() { + cdr1_start = Some((v_start + ex.share[m].cdr1_start.unwrap()) as u32); + } + let mut fwr2_start = None; + if ex.share[m].fr2_start.is_some() { + fwr2_start = Some((v_start + ex.share[m].fr2_start.unwrap()) as u32); + } + let mut cdr2_start = None; + if ex.share[m].cdr2_start.is_some() { + cdr2_start = Some((v_start + ex.share[m].cdr2_start.unwrap()) as u32); + } + let mut fwr3_start = None; + if ex.share[m].fr3_start.is_some() { + fwr3_start = Some((v_start + ex.share[m].fr3_start.unwrap()) as u32); + } let cdr3_start = v_start + ex.share[m].cdr3_start; let cdr3_end = cdr3_start + ex.share[m].cdr3_dna.len(); + let fwr4_end = Some((ex.share[m].v_start + ex.share[m].seq.len()) as u32); let mut umi_counts = Vec::::new(); let mut read_counts = Vec::::new(); let mut contig_ids = Vec::::new(); @@ -222,29 +277,108 @@ pub fn make_loupe_clonotype( let score = |a: u8, b: u8| if a == b { 1i32 } else { -1i32 }; let mut aligner = Aligner::new(-6, -1, &score); - let al = aligner.semiglobal(&ex.share[m].seq, &xchains[cx].nt_sequence); + let al = aligner.semiglobal(&nt_sequence, &xchains[cx].nt_sequence); let clonotype_consensus_aln = Alignment::from(&al); - let al = aligner.semiglobal(&ex.share[m].seq, &concatu[cx]); + let al = aligner.semiglobal(&nt_sequence, &concatu[cx]); let universal_reference_aln = Alignment::from(&al); - let al = aligner.semiglobal(&ex.share[m].seq, &concatd[cx]); + let al = aligner.semiglobal(&nt_sequence, &concatd[cx]); let donor_reference_aln = Alignment::from(&al); + // Compute nucleotide percent identity. + + let xm = &ex.share[m]; + let mut diffs = 0; + let mut denom = 0; + let seq = &xm.seq_del_amino; + let mut vref = refdata.refs[xm.v_ref_id].to_ascii_vec(); + if xm.v_ref_id_donor_alt_id.is_some() { + vref = dref[xm.v_ref_id_donor.unwrap()].nt_sequence.clone(); + } + let jref = refdata.refs[xm.j_ref_id].to_ascii_vec(); + let z = seq.len(); + for p in 0..z { + let b = seq[p]; + if b == b'-' { + diffs += 1; + denom += 1; + continue; + } + if p < vref.len() - ctl.heur.ref_v_trim { + denom += 1; + if b != vref[p] { + diffs += 1; + } + } + if p >= z - (jref.len() - ctl.heur.ref_j_trim) { + denom += 1; + if b != jref[jref.len() - (z - p)] { + diffs += 1; + } + } + } + let dna_percent = 100.0 * (denom - diffs) as f32 / denom as f32; + + // Compute amino acid percent identity. + + let xm = &ex.share[m]; + let mut diffs = 0; + let mut denom = 0; + let aa_seq = &xm.aa_mod_indel; + let mut vref = refdata.refs[xm.v_ref_id].to_ascii_vec(); + if xm.v_ref_id_donor_alt_id.is_some() { + vref = dref[xm.v_ref_id_donor.unwrap()].nt_sequence.clone(); + } + let jref = refdata.refs[xm.j_ref_id].to_ascii_vec(); + let z = 3 * aa_seq.len() + 1; + for p in 0..aa_seq.len() { + if aa_seq[p] == b'-' { + diffs += 1; + denom += 1; + continue; + } + if 3 * p + 3 <= vref.len() - ctl.heur.ref_v_trim { + denom += 1; + if aa_seq[p] != codon_to_aa(&vref[3 * p..3 * p + 3]) { + diffs += 1; + } + } + if 3 * p > z - (jref.len() - ctl.heur.ref_j_trim) + 3 { + denom += 1; + if aa_seq[p] + != codon_to_aa( + &jref[jref.len() - (z - 3 * p)..jref.len() - (z - 3 * p) + 3], + ) + { + diffs += 1; + } + } + } + let aa_percent = 100.0 * (denom - diffs) as f32 / denom as f32; + // Finally, define the ExactClonotypeChain. chains.push(Some(ExactSubClonotypeChain { - nt_sequence: nt_sequence, - aa_sequence: aa_sequence, + nt_sequence, + aa_sequence, v_start: v_start as u32, j_end: j_end as u32, c_region_idx: c_region_idx.map(|idx| idx as u32), + fwr1_start, + cdr1_start, + fwr2_start, + cdr2_start, + fwr3_start, cdr3_start: cdr3_start as u32, cdr3_end: cdr3_end as u32, - umi_counts: umi_counts, - read_counts: read_counts, - contig_ids: contig_ids, - clonotype_consensus_aln: clonotype_consensus_aln, - donor_reference_aln: donor_reference_aln, - universal_reference_aln: universal_reference_aln, + fwr4_end, + umi_counts, + read_counts, + contig_ids, + clonotype_consensus_aln, + donor_reference_aln, + universal_reference_aln, + dna_percent, + aa_percent, })); } let mut cell_barcodes = Vec::::new(); @@ -274,18 +408,15 @@ pub fn make_loupe_clonotype( }) }) .collect(), - cell_barcodes: cell_barcodes, - inkt_evidence: inkt_evidence, - mait_evidence: mait_evidence, + cell_barcodes, + inkt_evidence, + mait_evidence, }); } // Build Clonotype. - let mut n = 0; - for i in 0..ecl.len() { - n += ecl[i].cell_barcodes.len(); - } + let n = ecl.iter().map(|e| e.cell_barcodes.len()).sum::(); Clonotype { chains: xchains, exact_clonotypes: ecl, @@ -297,15 +428,15 @@ pub fn loupe_out( ctl: &EncloneControl, all_loupe_clonotypes: Vec, refdata: &RefData, - dref: &Vec, + dref: &[DonorReferenceItem], ) { - if ctl.gen_opt.binary.len() > 0 || ctl.gen_opt.proto.len() > 0 { + if !ctl.gen_opt.binary.is_empty() || !ctl.gen_opt.proto.is_empty() { let mut uref = Vec::new(); for i in 0..refdata.refs.len() { uref.push(UniversalReferenceItem { ref_idx: refdata.id[i] as u32, display_name: refdata.name[i].clone(), - region: match refdata.segtype[i].as_str() { + region: match refdata.segtype[i] { "U" => Region::U.into(), "V" => Region::V.into(), "D" => Region::D.into(), @@ -318,9 +449,10 @@ pub fn loupe_out( } let metadata = match &ctl.gen_opt.proto_metadata { Some(fname) => serde_json::from_reader( - std::fs::File::open(fname).expect(&format!("Error while reading {}", fname)), + std::fs::File::open(fname) + .unwrap_or_else(|_| panic!("Error while reading {fname}")), ) - .expect(&format!("Unable to deserialize Metadata from {}", fname)), + .unwrap_or_else(|_| panic!("Unable to deserialize Metadata from {fname}")), None => Metadata::default(), }; let enclone_outputs = EncloneOutputs { @@ -333,10 +465,10 @@ pub fn loupe_out( items: dref.to_vec(), }, }; - if ctl.gen_opt.binary.len() > 0 { + if !ctl.gen_opt.binary.is_empty() { write_obj(&enclone_outputs, &ctl.gen_opt.binary); } - if ctl.gen_opt.proto.len() > 0 { + if !ctl.gen_opt.proto.is_empty() { write_proto(enclone_outputs, &ctl.gen_opt.proto).unwrap(); } } diff --git a/enclone_print/src/print_clonotypes.rs b/enclone_print/src/print_clonotypes.rs index 63cc90b43..b21e3c8e8 100644 --- a/enclone_print/src/print_clonotypes.rs +++ b/enclone_print/src/print_clonotypes.rs @@ -1,25 +1,41 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // This file supplies the single function print_clonotypes. It prints clonotypes, but also // does some filtering to remove 'noise' clonotypes. // // Problem: stack traces from this file consistently do not go back to the main program. -use crate::filter::*; -use crate::loupe::*; -use crate::print_utils1::*; -use crate::print_utils2::*; -use crate::print_utils3::*; -use crate::print_utils4::*; -use crate::print_utils5::*; -use enclone_core::defs::*; -use enclone_proto::types::*; +use crate::define_mat::define_mat; +use crate::filter::survives_filter; +use crate::finish_table::finish_table; +use crate::gene_scan::gene_scan_test; +use crate::loupe::{loupe_out, make_loupe_clonotype}; +use crate::print_utils1::{compute_field_types, extra_args, start_gen}; +use crate::print_utils2::row_fill; +use crate::print_utils3::{ + consensus_codon_cdr3, define_column_info, get_extra_parseables, process_complete, +}; +use crate::print_utils4::{build_show_aa, compute_bu, compute_some_stats}; +use crate::print_utils5::{delete_weaks, vars_and_shares}; +use enclone_args::proc_args_check::involves_gex_fb; +use enclone_core::allowed_vars::{CVARS_ALLOWED, CVARS_ALLOWED_PCELL, LVARS_ALLOWED}; +use enclone_core::barcode_fate::BarcodeFate; +use enclone_core::defs::{AlleleData, CloneInfo, ColInfo, EncloneControl, ExactClonotype, GexInfo}; +use enclone_core::mammalian_fixed_len::mammalian_fixed_len_peer_groups; +use enclone_core::set_speakers::set_speakers; +use enclone_proto::types::{Clonotype, DonorReferenceItem}; use equiv::EquivRel; +use hdf5::Reader; +use itertools::izip; +use qd::Double; use rayon::prelude::*; -use std::collections::HashMap; -use string_utils::*; -use vdj_ann::refx::*; -use vector_utils::*; +use std::cmp::max; +use std::collections::{HashMap, HashSet}; +use std::fs::File; +use std::io::BufWriter; +use string_utils::TextUtils; +use vdj_ann::refx::RefData; +use vector_utils::{bin_member, bin_position, erase_if, next_diff12_3, unique_sort}; // Print clonotypes. A key challenge here is to define the columns that represent shared // chains. This is given below by the code that forms an equivalence relation on the CDR3_AAs. @@ -35,47 +51,99 @@ use vector_utils::*; // eq = equivalence relation on info pub fn print_clonotypes( + is_bcr: bool, + to_bc: &HashMap<(usize, usize), Vec>, + sr: &[Vec], refdata: &RefData, - dref: &Vec, + dref: &[DonorReferenceItem], ctl: &EncloneControl, - exact_clonotypes: &Vec, - info: &Vec, - orbits: &Vec>, + exact_clonotypes: &[ExactClonotype], + info: &[CloneInfo], + orbits: &[Vec], + raw_joins: &[Vec], gex_info: &GexInfo, - vdj_cells: &Vec>, - d_readers: &Vec>, - ind_readers: &Vec>, - h5_data: &Vec<(usize, Vec, Vec)>, + vdj_cells: &[Vec], + d_readers: &[Option], + ind_readers: &[Option], + h5_data: &[(usize, Vec, Vec)], pics: &mut Vec, exacts: &mut Vec>, + in_center: &mut Vec, rsi: &mut Vec, out_datas: &mut Vec>>, tests: &mut Vec, controls: &mut Vec, -) { - // Make an abbreviation. - + fate: &mut [HashMap], + allele_data: &AlleleData, +) -> Result<(), String> { let lvars = &ctl.clono_print_opt.lvars; - // Compute total cells. + // Compute extra args. - let mut total_cells = 0; - for i in 0..exact_clonotypes.len() { - total_cells += exact_clonotypes[i].ncells(); - } + let extra_args = extra_args(ctl); + + // Determine if any lvars need gex info. + + let need_gex = { + lvars.iter().map(String::as_str).any(involves_gex_fb) + || { + if ctl.parseable_opt.pout.is_empty() { + false + } else if ctl.parseable_opt.pcols.is_empty() { + LVARS_ALLOWED.into_iter().any(involves_gex_fb) + } else { + ctl.parseable_opt + .pcols + .iter() + .map(String::as_str) + .any(involves_gex_fb) + } + } + || extra_args.iter().map(String::as_str).any(involves_gex_fb) + }; // Define parseable output columns. The entire machinery for parseable output is controlled // by macros that begin with "speak". + let mut max_chains = 4; + // This seems like a bug, since rsi is uninitialized upon entry to print_clonotypes. + for r in rsi.iter() { + max_chains = max(max_chains, r.mat.len()); + } let mut parseable_fields = Vec::::new(); - set_speakers(&ctl, &mut parseable_fields); + set_speakers(ctl, &mut parseable_fields, max_chains); let pcols_sort = &ctl.parseable_opt.pcols_sort; + // Identify certain extra parseable variables. These arise from parameterizable cvars. + + let mut extra_parseables = get_extra_parseables(ctl, pcols_sort); + + // Compute all_vars. + + let rsi_vars = &ctl.clono_print_opt.cvars; + let mut all_vars = rsi_vars.iter().map(String::as_str).collect::>(); + for var in CVARS_ALLOWED { + if !rsi_vars.contains(&var.to_string()) { + all_vars.push(var); + } + } + for var in CVARS_ALLOWED_PCELL { + if !rsi_vars.contains(&var.to_string()) { + all_vars.push(var); + } + } + all_vars.append(&mut extra_parseables); + for x in extra_args.iter() { + if !rsi_vars.contains(x) { + all_vars.push(x.as_str()); + } + } + // Test for presence of GEX/FB data. let mut have_gex = false; for i in 0..ctl.origin_info.gex_path.len() { - if ctl.origin_info.gex_path[i].len() > 0 { + if !ctl.origin_info.gex_path[i].is_empty() { have_gex = true; } } @@ -93,16 +161,25 @@ pub fn print_clonotypes( // Compute number of vdj cells that are gex. let mut n_vdj_gex = Vec::::new(); - for li in 0..ctl.origin_info.n() { + for (gex, vdj) in gex_info + .pca + .iter() + .zip(vdj_cells.iter()) + .take(ctl.origin_info.n()) + { let mut n = 0; - for y in gex_info.pca[li].iter() { - if bin_member(&vdj_cells[li], &y.0) { + for y in gex.iter() { + if bin_member(vdj, y.0) { n += 1; } } n_vdj_gex.push(n); } + // Compute peer groups. + + let peer_groups = mammalian_fixed_len_peer_groups(refdata); + // Traverse the orbits. // 0: index in reps @@ -110,7 +187,7 @@ pub fn print_clonotypes( // 2: vector of some clonotype info // [parallel to 1] // next to last three entries = whitelist contam, denominator for that, low gex count - // added out_datas + // added out_datas (used to be next to last three, now one more) let mut results = Vec::<( usize, Vec, @@ -123,6 +200,9 @@ pub fn print_clonotypes( isize, Vec, Vec, + Vec<(usize, String, BarcodeFate)>, + Vec, + String, )>::new(); for i in 0..orbits.len() { results.push(( @@ -137,6 +217,9 @@ pub fn print_clonotypes( 0, Vec::::new(), Vec::::new(), + Vec::new(), + Vec::new(), + String::new(), )); } results.par_iter_mut().for_each(|res| { @@ -155,47 +238,18 @@ pub fn print_clonotypes( // Capture these data into parallel data structures, one per exact subclonotype: // exacts: the exact subclonotype ids // mults: number of cells [redundant, might remove] - // cdr3s: sorted list of (chain_type:cdr3) - // js: indices into od (BE VERY CARFUL ABOUT USING THIS) let mut exacts = Vec::::new(); let mut mults = Vec::::new(); - let mut cdr3s = Vec::>::new(); - let mut cdr3s_len = Vec::>::new(); - let mut js = Vec::::new(); let mut j = 0; let loupe_clonotypes = &mut res.6; while j < od.len() { let k = next_diff12_3(&od, j as i32) as usize; - let mut mult = 0 as usize; - let mut z = Vec::::new(); - let mut z_len = Vec::<(String, usize)>::new(); + let mut mult = 0_usize; for l in j..k { let x: &CloneInfo = &info[od[l].2 as usize]; let m = x.clonotype_index; mult = exact_clonotypes[m].clones.len(); - for m in 0..x.cdr3_aa.len() { - // Do something EXTREMELY ugly. To force TRB columns to come before TRA - // columns, rename then TRX and TRY. This is reversed at the very end. - - let mut c = x.chain_types[m].clone(); - if c.starts_with("TRB") { - c = c.replacen("TRB", "TRX", 1); - } else if c.starts_with("TRA") { - c = c.replacen("TRA", "TRY", 1); - } - z.push(format!("{}:{}", c, x.cdr3_aa[m])); - z_len.push((format!("{}:{}", c, x.cdr3_aa[m]), x.lens[m])); - } - } - unique_sort(&mut z); - unique_sort(&mut z_len); - cdr3s.push(z); - cdr3s_len.push(z_len); - js.push(j); - let mut x = Vec::::new(); - for l in j..k { - x.push(l); } mults.push(mult); exacts.push(od[j].1); @@ -205,53 +259,53 @@ pub fn print_clonotypes( // There are two passes. On the first pass we only identify the exact subclonotypes that // are junk. On the second pass we remove those and then print the orbit. - let mut bads = vec![false; cdr3s.len()]; + let mut bads = vec![false; exacts.len()]; + let mut stats_pass1 = Vec::)>>::new(); for pass in 1..=2 { // Delete weak exact subclonotypes. if pass == 2 && !ctl.clono_filt_opt.protect_bads { - erase_if(&mut cdr3s, &bads); - erase_if(&mut cdr3s_len, &bads); - erase_if(&mut js, &bads); erase_if(&mut mults, &bads); erase_if(&mut exacts, &bads); } // Sort exact subclonotypes. - let mat = define_mat(&ctl, &exact_clonotypes, &cdr3s_len, &js, &od, &info); - let mut priority = Vec::<(Vec, usize, usize)>::new(); - for u in 0..exacts.len() { - let mut typex = vec![false; mat.len()]; - for col in 0..mat.len() { - if mat[col][u].is_some() { - typex[col] = true; - } - } - let clonotype_id = exacts[u]; - let ex = &exact_clonotypes[clonotype_id]; - let mut utot0 = 0; - let mid = mat[0][u]; - if mid.is_some() { - let mid = mid.unwrap(); + let mat = define_mat( + is_bcr, + to_bc, + sr, + ctl, + exact_clonotypes, + &exacts, + &od, + info, + raw_joins, + refdata, + dref, + ); + let priority = exacts + .iter() + .enumerate() + .map(|(u, &exact)| { + let typex = mat.iter().map(|col| col[u].is_some()).collect::>(); + let clonotype_id = exact; let ex = &exact_clonotypes[clonotype_id]; - for j in 0..ex.clones.len() { - utot0 += ex.clones[j][mid].umi_count; + let mut utot0 = 0; + if let Some(mid) = mat[0][u] { + let ex = &exact_clonotypes[clonotype_id]; + for j in 0..ex.clones.len() { + utot0 += ex.clones[j][mid].umi_count; + } } - } - priority.push((typex.clone(), ex.ncells(), utot0)); - } + (typex, ex.ncells(), utot0) + }) + .collect::>(); let permutation = permutation::sort(&priority[..]); exacts = permutation.apply_slice(&exacts[..]); mults = permutation.apply_slice(&mults[..]); - cdr3s = permutation.apply_slice(&cdr3s[..]); - cdr3s_len = permutation.apply_slice(&cdr3s_len[..]); - js = permutation.apply_slice(&js[..]); exacts.reverse(); mults.reverse(); - cdr3s.reverse(); - cdr3s_len.reverse(); - js.reverse(); // Define a matrix mat[col][ex] which is the column of the exact subclonotype // corresponding to the given column col of the clonotype, which may or may not be @@ -259,9 +313,21 @@ pub fn print_clonotypes( // reference sequence identifiers, CDR3 start positions, and the like. let nexacts = exacts.len(); - let mat = define_mat(&ctl, &exact_clonotypes, &cdr3s_len, &js, &od, &info); + let mat = define_mat( + is_bcr, + to_bc, + sr, + ctl, + exact_clonotypes, + &exacts, + &od, + info, + raw_joins, + refdata, + dref, + ); let cols = mat.len(); - let mut rsi = define_column_info(&ctl, &exacts, &exact_clonotypes, &mat, &refdata); + let mut rsi = define_column_info(ctl, &exacts, exact_clonotypes, &mat, refdata); rsi.mat = mat; let mat = &rsi.mat; @@ -271,21 +337,35 @@ pub fn print_clonotypes( // Filter. + let mut in_center = true; if pass == 2 - && !survives_filter(&exacts, &rsi, &ctl, &exact_clonotypes, &refdata, &gex_info) + && !survives_filter( + &exacts, + &rsi, + ctl, + exact_clonotypes, + refdata, + gex_info, + dref, + ) { - continue; + if ctl.clono_group_opt.asymmetric_center == "from_filters" { + in_center = false; + } else { + continue; + } } // Generate Loupe data. - if (ctl.gen_opt.binary.len() > 0 || ctl.gen_opt.proto.len() > 0) && pass == 2 { + if (!ctl.gen_opt.binary.is_empty() || !ctl.gen_opt.proto.is_empty()) && pass == 2 { loupe_clonotypes.push(make_loupe_clonotype( - &exact_clonotypes, + exact_clonotypes, &exacts, &rsi, - &refdata, - &dref, + refdata, + dref, + ctl, )); } @@ -300,18 +380,19 @@ pub fn print_clonotypes( // ◼ some unsavory workarounds below. let mut mlog = Vec::::new(); - if n >= ctl.clono_filt_opt.ncells_low { + if n >= ctl.clono_filt_opt.ncells_low + || ctl.clono_group_opt.asymmetric_center == "from_filters" + { // Start to generate parseable output. if pass == 2 { start_gen( - &ctl, + ctl, &exacts, - &exact_clonotypes, - &refdata, - &rsi, + exact_clonotypes, &mut out_data, &mut mlog, + &extra_args, ); } @@ -320,67 +401,68 @@ pub fn print_clonotypes( let mut vars = Vec::>::new(); let mut vars_amino = Vec::>::new(); let mut shares_amino = Vec::>::new(); + let mut ref_diff_pos = Vec::>>::new(); vars_and_shares( pass, - &ctl, + ctl, &exacts, - &exact_clonotypes, + exact_clonotypes, &rsi, - &refdata, - &dref, + refdata, + dref, &mut vars, &mut vars_amino, &mut shares_amino, + &mut ref_diff_pos, &mut out_data, ); // Mark some weak exact subclonotypes for deletion. if pass == 1 { - delete_weaks( - &ctl, - &exacts, - &mults, - &exact_clonotypes, - total_cells, - &mat, - &vars, - &mut bads, - ); + delete_weaks(ctl, &exacts, exact_clonotypes, mat, refdata, &mut bads); } - // Done unless on second pass. Unless there are bounds or COMPLETE specified. + // Done unless on second pass. Unless there are bounds or COMPLETE specified + // or VAR_DEF specified. - if pass == 1 && ctl.clono_filt_opt.bounds.len() == 0 && !ctl.gen_opt.complete { + if pass == 1 + && ctl.clono_filt_opt.bounds.is_empty() + && !ctl.gen_opt.complete + && ctl.gen_opt.var_def.is_empty() + { continue; } // Define amino acid positions to show. let show_aa = build_show_aa( - &ctl, + ctl, &rsi, &vars_amino, &shares_amino, - &refdata, - &dref, + refdata, + dref, &exacts, - &exact_clonotypes, + exact_clonotypes, ); - // Build varmat. + // Define field types corresponding to the amino acid positions to show. + let field_types = compute_field_types(ctl, &rsi, &show_aa); + // Build varmat matrix of size (nexacts, cols). let mut varmat = vec![vec![vec![b'-']; cols]; nexacts]; - for col in 0..cols { - for u in 0..nexacts { - let m = mat[col][u]; - if m.is_some() { - let mut v = Vec::::new(); - let seq = rsi.seqss[col][u].clone(); - for p in vars[col].iter() { - v.push(seq[*p]); - } - varmat[u][col] = v; + for (col, (mat_slice, seqss_slice, vars_slice)) in + izip!(mat, &rsi.seqss, &vars).take(cols).enumerate() + { + for (varmat_u, m, seq) in izip!(&mut varmat, mat_slice, seqss_slice) { + varmat_u[col] = if m.is_some() { + vars_slice + .iter() + .map(|&p| *seq.get(p).unwrap_or(&b'?')) + .collect() + } else { + vec![b'-'] } } } @@ -395,11 +477,9 @@ pub fn print_clonotypes( && x.after("nd").force_usize() >= 1 { lvarsc.clear(); - for m in 0..i { - lvarsc.push(lvars[m].clone()); - } + lvarsc.extend(lvars.iter().take(i).cloned()); let k = x.after("nd").force_usize(); - let mut n = vec![0 as usize; ctl.origin_info.n()]; + let mut n = vec![0_usize; ctl.origin_info.n()]; for u in 0..nexacts { let ex = &exact_clonotypes[exacts[u]]; for l in 0..ex.ncells() { @@ -421,58 +501,55 @@ pub fn print_clonotypes( break; } } - for l in 0..k { + for (l, ds) in datasets.iter().take(k).enumerate() { if l >= n.len() { break; } - nd_fields.push(format!("n_{}", datasets[l].clone())); - lvarsc.push(format!("n_{}", datasets[l].clone())); + nd_fields.push(format!("n_{}", ds.as_str())); + lvarsc.push(format!("n_{}", ds.as_str())); } if n.len() > k { nd_fields.push("n_other".to_string()); lvarsc.push("n_other".to_string()); } - for m in i + 1..lvars.len() { - lvarsc.push(lvars[m].clone()); - } + lvarsc.extend(lvars.iter().skip(i + 1).cloned()); + break; } } let lvars = lvarsc.clone(); + let mut lvarsh = HashSet::::new(); + for x in lvars.iter() { + lvarsh.insert(x.to_string()); + } // Now build table content. let mut sr = Vec::<(Vec, Vec>, Vec>, usize)>::new(); let mut groups = HashMap::>::new(); - for i in 0..lvars.len() { - if lvars[i].starts_with('g') && lvars[i].after("g").parse::().is_ok() { - let d = lvars[i].after("g").force_usize(); + for lvar in &lvars { + if let Some(Ok(d)) = lvar.strip_prefix('g').map(str::parse::) { if groups.contains_key(&d) { continue; } let mut e: EquivRel = EquivRel::new(nexacts as i32); - for u1 in 0..nexacts { - let ex1 = &exact_clonotypes[exacts[u1]]; - for u2 in u1 + 1..nexacts { + for (u1, &e1) in exacts.iter().take(nexacts).enumerate() { + let ex1 = &exact_clonotypes[e1]; + for (u2, &e2) in exacts.iter().enumerate().take(nexacts).skip(u1 + 1) { if e.class_id(u1 as i32) == e.class_id(u2 as i32) { continue; } - let ex2 = &exact_clonotypes[exacts[u2]]; + let ex2 = &exact_clonotypes[e2]; let mut diffs = 0; - 'comp: for cx in 0..cols { - let m1 = mat[cx][u1]; - if m1.is_none() { - continue; - } - let m2 = mat[cx][u2]; - if m2.is_none() { - continue; - } - let (m1, m2) = (m1.unwrap(), m2.unwrap()); - for p in vars[cx].iter() { - if ex1.share[m1].seq_del[*p] != ex2.share[m2].seq_del[*p] { - diffs += 1; - if diffs > d { - break 'comp; + 'comp: for (mm, vars) in mat.iter().zip(vars.iter()).take(cols) { + if let (Some(m1), Some(m2)) = (mm[u1], mm[u2]) { + let (s1, s2) = + (&ex1.share[m1].seq_del, &ex2.share[m2].seq_del); + for &p in vars { + if s1[p] != s2[p] { + diffs += 1; + if diffs > d { + break 'comp; + } } } } @@ -494,223 +571,27 @@ pub fn print_clonotypes( // Set up to record stats that assign a value to each cell for a given variable. - let mut stats = Vec::<(String, Vec)>::new(); - - // Compute "cred" stats (credibility/# of neighboring cells that are also - // B cells). - - let mut cred = vec![Vec::::new(); lvars.len()]; - for k in 0..lvars.len() { - if lvars[k] == "cred".to_string() { - for u in 0..nexacts { - let clonotype_id = exacts[u]; - let ex = &exact_clonotypes[clonotype_id]; - for l in 0..ex.clones.len() { - let bc = &ex.clones[l][0].barcode; - let li = ex.clones[l][0].dataset_index; - if gex_info.pca[li].contains_key(&bc.clone()) { - let mut creds = 0; - let mut z = Vec::<(f64, String)>::new(); - let x = &gex_info.pca[li][&bc.clone()]; - for y in gex_info.pca[li].iter() { - let mut dist2 = 0.0; - for m in 0..x.len() { - dist2 += (y.1[m] - x[m]) * (y.1[m] - x[m]); - } - z.push((dist2, y.0.clone())); - } - z.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let top = n_vdj_gex[li]; - for i in 0..top { - if bin_member(&vdj_cells[li], &z[i].1) { - creds += 1; - } - } - let pc = 100.0 * creds as f64 / top as f64; - cred[k].push(format!("{:.1}", pc)); - } else { - cred[k].push("".to_string()); - } - } - } - } - } - - // Compute pe (PCA distance). - - let mut pe = vec![Vec::::new(); lvars.len()]; - for k in 0..lvars.len() { - if lvars[k].starts_with("pe") { - let n = lvars[k].after("pe").force_usize(); - let mut bcs = Vec::::new(); - let mut count = 0; - let mut to_index = Vec::::new(); - for u in 0..nexacts { - let clonotype_id = exacts[u]; - let ex = &exact_clonotypes[clonotype_id]; - for l in 0..ex.clones.len() { - let bc = &ex.clones[l][0].barcode; - let li = ex.clones[l][0].dataset_index; - if gex_info.pca[li].contains_key(&bc.clone()) { - bcs.push(bc.to_string()); - to_index.push(count); - } - count += 1; - } - } - let mut e: EquivRel = EquivRel::new(bcs.len() as i32); - let li = 0; // BEWARE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - let mut mat = vec![Vec::::new(); bcs.len()]; - for i in 0..bcs.len() { - mat[i] = gex_info.pca[li][&bcs[i].clone()].clone(); - } - for i1 in 0..bcs.len() { - for i2 in i1 + 1..bcs.len() { - if e.class_id(i1 as i32) != e.class_id(i2 as i32) { - let mut d = 0.0; - for j in 0..mat[i1].len() { - d += (mat[i1][j] - mat[i2][j]) * (mat[i1][j] - mat[i2][j]); - } - d = d.sqrt(); - if d <= n as f64 { - e.join(i1 as i32, i2 as i32); - } - } - } - } - pe[k] = vec![String::new(); count]; - let mut ids = Vec::::new(); - for i in 0..bcs.len() { - ids.push(e.class_id(i as i32)); - } - unique_sort(&mut ids); - let mut reps = Vec::::new(); - e.orbit_reps(&mut reps); - reps.sort(); - for i in 0..bcs.len() { - pe[k][to_index[i]] = - format!("{}", bin_position(&ids, &e.class_id(i as i32))); - } - } - } - - // Compute ppe (PCA distance). - - let mut ppe = vec![Vec::::new(); lvars.len()]; - for k in 0..lvars.len() { - if lvars[k].starts_with("ppe") { - let n = lvars[k].after("ppe").force_usize(); - let mut bcs = Vec::::new(); - let mut count = 0; - let mut to_index = Vec::::new(); - for u in 0..nexacts { - let clonotype_id = exacts[u]; - let ex = &exact_clonotypes[clonotype_id]; - for l in 0..ex.clones.len() { - let bc = &ex.clones[l][0].barcode; - let li = ex.clones[l][0].dataset_index; - if gex_info.pca[li].contains_key(&bc.clone()) { - bcs.push(bc.to_string()); - to_index.push(count); - } - count += 1; - } - } - let li = 0; // BEWARE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - let mut mat = vec![Vec::::new(); bcs.len()]; - for i in 0..bcs.len() { - mat[i] = gex_info.pca[li][&bcs[i].clone()].clone(); - } - let mut matg = Vec::>::new(); - for i in gex_info.pca[li].iter() { - matg.push(i.1.to_vec()); - } - let mut x = vec![0; bcs.len()]; - for i1 in 0..mat.len() { - for i2 in 0..matg.len() { - let m1 = &mat[i1]; - let m2 = &matg[i2]; - let mut d = 0.0; - for j in 0..m1.len() { - d += (m1[j] - m2[j]) * (m1[j] - m2[j]); - } - d = d.sqrt(); - if d <= n as f64 { - x[i1] += 1; - } - } - } - let mut y = vec![0; bcs.len()]; - for i1 in 0..mat.len() { - for i2 in 0..mat.len() { - let m1 = &mat[i1]; - let m2 = &mat[i2]; - let mut d = 0.0; - for j in 0..m1.len() { - d += (m1[j] - m2[j]) * (m1[j] - m2[j]); - } - d = d.sqrt(); - if d <= n as f64 { - y[i1] += 1; - } - } - } - ppe[k] = vec![String::new(); count]; - for i in 0..bcs.len() { - ppe[k][to_index[i]] = - format!("{:.1}", 100.0 * y[i] as f64 / x[i] as f64); - } - } - } + let mut stats = Vec::<(String, Vec)>::new(); - // Compute npe (PCA distance). + // Compute some stats; - let mut npe = vec![Vec::::new(); lvars.len()]; - for k in 0..lvars.len() { - if lvars[k].starts_with("npe") { - let n = lvars[k].after("npe").force_usize(); - let mut bcs = Vec::::new(); - let mut count = 0; - let mut to_index = Vec::::new(); - for u in 0..nexacts { - let clonotype_id = exacts[u]; - let ex = &exact_clonotypes[clonotype_id]; - for l in 0..ex.clones.len() { - let bc = &ex.clones[l][0].barcode; - let li = ex.clones[l][0].dataset_index; - if gex_info.pca[li].contains_key(&bc.clone()) { - bcs.push(bc.to_string()); - to_index.push(count); - } - count += 1; - } - } - let li = 0; // BEWARE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - let mut mat = vec![Vec::::new(); bcs.len()]; - for i in 0..bcs.len() { - mat[i] = gex_info.pca[li][&bcs[i].clone()].clone(); - } - let mut y = vec![0; bcs.len()]; - for i1 in 0..mat.len() { - for i2 in 0..mat.len() { - let m1 = &mat[i1]; - let m2 = &mat[i2]; - let mut d = 0.0; - for j in 0..m1.len() { - d += (m1[j] - m2[j]) * (m1[j] - m2[j]); - } - d = d.sqrt(); - if d <= n as f64 { - y[i1] += 1; - } - } - } - npe[k] = vec![String::new(); count]; - for i in 0..bcs.len() { - npe[k][to_index[i]] = format!("{}", y[i]); - } - } - } + let mut cred = Vec::>::new(); + let mut pe = Vec::>::new(); + let mut ppe = Vec::>::new(); + let mut npe = Vec::>::new(); + compute_some_stats( + ctl, + &lvars, + &exacts, + exact_clonotypes, + gex_info, + vdj_cells, + &n_vdj_gex, + &mut cred, + &mut pe, + &mut ppe, + &mut npe, + ); // Precompute for near and far. @@ -723,6 +604,13 @@ pub fn print_clonotypes( } } + // Form CDR3 consensus sequences. + + let mut cdr3_con = Vec::>::new(); + if ctl.gen_opt.color == "codon-diffs" { + cdr3_con = consensus_codon_cdr3(&rsi, &exacts, exact_clonotypes); + } + // Build rows. let mut cell_count = 0; @@ -738,19 +626,22 @@ pub fn print_clonotypes( let ex = &exact_clonotypes[clonotype_id]; let mut d_all = vec![Vec::::new(); ex.clones.len()]; let mut ind_all = vec![Vec::::new(); ex.clones.len()]; - row_fill( + let mut these_stats = Vec::<(String, Vec)>::new(); + let resx = row_fill( pass, u, - &ctl, + ctl, &exacts, &mults, - &exact_clonotypes, - &gex_info, - &refdata, + exact_clonotypes, + gex_info, + refdata, &varmat, &fp, &vars_amino, &show_aa, + &ref_diff_pos, + &field_types, &mut bads, &mut gex_low, &mut row, @@ -759,25 +650,41 @@ pub fn print_clonotypes( &mut d_all, &mut ind_all, &rsi, - &dref, + dref, &groups, - &d_readers, - &ind_readers, - &h5_data, - &mut stats, - &vdj_cells, + d_readers, + ind_readers, + h5_data, + &mut these_stats, + &stats_pass1, + vdj_cells, &n_vdj_gex, &lvars, + &lvarsh, &nd_fields, + &peer_groups, + &extra_args, + &all_vars, + need_gex, + fate, + &cdr3_con, + allele_data, ); - let mut bli = Vec::<(String, usize, usize)>::new(); - for l in 0..ex.clones.len() { - bli.push(( - ex.clones[l][0].barcode.clone(), - ex.clones[l][0].dataset_index, - l, - )); - } + stats.append(&mut these_stats.clone()); + if pass == 1 { + stats_pass1.push(these_stats.clone()); + } + these_stats.sort_by(|a, b| a.0.cmp(&b.0)); + if let Err(e) = resx { + res.13 = e; + return; + } + let mut bli = ex + .clones + .iter() + .enumerate() + .map(|(l, clone)| (clone[0].barcode.clone(), clone[0].dataset_index, l)) + .collect::>(); // WHY ARE WE SORTING HERE? bli.sort(); for col in 0..cols { @@ -785,290 +692,45 @@ pub fn print_clonotypes( typex[col] = true; } } - for r in 0..cx.len() { - for s in 0..cx[r].len() { - row.push(cx[r][s].clone()); - } + for mut cxr in cx { + row.append(&mut cxr); } res.5 = gex_low; - // Very bad computation because of embedded binary search. - - let mut subrows = Vec::>::new(); - if ctl.clono_print_opt.bu { - for bcl in bli.iter() { - let mut row = Vec::::new(); - let bc = &bcl.0; - let li = bcl.1; - let di = ex.clones[bcl.2][0].dataset_index; - row.push(format!("$ {}", bc.clone())); - let ex = &exact_clonotypes[exacts[u]]; - for k in 0..lvars.len() { - let nr = row.len(); - let mut filled = false; - for l in 0..ctl.origin_info.n() { - if lvars[k] == format!("n_{}", ctl.origin_info.dataset_id[l]) { - let mut n = 0; - if di == l { - n = 1; - } - row.push(format!("{}", n)); - filled = true; - } - } - if filled { - } else if lvars[k] == "n_b".to_string() { - let mut n = 0; - let li = ex.clones[bcl.2][0].dataset_index; - if gex_info.cell_type[li].contains_key(&bc.clone()) { - if gex_info.cell_type[li][&bc.clone()].starts_with('B') { - n = 1; - } - } - row.push(format!("{}", n)); - } else if lvars[k] == "n_other".to_string() { - let mut n = 0; - let di = ex.clones[bcl.2][0].dataset_index; - let f = format!("n_{}", ctl.origin_info.dataset_id[di]); - let mut found = false; - for i in 0..nd_fields.len() { - if f == nd_fields[i] { - found = true; - } - } - if !found { - n = 1; - } - row.push(format!("{}", n)); - } else if lvars[k] == "sec".to_string() { - let mut n = 0; - if ctl.origin_info.secmem[li].contains_key(&bc.clone()) { - n = ctl.origin_info.secmem[li][&bc.clone()].0; - } - row.push(format!("{}", n)); - } else if lvars[k] == "mem".to_string() { - let mut n = 0; - if ctl.origin_info.secmem[li].contains_key(&bc.clone()) { - n = ctl.origin_info.secmem[li][&bc.clone()].1; - } - row.push(format!("{}", n)); - } else if bin_member(&alt_bcs, &lvars[k]) { - let mut val = String::new(); - let alt = &ctl.origin_info.alt_bc_fields[li]; - for j in 0..alt.len() { - if alt[j].0 == lvars[k] { - if alt[j].1.contains_key(&bc.clone()) { - val = alt[j].1[&bc.clone()].clone(); - } - } - } - row.push(val); - } else if lvars[k] == "datasets".to_string() { - row.push(format!("{}", ctl.origin_info.dataset_id[li].clone())); - } else if lvars[k] == "clust".to_string() && have_gex { - let mut cid = 0; - if gex_info.cluster[li].contains_key(&bc.clone()) { - cid = gex_info.cluster[li][&bc.clone()]; - } - row.push(format!("{}", cid)); - } else if lvars[k].starts_with("pe") && have_gex { - row.push(format!("{}", pe[k][cell_count + bcl.2])); - } else if lvars[k].starts_with("npe") && have_gex { - row.push(format!("{}", npe[k][cell_count + bcl.2])); - } else if lvars[k].starts_with("ppe") && have_gex { - row.push(format!("{}", ppe[k][cell_count + bcl.2])); - } else if lvars[k] == "cred".to_string() && have_gex { - row.push(format!("{}", cred[k][cell_count + bcl.2])); - } else if lvars[k] == "type".to_string() && have_gex { - let mut cell_type = "".to_string(); - if gex_info.cell_type[li].contains_key(&bc.clone()) { - cell_type = gex_info.cell_type[li][&bc.clone()].clone(); - } - row.push(cell_type); - } else if lvars[k] == "n_gex".to_string() && have_gex { - let mut n_gex = 0; - if bin_member(&gex_info.gex_cell_barcodes[li], &bc) { - n_gex = 1; - } - row.push(format!("{}", n_gex)); - } else if lvars[k] == "mark".to_string() { - let mut mark = String::new(); - if ex.clones[bcl.2][0].marked { - mark = "x".to_string(); - } - row.push(mark); - } else if lvars[k] == "entropy".to_string() && have_gex { - // NOTE DUPLICATION WITH CODE BELOW. - let mut gex_count = 0; - let p = bin_position(&gex_info.gex_barcodes[li], &bc); - if p >= 0 { - let mut raw_count = 0; - if gex_info.gex_matrices[li].initialized() { - let row = gex_info.gex_matrices[li].row(p as usize); - for j in 0..row.len() { - let f = row[j].0; - let n = row[j].1; - if gex_info.is_gex[li][f] { - raw_count += n; - } - } - } else { - let l = bcl.2; - for j in 0..d_all[l].len() { - if gex_info.is_gex[li][ind_all[l][j] as usize] { - raw_count += d_all[l][j] as usize; - } - } - } - gex_count = raw_count; - } - let mut entropy = 0.0; - if p >= 0 { - if gex_info.gex_matrices[li].initialized() { - let row = gex_info.gex_matrices[li].row(p as usize); - for j in 0..row.len() { - let f = row[j].0; - let n = row[j].1; - if gex_info.is_gex[li][f] { - let q = n as f64 / gex_count as f64; - entropy -= q * q.log2(); - } - } - } else { - let l = bcl.2; - for j in 0..d_all[l].len() { - if gex_info.is_gex[li][ind_all[l][j] as usize] { - let n = d_all[l][j] as usize; - let q = n as f64 / gex_count as f64; - entropy -= q * q.log2(); - } - } - } - } - row.push(format!("{:.2}", entropy)); - } else if have_gex { - // this calc isn't needed except in _% case below - // TODO: ELIMINATE UNNEEDED CALC - let mut gex_count = 0.0; - let p = bin_position(&gex_info.gex_barcodes[li], &bc); - if p >= 0 { - let mut raw_count = 0 as f64; - if gex_info.gex_matrices[li].initialized() { - let row = gex_info.gex_matrices[li].row(p as usize); - for j in 0..row.len() { - let f = row[j].0; - let n = row[j].1; - if gex_info.is_gex[li][f] { - raw_count += n as f64; - } - } - } else { - let l = bcl.2; - for j in 0..d_all[l].len() { - if gex_info.is_gex[li][ind_all[l][j] as usize] { - raw_count += d_all[l][j] as f64; - } - } - } - if !ctl.gen_opt.full_counts { - gex_count = raw_count * gex_info.gex_mults[li]; - } else { - gex_count = raw_count; - } - } - if lvars[k] == "gex".to_string() { - row.push(format!("{}", gex_count.round())); - } else { - let mut y = lvars[k].clone(); - if y.contains(':') { - y = y.after(":").to_string(); - } - let y0 = y.clone(); - let suffixes = ["_min", "_max", "_μ", "_Σ", "_cell", "_%"]; - for s in suffixes.iter() { - if y.ends_with(s) { - y = y.rev_before(&s).to_string(); - break; - } - } - let p = bin_position(&gex_info.gex_barcodes[li], &bc); - let mut computed = false; - let mut count = 0.0; - let l = bcl.2; - if p >= 0 { - let mut ux = Vec::::new(); - if ctl.clono_print_opt.regex_match[li].contains_key(&y) - { - ux = - ctl.clono_print_opt.regex_match[li][&y].clone(); - } - if ux.len() > 0 { - computed = true; - for fid in ux.iter() { - let counti = get_gex_matrix_entry( - &ctl, &gex_info, *fid, &d_all, &ind_all, - li, l, p as usize, &y, - ); - count += counti; - } - } else if gex_info.feature_id[li].contains_key(&y) { - computed = true; - let fid = gex_info.feature_id[li][&y]; - count = get_gex_matrix_entry( - &ctl, &gex_info, fid, &d_all, &ind_all, li, l, - p as usize, &y, - ); - } - } - if computed { - // note unneeded calculation above in certain cases - // TODO: ELIMINATE! - if y0.ends_with("_min") { - } else if y0.ends_with("_max") { - } else if y0.ends_with("_μ") { - } else if y0.ends_with("_Σ") { - } else if y0.ends_with("_%") { - row.push(format!( - "{:.2}", - (100.0 * count) / gex_count - )); - } else { - row.push(format!("{}", count.round())); - } - } - } - } - if row.len() == nr { - row.push("".to_string()); - } - } - let mut ncall = 0; - for k in 0..cols { - ncall += rsi.cvars[k].len(); - } - let mut cx = vec!["".to_string(); ncall]; - let mut cp = 0; - for col in 0..cols { - let m = mat[col][u]; - if m.is_some() { - let m = m.unwrap(); - for p in 0..rsi.cvars[col].len() { - if rsi.cvars[col][p] == "u".to_string() { - let numi = ex.clones[bcl.2][m].umi_count; - cx[cp + p] = format!("{}", numi); - } else if rsi.cvars[col][p] == "r".to_string() { - let r = ex.clones[bcl.2][m].read_count; - cx[cp + p] = format!("{}", r); - } - } - } - cp += rsi.cvars[col].len(); - } - row.append(&mut cx); - subrows.push(row); - } + // Compute per-cell entries. + + if pass == 2 { + let mut subrows = Vec::>::new(); + compute_bu( + u, + cell_count, + &exacts, + &lvars, + ctl, + &bli, + ex, + exact_clonotypes, + &mut row, + &mut subrows, + &varmat, + have_gex, + gex_info, + &rsi, + &mut sr, + fate, + &nd_fields, + &alt_bcs, + &cred, + &pe, + &ppe, + &npe, + &d_all, + &ind_all, + mat, + &these_stats, + refdata, + ); } - sr.push((row, subrows, varmat[u].clone(), u)); cell_count += ex.clones.len(); } let mut rord = Vec::::new(); // note that this is now superfluous @@ -1076,21 +738,14 @@ pub fn print_clonotypes( rord.push(j); } - // Apply bounds. Before sorting we check for non-numbers because otherwise you'll - // get an inscrutable traceback. + // Combine stats for the same variable. This is needed because each exact + // subclonotype contributes. Note that we don't care about the order of the + // values here (other than stability) because what we're going to do with them is + // compute the mean or max. - for i in 0..stats.len() { - for j in 0..stats[i].1.len() { - if !stats[i].1[j].is_finite() { - panic!( - "About to sort but there's a non-finite value, which would \ - cause the sort to fail. This is a bug." - ); - } - } - } - stats.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let mut stats2 = Vec::<(String, Vec)>::new(); + stats.sort_by(|a, b| a.0.cmp(&b.0)); + let stats_orig = stats.clone(); + let mut stats2 = Vec::<(String, Vec)>::new(); let mut i = 0; while i < stats.len() { let mut j = i + 1; @@ -1100,391 +755,148 @@ pub fn print_clonotypes( } j += 1; } - let mut all = Vec::::new(); - for k in i..j { - all.append(&mut stats[k].1.clone()); - } + let all = stats[i..j] + .iter() + .flat_map(|s| s.1.iter().cloned()) + .collect(); stats2.push((stats[i].0.clone(), all)); i = j; } stats = stats2; - for i in 0..ctl.clono_filt_opt.bounds.len() { - let x = &ctl.clono_filt_opt.bounds[i]; + + // Traverse the bounds and apply them. + // Notes: + // 1. This seems to run during both pass 1 and 2, and should only run + // during pass 1. + // 2. The results of this can be counterintuitive, because the filtering is + // applied during pass 1, when there could be cells in the clonotype, that + // are removed by other filters. + + for bi in 0..ctl.clono_filt_opt.bounds.len() { + let x = &ctl.clono_filt_opt.bounds[bi]; let mut means = Vec::::new(); - for i in 0..x.n() { - let mut vals = Vec::::new(); - // let mut found = false; - for j in 0..stats.len() { - if stats[j].0 == x.var[i] { - vals.append(&mut stats[j].1.clone()); - // found = true; + let mut mins = Vec::::new(); + let mut maxs = Vec::::new(); + // traverse the coefficients on the left hand side (each having a variable) + let mut fail = false; + for vi in x.var.iter().take(x.n()) { + let mut vals = Vec::::new(); // the stats for the variable + for stat in &stats { + if stat.0 == *vi { + for sk in &stat.1 { + if let Ok(sk) = sk.parse::() { + vals.push(sk); + } + } break; } } - /* - if !found { - eprintln!( - "\nFailed to find the variable {} used in a \ - bound. Please see \"enclone help filter\".\n", - x.var[i] - ); - std::process::exit(1); - } - */ + let mut min = 1_000_000_000.0_f64; let mut mean = 0.0; - for j in 0..vals.len() { - mean += vals[j]; - } - mean /= n as f64; - means.push(mean); - } - if !x.satisfied(&means) { - for u in 0..nexacts { - bads[u] = true; - } - } - } - - // Process COMPLETE. - - if ctl.gen_opt.complete { - let mut used = vec![false; cols]; - for u in 0..nexacts { - if !bads[u] { - for m in 0..cols { - if mat[m][u].is_some() { - used[m] = true; - } + let mut max = -1_000_000_000.0_f64; + let mut count = 0; + for val in vals { + if !val.is_nan() { + min = min.min(val); + mean += val; + max = max.max(val); + count += 1; } } + if count == 0 { + fail = true; + } else { + mins.push(min); + mean /= count as f64; + means.push(mean); + maxs.push(max); + } } - for u in 0..nexacts { - for m in 0..cols { - if used[m] && mat[m][u].is_none() { - bads[u] = true; + if ctl.clono_filt_opt.bound_type[bi] == "mean" && (fail || !x.satisfied(&means)) + { + if ctl.clono_group_opt.asymmetric_center == "from_filters" { + in_center = false; + } else { + for b in bads.iter_mut().take(nexacts) { + *b = true; } } } - } - - // See if we're in the test and control sets for gene scan. - // uses: ctl, stats - - if ctl.gen_opt.gene_scan_test.is_some() { - let x = ctl.gen_opt.gene_scan_test.clone().unwrap(); - let mut means = Vec::::new(); - for i in 0..x.n() { - let mut vals = Vec::::new(); - for j in 0..stats.len() { - if stats[j].0 == x.var[i] { - vals.append(&mut stats[j].1.clone()); - // found = true; - break; + if ctl.clono_filt_opt.bound_type[bi] == "min" && (fail || !x.satisfied(&mins)) { + if ctl.clono_group_opt.asymmetric_center == "from_filters" { + in_center = false; + } else { + for b in bads.iter_mut().take(nexacts) { + *b = true; } } - let mut mean = 0.0; - for j in 0..vals.len() { - mean += vals[j]; - } - mean /= n as f64; - means.push(mean); } - res.9.push(x.satisfied(&means)); - let x = ctl.gen_opt.gene_scan_control.clone().unwrap(); - let mut means = Vec::::new(); - for i in 0..x.n() { - let mut vals = Vec::::new(); - for j in 0..stats.len() { - if stats[j].0 == x.var[i] { - vals.append(&mut stats[j].1.clone()); - break; + if ctl.clono_filt_opt.bound_type[bi] == "max" && (fail || !x.satisfied(&maxs)) { + if ctl.clono_group_opt.asymmetric_center == "from_filters" { + in_center = false; + } else { + for b in bads.iter_mut().take(nexacts) { + *b = true; } } - let mut mean = 0.0; - for j in 0..vals.len() { - mean += vals[j]; - } - mean /= n as f64; - means.push(mean); } - res.10.push(x.satisfied(&means)); } + // Process COMPLETE. + + process_complete(ctl, nexacts, &mut bads, mat); + // Done unless on second pass. if pass == 1 { continue; } - // Fill in exact_subclonotype_id, reorder. - - if ctl.parseable_opt.pout.len() > 0 { - for u in 0..nexacts { - macro_rules! speak { - ($u:expr, $var:expr, $val:expr) => { - if pass == 2 && ctl.parseable_opt.pout.len() > 0 { - if pcols_sort.is_empty() - || bin_member(&pcols_sort, &$var.to_string()) - { - out_data[$u].insert($var.to_string(), $val); - } - } - }; - } - speak![rord[u], "exact_subclonotype_id", format!("{}", u + 1)]; - } - let mut out_data2 = vec![HashMap::::new(); nexacts]; - for v in 0..nexacts { - out_data2[v] = out_data[rord[v]].clone(); - } - out_data = out_data2; - } - - // Add header text to mlog. + // See if we're in the test and control sets for gene scan. - add_header_text(&ctl, &exacts, &exact_clonotypes, &rord, &mat, &mut mlog); + gene_scan_test( + ctl, + &stats, + &stats_orig, + nexacts, + n, + &mut res.9, + &mut res.10, + ); - // Build table stuff. + // Make the table. - let mut row1 = Vec::::new(); - let mut justify = Vec::::new(); - let mut rows = Vec::>::new(); - let mut drows = Vec::>::new(); - build_table_stuff( - &ctl, + let mut logz = String::new(); + finish_table( + n, + ctl, &exacts, - &exact_clonotypes, + exact_clonotypes, &rsi, &vars, &show_aa, - &mut row1, - &mut justify, - &mut drows, - &mut rows, + &field_types, &lvars, + refdata, + dref, + &peer_groups, + &mut mlog, + &mut logz, + &stats, + &mut sr, + &extra_args, + pcols_sort, + &mut out_data, + &rord, + pass, + &cdr3_con, ); - // Insert universal and donor reference rows. - - insert_reference_rows( - &ctl, - &rsi, - &show_aa, - &refdata, - &dref, - &row1, - &mut drows, - &mut rows, - &exacts, - &exact_clonotypes, - ); - - // Insert horizontal line. - - if !drows.is_empty() { - let mut width = 1 + lvars.len(); - for col in 0..cols { - width += rsi.cvars[col].len(); - } - rows.push(vec!["\\hline".to_string(); width]); - } - - // Insert placeholder for dots row. - - let diff_pos = rows.len(); - if !drows.is_empty() { - let row = Vec::::new(); - rows.push(row); - } - - // Finish building table content. - - for j in 0..sr.len() { - sr[j].0[0] = format!("{}", j + 1); // row number (#) - rows.push(sr[j].0.clone()); - rows.append(&mut sr[j].1.clone()); - } - - // Add sum and mean rows. - - if ctl.clono_print_opt.sum { - let mut row = Vec::::new(); - row.push("Σ".to_string()); - for i in 0..lvars.len() { - let mut x = lvars[i].clone(); - if x.contains(':') { - x = x.before(":").to_string(); - } - let mut found = false; - let mut total = 0.0; - for j in 0..stats.len() { - if stats[j].0 == x { - found = true; - for k in 0..stats[j].1.len() { - total += stats[j].1[k]; - } - } - } - if !found { - row.push(String::new()); - } else { - if !lvars[i].ends_with("_%") { - row.push(format!("{}", total.round() as usize)); - } else { - row.push(format!("{:.2}", total)); - } - } - } - // This is necessary but should not be: - for cx in 0..cols { - for _ in 0..rsi.cvars[cx].len() { - row.push(String::new()); - } - } - rows.push(row); - } - if ctl.clono_print_opt.mean { - let mut row = Vec::::new(); - row.push("μ".to_string()); - for i in 0..lvars.len() { - let mut x = lvars[i].clone(); - if x.contains(':') { - x = x.before(":").to_string(); - } - let mut found = false; - let mut total = 0.0; - for j in 0..stats.len() { - if stats[j].0 == x { - found = true; - for k in 0..stats[j].1.len() { - total += stats[j].1[k]; - } - } - } - let mean = total / n as f64; - if !found { - row.push(String::new()); - } else { - if !lvars[i].ends_with("_%") { - row.push(format!("{:.1}", mean)); - } else { - row.push(format!("{:.2}", mean)); - } - } - } - // This is necessary but should not be: - for cx in 0..cols { - for _ in 0..rsi.cvars[cx].len() { - row.push(String::new()); - } - } - rows.push(row); - } - - // Make the diff row. - - make_diff_row(&ctl, &rsi, cols, diff_pos, &drows, &mut row1, &mut rows); - - // Make table. - - for i in 0..rows.len() { - for j in 0..rows[i].len() { - rows[i][j] = rows[i][j].replace("|TRX", "TRB"); - rows[i][j] = rows[i][j].replace("|TRY", "TRA"); - } - } - for cx in 0..cols { - justify.push(b'|'); - for m in 0..rsi.cvars[cx].len() { - justify.push(justification(&rsi.cvars[cx][m])); - } - } - let mut logz = String::new(); - make_table(&ctl, &mut rows, &justify, &mlog, &mut logz); - - // Add phylogeny. - - if ctl.toy { - let mut vrefs = Vec::>::new(); - let mut jrefs = Vec::>::new(); - for cx in 0..cols { - let (mut vref, mut jref) = (Vec::::new(), Vec::::new()); - for u in 0..nexacts { - let m = rsi.mat[cx][u]; - if m.is_some() { - let m = m.unwrap(); - jref = exact_clonotypes[exacts[u]].share[m].js.to_ascii_vec(); - } - let vseq1 = refdata.refs[rsi.vids[cx]].to_ascii_vec(); - if rsi.vpids[cx].is_some() { - vref = dref[rsi.vpids[cx].unwrap()].nt_sequence.clone(); - } else { - vref = vseq1.clone(); - } - } - vrefs.push(vref); - jrefs.push(jref); - } - for u1 in 0..nexacts { - let ex1 = &exact_clonotypes[exacts[u1]]; - for u2 in u1 + 1..nexacts { - let ex2 = &exact_clonotypes[exacts[u2]]; - let (mut d1, mut d2) = (0, 0); - let mut d = 0; - for cx in 0..cols { - let (m1, m2) = (rsi.mat[cx][u1], rsi.mat[cx][u2]); - if m1.is_none() || m2.is_none() { - continue; - } - let (m1, m2) = (m1.unwrap(), m2.unwrap()); - let (s1, s2) = (&ex1.share[m1].seq_del, &ex2.share[m2].seq_del); - let n = s1.len(); - let (vref, jref) = (&vrefs[cx], &jrefs[cx]); - for j in 0..vars[cx].len() { - let p = vars[cx][j]; - if s1[p] != s2[p] { - if p < vref.len() - ctl.heur.ref_v_trim { - if s1[p] == vref[p] { - d1 += 1; - } else if s2[p] == vref[p] { - d2 += 1; - } - } else if p >= n - (jref.len() - ctl.heur.ref_j_trim) { - if s1[p] == jref[jref.len() - (n - p)] { - d1 += 1; - } else if s2[p] == jref[jref.len() - (n - p)] { - d2 += 1; - } - } else { - d += 1; - } - } - } - } - if (d1 == 0) ^ (d2 == 0) { - if d1 == 0 { - logz += &format!("{} ==> {}", u1 + 1, u2 + 1); - } else { - logz += &format!("{} ==> {}", u2 + 1, u1 + 1); - } - let s = format!( - "; u1 = {}, u2 = {}, d1 = {}, d2 = {}, d = {}\n", - u1 + 1, - u2 + 1, - d1, - d2, - d - ); - logz += &s; - } - } - } - } - // Save. res.1.push(logz); res.2.push((exacts.clone(), rsi.clone())); + res.12.push(in_center); for u in 0..exacts.len() { res.8 += exact_clonotypes[exacts[u]].ncells() as isize; } @@ -1494,6 +906,18 @@ pub fn print_clonotypes( } } }); + let exacts = exacts; + for r in &results { + if !r.13.is_empty() { + return Err(r.13.clone()); + } + } + + for ri in &results { + for vj in &ri.11 { + fate[vj.0].insert(vj.1.clone(), vj.2.clone()); + } + } // Sort results in descending order by number of cells. @@ -1502,36 +926,56 @@ pub fn print_clonotypes( // Write loupe output. let mut all_loupe_clonotypes = Vec::::new(); - for i in 0..results.len() { - all_loupe_clonotypes.append(&mut results[i].6); + for r in results.iter_mut() { + all_loupe_clonotypes.append(&mut r.6); + } + loupe_out(ctl, all_loupe_clonotypes, refdata, dref); + + // Write out the fate of each filtered barcode. + if !ctl.gen_opt.fate_file.is_empty() { + let mut wtr = BufWriter::new( + File::create(&ctl.gen_opt.fate_file).expect("Unable to open FATE_FILE for writing"), + ); + serde_json::to_writer_pretty(&mut wtr, fate).map_err(|e| e.to_string())?; + } + + // Set up to group and print clonotypes. + + for ri in results.iter_mut().take(orbits.len()) { + for (v1, (v2, &v12)) in ri.1.iter().zip(ri.2.iter().zip(ri.12.iter())) { + pics.push(v1.clone()); + exacts.push(v2.0.clone()); + rsi.push(v2.1.clone()); + in_center.push(v12); + } + out_datas.append(&mut ri.7); } - loupe_out(&ctl, all_loupe_clonotypes, &refdata, &dref); // Gather some data for gene scan. - if ctl.gen_opt.gene_scan_test.is_some() { - let mut count = 0; - for i in 0..orbits.len() { - for j in 0..results[i].1.len() { - if results[i].9[j] { - tests.push(count); + if ctl.gen_opt.gene_scan_test.is_some() && !ctl.gen_opt.gene_scan_exact { + for (i, r) in results.iter().take(orbits.len()).enumerate() { + for (&v9, &v10) in r.9.iter().zip(r.10.iter()) { + if v9 { + tests.push(i); } - if results[i].10[j] { - controls.push(count); + if v10 { + controls.push(i); } - count += 1; } } } - - // Set up to group and print clonotypes. - - for i in 0..orbits.len() { - for j in 0..results[i].1.len() { - pics.push(results[i].1[j].clone()); - exacts.push(results[i].2[j].0.clone()); - rsi.push(results[i].2[j].1.clone()); + if ctl.gen_opt.gene_scan_test.is_some() && ctl.gen_opt.gene_scan_exact { + for (r, e) in results.iter().zip(exacts.iter()) { + for (&ej, (&v9, &v10)) in e.iter().zip(r.9.iter().zip(r.10.iter())) { + if v9 { + tests.push(ej); + } + if v10 { + controls.push(ej); + } + } } - out_datas.append(&mut results[i].7); } + Ok(()) } diff --git a/enclone_print/src/print_utils1.rs b/enclone_print/src/print_utils1.rs index 4001fda98..ffc90a84b 100644 --- a/enclone_print/src/print_utils1.rs +++ b/enclone_print/src/print_utils1.rs @@ -1,80 +1,323 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -use ansi_escape::*; -use enclone_core::defs::*; -use io_utils::*; -use itertools::*; +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use amino::codon_to_aa; +use ansi_escape::{ + emit_bold_escape, emit_eight_bit_color_escape, emit_end_escape, emit_red_escape, +}; +use enclone_core::cell_color::CellColor; +use enclone_core::defs::{ColInfo, EncloneControl, ExactClonotype, GexInfo, TigData1, POUT_SEP}; +use enclone_core::print_tools::{color_by_property, emit_codon_color_escape}; +use enclone_vars::decode_arith; +use expr_tools::vars_of_node; +use io_utils::{fwrite, fwriteln}; +use itertools::Itertools; use std::cmp::max; use std::collections::HashMap; +use std::fmt::Write as _; use std::io::Write; -use string_utils::*; -use tables::*; -use vdj_ann::refx::*; -use vector_utils::*; +use string_utils::{stringme, strme}; +use tables::{print_tabular_vbox, visible_width}; +use vector_utils::{bin_member, lower_bound1_3, meet_size, unique_sort, upper_bound1_3, VecUtils}; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn test_internal_error_seq(seq: &[u8], dna: &[u8], cdr3: &str) -> Result<(), String> { + let mut found = false; + for i in 0..seq.len() { + if seq[i..].starts_with(dna) { + found = true; + } + } + if !found { + return Err(format!( + "\nInternal error, failed to find {}, CDR3 = {}.\n", + strme(dna), + cdr3 + )); + } + Ok(()) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Code to compute VDJ features. + +pub fn get_cdr1(x: &TigData1, left: i64, right: i64) -> Option { + let left = left * 3; + let right = right * 3; + if x.cdr1_start.is_some() + && x.fr2_start.is_some() + && x.cdr1_start.unwrap() <= x.fr2_start.unwrap() + { + let mut dna = Vec::::new(); + if x.cdr1_start.unwrap() as i64 - left >= 0 + && x.cdr1_start.unwrap() as i64 - left < x.seq_del_amino.len() as i64 + && x.fr2_start.unwrap() as i64 + right > 0 + && x.fr2_start.unwrap() as i64 + right <= x.seq_del_amino.len() as i64 + { + for p in x.cdr1_start.unwrap() as i64 - left..x.fr2_start.unwrap() as i64 + right { + let p = p as usize; + for j in 0..x.ins.len() { + if x.ins[j].0 == p { + let mut z = x.ins[j].1.clone(); + dna.append(&mut z); + } + } + if x.seq_del_amino[p] != b'-' { + dna.push(x.seq_del_amino[p]); + } + } + test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa).unwrap(); + return Some(stringme(&dna)); + } + } + None +} + +pub fn get_cdr2(x: &TigData1, left: i64, right: i64) -> Option { + let left = left * 3; + let right = right * 3; + if x.cdr2_start.is_some() + && x.fr3_start.is_some() + && x.cdr2_start.unwrap() <= x.fr3_start.unwrap() + { + let mut dna = Vec::::new(); + if x.cdr2_start.unwrap() as i64 - left >= 0 + && x.cdr2_start.unwrap() as i64 - left < x.seq_del_amino.len() as i64 + && x.fr3_start.unwrap() as i64 + right > 0 + && x.fr3_start.unwrap() as i64 + right <= x.seq_del_amino.len() as i64 + { + for p in x.cdr2_start.unwrap() as i64 - left..x.fr3_start.unwrap() as i64 + right { + let p = p as usize; + for j in 0..x.ins.len() { + if x.ins[j].0 == p { + let mut z = x.ins[j].1.clone(); + dna.append(&mut z); + } + } + if x.seq_del_amino[p] != b'-' { + dna.push(x.seq_del_amino[p]); + } + } + test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa).unwrap(); + return Some(stringme(&dna)); + } + } + None +} + +pub fn get_cdr3(x: &TigData1, left: i64, right: i64) -> Option { + let left = left * 3; + let right = right * 3; + let mut dna = Vec::::new(); + if x.cdr3_start as i64 - left >= 0 + && x.cdr3_start as i64 - left < x.seq_del_amino.len() as i64 + && x.cdr3_start as i64 + 3 * x.cdr3_aa.len() as i64 + right > 0 + && x.cdr3_start as i64 + 3 * x.cdr3_aa.len() as i64 + right <= x.seq_del_amino.len() as i64 + { + for p in + x.cdr3_start as i64 - left..x.cdr3_start as i64 + 3 * x.cdr3_aa.len() as i64 + right + { + let p = p as usize; + for j in 0..x.ins.len() { + if x.ins[j].0 == p { + let mut z = x.ins[j].1.clone(); + dna.append(&mut z); + } + } + if x.seq_del_amino[p] != b'-' { + dna.push(x.seq_del_amino[p]); + } + } + test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa).unwrap(); + return Some(stringme(&dna)); + } + None +} + +pub fn get_fwr1(x: &TigData1) -> Option { + if x.cdr1_start.is_some() && x.fr1_start <= x.cdr1_start.unwrap() { + let mut dna = Vec::::new(); + for p in x.fr1_start..x.cdr1_start.unwrap() { + for j in 0..x.ins.len() { + if x.ins[j].0 == p { + let mut z = x.ins[j].1.clone(); + dna.append(&mut z); + } + } + if x.seq_del_amino[p] != b'-' { + dna.push(x.seq_del_amino[p]); + } + } + test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa).unwrap(); + return Some(stringme(&dna)); + } + None +} + +pub fn get_fwr2(x: &TigData1) -> Option { + if x.fr2_start.unwrap() <= x.cdr2_start.unwrap() { + let mut dna = Vec::::new(); + for p in x.fr2_start.unwrap()..x.cdr2_start.unwrap() { + for j in 0..x.ins.len() { + if x.ins[j].0 == p { + let mut z = x.ins[j].1.clone(); + dna.append(&mut z); + } + } + if x.seq_del_amino[p] != b'-' { + dna.push(x.seq_del_amino[p]); + } + } + test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa).unwrap(); + return Some(stringme(&dna)); + } + None +} + +pub fn get_fwr3(x: &TigData1) -> Option { + // not sure if it makes sense to substract x.ins.len() in two places + if x.fr3_start.is_some() && x.fr3_start.unwrap() <= x.cdr3_start - x.ins_len() { + let mut dna = Vec::::new(); + let mut cdr3_start = x.cdr3_start; + for p in 0..x.cdr3_start { + if x.seq_del_amino[p] == b'-' { + cdr3_start += 1; + } + } + for p in x.fr3_start.unwrap()..cdr3_start - x.ins_len() { + for j in 0..x.ins.len() { + if x.ins[j].0 == p { + let mut z = x.ins[j].1.clone(); + dna.append(&mut z); + } + } + if x.seq_del_amino[p] != b'-' { + dna.push(x.seq_del_amino[p]); + } + } + test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa).unwrap(); + return Some(stringme(&dna)); + } + None +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn compute_field_types( + ctl: &EncloneControl, + rsi: &ColInfo, + show_aa: &[Vec], +) -> Vec> { + let cols = rsi.mat.len(); + let mut field_types = vec![Vec::new(); cols]; + for cx in 0..cols { + let mut ft = vec![0_u8; show_aa[cx].len()]; + let cs1 = rsi.cdr1_starts[cx]; + let cs2 = rsi.cdr2_starts[cx]; + let cs3 = rsi.cdr3_starts[cx]; + let n3 = rsi.cdr3_lens[cx]; + let fs1 = rsi.fr1_starts[cx]; + let fs2 = rsi.fr2_starts[cx]; + let fs3 = rsi.fr3_starts[cx]; + let show_cdr1 = cs1.is_some() + && fs2.is_some() + && cs1.unwrap() <= fs2.unwrap() + && ctl.clono_print_opt.amino.contains(&"cdr1".to_string()); + let show_cdr2 = cs2.is_some() + && fs3.is_some() + && cs2.unwrap() <= fs3.unwrap() + && ctl.clono_print_opt.amino.contains(&"cdr2".to_string()); + let show_cdr3 = ctl.clono_print_opt.amino.contains(&"cdr3".to_string()); + let show_fwr1 = cs1.is_some() + && rsi.fr1_starts[cx] <= cs1.unwrap() + && ctl.clono_print_opt.amino.contains(&"fwr1".to_string()); + let show_fwr2 = fs2.is_some() + && cs2.is_some() + && fs2.unwrap() <= cs2.unwrap() + && ctl.clono_print_opt.amino.contains(&"fwr2".to_string()); + let show_fwr3 = fs3.is_some() + && fs3.unwrap() <= rsi.cdr3_starts[cx] + && ctl.clono_print_opt.amino.contains(&"fwr3".to_string()); + let show_fwr4 = ctl.clono_print_opt.amino.contains(&"fwr4".to_string()); + for (j, p) in show_aa[cx].iter().enumerate() { + if show_cdr1 && *p >= cs1.unwrap() / 3 && *p < fs2.unwrap() / 3 { + ft[j] = 1; + } else if show_cdr2 && *p >= cs2.unwrap() / 3 && *p < fs3.unwrap() / 3 { + ft[j] = 2; + } else if show_cdr3 && *p >= cs3 / 3 && *p < cs3 / 3 + n3 { + ft[j] = 3; + } else if show_fwr1 && *p >= fs1 / 3 && *p < cs1.unwrap() / 3 { + ft[j] = 4; + } else if show_fwr2 && *p >= fs2.unwrap() / 3 && *p < cs2.unwrap() / 3 { + ft[j] = 5; + } else if show_fwr3 && *p >= fs3.unwrap() / 3 && *p < rsi.cdr3_starts[cx] / 3 { + ft[j] = 6; + } else if show_fwr4 && *p >= cs3 / 3 + n3 { + ft[j] = 7; + } + } + field_types[cx] = ft; + } + field_types +} // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ pub fn make_table( ctl: &EncloneControl, - rows: &mut Vec>, - justify: &Vec, - mlog: &Vec, + rows: &mut [Vec], + justify: &[u8], + mlog: &[u8], logz: &mut String, ) { // In plain mode, strip escape characters. if !ctl.pretty { - for i in 0..rows.len() { - for j in 0..rows[i].len() { + for row in rows.iter_mut() { + for rj in row { let mut x = Vec::::new(); let mut escaped = false; - let s = rows[i][j].as_bytes(); - for l in 0..s.len() { - if s[l] == b'' { + let s = rj.as_bytes(); + for &byte in s { + if byte == b'' { escaped = true; } if escaped { - if s[l] == b'm' { + if byte == b'm' { escaped = false; } continue; } - x.push(s[l]); + x.push(byte); } - rows[i][j] = stringme(&x); + *rj = stringme(&x); } } } // Make table. - let log0 = stringme(&mlog); + let log0 = stringme(mlog); let mut log = String::new(); if ctl.debug_table_printing { - for i in 0..rows.len() { - println!(""); - for j in 0..rows[i].len() { + for (i, row) in rows.iter().enumerate() { + println!(); + for (j, rj) in row.iter().enumerate() { println!( "row = {}, col = {}, entry = {}, vis width = {}", i, j, - rows[i][j], - visible_width(&rows[i][j]) + rj, + visible_width(rj) ); } } - println!(""); + println!(); } - print_tabular_vbox( - &mut log, - &rows, - 2, - &justify, - ctl.debug_table_printing, - false, - ); + print_tabular_vbox(&mut log, rows, 2, justify, ctl.debug_table_printing, false); if ctl.debug_table_printing { - println!("{}", log); + println!("{log}"); } let mut cs = vec![Vec::::new(); rows.len() + 2]; let mut row = 0; @@ -89,10 +332,8 @@ pub fn make_table( // Process each row. - for i in 0..cs.len() { - for j in 0..cs[i].len() { - log.push(cs[i][j]); - } + for ci in cs { + log.extend(&ci); log.push('\n'); } @@ -100,10 +341,7 @@ pub fn make_table( let mut barcode = false; let mut header = false; - let mut x = Vec::::new(); - for c in log.chars() { - x.push(c); - } + let x: Vec = log.chars().collect(); let mut j = 0; while j < x.len() { // DEFAULT @@ -130,8 +368,12 @@ pub fn make_table( // *** bullets now off *** if c == '$' { if ctl.pretty { - *logz += &format!("[38;5;{}m[48;5;{}m ", TEXTCOLOR, BACKGROUND); - barcode = true; + if !ctl.nogray { + write!(*logz, "[38;5;{TEXTCOLOR}m[48;5;{BACKGROUND}m ").unwrap(); + barcode = true; + } else { + logz.push(' '); + } } else { logz.push('•'); } @@ -143,8 +385,8 @@ pub fn make_table( // In a barcode line, hop around │ symbols, which should not be colorized. } else if barcode && c == '│' && x[j + 1] != '\n' { // *logz += "│"; - *logz += &format!("[48;5;{}m│", BACKGROUND); - *logz += &format!("[38;5;{}m[48;5;{}m", TEXTCOLOR, BACKGROUND); + write!(*logz, "[48;5;{BACKGROUND}m│").unwrap(); + write!(*logz, "[38;5;{TEXTCOLOR}m[48;5;{BACKGROUND}m").unwrap(); } else if barcode && c == '│' && x[j + 1] == '\n' { *logz += "│"; // *logz += &format!("[48;5;{}m│", BACKGROUND); @@ -153,7 +395,7 @@ pub fn make_table( // Do similar things for header line, but bold the line instead. } else if c == '#' { if ctl.pretty { - *logz += &format!("#"); + *logz += "#"; header = true; } else { logz.push('#'); @@ -167,7 +409,7 @@ pub fn make_table( // In a header line, hop around │ symbols, which should not be colorized. } else if header && c == '│' && x[j + 1] != '\n' { *logz += "│"; - *logz += &format!(""); + *logz += ""; } else if header && c == '│' && x[j + 1] == '\n' { *logz += "│"; header = false; @@ -184,33 +426,31 @@ pub fn make_table( pub fn print_digit(p: usize, i: usize, digits: usize, ds: &mut String) { if digits == 1 { - *ds += &format!("{}", p); + write!(*ds, "{p}").unwrap(); } else if digits == 2 { if i == 0 { if p >= 10 { - *ds += &format!("{}", p / 10); + write!(*ds, "{}", p / 10).unwrap(); } else { ds.push(' '); } } else { - *ds += &format!("{}", p % 10); + write!(*ds, "{}", p % 10).unwrap(); } - } else { - if i == 0 { - if p >= 100 { - *ds += &format!("{}", p / 100); - } else { - ds.push(' '); - } - } else if i == 1 { - if p >= 10 { - *ds += &format!("{}", (p % 100) / 10); - } else { - ds.push(' '); - } + } else if i == 0 { + if p >= 100 { + write!(*ds, "{}", p / 100).unwrap(); } else { - *ds += &format!("{}", p % 10); + ds.push(' '); } + } else if i == 1 { + if p >= 10 { + write!(*ds, "{}", (p % 100) / 10).unwrap(); + } else { + ds.push(' '); + } + } else { + write!(*ds, "{}", p % 10).unwrap(); } } @@ -230,261 +470,96 @@ pub fn ndigits(n: usize) -> usize { // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ -pub fn make_diff_row( - ctl: &EncloneControl, - rsi: &ColInfo, - cols: usize, - diff_pos: usize, - drows: &Vec>, - row1: &mut Vec, - rows: &mut Vec>, -) { - let mut xrow = vec!["".to_string(); row1.len()]; - let mut xrow_filled = vec![false; row1.len()]; - for cx in 0..cols { - for j in 0..rsi.cvars[cx].len() { - if rsi.cvars[cx][j] != "amino".to_string() { - xrow.push(rsi.cvars[cx][j].to_string()); - xrow_filled.push(true); - } else { - xrow.push("".to_string()); - xrow_filled.push(false); - } - } - } - let nc = row1.len(); - if !drows.is_empty() { - let mut ncall = 0; - for j in 0..cols { - for z in 0..rsi.cvars[j].len() { - let mut c = Vec::>::new(); - let mut start = 5 + drows.len(); - if drows.len() >= 1 { - start += 3; - } - if ctl.clono_print_opt.sum { - start += 1; - } - if ctl.clono_print_opt.mean { - start += 1; - } - for k in start..rows.len() { - if rows[k][0].contains("$") { - continue; - } - if rows[k][ncall + z + nc].len() > 0 { - c.push(rows[k][ncall + z + nc].as_bytes().to_vec()); - } - } - - // Package characters with ANSI escape codes that come before them. - // The is a dorky way of identifying codons that are different, by - // virtue of them being shown as colored amino acids. - - let mut c2 = Vec::>>::new(); - for i in 0..c.len() { - c2.push(package_characters_with_escapes(&c[i])); - } - - // Proceed. - - if (rsi.cvars[j][z] != "amino" && rsi.cvars[j][z] != "var") || c.len() == 0 { - row1.push("".to_string()); - continue; - } - let mut dots = Vec::::new(); - for m in 0..c2[0].len() { - let mut digits_or_blanks = true; - for l in 0..c2.len() { - if c2[l].is_empty() { - // needed? - continue; - } - if !(c2[l][m] == b" ".to_vec() - || (c2[l][m] >= b"0".to_vec() && c2[l][m] <= b"9".to_vec())) - { - digits_or_blanks = false; - } - if c2[l][m].contains(&b' ') { - digits_or_blanks = true; - } - } - let mut same = true; - for l in 1..c2.len() { - if c2[l].is_empty() { - // needed? - continue; - } - if c2[l][m] != c2[0][m] { - same = false; - } - } - let mut sep = true; - for l in 0..c2.len() { - if c2[l].is_empty() { - // needed? - continue; - } - if c2[l][m] != b"|" { - sep = false; - } - } - if sep { - } else if digits_or_blanks { - dots.push(b' '); - } else if same { - dots.push(b'.'); - } else { - dots.push(b'x'); - } - } - row1.push(format!("{}", strme(&dots))); - } - ncall += rsi.cvars[j].len(); - } - if !drows.is_empty() { - for i in 0..row1.len() { - if xrow_filled[i] { - row1[i] = xrow[i].clone(); - } - } - } - rows[diff_pos] = row1.to_vec(); - } else { - if !drows.is_empty() { - for i in 0..row1.len() { - if xrow_filled[i] { - row1[i] = xrow[i].clone(); - } - } - } - for i in 0..row1.len() { - rows[diff_pos - 1][i] = row1[i].clone(); - } - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - // Start to generate parseable output, warn about multi-donor clonotypes, and other things. pub fn start_gen( ctl: &EncloneControl, - exacts: &Vec, - exact_clonotypes: &Vec, - refdata: &RefData, - rsi: &ColInfo, + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], out_data: &mut Vec>, mut mlog: &mut Vec, + extra_args: &[String], ) { let pcols_sort = &ctl.parseable_opt.pcols_sort; macro_rules! speak { ($u:expr, $var:expr, $val:expr) => { - if ctl.parseable_opt.pout.len() > 0 { - if pcols_sort.is_empty() || bin_member(&pcols_sort, &$var.to_string()) { + if ctl.parseable_opt.pout.len() > 0 || extra_args.len() > 0 { + if pcols_sort.is_empty() + || bin_member(&pcols_sort, &$var.to_string()) + || bin_member(&extra_args, &$var.to_string()) + { out_data[$u].insert($var.to_string(), $val); } } }; } - macro_rules! speakc { - ($u:expr, $col:expr, $var:expr, $val:expr) => { - if ctl.parseable_opt.pout.len() > 0 && $col + 1 <= ctl.parseable_opt.pchains { - let varc = format!("{}{}", $var, $col + 1); - if pcols_sort.is_empty() || bin_member(&pcols_sort, &varc) { - out_data[$u].insert(varc, format!("{}", $val)); - } - } - }; - } - let nexacts = exacts.len(); let mut n = 0; - for u in 0..nexacts { - n += exact_clonotypes[exacts[u]].ncells(); - } - if ctl.parseable_opt.pout.len() > 0 { - *out_data = vec![HashMap::::new(); nexacts]; + for &eu in exacts { + n += exact_clonotypes[eu].ncells(); } - let cols = rsi.vids.len(); - let mut ncells = 0; - for u in 0..exacts.len() { - ncells += exact_clonotypes[exacts[u]].ncells(); + if !ctl.parseable_opt.pout.is_empty() || !extra_args.is_empty() { + *out_data = vec![HashMap::::new(); exacts.len()]; } - for u in 0..exacts.len() { - speak!(u, "nchains", format!("{}", cols)); - speak!(u, "clonotype_ncells", format!("{}", ncells)); - let mut bc = Vec::::new(); - for x in exact_clonotypes[exacts[u]].clones.iter() { - bc.push(x[0].barcode.clone()); - } - bc.sort(); + for (u, &eu) in exacts.iter().enumerate() { + let mut bc = exact_clonotypes[eu] + .clones + .iter() + .map(|x| x[0].barcode.as_str()) + .collect::>(); + bc.sort_unstable(); speak!(u, "barcodes", format!("{}", bc.iter().format(","))); for d in ctl.origin_info.dataset_list.iter() { - if d.len() > 0 { - let mut bc = Vec::::new(); - for i in 0..exact_clonotypes[exacts[u]].clones.len() { - let q = &exact_clonotypes[exacts[u]].clones[i]; + if !d.is_empty() { + let mut bc = Vec::<&str>::new(); + for q in &exact_clonotypes[eu].clones { if ctl.origin_info.dataset_id[q[0].dataset_index] == *d { - bc.push(q[0].barcode.clone()); + bc.push(q[0].barcode.as_str()); } } speak!( u, - &format!("{}_barcodes", d), + &format!("{d}_barcodes"), format!("{}", bc.iter().format(",")) ); } } if ctl.parseable_opt.pbarcode { - let mut bc = Vec::::new(); - for x in exact_clonotypes[exacts[u]].clones.iter() { - bc.push(x[0].barcode.clone()); - } - speak!(u, "barcode", format!("{}", bc.iter().format(";"))); + let bc = exact_clonotypes[eu] + .clones + .iter() + .map(|x| x[0].barcode.as_str()) + .collect::>(); + speak!(u, "barcode", format!("{}", bc.iter().format(POUT_SEP))); for d in ctl.origin_info.dataset_list.iter() { - if d.len() > 0 { - let mut bc = Vec::::new(); - for i in 0..exact_clonotypes[exacts[u]].clones.len() { - let q = &exact_clonotypes[exacts[u]].clones[i]; + if !d.is_empty() { + let mut bc = Vec::<&str>::new(); + for i in 0..exact_clonotypes[eu].clones.len() { + let q = &exact_clonotypes[eu].clones[i]; if ctl.origin_info.dataset_id[q[0].dataset_index] == *d { - bc.push(q[0].barcode.clone()); + bc.push(q[0].barcode.as_str()); } else { - bc.push("".to_string()); + bc.push(""); } } speak!( u, - &format!("{}_barcode", d), - format!("{}", bc.iter().format(";")) + &format!("{d}_barcode"), + format!("{}", bc.iter().format(POUT_SEP)) ); } } } - for cx in 0..cols { - let vid = rsi.vids[cx]; - speakc!(u, cx, "v_name", refdata.name[vid]); - speakc!(u, cx, "v_id", refdata.id[vid]); - let did = rsi.dids[cx]; - if did.is_some() { - let did = did.unwrap(); - speakc!(u, cx, "d_name", refdata.name[did]); - speakc!(u, cx, "d_id", refdata.id[did]); - } - let jid = rsi.jids[cx]; - speakc!(u, cx, "j_name", refdata.name[jid]); - speakc!(u, cx, "j_id", refdata.id[jid]); - } } // Start to print the clonotype. let mut donors = Vec::::new(); - for u in 0..exacts.len() { - let ex = &exact_clonotypes[exacts[u]]; - for m in 0..ex.clones.len() { - if ex.clones[m][0].donor_index.is_some() { - let d = ex.clones[m][0].donor_index.unwrap(); - if ctl.origin_info.donor_list[d].len() > 0 { + for &eu in exacts { + let ex = &exact_clonotypes[eu]; + for cm in &ex.clones { + if cm[0].donor_index.is_some() { + let d = cm[0].donor_index.unwrap(); + if !ctl.origin_info.donor_list[d].is_empty() { donors.push(d); } } @@ -500,29 +575,50 @@ pub fn start_gen( } fwrite!(&mut mlog, "██"); if ctl.pretty { - emit_end_escape(&mut mlog); + emit_end_escape(mlog); } fwriteln!( &mut mlog, " WARNING: This clonotype contains cells from multiple donors." ); - let mut donor_names = Vec::::new(); - for i in 0..donors.len() { - donor_names.push(ctl.origin_info.donor_list[donors[i]].clone()); + let mut mixes = 0; + if ctl.origin_info.donor_list.len() > 1 && ctl.clono_filt_opt_def.donor { + for (j1, &ej1) in exacts.iter().enumerate() { + let ex1 = &exact_clonotypes[ej1]; + for (j2, &ej2) in exacts.iter().enumerate().skip(j1) { + let ex2 = &exact_clonotypes[ej2]; + for (k1, ck1) in ex1.clones.iter().enumerate() { + let x1 = &ck1[0]; + if let Some(donor1) = x1.donor_index { + for (k2, ck2) in ex2.clones.iter().enumerate() { + if (j1, k1) < (j2, k2) { + let x2 = &ck2[0]; + if let Some(donor2) = x2.donor_index { + if donor1 != donor2 { + mixes += 1; + } + } + } + } + } + } + } + } } + fwriteln!(&mut mlog, "total mixed cell pairs = {}", mixes); + let donor_names: Vec<&str> = donors + .iter() + .map(|&donor| ctl.origin_info.donor_list[donor].as_str()) + .collect(); fwriteln!(&mut mlog, "donors = {}", donor_names.iter().format(",")); fwriteln!(&mut mlog, "datasets in which these donors appear:"); - for i in 0..donors.len() { - let mut datasets = Vec::::new(); - for u in 0..nexacts { - let ex = &exact_clonotypes[exacts[u]]; - for l in 0..ex.clones.len() { - if ex.clones[l][0].donor_index.is_some() { - if ex.clones[l][0].donor_index.unwrap() == donors[i] { - datasets.push( - ctl.origin_info.dataset_id[ex.clones[l][0].dataset_index].clone(), - ); - } + for (i, donor) in donors.into_iter().enumerate() { + let mut datasets = Vec::<&str>::new(); + for &eu in exacts { + let ex = &exact_clonotypes[eu]; + for clone in &ex.clones { + if clone[0].donor_index.is_some() && clone[0].donor_index.unwrap() == donor { + datasets.push(ctl.origin_info.dataset_id[clone[0].dataset_index].as_str()); } } } @@ -540,12 +636,10 @@ pub fn start_gen( // Print barcodes. if ctl.clono_print_opt.barcodes { - let mut bc = Vec::::new(); - for u in 0..nexacts { - let ex = &exact_clonotypes[exacts[u]]; - for l in 0..ex.clones.len() { - bc.push(ex.clones[l][0].barcode.clone()); - } + let mut bc = Vec::<&str>::new(); + for &eu in exacts { + let ex = &exact_clonotypes[eu]; + bc.extend(ex.clones.iter().map(|clone| clone[0].barcode.as_str())); } unique_sort(&mut bc); fwriteln!(&mut mlog, "• {}", bc.iter().format(",")); @@ -555,10 +649,12 @@ pub fn start_gen( // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ pub fn insert_position_rows( + ctl: &EncloneControl, rsi: &ColInfo, - show_aa: &Vec>, - vars: &Vec>, - row1: &Vec, + show_aa: &[Vec], + field_types: &[Vec], + vars: &[Vec], + row1: &[String], ) -> Vec> { let cols = rsi.cdr3_starts.len(); let mut drows = Vec::>::new(); @@ -567,60 +663,42 @@ pub fn insert_position_rows( if zpass == 2 { drows = vec![vec![String::new(); row1.len()]; digits]; } - for cx in 0..cols { - let cs1 = rsi.cdr1_starts[cx]; - let n1 = rsi.cdr1_lens[cx]; - let cs2 = rsi.cdr2_starts[cx]; - let n2 = rsi.cdr2_lens[cx]; - let cs3 = rsi.cdr3_starts[cx]; - let n3 = rsi.cdr3_lens[cx]; - for m in 0..rsi.cvars[cx].len() { + for (cvar, (aa, (var, field_type))) in rsi + .cvars + .iter() + .zip(show_aa.iter().zip(vars.iter().zip(field_types.iter()))) + .take(cols) + { + for rsim in cvar { if zpass == 1 { - if rsi.cvars[cx][m] == "amino".to_string() { - for p in show_aa[cx].iter() { + if rsim == "amino" { + for p in aa { digits = max(digits, ndigits(*p)); } - } else if rsi.cvars[cx][m] == "var".to_string() { - for p in vars[cx].iter() { + } else if rsim == "var" { + for p in var { digits = max(digits, ndigits(*p)); } } } else { - for i in 0..digits { - if rsi.cvars[cx][m] == "amino".to_string() { + for (i, drow) in drows.iter_mut().enumerate() { + if rsim == "amino" { let mut ds = String::new(); - for (j, p) in show_aa[cx].iter().enumerate() { - if j > 0 && cs1.is_some() && *p == cs1.unwrap() / 3 { + for (j, (&p, &t)) in aa.iter().zip(field_type.iter()).enumerate() { + if j > 0 && t != field_type[j - 1] && !ctl.gen_opt.nospaces { ds += " "; } - if j > 0 && cs2.is_some() && *p == cs2.unwrap() / 3 { - ds += " "; - } - if j > 0 && *p == cs3 / 3 { - ds += " "; - } - print_digit(*p, i, digits, &mut ds); - if j < show_aa[cx].len() - 1 { - if cs1.is_some() && *p == cs1.unwrap() / 3 + n1.unwrap() - 1 { - ds += " "; - } - if cs2.is_some() && *p == cs2.unwrap() / 3 + n2.unwrap() - 1 { - ds += " "; - } - if *p == cs3 / 3 + n3 - 1 { - ds += " "; - } - } + print_digit(p, i, digits, &mut ds); } - drows[i].push(ds); - } else if rsi.cvars[cx][m] == "var".to_string() { + drow.push(ds); + } else if rsim == "var" { let mut ds = String::new(); - for p in vars[cx].iter() { + for p in var { print_digit(*p, i, digits, &mut ds); } - drows[i].push(ds); + drow.push(ds); } else { - drows[i].push(String::new()); + drow.push(String::new()); } } } @@ -629,3 +707,212 @@ pub fn insert_position_rows( } drows } + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn color_codon( + ctl: &EncloneControl, + seq_amino: &[u8], + ref_diff_pos: &[Vec>], + x: &[(usize, u8, u32)], + col: usize, + mid: usize, + p: usize, + u: usize, + last_color: &mut String, + last: bool, + cdr3_con: &[Vec], + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], +) -> Vec { + let mut log = Vec::::new(); + let codon = &seq_amino[3 * p..3 * p + 3]; + let aa = codon_to_aa(codon); + if ctl.gen_opt.color == *"codon" || ctl.gen_opt.color == *"codon-diffs" { + let mut diff = false; + if !ref_diff_pos.is_empty() && ctl.gen_opt.color == *"codon-diffs" { + for j in 0..3 { + if bin_member(&ref_diff_pos[col][u], &(3 * p + j)) { + diff = true; + } + } + let cdr3_start = exact_clonotypes[exacts[u]].share[mid].cdr3_start; + let cdr3 = &exact_clonotypes[exacts[u]].share[mid].cdr3_dna.as_bytes(); + if 3 * p >= cdr3_start && 3 * p < cdr3_start + cdr3.len() { + let cdr3_con = &cdr3_con[col]; + for j in 0..3 { + let cp = 3 * p - cdr3_start + j; + if cdr3[cp] != cdr3_con[cp] { + diff = true; + } + } + } + } + if !ref_diff_pos.is_empty() && !diff && ctl.gen_opt.color == *"codon-diffs" { + log.append(&mut b"".to_vec()); + } else { + emit_codon_color_escape(codon, &mut log); + } + log.push(aa); + emit_end_escape(&mut log); + } else if ctl.gen_opt.color == *"property" { + color_by_property(&[aa], &mut log); + } else { + let (low, high) = (lower_bound1_3(x, &p), upper_bound1_3(x, &p)); + let (mut total, mut this) = (0.0, 0.0); + for u in low..high { + total += x[u as usize].2 as f64; + if x[u as usize].1 == aa { + this = x[u as usize].2 as f64; + } + } + let mut color = "black".to_string(); + if total > 0.0 && 100.0 * this / total <= ctl.gen_opt.color_by_rarity_pc { + if this == 0.0 { + color = "red".to_string(); + } else { + color = "blue".to_string(); + } + } + if color != *last_color { + if color == *"black" { + emit_end_escape(&mut log); + } else { + if color == *"red" { + emit_red_escape(&mut log); + } else { + emit_eight_bit_color_escape(&mut log, 6); + } + emit_bold_escape(&mut log); + } + *last_color = color; + } + fwrite!(log, "{}", aa as char); + } + if last && *last_color != "black" { + emit_end_escape(&mut log); + } + log +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn aa_classes() -> Vec<(char, &'static [u8])> { + vec![ + ('B', b"DN"), + ('Z', b"EQ"), + ('J', b"IL"), + ('-', b"DE"), + ('+', b"KHR"), + ('Ψ', b"ILMV"), + ('π', b"AGPS"), + ('Ω', b"FHWY"), + ('Φ', b"IFLMVWY"), + ('ζ', b"DEHKNQRST"), + ('X', b"ACDEFGHIKLMNPQRSTVWY"), + ] +} + +pub fn cdr3_aa_con( + style: &str, + col: usize, + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], + rsi: &ColInfo, +) -> String { + let mat = &rsi.mat; + let mut cdr3s = Vec::::new(); + for v in 0..exacts.len() { + if let Some(m) = mat[col][v] { + let ex = &exact_clonotypes[exacts[v]]; + cdr3s.push(ex.share[m].cdr3_aa.clone()); + } + } + let classes = aa_classes(); + let mut c = String::new(); + for i in 0..cdr3s[0].len() { + let mut vals: Vec = cdr3s.iter().map(|cdr| cdr.as_bytes()[i]).collect(); + unique_sort(&mut vals); + if vals.solo() { + c.push(vals[0] as char); + } else if style == "x" { + c.push('X'); + } else { + for m in classes.iter() { + if meet_size(&vals, m.1) == vals.len() { + c.push(m.0); + break; + } + } + } + } + c +} + +pub fn get_gex_matrix_entry( + ctl: &EncloneControl, + gex_info: &GexInfo, + fid: usize, + d_all: &[Vec], + ind_all: &[Vec], + li: usize, + l: usize, + p: usize, + y: &str, +) -> f64 { + let mut raw_count = 0 as f64; + if gex_info.gex_matrices[li].initialized() { + raw_count = gex_info.gex_matrices[li].value(p, fid) as f64; + } else { + for (&da, &ia) in d_all[l].iter().zip(ind_all[l].iter()) { + if ia == fid as u32 { + raw_count = da as f64; + break; + } + } + } + let mult = if y.ends_with("_g") { + gex_info.gex_mults[li] + } else { + gex_info.fb_mults[li] + }; + if !ctl.gen_opt.full_counts { + raw_count *= mult; + } + raw_count +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn extra_args(ctl: &EncloneControl) -> Vec { + let mut extra_args = ctl.gen_opt.tree.clone(); + if !ctl.plot_opt.plot_xy_filename.is_empty() { + extra_args.push(ctl.plot_opt.plot_xy_xvar.clone()); + extra_args.push(ctl.plot_opt.plot_xy_yvar.clone()); + } + match ctl.plot_opt.cell_color { + CellColor::ByVariableValue(ref x) => { + extra_args.push(x.var.clone()); + } + CellColor::ByCategoricalVariableValue(ref x) => { + extra_args.append(&mut x.vars.clone()); + } + _ => {} + }; + for i in 0..ctl.clono_filt_opt.bounds.len() { + extra_args.append(&mut ctl.clono_filt_opt.bounds[i].var.clone()); + } + if ctl.gen_opt.gene_scan_test.is_some() { + extra_args.append(&mut ctl.gen_opt.gene_scan_test.as_ref().unwrap().var.clone()); + extra_args.append(&mut ctl.gen_opt.gene_scan_control.as_ref().unwrap().var.clone()); + } + extra_args.append(&mut ctl.plot_opt.sim_mat_plot_vars.clone()); + for i in 0..ctl.gen_opt.var_def.len() { + let x = &ctl.gen_opt.var_def[i].2; + for v in vars_of_node(x).iter() { + extra_args.push(decode_arith(v)); + } + } + unique_sort(&mut extra_args); + extra_args +} diff --git a/enclone_print/src/print_utils2.rs b/enclone_print/src/print_utils2.rs index ca00a293c..6dd0960c4 100644 --- a/enclone_print/src/print_utils2.rs +++ b/enclone_print/src/print_utils2.rs @@ -1,57 +1,30 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. // This file contains the single function row_fill, // plus a small helper function get_gex_matrix_entry. -use amino::*; -use ansi_escape::*; -use bio::alignment::pairwise::*; -use bio::alignment::AlignmentOperation::*; -use enclone_core::defs::*; -use enclone_core::print_tools::*; -use enclone_proto::types::*; -use itertools::*; +use crate::print_utils1::color_codon; +use crate::proc_cvar_auto::proc_cvar_auto; +use crate::proc_lvar2::proc_lvar2; +use crate::proc_lvar_auto::proc_lvar_auto; +use amino::{aa_seq, codon_to_aa}; +use enclone_core::allowed_vars::LVARS_ALLOWED; +use enclone_core::barcode_fate::BarcodeFate; +use enclone_core::defs::{AlleleData, ColInfo, EncloneControl, ExactClonotype, GexInfo, POUT_SEP}; +use enclone_core::median::median_f64; +use enclone_proto::types::DonorReferenceItem; +use enclone_vars::decode_arith; +use expr_tools::{define_evalexpr_context, vars_of_node}; +use hdf5::Reader; +use itertools::Itertools; use ndarray::s; -use stats_utils::*; -use std::cmp::{max, min}; -use std::collections::HashMap; -use string_utils::*; -use vdj_ann::refx::*; -use vector_utils::*; - -pub fn get_gex_matrix_entry( - ctl: &EncloneControl, - gex_info: &GexInfo, - fid: usize, - d_all: &Vec>, - ind_all: &Vec>, - li: usize, - l: usize, - p: usize, - y: &str, -) -> f64 { - let mut raw_count = 0 as f64; - if gex_info.gex_matrices[li].initialized() { - raw_count = gex_info.gex_matrices[li].value(p as usize, fid) as f64; - } else { - for j in 0..d_all[l].len() { - if ind_all[l][j] == fid as u32 { - raw_count = d_all[l][j] as f64; - break; - } - } - } - let mult: f64; - if y.ends_with("_g") { - mult = gex_info.gex_mults[li]; - } else { - mult = gex_info.fb_mults[li]; - } - if !ctl.gen_opt.full_counts { - raw_count *= mult; - } - raw_count -} +use stats_utils::percent_ratio; +use std::collections::{HashMap, HashSet}; +use std::fmt::Write; +use string_utils::{stringme, strme, TextUtils}; +use vdj_ann::refx::RefData; +use vector_utils::next_diff12_4; +use vector_utils::{bin_member, bin_position, unique_sort}; // The following code creates a row in the enclone output table for a clonotype. Simultaneously // it generates a row of parseable output. And it does some other things that are not described @@ -63,64 +36,64 @@ pub fn row_fill( pass: usize, u: usize, ctl: &EncloneControl, - exacts: &Vec, - mults: &Vec, - exact_clonotypes: &Vec, + exacts: &[usize], + mults: &[usize], + exact_clonotypes: &[ExactClonotype], gex_info: &GexInfo, refdata: &RefData, - varmat: &Vec>>, - fp: &Vec>, - vars_amino: &Vec>, - show_aa: &Vec>, - bads: &mut Vec, + varmat: &[Vec>], + fp: &[Vec], + vars_amino: &[Vec], + show_aa: &[Vec], + ref_diff_pos: &[Vec>], + field_types: &[Vec], + bads: &mut [bool], gex_low: &mut usize, - row: &mut Vec, // row of human-readable output - out_data: &mut Vec>, // row of parseable output - cx: &mut Vec>, - d_all: &mut Vec>, - ind_all: &mut Vec>, + row: &mut Vec, // row of human-readable output + out_data: &mut [HashMap], // row of parseable output + cx: &mut [Vec], + d_all: &mut [Vec], + ind_all: &mut [Vec], rsi: &ColInfo, - dref: &Vec, + dref: &[DonorReferenceItem], groups: &HashMap>, - d_readers: &Vec>, - ind_readers: &Vec>, - h5_data: &Vec<(usize, Vec, Vec)>, - stats: &mut Vec<(String, Vec)>, - vdj_cells: &Vec>, - n_vdj_gex: &Vec, - lvarsc: &Vec, - nd_fields: &Vec, -) { + d_readers: &[Option], + ind_readers: &[Option], + h5_data: &[(usize, Vec, Vec)], + stats: &mut Vec<(String, Vec)>, + stats_pass1: &[Vec<(String, Vec)>], + vdj_cells: &[Vec], + n_vdj_gex: &[usize], + lvarsc: &[String], + lvarsh: &HashSet, + nd_fields: &[String], + peer_groups: &[Vec<(usize, u8, u32)>], + extra_args: &[String], + all_vars: &[&str], + need_gex: bool, + fate: &[HashMap], + cdr3_con: &[Vec], + allele_data: &AlleleData, +) -> Result<(), String> { // Redefine some things to reduce dependencies. let mat = &rsi.mat; let cvars = &ctl.clono_print_opt.cvars; - let lvars = lvarsc.clone(); + let lvars = lvarsc; let clonotype_id = exacts[u]; let ex = &exact_clonotypes[clonotype_id]; - macro_rules! speak { - ($u:expr, $var:expr, $val:expr) => { - if ctl.parseable_opt.pout.len() > 0 { - let mut v = $var.to_string(); - v = v.replace("_Σ", "_sum"); - v = v.replace("_μ", "_mean"); - if ctl.parseable_opt.pcols.is_empty() - || bin_member(&ctl.parseable_opt.pcols_sortx, &v) - { - out_data[$u].insert(v, $val); - } - } - }; - } let mut pcols_sort = ctl.parseable_opt.pcols_sort.clone(); - for i in 0..pcols_sort.len() { - pcols_sort[i] = pcols_sort[i].replace("_Σ", "_sum"); - pcols_sort[i] = pcols_sort[i].replace("_μ", "_mean"); + for pcol in pcols_sort.iter_mut() { + *pcol = pcol.replace("_Σ", "_sum").replace("_μ", "_mean"); } pcols_sort.sort(); macro_rules! speakc { ($u:expr, $col:expr, $var:expr, $val:expr) => { - if ctl.parseable_opt.pout.len() > 0 && $col + 1 <= ctl.parseable_opt.pchains { + if pass == 2 + && ctl.parseable_opt.pout.len() > 0 + && (ctl.parseable_opt.pchains == "max" + || $col < ctl.parseable_opt.pchains.force_usize()) + { let mut v = $var.clone(); v = v.replace("_Σ", "_sum"); v = v.replace("_μ", "_mean"); @@ -157,24 +130,14 @@ pub fn row_fill( } }; } - let cols = varmat[0].len(); - - // Set up lead variable macro. This is the mechanism for generating - // both human-readable and parseable output for lead variables. - macro_rules! lvar { - ($i: expr, $var:expr, $val:expr) => { - if $i < lvars.len() { - row.push($val) - } - if pass == 2 { - speak!(u, $var.to_string(), $val); - } - }; + let cols = varmat[0].len(); + if ctl.gen_opt.row_fill_verbose { + eprintln!(); } - // Compute dataset indices, gex, gex_min, gex_max, gex_mean, gex_sum, - // n_gex_cell, n_gex, entropy. + // Compute dataset indices, gex, gex_mean, gex_sum, + // n_gex_cell, n_gex. let mut dataset_indices = Vec::::new(); for l in 0..ex.clones.len() { @@ -187,25 +150,77 @@ pub fn row_fill( } row.push("".to_string()); // row number (#), filled in below let mut counts = Vec::::new(); - let mut fcounts = Vec::::new(); - let mut n_gex = 0; + let mut gex_counts_unsorted = Vec::::new(); + let mut gex_fcounts_unsorted = Vec::::new(); let mut n_gexs = Vec::::new(); - let mut total_counts = Vec::::new(); + + // It may not make any sense at all for this code to be here. + + if ctl.clono_filt_opt_def.whitef { + let mut bch = vec![Vec::<(usize, String, usize, usize)>::new(); 2]; + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = &ex.clones[l][0].barcode; + let mut numi = 0; + for j in 0..ex.clones[l].len() { + numi += ex.clones[l][j].umi_count; + } + bch[0].push((li, bc[0..8].to_string(), numi, l)); + bch[1].push((li, bc[8..16].to_string(), numi, l)); + } + let mut junk = 0; + let mut bad = vec![false; ex.clones.len()]; + for l in 0..2 { + bch[l].sort(); + let mut m = 0; + while m < bch[l].len() { + let n = next_diff12_4(&bch[l], m as i32) as usize; + for u1 in m..n { + for u2 in m..n { + if bch[l][u1].2 >= 10 * bch[l][u2].2 { + bad[bch[l][u2].3] = true; + } + } + } + m = n; + } + } + for b in bad { + if b { + junk += 1; + } + } + let junk_rate = percent_ratio(junk, ex.clones.len()); + // WRONG! THIS IS SUPPOSED TO BE EXECUTED ON PASS 1!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + if junk_rate == 0.0 { + bads[u] = true; + } + } + // It might be possible to speed this up a lot by pulling part of the "let d" and // "let ind" constructs out of the loop. - if lvars.contains(&"entropy".to_string()) { + + let (mut gex_mean, mut gex_sum) = (0.0, 0.0); + if need_gex { for l in 0..ex.clones.len() { let li = ex.clones[l][0].dataset_index; let bc = ex.clones[l][0].barcode.clone(); if !gex_info.gex_barcodes.is_empty() { + if bin_member(&gex_info.gex_cell_barcodes[li], &bc) { + n_gexs.push(1); + } else { + n_gexs.push(0); + } + let mut count = 0; + let mut fcount = 0.0; let p = bin_position(&gex_info.gex_barcodes[li], &bc); if p >= 0 { let mut raw_count = 0; if gex_info.gex_matrices[li].initialized() { let row = gex_info.gex_matrices[li].row(p as usize); - for j in 0..row.len() { - let f = row[j].0; - let n = row[j].1; + for r in row { + let f = r.0; + let n = r.1; if gex_info.is_gex[li][f] { raw_count += n; } @@ -234,663 +249,256 @@ pub fn row_fill( } for j in 0..d.len() { if gex_info.is_gex[li][ind[j] as usize] { - raw_count += d[j] as usize; + let n = d[j] as usize; + raw_count += n; } } d_all[l] = d; ind_all[l] = ind; } - total_counts.push(raw_count); - } - } - } - } - let mut entropies = Vec::::new(); - for l in 0..ex.clones.len() { - let li = ex.clones[l][0].dataset_index; - let bc = ex.clones[l][0].barcode.clone(); - if !gex_info.gex_barcodes.is_empty() { - if bin_member(&gex_info.gex_cell_barcodes[li], &bc) { - n_gex += 1; - n_gexs.push(1); - } else { - n_gexs.push(0); - } - let mut count = 0; - let mut fcount = 0.0; - let mut entropy = 0.0; - let p = bin_position(&gex_info.gex_barcodes[li], &bc); - if p >= 0 { - let mut raw_count = 0; - if gex_info.gex_matrices[li].initialized() { - let row = gex_info.gex_matrices[li].row(p as usize); - for j in 0..row.len() { - let f = row[j].0; - let n = row[j].1; - if gex_info.is_gex[li][f] { - if lvars.contains(&"entropy".to_string()) { - let q = n as f64 / total_counts[l] as f64; - entropy -= q * q.log2(); - } - raw_count += n; - } - } - } else { - let z1 = gex_info.h5_indptr[li][p as usize] as usize; - let z2 = gex_info.h5_indptr[li][p as usize + 1] as usize; // is p+1 OK?? - let d: Vec; - let ind: Vec; - if ctl.gen_opt.h5_pre { - d = h5_data[li].1[z1..z2].to_vec(); - ind = h5_data[li].2[z1..z2].to_vec(); + if !ctl.gen_opt.full_counts { + count = (raw_count as f64 * gex_info.gex_mults[li]).round() as usize; + fcount = raw_count as f64 * gex_info.gex_mults[li]; } else { - d = d_readers[li] - .as_ref() - .unwrap() - .read_slice(s![z1..z2]) - .unwrap() - .to_vec(); - ind = ind_readers[li] - .as_ref() - .unwrap() - .read_slice(s![z1..z2]) - .unwrap() - .to_vec(); - } - for j in 0..d.len() { - if gex_info.is_gex[li][ind[j] as usize] { - let n = d[j] as usize; - if lvars.contains(&"entropy".to_string()) { - let q = n as f64 / total_counts[l] as f64; - entropy -= q * q.log2(); - } - raw_count += n; - } + count = (raw_count as f64).round() as usize; + fcount = raw_count as f64; } - d_all[l] = d; - ind_all[l] = ind; - } - if !ctl.gen_opt.full_counts { - count = (raw_count as f64 * gex_info.gex_mults[li]).round() as usize; - fcount = raw_count as f64 * gex_info.gex_mults[li]; - } else { - count = (raw_count as f64).round() as usize; - fcount = raw_count as f64; } + counts.push(count); + gex_fcounts_unsorted.push(fcount); } - counts.push(count); - fcounts.push(fcount); - entropies.push(entropy); } - } - let count_unsorted = counts.clone(); - counts.sort(); - for n in counts.iter() { - if *n < 100 { - *gex_low += 1; + gex_counts_unsorted = counts.clone(); + counts.sort_unstable(); + for n in counts.iter() { + if *n < 100 { + *gex_low += 1; + } + } + if !counts.is_empty() { + gex_sum = gex_fcounts_unsorted.iter().sum::(); + gex_mean = gex_sum / gex_fcounts_unsorted.len() as f64; } - } - let (mut gex_median, mut gex_min, mut gex_max, mut gex_mean, mut gex_sum) = (0, 0, 0, 0.0, 0.0); - if counts.len() > 0 { - gex_median = counts[counts.len() / 2]; - gex_min = counts[0]; - gex_max = counts[counts.len() - 1]; - gex_sum = fcounts.iter().sum::(); - gex_mean = gex_sum / fcounts.len() as f64; - } - let entropies_unsorted = entropies.clone(); - entropies.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let mut entropy = 0.0; - if entropies.len() > 0 { - entropy = entropies[entropies.len() / 2]; } // Output lead variable columns. // WARNING! If you add lead variables, you may need to add them to the function // LinearCondition::require_valid_variables. - let mut all_lvars = lvars.clone(); - if ctl.parseable_opt.pout.len() == 0 { + let mut all_lvars = lvars.iter().map(String::as_str).collect::>(); + if ctl.parseable_opt.pout.is_empty() { } else if ctl.parseable_opt.pcols.is_empty() { - for i in 0..LVARS_ALLOWED.len() { - if !lvars.contains(&LVARS_ALLOWED[i].to_string()) { - all_lvars.push(LVARS_ALLOWED[i].to_string()); + for var in LVARS_ALLOWED { + if !lvarsh.contains(&var.to_string()) { + all_lvars.push(var); } } } else { for i in 0..ctl.parseable_opt.pcols.len() { - if !lvars.contains(&ctl.parseable_opt.pcols[i].to_string()) { - all_lvars.push(ctl.parseable_opt.pcols[i].to_string()); + if !lvarsh.contains(&ctl.parseable_opt.pcols[i].to_string()) { + all_lvars.push(ctl.parseable_opt.pcols[i].as_str()); } } } - let mut alt_bcs = Vec::::new(); + for x in extra_args { + if !lvarsh.contains(x) { + all_lvars.push(x.as_str()); + } + } + let mut alt_bcs = Vec::<&str>::new(); for li in 0..ctl.origin_info.alt_bc_fields.len() { for i in 0..ctl.origin_info.alt_bc_fields[li].len() { - alt_bcs.push(ctl.origin_info.alt_bc_fields[li][i].0.clone()); + alt_bcs.push(ctl.origin_info.alt_bc_fields[li][i].0.as_str()); } } unique_sort(&mut alt_bcs); - for i in 0..all_lvars.len() { - let x = &all_lvars[i]; - if x.starts_with('g') && x.after("g").parse::().is_ok() { - let d = x.after("g").force_usize(); - lvar![i, x, format!("{}", groups[&d][u] + 1)]; - } else if x == "origins" { - let mut origins = Vec::::new(); - for j in 0..ex.clones.len() { - if ex.clones[j][0].origin_index.is_some() { - origins.push( - ctl.origin_info.origin_id[ex.clones[j][0].origin_index.unwrap()].clone(), - ); - } else { - origins.push("?".to_string()); - } - } - unique_sort(&mut origins); - lvar![i, x, format!("{}", origins.iter().format(","))]; - } else if x == "datasets" { - lvar![i, x, format!("{}", lenas.iter().format(","))]; - } else if x == "donors" { - let mut donors = Vec::::new(); - for j in 0..ex.clones.len() { - if ex.clones[j][0].donor_index.is_some() { - donors.push( - ctl.origin_info.donor_list[ex.clones[j][0].donor_index.unwrap()].clone(), - ); - } else { - donors.push("?".to_string()); - } - } - unique_sort(&mut donors); - lvar![i, x, format!("{}", donors.iter().format(","))]; - } else if x == "n" { - lvar![i, x, format!("{}", mults[u])]; - let counts = vec![1.0; mults[u]]; - stats.push((x.to_string(), counts)); - } else if x == "clust" { - let mut clust = Vec::::new(); - for j in 0..ex.clones.len() { - let mut cid = 0; - let bc = &ex.clones[j][0].barcode; - let li = ex.clones[j][0].dataset_index; - if gex_info.cluster[li].contains_key(&bc.clone()) { - cid = gex_info.cluster[li][&bc.clone()]; - } - clust.push(cid); - } - clust.sort(); - lvar![i, x, format!("{}", abbrev_list(&clust))]; - } else if x == "n_other" { - let mut n = 0; - for j in 0..ex.clones.len() { - let mut found = false; - let di = ex.clones[j][0].dataset_index; - let f = format!("n_{}", ctl.origin_info.dataset_id[di]); - for i in 0..nd_fields.len() { - if f == nd_fields[i] { - found = true; - } - } - if !found { - n += 1; - } - } - lvar![i, x, format!("{}", n)]; - } else if x == "n_b" { - let mut n_b = 0; - for j in 0..ex.clones.len() { - let bc = &ex.clones[j][0].barcode; - let li = ex.clones[j][0].dataset_index; - if gex_info.cell_type[li].contains_key(&bc.clone()) { - if gex_info.cell_type[li][&bc.clone()].starts_with('B') { - n_b += 1; - } - } - } - lvar![i, x, format!("{}", n_b)]; - } else if x == "type" { - let mut cell_types = Vec::::new(); - /* - for j in 0..ex.clones.len() { - let mut cell_type = "".to_string(); - let bc = &ex.clones[j][0].barcode; - let li = ex.clones[j][0].dataset_index; - if gex_info.cell_type[li].contains_key(&bc.clone()) { - cell_type = gex_info.cell_type[li][&bc.clone()].clone(); - } - cell_types.push(cell_type); - } - */ - cell_types.sort(); - lvar![i, x, format!("{}", abbrev_list(&cell_types))]; - } else if x == "mark" { - let mut n = 0; - for j in 0..ex.clones.len() { - if ex.clones[j][0].marked { - n += 1; - } - } - lvar![i, x, format!("{}", n)]; - } else if x == "inkt" { - let mut s = String::new(); - let alpha_g = ex.share[0].inkt_alpha_chain_gene_match; - let alpha_j = ex.share[0].inkt_alpha_chain_junction_match; - let beta_g = ex.share[0].inkt_beta_chain_gene_match; - let beta_j = ex.share[0].inkt_beta_chain_junction_match; - if alpha_g || alpha_j { - s += "𝝰"; - if alpha_g { - s += "g"; - } - if alpha_j { - s += "j"; - } - } - if beta_g || beta_j { - s += "𝝱"; - if beta_g { - s += "g"; - } - if beta_j { - s += "j"; - } - } - lvar![i, x, s.clone()]; - } else if x == "mait" { - let mut s = String::new(); - let alpha_g = ex.share[0].mait_alpha_chain_gene_match; - let alpha_j = ex.share[0].mait_alpha_chain_junction_match; - let beta_g = ex.share[0].mait_beta_chain_gene_match; - let beta_j = ex.share[0].mait_beta_chain_junction_match; - if alpha_g || alpha_j { - s += "𝝰"; - if alpha_g { - s += "g"; - } - if alpha_j { - s += "j"; - } - } - if beta_g || beta_j { - s += "𝝱"; - if beta_g { - s += "g"; - } - if beta_j { - s += "j"; - } - } - lvar![i, x, s.clone()]; - } else if x.starts_with("pe") { - lvar![i, x, format!("")]; - } else if x.starts_with("npe") { - lvar![i, x, format!("")]; - } else if x.starts_with("ppe") { - lvar![i, x, format!("")]; - } else if x == "cred" || x == "cred_cell" { - let mut credsx = Vec::::new(); - for l in 0..ex.clones.len() { - let bc = &ex.clones[l][0].barcode; - let li = ex.clones[l][0].dataset_index; - if gex_info.pca[li].contains_key(&bc.clone()) { - let mut creds = 0; - let mut z = Vec::<(f64, String)>::new(); - let x = &gex_info.pca[li][&bc.clone()]; - for y in gex_info.pca[li].iter() { - let mut dist2 = 0.0; - for m in 0..x.len() { - dist2 += (y.1[m] - x[m]) * (y.1[m] - x[m]); - } - z.push((dist2, y.0.clone())); - } - z.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let top = n_vdj_gex[li]; - for i in 0..top { - if bin_member(&vdj_cells[li], &z[i].1) { - creds += 1; - } - } - let pc = 100.0 * creds as f64 / top as f64; - credsx.push(pc); - } else { - credsx.push(0.0); + + macro_rules! speak { + ($u:expr, $var:expr, $val:expr) => { + if pass == 2 && (ctl.parseable_opt.pout.len() > 0 || extra_args.len() > 0) { + let mut v = $var.to_string(); + v = v.replace("_Σ", "_sum"); + v = v.replace("_μ", "_mean"); + if ctl.parseable_opt.pcols.is_empty() + || bin_member(&ctl.parseable_opt.pcols_sortx, &v) + || bin_member(&extra_args, &v) + { + out_data[$u].insert(v, $val); } } - credsx.sort_by(|a, b| a.partial_cmp(b).unwrap()); - if x == "cred" { - if credsx.is_empty() { - lvar![i, x, format!("")]; - } else { - lvar![i, x, format!("{:.1}", credsx[credsx.len() / 2])]; - } - } else { + }; + } + + 'lvar_loop: for (i, &x) in all_lvars.iter().enumerate() { + // Process VAR_DEF variables. + + for var_def in &ctl.gen_opt.var_def { + if x == var_def.0 && i < lvars.len() { if pass == 2 { - let mut r = Vec::::new(); - for j in 0..credsx.len() { - r.push(format!("{:.1}", credsx[j])); - } - speak!(u, x, format!("{}", r.iter().format(";"))); - } - } - } else if bin_member(&alt_bcs, x) { - lvar![i, x, format!("")]; - if pass == 2 { - let mut r = Vec::::new(); - for l in 0..ex.clones.len() { - let li = ex.clones[l][0].dataset_index; - let bc = ex.clones[l][0].barcode.clone(); - let mut val = String::new(); - let alt = &ctl.origin_info.alt_bc_fields[li]; - for j in 0..alt.len() { - if alt[j].0 == *x { - if alt[j].1.contains_key(&bc.clone()) { - val = alt[j].1[&bc.clone()].clone(); + let comp = &var_def.2; + let vars = vars_of_node(comp); // computing this here might be inefficient + let mut out_vals = Vec::::new(); + for k in 0..ex.clones.len() { + let mut in_vals = Vec::::new(); + for var in &vars { + let var = decode_arith(var); + let mut found = false; + for stat in stats.iter() { + if stat.0 == var { + in_vals.push(stat.1[k].clone()); + found = true; + break; + } + } + if !found { + for stat in &stats_pass1[u] { + if stat.0 == var { + in_vals.push(stat.1[k].clone()); + found = true; + break; + } + } + } + if !found { + in_vals.push(String::new()); } } - } - r.push(val); - } - speak!(u, x, format!("{}", r.iter().format(";"))); - } - } else if x.starts_with("n_") && !x.starts_with("n_gex") { - let name = x.after("n_"); - let mut count = 0; - let mut counts = Vec::::new(); - for j in 0..ex.clones.len() { - let x = &ex.clones[j][0]; - if ctl.origin_info.dataset_id[x.dataset_index] == name { - count += 1; - counts.push(1.0); - } else if x.origin_index.is_some() - && ctl.origin_info.origin_list[x.origin_index.unwrap()] == name - { - count += 1; - counts.push(1.0); - } else if x.donor_index.is_some() - && ctl.origin_info.donor_list[x.donor_index.unwrap()] == name - { - count += 1; - counts.push(1.0); - } else if x.tag_index.is_some() - && ctl.origin_info.tag_list[x.tag_index.unwrap()] == name - { - count += 1; - counts.push(1.0); - } - } - lvar![i, x, format!("{}", count)]; - stats.push((x.to_string(), counts)); - } else if x == "sec" && ctl.gen_opt.using_secmem { - let mut n = 0; - for l in 0..ex.clones.len() { - let li = ex.clones[l][0].dataset_index; - let bc = &ex.clones[l][0].barcode; - if ctl.origin_info.secmem[li].contains_key(&bc.clone()) { - n += ctl.origin_info.secmem[li][&bc.clone()].0; - } - } - lvar![i, x, format!("{}", n)]; - } else if x == "mem" && ctl.gen_opt.using_secmem { - let mut n = 0; - for l in 0..ex.clones.len() { - let li = ex.clones[l][0].dataset_index; - let bc = &ex.clones[l][0].barcode; - if ctl.origin_info.secmem[li].contains_key(&bc.clone()) { - n += ctl.origin_info.secmem[li][&bc.clone()].1; - } - } - lvar![i, x, format!("{}", n)]; - } else if x == "dref" { - let mut diffs = 0; - for m in 0..cols { - if mat[m][u].is_some() { - let r = mat[m][u].unwrap(); - let seq = &ex.share[r].seq_del_amino; - let mut vref = refdata.refs[rsi.vids[m]].to_ascii_vec(); - if rsi.vpids[m].is_some() { - vref = dref[rsi.vpids[m].unwrap()].nt_sequence.clone(); - } - let jref = refdata.refs[rsi.jids[m]].to_ascii_vec(); - let z = seq.len(); - for p in 0..z { - let b = seq[p]; - if p < vref.len() - ctl.heur.ref_v_trim && b != vref[p] { - diffs += 1; - } - if p >= z - (jref.len() - ctl.heur.ref_j_trim) - && b != jref[jref.len() - (z - p)] - { - diffs += 1; + let c = define_evalexpr_context(&vars, &in_vals); + let res = comp.eval_with_context(&c); + // if res.is_err() { + // eprintln!("\nInternal error, failed to compute {}.\n", x); + // std::process::exit(1); + // } + let val = res.unwrap(); + let val = val.as_number(); + out_vals.push(match val { + Err(_) => String::new(), + Ok(val) => format!("{val:.1}"), + }); + } + let mut median = String::new(); + let mut out_valsf = Vec::::new(); + let mut all_float = true; + for y in out_vals.iter() { + if y.parse::().is_err() { + all_float = false; + } else { + out_valsf.push(y.force_f64()); } } - } - } - lvar![i, x, format!("{}", diffs)]; - } else if x == "near" { - let mut dist = 1_000_000; - for i2 in 0..varmat.len() { - if i2 == u || fp[i2] != fp[u] { - continue; - } - let mut d = 0; - for c in fp[u].iter() { - for j in 0..varmat[u][*c].len() { - if varmat[u][*c][j] != varmat[i2][*c][j] { - d += 1; - } + if all_float { + out_valsf.sort_by(|a, b| a.partial_cmp(b).unwrap()); + median = format!("{:.1}", median_f64(&out_valsf)); } - } - dist = min(dist, d); - } - if dist == 1_000_000 { - lvar![i, x, "".to_string()]; - } else { - lvar![i, x, format!("{}", dist)]; - } - } else if x == "far" { - let mut dist = -1 as isize; - for i2 in 0..varmat.len() { - if i2 == u || fp[i2] != fp[u] { - continue; - } - let mut d = 0 as isize; - for c in fp[u].iter() { - for j in 0..varmat[u][*c].len() { - if varmat[u][*c][j] != varmat[i2][*c][j] { - d += 1; - } + if i < lvars.len() { + row.push(median.clone()) } - } - dist = max(dist, d); - } - if dist == -1 as isize { - lvar![i, x, "".to_string()]; - } else { - lvar![i, x, format!("{}", dist)]; - } - } else if x == "gex" { - lvar![i, x, format!("{}", gex_median)]; - } else if x == "gex_cell" { - if pass == 2 { - speak!(u, x, format!("{}", count_unsorted.iter().format(";"))); - } - } else if x == "n_gex" { - lvar![i, x, format!("{}", n_gex)]; - } else if x == "n_gex_cell" { - if i < lvars.len() { - row.push("".to_string()); - } - if pass == 2 { - speak!( - u, - "n_gex_cell".to_string(), - format!("{}", n_gexs.iter().format(";")) - ); - } - } else if x == "entropy" { - lvar![i, x, format!("{:.2}", entropy)]; - } else if x == "entropy_cell" { - let mut e = Vec::::new(); - for x in entropies_unsorted.iter() { - e.push(format!("{:.2}", x)); - } - speak!(u, x, format!("{}", e.iter().format(";"))); - } else if x == "gex_min" { - lvar![i, x, format!("{}", gex_min)]; - } else if x == "gex_max" { - lvar![i, x, format!("{}", gex_max)]; - } else if x == "gex_μ" { - lvar![i, x, format!("{}", gex_mean.round() as usize)]; - } else if x == "gex_Σ" { - lvar![i, x, format!("{}", gex_sum.round() as usize)]; - } else if x == "ext" { - let mut exts = Vec::::new(); - for l in 0..ex.clones.len() { - let li = ctl.origin_info.dataset_id[ex.clones[l][0].dataset_index].clone(); - let bc = ex.clones[l][0].barcode.clone(); - if ctl.gen_opt.extc.contains_key(&(li.clone(), bc.clone())) { - exts.push(ctl.gen_opt.extc[&(li, bc)].clone()); - } - } - exts.sort(); - let mut s = String::new(); - let mut j = 0; - while j < exts.len() { - let k = next_diff(&exts, j); - if j > 0 { - s += ","; - } - s += &format!( - "{}[{}/{}]", - exts[j], - k - j, - ctl.gen_opt.extn[&exts[j].clone()] - ); - j = k; - } - lvar![i, x, s.clone()]; - } else { - let (mut counts_sub, mut fcounts_sub) = (Vec::::new(), Vec::::new()); - let xorig = x.clone(); - let (mut x, mut y) = (x.to_string(), x.to_string()); - if x.contains(':') { - x = x.before(":").to_string(); - } - if y.contains(':') { - y = y.after(":").to_string(); - } - let y0 = y.clone(); - for _ in 1..=2 { - let suffixes = ["_min", "_max", "_μ", "_Σ", "_cell", "_%"]; - for s in suffixes.iter() { - if y.ends_with(s) { - y = y.rev_before(&s).to_string(); - break; - } - } - } - let mut computed = false; - for l in 0..ex.clones.len() { - let li = ex.clones[l][0].dataset_index; - let bc = ex.clones[l][0].barcode.clone(); - let mut ux = Vec::::new(); - if ctl.clono_print_opt.regex_match[li].contains_key(&y) { - ux = ctl.clono_print_opt.regex_match[li][&y].clone(); - } - if ux.len() > 0 { - let p = bin_position(&gex_info.gex_barcodes[li], &bc); - if p >= 0 { - computed = true; - let mut raw_count = 0.0; - for fid in ux.iter() { - let raw_counti = get_gex_matrix_entry( - &ctl, &gex_info, *fid, &d_all, &ind_all, li, l, p as usize, &y, - ); - raw_count += raw_counti; - } - counts_sub.push(raw_count.round() as f64); - fcounts_sub.push(raw_count); - } - } else { - if gex_info.feature_id[li].contains_key(&y) { - computed = true; - let p = bin_position(&gex_info.gex_barcodes[li], &bc); - if p >= 0 { - let fid = gex_info.feature_id[li][&y]; - let raw_count = get_gex_matrix_entry( - &ctl, &gex_info, fid, &d_all, &ind_all, li, l, p as usize, &y, + if pass == 2 { + if ctl.parseable_opt.pbarcode { + speak!( + u, + x.to_string(), + format!("{}", out_vals.iter().format(POUT_SEP)) ); - counts_sub.push(raw_count.round() as f64); - fcounts_sub.push(raw_count); + } else { + speak!(u, x.to_string(), median.clone()); } } + stats.push((x.to_string(), out_vals.clone())); + } else if i < lvars.len() { + row.push(String::new()); } + continue 'lvar_loop; } - if computed { - if !y0.ends_with("_%") { - stats.push((x.clone(), fcounts_sub.clone())); - } else { - let mut f = Vec::::new(); - for i in 0..fcounts_sub.len() { - let mut x = 0.0; - if gex_mean > 0.0 { - x = 100.0 * fcounts_sub[i] / gex_mean; - } - f.push(x); - } - stats.push((x.clone(), f)); - } - let mut counts_sub_sorted = counts_sub.clone(); - counts_sub_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let sum = fcounts_sub.iter().sum::(); - let mean = sum / counts_sub.len() as f64; + } - if xorig.ends_with("_%_cell") { - if pass == 2 { - let mut c = Vec::::new(); - for j in 0..counts_sub.len() { - c.push(format!("{:.2}", 100.0 * counts_sub[j] as f64 / fcounts[j])); - } - let val = format!("{}", c.iter().format(";")); - speak!(u, x, val); - } - } else if xorig.ends_with("_cell") { - if pass == 2 { - let val = format!("{}", counts_sub.iter().format(";")); - speak!(u, x, val); - } - } else { - if y0.ends_with("_min") { - lvar![i, x, format!("{}", counts_sub_sorted[0].round())]; - } else if y0.ends_with("_max") { - lvar![ - i, - x, - format!("{}", counts_sub_sorted[counts_sub.len() - 1].round()) - ]; - } else if y0.ends_with("_μ") { - lvar![i, x, format!("{}", mean.round())]; - } else if y0.ends_with("_Σ") { - lvar![i, x, format!("{}", sum.round())]; - } else if y0.ends_with("_%") { - lvar![i, x, format!("{:.2}", (100.0 * sum) / gex_sum)]; - } else { - let mut median = 0.0; - if counts_sub_sorted.len() > 0 { - median = counts_sub_sorted[counts_sub_sorted.len() / 2].round(); - } - lvar![i, x, format!("{}", median)]; - } - } - } + // Process other lvars. + + if !proc_lvar_auto( + i, + pass, + x, + exacts, + exact_clonotypes, + u, + rsi, + refdata, + ctl, + extra_args, + out_data, + stats, + lvars, + row, + fate, + dref, + varmat, + fp, + n_vdj_gex, + vdj_cells, + gex_info, + groups, + mults, + nd_fields, + &gex_counts_unsorted, + &gex_fcounts_unsorted, + &n_gexs, + d_readers, + ind_readers, + h5_data, + &alt_bcs, + )? { + let _ = proc_lvar2( + i, + x, + pass, + u, + ctl, + exacts, + exact_clonotypes, + gex_info, + row, + out_data, + d_all, + ind_all, + stats, + lvars, + &alt_bcs, + gex_mean, + gex_sum, + &gex_fcounts_unsorted, + extra_args, + ); } } // Sanity check. It's here because if it fails and that failure was not detected, something // exceptionally cryptic would happen downstream. - assert_eq!(row.len(), lvars.len() + 1); + if row.len() != lvars.len() + 1 { + let mut msg = format!( + "Oops, row.len() != lvars.len() + 1, as in fact we have\n\ + row.len() = {} and lvars.len() = {}, and in more detail,\n\ + row = {}\n\ + and lvars = {}.\nThis happened on a clonotype that included the barcode {}.", + row.len(), + lvars.len(), + row.iter().format(","), + lvars.iter().format(","), + ex.clones[0][0].barcode, + ); + if !ctl.gen_opt.row_fill_verbose { + write!( + msg, + "\n\nYou may find it helpful to add the options\n\ + BARCODE={} ROW_FILL_VERBOSE\n\ + to the command line. Depending on other arguments, you might also need to \ + add MAX_CORES=1.", + ex.clones[0][0].barcode + ) + .unwrap(); + } + panic!("{}", msg); + } // Get the relevant barcodes. @@ -907,85 +515,81 @@ pub fn row_fill( // Traverse the chains. for col in 0..cols { + // Process variables that need to be computed even if the chain entry is empty. + // NO: WHY? WHY WOULD WE WANT TO PRINT THESE? BEHAVIOR CHANGED. DON'T KNOW WHY + // WE EVER DID THIS> + + let rsi_vars = &ctl.clono_print_opt.cvars; + let have_notes = rsi.cvars[col].contains(&"notes".to_string()); + let mut notes_pos = 0; + let mut notes_in = false; + for (j, &var) in all_vars.iter().take(rsi_vars.len()).enumerate() { + if var == "notes" { + notes_pos = j; + notes_in = true; + } + } + // these lines moved to prevent printing if chain is absent let mid = mat[col][u]; if mid.is_none() { continue; } - let mid = mid.unwrap(); - let ex = &exact_clonotypes[clonotype_id]; - let seq_amino = rsi.seqss_amino[col][u].clone(); - - // Get UMI and read stats. - - let mut numis = Vec::::new(); - let mut nreads = Vec::::new(); - for j in 0..ex.clones.len() { - numis.push(ex.clones[j][mid].umi_count); - nreads.push(ex.clones[j][mid].read_count); + for (j, &var) in all_vars.iter().enumerate() { + let mut jj = j; + if !have_notes && notes_in && j >= notes_pos { + jj -= 1; + } + + // Decide if there is nothing to compute. This is almost certainly not optimal. + // Also largely duplicated below. + + let mut needed = false; + let varc = format!("{var}{}", col + 1); + if jj < rsi.cvars[col].len() && cvars.contains(&var.to_string()) + || pass == 2 + && !ctl.parseable_opt.pout.is_empty() + && (ctl.parseable_opt.pchains == "max" + || col < ctl.parseable_opt.pchains.force_usize()) + && (pcols_sort.is_empty() || bin_member(&pcols_sort, &varc)) + || extra_args.contains(&varc) + { + needed = true; + } + if !needed { + continue; + } + let col_var = jj < rsi_vars.len(); + if !col_var && ctl.parseable_opt.pout.is_empty() && extra_args.is_empty() { + continue; + } } - numis.sort(); - let median_numis = numis[numis.len() / 2]; - let utot: usize = numis.iter().sum(); - let u_mean = (utot as f64 / numis.len() as f64).round() as usize; - let u_min = *numis.iter().min().unwrap(); - let u_max = *numis.iter().max().unwrap(); - nreads.sort(); - let rtot: usize = nreads.iter().sum(); - let r_mean = (rtot as f64 / nreads.len() as f64).round() as usize; - let r_min = *nreads.iter().min().unwrap(); - let r_max = *nreads.iter().max().unwrap(); - let median_nreads = nreads[nreads.len() / 2]; - // Set up chain variable macro. This is the mechanism for generating - // both human-readable and parseable output for chain variables. + // Keep going. - macro_rules! cvar { - ($i: expr, $var:expr, $val:expr) => { - if $i < rsi.cvars[col].len() && cvars.contains(&$var) { - cx[col][$i] = $val.clone(); - } - speakc!(u, col, $var, $val); - }; - } + let mid = mid.unwrap(); + let ex = &exact_clonotypes[clonotype_id]; + let seq_amino = rsi.seqss_amino[col][u].clone(); - // Speak quality score column entries. + // Speak some other column entries. - if ctl.parseable_opt.pout.len() > 0 && col + 1 <= ctl.parseable_opt.pchains { - for i in 0..pcols_sort.len() { - if pcols_sort[i].starts_with('q') - && pcols_sort[i].ends_with(&format!("_{}", col + 1)) - { - let n = pcols_sort[i].after("q").rev_before("_").force_usize(); - if n < ex.share[mid].seq.len() { - let mut quals = Vec::::new(); - for j in 0..ex.clones.len() { - quals.push(ex.clones[j][mid].quals[n]); - } - let q = format!("{}", quals.iter().format(",")); - out_data[u].insert(pcols_sort[i].clone(), q); - } + let xm = &ex.share[mid]; + speakc!(u, col, "vj_aa".to_string(), stringme(&aa_seq(&xm.seq, 0))); + speakc!(u, col, "vj_seq".to_string(), stringme(&xm.seq)); + let mut dna = Vec::::new(); + for p in xm.fr1_start..xm.seq_del_amino.len() { + for j in 0..xm.ins.len() { + if xm.ins[j].0 == p { + let mut z = xm.ins[j].1.clone(); + dna.append(&mut z); } } + if xm.seq_del_amino[p] != b'-' { + dna.push(xm.seq_del_amino[p]); + } } - - // Speak some other column entries. - - speakc!(u, col, "vj_seq".to_string(), stringme(&ex.share[mid].seq)); - speakc!(u, col, "seq".to_string(), stringme(&ex.share[mid].full_seq)); - speakc!(u, col, "v_start".to_string(), ex.share[mid].v_start); - let cid = ex.share[mid].c_ref_id; - if cid.is_some() { - let cid = cid.unwrap(); - speakc!(u, col, "const_id".to_string(), refdata.id[cid]); - } - let uid = ex.share[mid].u_ref_id; - if uid.is_some() { - let uid = uid.unwrap(); - speakc!(u, col, "utr_id".to_string(), refdata.id[uid]); - speakc!(u, col, "utr_name".to_string(), refdata.name[uid]); - } - speakc!(u, col, "cdr3_start".to_string(), ex.share[mid].cdr3_start); - speakc!(u, col, "cdr3_aa".to_string(), ex.share[mid].cdr3_aa); + speakc!(u, col, "vj_aa_nl".to_string(), stringme(&aa_seq(&dna, 0))); + speakc!(u, col, "vj_seq_nl".to_string(), stringme(&dna)); + speakc!(u, col, "seq".to_string(), stringme(&xm.full_seq)); let mut vv = Vec::::new(); for x in vars_amino[col].iter() { vv.push(*x / 3); @@ -1006,431 +610,118 @@ pub fn row_fill( // Create column entry. - let rsi_vars = &rsi.cvars[col]; - let mut all_vars = rsi_vars.clone(); - for j in 0..CVARS_ALLOWED.len() { - let var = &CVARS_ALLOWED[j]; - if !rsi_vars.contains(&var.to_string()) { - all_vars.push(var.to_string()); + for (j, &var) in all_vars.iter().enumerate() { + let mut jj = j; + if !have_notes && notes_in && j >= notes_pos { + jj -= 1; } - } - for j in 0..CVARS_ALLOWED_PCELL.len() { - let var = &CVARS_ALLOWED_PCELL[j]; - if !rsi_vars.contains(&var.to_string()) { - all_vars.push(var.to_string()); + if var == "notes" && !have_notes { + continue; } - } - for j in 0..all_vars.len() { - let col_var = j < rsi_vars.len(); - if !col_var && ctl.parseable_opt.pout.len() == 0 { + + // Decide if there is nothing to compute. This is almost certainly not optimal. + + let mut needed = false; + if !ex.share[mid].left + && (var == "d1_name" + || var == "d2_name" + || var == "d_delta" + || var == "d_Δ" + || var == "d1_score" + || var == "d2_score") + { + continue; + } + let varc = format!("{var}{}", col + 1); + if jj < rsi.cvars[col].len() && cvars.contains(&var.to_string()) + || pass == 2 + && !ctl.parseable_opt.pout.is_empty() + && (ctl.parseable_opt.pchains == "max" + || col < ctl.parseable_opt.pchains.force_usize()) + && (pcols_sort.is_empty() || bin_member(&pcols_sort, &varc)) + || var == "amino" + || var == "u_cell" + || var == "r_cell" + || var == "white" + || ctl.clono_filt_opt_def.whitef + || extra_args.contains(&varc) + { + needed = true; + } + if !needed { + continue; + } + let col_var = jj < rsi_vars.len(); + if !col_var && ctl.parseable_opt.pout.is_empty() && extra_args.is_empty() { continue; } - let var = &all_vars[j]; - if *var == "amino".to_string() && col_var { - let cs = rsi.cdr3_starts[col] / 3; - let n = rsi.cdr3_lens[col]; + + // Compute. + + if !proc_cvar_auto( + jj, + pass, + var, + ex, + exacts, + exact_clonotypes, + mid, + col, + u, + rsi, + refdata, + dref, + ctl, + extra_args, + &pcols_sort, + cx, + varmat, + out_data, + stats, + allele_data, + )? && var == "amino" + && col_var + { + let mut last_color = "black".to_string(); for k in 0..show_aa[col].len() { let p = show_aa[col][k]; - if k > 0 && p == cs { - cx[col][j] += " "; + if k > 0 + && field_types[col][k] != field_types[col][k - 1] + && !ctl.gen_opt.nospaces + { + cx[col][jj] += " "; } if 3 * p + 3 <= seq_amino.len() && seq_amino[3 * p..3 * p + 3].to_vec() == b"---".to_vec() { - cx[col][j] += "-"; + cx[col][jj] += "-"; } else if 3 * p + 3 > seq_amino.len() || seq_amino[3 * p..3 * p + 3].contains(&b'-') { - cx[col][j] += "*"; - } else { - let mut log = Vec::::new(); - let aa = codon_to_aa(&seq_amino[3 * p..3 * p + 3]); - if ctl.gen_opt.color == "codon".to_string() { - emit_codon_color_escape(&seq_amino[3 * p..3 * p + 3], &mut log); - log.push(aa); - emit_end_escape(&mut log); - } else { - color_by_property(&vec![aa], &mut log); - } - cx[col][j] += strme(&log); - } - if k < show_aa[col].len() - 1 && p == cs + n - 1 { - cx[col][j] += " "; - } - } - } else if *var == "comp".to_string() || *var == "edit".to_string() { - let mut comp = 1000000; - let mut edit = String::new(); - let td = &ex.share[mid]; - let tig = &td.seq; - let score = |a: u8, b: u8| if a == b { 1i32 } else { -1i32 }; - let mut aligner = Aligner::new(-6, -1, &score); - - // Go through passes. If IGH/TRB, we go through every D segment. Otherwise - // there is just one pass. - - let mut z = 1; - if ex.share[mid].left { - z = refdata.ds.len(); - } - for di in 0..z { - let mut d = 0; - if ex.share[mid].left { - d = refdata.ds[di]; - } - - // Start to build reference concatenation. First append the V segment. - - let mut concat = Vec::::new(); - let mut vref = refdata.refs[rsi.vids[col]].to_ascii_vec(); - if rsi.vpids[col].is_none() { + cx[col][jj] += "*"; } else { - vref = dref[rsi.vpids[col].unwrap()].nt_sequence.clone(); - } - concat.append(&mut vref.clone()); - - // Append the D segment if IGH/TRB. - - if ex.share[mid].left { - let mut x = refdata.refs[d].to_ascii_vec(); - concat.append(&mut x); - } - - // Append the J segment. - - let mut x = refdata.refs[rsi.jids[col]].to_ascii_vec(); - concat.append(&mut x); - - // Align the V..J sequence on the contig to the reference concatenation. - - let al = aligner.semiglobal(&tig, &concat); - let mut m = 0; - let mut pos = al.xstart; - let mut rpos = (al.ystart as isize) - (vref.len() as isize); - let mut count = 0; - let start = td.cdr3_start; - let stop = td.j_stop - td.v_start; - let mut edits = Vec::::new(); - while m < al.operations.len() { - let n = next_diff(&al.operations, m); - match al.operations[m] { - Match => { - pos += 1; - rpos += 1; - } - Subst => { - if pos >= start && pos < stop { - count += 1; - edits.push(format!("S{}", rpos)); - } - pos += 1; - rpos += 1; - } - Del => { - if pos >= start && pos < stop { - count += 1; - edits.push(format!("D{}:{}", rpos, n - m)); - } - pos += n - m; - m = n - 1; - } - Ins => { - if pos >= start && pos < stop { - count += 1; - edits.push(format!("I{}:{}", rpos, n - m)); - } - rpos += (n - m) as isize; - m = n - 1; - } - _ => {} - }; - m += 1; - } - if count < comp { - comp = count; - edit = format!("{}", edits.iter().format("•")); - } - } - if *var == "comp".to_string() { - cvar![j, var, format!("{}", comp)]; - } else { - cvar![j, var, format!("{}", edit)]; - } - } else if *var == "cdr3_dna".to_string() { - cvar![j, var, ex.share[mid].cdr3_dna.clone()]; - } else if *var == "cdr3_len".to_string() { - cvar![j, var, ex.share[mid].cdr3_aa.len().to_string()]; - } else if *var == "ulen".to_string() { - cvar![j, *var, format!("{}", ex.share[mid].v_start)]; - } else if *var == "clen".to_string() { - cvar![ - j, - var, - format!("{}", ex.share[mid].full_seq.len() - ex.share[mid].j_stop) - ]; - } else if *var == "vjlen".to_string() { - cvar![ - j, - var, - format!("{}", ex.share[mid].j_stop - ex.share[mid].v_start) - ]; - } else if var.starts_with("ndiff") { - let u0 = var.between("ndiff", "vj").force_usize() - 1; - if u0 < exacts.len() && mat[col][u0].is_some() && mat[col][u].is_some() { - let m0 = mat[col][u0].unwrap(); - let m = mat[col][u].unwrap(); - let mut ndiff = 0; - let ex0 = &exact_clonotypes[exacts[u0]]; - let ex = &exact_clonotypes[exacts[u]]; - for p in 0..ex0.share[m0].seq_del.len() { - if ex0.share[m0].seq_del[p] != ex.share[m].seq_del[p] { - ndiff += 1; - } - } - cvar![j, *var, format!("{}", ndiff)]; - } else { - cvar![j, *var, "_".to_string()]; - } - } else if *var == "cdiff".to_string() { - let cstart = ex.share[mid].j_stop; - let clen = ex.share[mid].full_seq.len() - cstart; - let cid = ex.share[mid].c_ref_id; - let mut cdiff = String::new(); - let mut ndiffs = 0; - if cid.is_some() { - let r = &refdata.refs[cid.unwrap()]; - let mut extra = 0; - if clen > r.len() { - extra = clen - r.len(); - } - for i in 0..min(clen, r.len()) { - let tb = ex.share[mid].full_seq[cstart + i]; - let rb = r.to_ascii_vec()[i]; - if tb != rb { - ndiffs += 1; - if ndiffs <= 5 { - cdiff += &format!("{}{}", i, tb as char); - } - } - } - if ndiffs > 5 { - cdiff += "..."; - } - if extra > 0 { - cdiff += &format!("+{}", extra); - } - } else if clen > 0 { - cdiff = format!("+{}", clen); - } - cvar![j, var, cdiff]; - } else if *var == "udiff".to_string() { - let ulen = ex.share[mid].v_start; - let uid = ex.share[mid].u_ref_id; - let mut udiff = String::new(); - let mut ndiffs = 0; - if uid.is_some() { - let r = &refdata.refs[uid.unwrap()]; - let mut extra = 0; - if ulen > r.len() { - extra = ulen - r.len(); - } - for i in 0..ulen { - let mut rpos = i; - if ulen < r.len() { - rpos += r.len() - ulen; - } else { - if i + r.len() < ulen { - continue; - } - rpos -= ulen - r.len(); - } - let tb = ex.share[mid].full_seq[i]; - let rb = r.to_ascii_vec()[rpos]; - if tb != rb { - ndiffs += 1; - if ndiffs <= 5 { - udiff += &format!("{}{}", rpos, tb as char); - } - } - } - if ndiffs > 5 { - udiff += "..."; - } - if extra > 0 { - udiff += &format!("+{}", extra); - } - } else if ulen > 0 { - udiff = format!("+{}", ulen); - } - cvar![j, var, udiff]; - } else if *var == "d_univ".to_string() { - let vid = ex.share[mid].v_ref_id; - let vref = &refdata.refs[vid].to_ascii_vec(); - let jid = ex.share[mid].j_ref_id; - let jref = &refdata.refs[jid].to_ascii_vec(); - let tig = &ex.share[mid].seq_del; - let n = tig.len(); - let mut diffs = 0; - for p in 0..n { - if tig[p] == b'-' { - continue; - } - if p < vref.len() - ctl.heur.ref_v_trim && tig[p] != vref[p] { - diffs += 1; - } else if p >= n - (jref.len() - ctl.heur.ref_j_trim) - && tig[p] != jref[jref.len() - (n - p)] - { - diffs += 1; - } - } - cvar![j, var, format!("{}", diffs)]; - } else if *var == "d_donor".to_string() { - let vid = ex.share[mid].v_ref_id; - let mut vref = refdata.refs[vid].to_ascii_vec(); - if rsi.vpids[col].is_some() { - vref = dref[rsi.vpids[col].unwrap()].nt_sequence.clone(); - } - let jid = ex.share[mid].j_ref_id; - let jref = &refdata.refs[jid].to_ascii_vec(); - let tig = &ex.share[mid].seq_del; - let n = tig.len(); - let mut diffs = 0; - for p in 0..n { - if tig[p] == b'-' { - continue; - } - if p < vref.len() - ctl.heur.ref_v_trim && tig[p] != vref[p] { - diffs += 1; - } else if p >= n - (jref.len() - ctl.heur.ref_j_trim) - && tig[p] != jref[jref.len() - (n - p)] - { - diffs += 1; - } - } - cvar![j, var, format!("{}", diffs)]; - } else if *var == "notes".to_string() { - cvar![j, var, ex.share[mid].vs_notesx.clone()]; - } else if *var == "var".to_string() { - cvar![j, var, stringme(&varmat[u][col])]; - } else if *var == "u".to_string() { - cvar![j, var, format!("{}", median_numis)]; - } else if *var == "u_cell".to_string() { - let var = var.clone(); - if col + 1 <= ctl.parseable_opt.pchains { - let varc = format!("{}{}", var, col + 1); - if pcols_sort.is_empty() || bin_member(&pcols_sort, &varc) { - let mut vals = String::new(); - for k in 0..ex.ncells() { - if k > 0 { - vals += ";"; - } - vals += &format!("{}", ex.clones[k][mid].umi_count); - } - out_data[u].insert(varc, format!("{}", vals)); - } - } - } else if *var == "u_min".to_string() { - cvar![j, var, format!("{}", u_min)]; - } else if *var == "u_max".to_string() { - cvar![j, var, format!("{}", u_max)]; - } else if *var == "u_μ".to_string() { - cvar![j, var, format!("{}", u_mean)]; - } else if *var == "u_Σ".to_string() { - cvar![j, var, format!("{}", utot)]; - } else if *var == "r".to_string() { - cvar![j, var, format!("{}", median_nreads)]; - } else if *var == "r_min".to_string() { - cvar![j, var, format!("{}", r_min)]; - } else if *var == "r_max".to_string() { - cvar![j, var, format!("{}", r_max)]; - } else if *var == "r_μ".to_string() { - cvar![j, var, format!("{}", r_mean)]; - } else if *var == "r_Σ".to_string() { - cvar![j, var, format!("{}", rtot)]; - } else if *var == "r_cell".to_string() { - let var = var.clone(); - if col + 1 <= ctl.parseable_opt.pchains { - let varc = format!("{}{}", var, col + 1); - if pcols_sort.is_empty() || bin_member(&pcols_sort, &varc) { - let mut vals = String::new(); - for k in 0..ex.ncells() { - if k > 0 { - vals += ";"; - } - vals += &format!("{}", ex.clones[k][mid].read_count); - } - out_data[u].insert(varc, format!("{}", vals)); - } - } - } else if *var == "const".to_string() { - let mut constx = Vec::::new(); - let cid = ex.share[mid].c_ref_id; - if cid.is_some() { - constx.push(refdata.name[cid.unwrap()].clone()); - } else { - constx.push("?".to_string()); - } - unique_sort(&mut constx); - // This is overcomplicated because there is now at most one - // const entry per exact subclonotype. - cvar![j, var, format!("{}", constx.iter().format(","))]; - - // Compute potential whitelist contamination percent and filter. - // This is an undocumented option. - } else if *var == "white".to_string() || ctl.clono_filt_opt.whitef { - let mut bch = vec![Vec::<(usize, String, usize, usize)>::new(); 2]; - for l in 0..ex.clones.len() { - let li = ex.clones[l][0].dataset_index; - let bc = &ex.clones[l][0].barcode; - let mut numi = 0; - for j in 0..ex.clones[l].len() { - numi += ex.clones[l][j].umi_count; - } - bch[0].push((li, bc[0..8].to_string(), numi, l)); - bch[1].push((li, bc[8..16].to_string(), numi, l)); - } - let mut junk = 0; - let mut bad = vec![false; ex.clones.len()]; - for l in 0..2 { - bch[l].sort(); - let mut m = 0; - while m < bch[l].len() { - let n = next_diff12_4(&bch[l], m as i32) as usize; - for u1 in m..n { - for u2 in m..n { - if bch[l][u1].2 >= 10 * bch[l][u2].2 { - bad[bch[l][u2].3] = true; - } - } - } - m = n; - } - } - for u in 0..bad.len() { - if bad[u] { - junk += 1; + let x = &peer_groups[rsi.vids[col]]; + let last = k == show_aa[col].len() - 1; + let log = color_codon( + ctl, + &seq_amino, + ref_diff_pos, + x, + col, + mid, + p, + u, + &mut last_color, + last, + cdr3_con, + exacts, + exact_clonotypes, + ); + cx[col][jj] += strme(&log); } } - // Don't look at very large clones because of course they - // show overlap. - /* // BROKEN AND WAS UGLY ANYWAY - const MAX_WHITELIST_CLONE: usize = 100; - if ex.clones.len() <= MAX_WHITELIST_CLONE { - res.3 += junk; - res.4 += ex.clones.len(); - } - */ - let junk_rate = percent_ratio(junk, ex.clones.len()); - if *var == "white".to_string() && col_var { - cx[col][j] = format!("{:.1}", junk_rate); - } - // WRONG! THIS IS SUPPOSED TO BE EXECUTED ON PASS 1!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - if ctl.clono_filt_opt.whitef && junk_rate == 0.0 - /* && pass == 1 */ - { - bads[u] = true; - } } } } + Ok(()) } diff --git a/enclone_print/src/print_utils3.rs b/enclone_print/src/print_utils3.rs index 0baaefe19..663afdc90 100644 --- a/enclone_print/src/print_utils3.rs +++ b/enclone_print/src/print_utils3.rs @@ -1,17 +1,180 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -use crate::print_utils1::*; -use amino::*; -use ansi_escape::*; -use enclone_core::defs::*; -use enclone_core::print_tools::*; -use enclone_proto::types::*; -use io_utils::*; +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use crate::print_utils1::color_codon; +use bio_edit::alignment::pairwise::Aligner; +use bio_edit::alignment::AlignmentOperation::{Del, Ins, Match, Subst}; +use enclone_core::allowed_vars::{CVARS_ALLOWED, CVARS_ALLOWED_PCELL}; +use enclone_core::defs::{ColInfo, EncloneControl, ExactClonotype}; +use enclone_proto::types::DonorReferenceItem; +use io_utils::fwriteln; use itertools::Itertools; +use std::fmt::Write as _; use std::io::Write; -use string_utils::*; -use vdj_ann::refx::*; -use vector_utils::*; +use string_utils::strme; +use vdj_ann::refx::RefData; +use vector_utils::{bin_member, erase_if, make_freq, next_diff, sort_sync2, unique_sort}; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn comp_edit( + ex: &ExactClonotype, + mid: usize, + col: usize, + refdata: &RefData, + dref: &[DonorReferenceItem], + rsi: &ColInfo, +) -> (usize, String) { + let mut comp = 1000000; + let mut edit = String::new(); + let td = &ex.share[mid]; + let tig = &td.seq; + let score = |a: u8, b: u8| if a == b { 1i32 } else { -1i32 }; + let mut aligner = Aligner::new(-6, -1, &score); + + // Go through passes. If IGH/TRB, we go through every D segment. Otherwise + // there is just one pass. + + let mut z = 1; + if ex.share[mid].left { + z = refdata.ds.len(); + } + let mut ds = Vec::::new(); + let mut counts = Vec::::new(); + for di in 0..z { + let mut d = 0; + if ex.share[mid].left { + d = refdata.ds[di]; + } + + // Start to build reference concatenation. First append the V segment. + + let mut concat = Vec::::new(); + let mut vref = refdata.refs[rsi.vids[col]].to_ascii_vec(); + if rsi.vpids[col].is_none() { + } else { + vref = dref[rsi.vpids[col].unwrap()].nt_sequence.clone(); + } + concat.append(&mut vref.clone()); + + // Append the D segment if IGH/TRB. + + if ex.share[mid].left { + let mut x = refdata.refs[d].to_ascii_vec(); + concat.append(&mut x); + } + + // Append the J segment. + + let mut x = refdata.refs[rsi.jids[col]].to_ascii_vec(); + concat.append(&mut x); + + // Align the V..J sequence on the contig to the reference concatenation. + + let al = aligner.semiglobal(tig, &concat); + let mut m = 0; + let mut pos = al.xstart; + let mut rpos = (al.ystart as isize) - (vref.len() as isize); + let mut count = 0; + let start = td.cdr3_start - td.ins_len(); + let stop = td.j_stop - td.v_start; + let mut edits = Vec::::new(); + while m < al.operations.len() { + let n = next_diff(&al.operations, m); + match al.operations[m] { + Match => { + pos += 1; + rpos += 1; + } + Subst => { + if pos >= start && pos < stop { + count += 1; + edits.push(format!("S{rpos}")); + } + pos += 1; + rpos += 1; + } + Del => { + if pos >= start && pos < stop { + count += 1; + edits.push(format!("D{rpos}:{}", n - m)); + } + pos += n - m; + m = n - 1; + } + Ins => { + if pos >= start && pos < stop { + count += 1; + edits.push(format!("I{rpos}:{}", n - m)); + } + rpos += (n - m) as isize; + m = n - 1; + } + _ => {} + }; + m += 1; + } + counts.push(count); + ds.push(d); + if count < comp { + comp = count; + edit = format!("{}", edits.iter().format("•")); + } + } + sort_sync2(&mut counts, &mut ds); + let mut comp = 0; + if !counts.is_empty() { + comp = counts[0]; + } + (comp, edit) +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Form the consensus CDR3 DNA sequence for a clonotype. We define this by finding, for each +// chain, and each codon in CDR3, the most frequent one. These codons are then chained together +// to form a DNA sequence. If no codon has > 50% of the total, we report a tie, as XXX. +// This code is only used in processing COLOR=codon-diffs. + +pub fn consensus_codon_cdr3( + rsi: &ColInfo, + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], +) -> Vec> { + let mut cons = Vec::>::new(); + let nexacts = exacts.len(); + let cols = rsi.vids.len(); + for cx in 0..cols { + let mut cdr3s = Vec::>::new(); + for u in 0..nexacts { + if let Some(m) = rsi.mat[cx][u] { + cdr3s.push( + exact_clonotypes[exacts[u]].share[m] + .cdr3_dna + .as_bytes() + .to_vec(), + ); + } + } + let n = cdr3s[0].len(); + let mut con = Vec::::new(); + for i in (0..n).step_by(3) { + let mut codons = Vec::>::new(); + for cdr3 in &cdr3s { + codons.push(cdr3[i..i + 3].to_vec()); + } + codons.sort(); + let mut freq = Vec::<(u32, Vec)>::new(); + make_freq(&codons, &mut freq); + if freq[0].0 as usize * 2 > codons.len() { + con.append(&mut freq[0].1.clone()); + } else { + con.append(&mut b"XXX".to_vec()); + } + } + cons.push(con); + } + cons +} // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ @@ -26,9 +189,9 @@ use vector_utils::*; pub fn define_column_info( ctl: &EncloneControl, - exacts: &Vec, - exact_clonotypes: &Vec, - mat: &Vec>>, + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], + mat: &[Vec>], refdata: &RefData, ) -> ColInfo { let cols = mat.len(); @@ -36,56 +199,65 @@ pub fn define_column_info( // Define cvars. let mut cvars = Vec::>::new(); - for cx in 0..cols { + for m in mat.iter().take(cols) { let mut have_notes = false; - for u in 0..exacts.len() { - let ex = &exact_clonotypes[exacts[u]]; - let m = mat[cx][u]; - if m.is_some() { - let m = m.unwrap(); + let mut left = false; + for (&e, &m) in exacts.iter().zip(m.iter()) { + let ex = &exact_clonotypes[e]; + if let Some(m) = m { let ex = &ex.share[m]; - if ex.vs_notesx.len() > 0 { + if ex.left { + left = true; + } + if !ex.vs_notesx.is_empty() { have_notes = true; } } } let mut cv = Vec::::new(); for i in 0..ctl.clono_print_opt.cvars.len() { - if ctl.clono_print_opt.cvars[i] == "notes" && !have_notes { + let var = &ctl.clono_print_opt.cvars[i]; + if var == "notes" && !have_notes { continue; } - cv.push(ctl.clono_print_opt.cvars[i].to_string()); + if !left + && (var == "d1_name" + || var == "d2_name" + || var == "d_delta" + || var == "d_Δ" + || var == "d1_score" + || var == "d2_score") + { + continue; + } + cv.push(var.to_string()); } cvars.push(cv); } // Compute CDR3 starts, etc. + let mut fr1_starts = Vec::::new(); + let mut fr2_starts = Vec::>::new(); + let mut fr3_starts = Vec::>::new(); let mut cdr1_starts = Vec::>::new(); let mut cdr2_starts = Vec::>::new(); let mut cdr3_starts = Vec::::new(); - let mut cdr1_lens = Vec::>::new(); - let mut cdr2_lens = Vec::>::new(); let mut cdr3_lens = Vec::::new(); let mut seq_lens = Vec::::new(); let mut seq_del_lens = Vec::::new(); - for cx in 0..cols { - for u in 0..exacts.len() { - let ex = &exact_clonotypes[exacts[u]]; - let m = mat[cx][u]; - if m.is_some() { - let m = m.unwrap(); + for m in mat.iter().take(cols) { + for (&e, &m) in exacts.iter().zip(m.iter()) { + let ex = &exact_clonotypes[e]; + if let Some(m) = m { let exm = &ex.share[m]; - cdr1_lens.push(Some(exm.cdr1_aa.len())); - cdr2_lens.push(Some(exm.cdr2_aa.len())); cdr3_lens.push(exm.cdr3_aa.len()); seq_lens.push(exm.seq.len()); seq_del_lens.push(exm.seq_del.len()); // The logic below with testing i < start while incrementing start seems fishy. - if exm.cdr1_start.is_some() { - let mut start = exm.cdr1_start.unwrap(); + if let Some(mut start) = exm.cdr1_start { for (i, c) in exm.seq_del.iter().enumerate() { if i < start && *c == b'-' { start += 1; @@ -95,8 +267,7 @@ pub fn define_column_info( } else { cdr1_starts.push(None); } - if exm.cdr2_start.is_some() { - let mut start = exm.cdr2_start.unwrap(); + if let Some(mut start) = exm.cdr2_start { for (i, c) in exm.seq_del.iter().enumerate() { if i < start && *c == b'-' { start += 1; @@ -106,7 +277,36 @@ pub fn define_column_info( } else { cdr2_starts.push(None); } - let mut start = exm.cdr3_start; + let mut start = exm.fr1_start; + for (i, c) in exm.seq_del.iter().enumerate() { + if i < start && *c == b'-' { + start += 1; + } + } + fr1_starts.push(start); + if exm.fr2_start.is_some() { + let mut start = exm.fr2_start.unwrap(); + for (i, c) in exm.seq_del.iter().enumerate() { + if i < start && *c == b'-' { + start += 1; + } + } + fr2_starts.push(Some(start)); + } else { + fr2_starts.push(None); + } + if exm.fr3_start.is_some() { + let mut start = exm.fr3_start.unwrap(); + for (i, c) in exm.seq_del.iter().enumerate() { + if i < start && *c == b'-' { + start += 1; + } + } + fr3_starts.push(Some(start)); + } else { + fr3_starts.push(None); + } + let mut start = exm.cdr3_start - exm.ins_len(); for (i, c) in exm.seq_del.iter().enumerate() { if i < start && *c == b'-' { start += 1; @@ -128,86 +328,101 @@ pub fn define_column_info( let mut dids = vec![None; cols]; let mut jids = vec![0; cols]; let mut cids = vec![None; cols]; - for col in 0..cols { + let mut left = vec![false; cols]; + for (m, (left, ((uids, (vids, (vpids, (vpids_d, vpids_a)))), (dids, (jids, cids))))) in mat + .iter() + .zip( + left.iter_mut().zip( + uids.iter_mut() + .zip( + vids.iter_mut().zip( + vpids + .iter_mut() + .zip(vpids_d.iter_mut().zip(vpids_a.iter_mut())), + ), + ) + .zip(dids.iter_mut().zip(jids.iter_mut().zip(cids.iter_mut()))), + ), + ) + .take(cols) + { let mut u = Vec::::new(); let mut v = Vec::::new(); let mut vp = Vec::<(usize, Option, Option, Option)>::new(); let mut d = Vec::::new(); let mut j = Vec::::new(); let mut c = Vec::::new(); - for e in 0..exacts.len() { - let clonotype_id = exacts[e]; + for (&clonotype_id, &m) in exacts.iter().zip(m.iter()) { let ex = &exact_clonotypes[clonotype_id]; - let m = mat[col][e]; - if m.is_some() { - let x = &ex.share[m.unwrap()]; - if x.u_ref_id.is_some() { - for _ in 0..ex.ncells() { - u.push(x.u_ref_id.unwrap()); - } + if let Some(m) = m { + let x = &ex.share[m]; + if x.left { + *left = true; + } + let ncells = ex.ncells(); + if let Some(u_ref_id) = x.u_ref_id { + u.resize(u.len() + ncells, u_ref_id); } - for _ in 0..ex.ncells() { - v.push(x.v_ref_id); - vp.push(( + // This is not actually correct. It copies the consensus V gene assignment + // for an exact subclonotype, rather than fetch the per cell entries. However + // it would be very rare for this to make a difference. + v.resize(v.len() + ncells, x.v_ref_id); + vp.resize( + vp.len() + ncells, + ( x.v_ref_id, x.v_ref_id_donor, x.v_ref_id_donor_donor, x.v_ref_id_donor_alt_id, - )); + ), + ); + if let Some(d_ref_id) = x.d_ref_id { + d.resize(d.len() + ncells, d_ref_id); } - if x.d_ref_id.is_some() { - for _ in 0..ex.ncells() { - d.push(x.d_ref_id.unwrap()); - } - } - for _ in 0..ex.ncells() { - j.push(x.j_ref_id); - } - if x.c_ref_id.is_some() { - for _ in 0..ex.ncells() { - c.push(x.c_ref_id.unwrap()); - } + j.resize(j.len() + ncells, x.j_ref_id); + if let Some(c_ref_id) = x.c_ref_id { + c.resize(c.len() + ncells, c_ref_id); } } } - u.sort(); - v.sort(); + u.sort_unstable(); + v.sort_unstable(); vp.sort(); - d.sort(); - j.sort(); - c.sort(); + d.sort_unstable(); + j.sort_unstable(); + c.sort_unstable(); let mut uf = Vec::<(u32, usize)>::new(); make_freq(&u, &mut uf); if !uf.is_empty() { - uids[col] = Some(uf[0].1); + *uids = Some(uf[0].1); } let mut vf = Vec::<(u32, usize)>::new(); make_freq(&v, &mut vf); - vids[col] = vf[0].1; + *vids = vf[0].1; let mut to_delete = vec![false; vp.len()]; for i in 0..vp.len() { - if vp[i].0 != vids[col] { + if vp[i].0 != *vids { to_delete[i] = true; } } erase_if(&mut vp, &to_delete); let mut vpf = Vec::<(u32, (usize, Option, Option, Option))>::new(); make_freq(&vp, &mut vpf); - vpids[col] = (vpf[0].1).1; - vpids_d[col] = (vpf[0].1).2; - vpids_a[col] = (vpf[0].1).3; + *vpids = (vpf[0].1).1; + *vpids_d = (vpf[0].1).2; + *vpids_a = (vpf[0].1).3; let mut df = Vec::<(u32, usize)>::new(); make_freq(&d, &mut df); if !df.is_empty() { - dids[col] = Some(df[0].1); + *dids = Some(df[0].1); } let mut jf = Vec::<(u32, usize)>::new(); make_freq(&j, &mut jf); - jids[col] = jf[0].1; + *jids = jf[0].1; let mut cf = Vec::<(u32, usize)>::new(); make_freq(&c, &mut cf); if !cf.is_empty() { - cids[col] = Some(cf[0].1); + *cids = Some(cf[0].1); } } @@ -216,15 +431,13 @@ pub fn define_column_info( let mut seqss = Vec::>>::new(); let mut seqss_amino = Vec::>>::new(); let nexacts = exacts.len(); - for cx in 0..cols { + for cx in mat.iter().take(cols) { let mut seqs = Vec::>::new(); let mut seqs_amino = Vec::>::new(); - for u in 0..nexacts { - let m = mat[cx][u]; - if m.is_some() { - let m = m.unwrap(); - seqs.push(exact_clonotypes[exacts[u]].share[m].seq_del.clone()); - seqs_amino.push(exact_clonotypes[exacts[u]].share[m].seq_del_amino.clone()); + for (&m, &e) in cx.iter().zip(exacts.iter()).take(nexacts) { + if let Some(m) = m { + seqs.push(exact_clonotypes[e].share[m].seq_del.clone()); + seqs_amino.push(exact_clonotypes[e].share[m].seq_del_amino.clone()); } else { seqs.push(Vec::::new()); seqs_amino.push(Vec::::new()); @@ -237,50 +450,62 @@ pub fn define_column_info( // Show segment names. We used ◼ as a separator character, but that does not render well // as a fixed-width character in Google Docs. So we changed it to ◆. - let mut chain_descrip = vec![String::new(); cols]; - for cx in 0..cols { - let vid = vids[cx]; - let mut vdescrip = format!("{}", refdata.id[vid]); - if vpids[cx].is_some() { - vdescrip = format!( - "{}.{}.{}", - vdescrip, - vpids_d[cx].unwrap() + 1, - vpids_a[cx].unwrap() + 1 - ); - } - chain_descrip[cx] = format!("{}|{}", vdescrip, refdata.name[vid]); - let did = dids[cx]; - if did.is_some() { - let did = did.unwrap(); - chain_descrip[cx] += &format!(" ◆ {}|{}", refdata.id[did], refdata.name[did]); - } - let jid = jids[cx]; - chain_descrip[cx] += &format!(" ◆ {}|{}", refdata.id[jid], refdata.name[jid]); - } + let chain_descrip = (0..cols) + .map(|cx| { + let vid = vids[cx]; + let mut vdescrip = format!("{}", refdata.id[vid]); + if vpids[cx].is_some() { + vdescrip = format!( + "{}.{}.{}", + vdescrip, + vpids_d[cx].unwrap() + 1, + vpids_a[cx].unwrap() + 1 + ); + } + let mut chain_descrip = format!("{vdescrip}|{}", refdata.name[vid]); + if let Some(did) = dids[cx] { + write!( + chain_descrip, + " ◆ {}|{}", + refdata.id[did], refdata.name[did] + ) + .unwrap(); + } + let jid = jids[cx]; + write!( + chain_descrip, + " ◆ {}|{}", + refdata.id[jid], refdata.name[jid] + ) + .unwrap(); + chain_descrip + }) + .collect(); // Return. ColInfo { - uids: uids, - vids: vids, - vpids: vpids, - dids: dids, - jids: jids, - cids: cids, - cdr1_starts: cdr1_starts, - cdr1_lens: cdr1_lens, - cdr2_starts: cdr2_starts, - cdr2_lens: cdr2_lens, - cdr3_starts: cdr3_starts, - cdr3_lens: cdr3_lens, - seq_lens: seq_lens, - seq_del_lens: seq_del_lens, - seqss: seqss, - seqss_amino: seqss_amino, - chain_descrip: chain_descrip, + left, + uids, + vids, + vpids, + dids, + jids, + cids, + fr1_starts, + fr2_starts, + fr3_starts, + cdr1_starts, + cdr2_starts, + cdr3_starts, + cdr3_lens, + seq_lens, + seq_del_lens, + seqss, + seqss_amino, + chain_descrip, mat: Vec::>>::new(), - cvars: cvars, + cvars, } } @@ -290,31 +515,27 @@ pub fn define_column_info( pub fn add_header_text( ctl: &EncloneControl, - exacts: &Vec, - exact_clonotypes: &Vec, - rord: &Vec, - mat: &Vec>>, + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], + rord: &[usize], + mat: &[Vec>], mut mlog: &mut Vec, ) { let nexacts = exacts.len(); let cols = mat.len(); - for cx in 0..cols { + for (cx, mcx) in mat.iter().take(cols).enumerate() { let (mut vref, mut jref) = (Vec::::new(), Vec::::new()); - for u in 0..nexacts { - let m = mat[cx][u]; - if m.is_some() { - let m = m.unwrap(); - vref = exact_clonotypes[exacts[u]].share[m].vs.to_ascii_vec(); - jref = exact_clonotypes[exacts[u]].share[m].js.to_ascii_vec(); + for (&m, &e) in mcx.iter().zip(exacts.iter()).take(nexacts) { + if let Some(m) = m { + vref = exact_clonotypes[e].share[m].vs.to_ascii_vec(); + jref = exact_clonotypes[e].share[m].js.to_ascii_vec(); } } let mut seqs = Vec::>::new(); let mut full_seqs = Vec::>::new(); for u in 0..nexacts { let ex = &exact_clonotypes[exacts[rord[u]]]; - let m = mat[cx][rord[u]]; - if m.is_some() { - let m = m.unwrap(); + if let Some(m) = mat[cx][rord[u]] { seqs.push(ex.share[m].seq_del.clone()); full_seqs.push(ex.share[m].full_seq.clone()); } else { @@ -354,17 +575,20 @@ pub fn add_header_text( pub fn insert_reference_rows( ctl: &EncloneControl, rsi: &ColInfo, - show_aa: &Vec>, + show_aa: &[Vec], + field_types: &[Vec], refdata: &RefData, - dref: &Vec, - row1: &Vec, + dref: &[DonorReferenceItem], + row1: &[String], drows: &mut Vec>, rows: &mut Vec>, - exacts: &Vec, - exact_clonotypes: &Vec, + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], + peer_groups: &[Vec<(usize, u8, u32)>], + cdr3_con: &[Vec], ) { let cols = rsi.seq_del_lens.len(); - if drows.len() >= 1 { + if !drows.is_empty() { for pass in 1..=2 { let mut row = Vec::::new(); if pass == 1 { @@ -379,10 +603,7 @@ pub fn insert_reference_rows( let mut refseq = Vec::::new(); let mut vlen: usize; let vseq: Vec; - if pass == 1 { - vlen = refdata.refs[rsi.vids[cz]].len(); - vseq = refdata.refs[rsi.vids[cz]].to_ascii_vec(); - } else if rsi.vpids[cz].is_none() { + if pass == 1 || rsi.vpids[cz].is_none() { vlen = refdata.refs[rsi.vids[cz]].len(); vseq = refdata.refs[rsi.vids[cz]].to_ascii_vec(); } else { @@ -396,26 +617,29 @@ pub fn insert_reference_rows( let mut gap = rsi.seq_del_lens[cz] as isize - vlen as isize - jlen as isize; if gap < -2 * (trim as isize) { - let mut bcs = Vec::::new(); - for u in 0..exacts.len() { - let ex = &exact_clonotypes[exacts[u]]; - for i in 0..ex.clones.len() { - bcs.push(ex.clones[i][0].barcode.clone()); - } - } - bcs.sort(); - eprintln!("\ncz = {}", cz); - eprintln!("pass = {}", pass); - eprintln!("seq_del.len() = {}", rsi.seq_del_lens[cz]); - eprintln!("vlen = {}", vlen); - eprintln!("jlen = {}", jlen); - eprintln!("gap = seq_del.len() - vlen - jlen"); - panic!( - "Something is wrong because gap is {}, which is negative.\n\ - This is happening for the clonotype with these barcodes:\n{}.", - gap, - bcs.iter().format(",") - ); + // We have removed the original assert error and assigned a non-negative value + // to the gap to prevent bugs in this part of the code. + gap = vlen as isize + jlen as isize; + // let mut bcs = Vec::::new(); + // for u in 0..exacts.len() { + // let ex = &exact_clonotypes[exacts[u]]; + // for i in 0..ex.clones.len() { + // bcs.push(ex.clones[i][0].barcode.clone()); + // } + // } + // bcs.sort(); + // eprintln!("\ncz = {}", cz); + // eprintln!("pass = {}", pass); + // eprintln!("seq_del.len() = {}", rsi.seq_del_lens[cz]); + // eprintln!("vlen = {}", vlen); + // eprintln!("jlen = {}", jlen); + // eprintln!("gap = seq_del.len() - vlen - jlen"); + // panic!( + // "Something is wrong because gap is {}, which is negative.\n\ + // This is happening for the clonotype with these barcodes:\n{}.", + // gap, + // bcs.iter().format(",") + // ); } if gap < 0 { @@ -429,41 +653,42 @@ pub fn insert_reference_rows( trim -= ptrim as usize; } + refseq.extend(&vseq[..vlen]); let gap = gap as usize; - for j in 0..vlen { - refseq.push(vseq[j]); - } - for _ in 0..gap { - refseq.push(b'-'); - } - for j in 0..jlen { - refseq.push(jseq[j + trim]); - } + refseq.resize(refseq.len() + gap, b'-'); + refseq.extend(&jseq[trim..trim + jlen]); let mut refx = String::new(); - let cs = rsi.cdr3_starts[cz] / 3; - let n = rsi.cdr3_lens[cz]; + let mut last_color = "black".to_string(); for k in 0..show_aa[cz].len() { let p = show_aa[cz][k]; - if k > 0 && p == cs { + if k > 0 + && field_types[cz][k] != field_types[cz][k - 1] + && !ctl.gen_opt.nospaces + { refx += " "; } if 3 * p + 3 > refseq.len() || refseq[3 * p..3 * p + 3].contains(&b'-') { refx += "◦"; } else { - let mut log = Vec::::new(); - let aa = codon_to_aa(&refseq[3 * p..3 * p + 3]); - if ctl.gen_opt.color == "codon".to_string() { - emit_codon_color_escape(&refseq[3 * p..3 * p + 3], &mut log); - log.push(aa); - emit_end_escape(&mut log); - } else { - color_by_property(&vec![aa], &mut log); - } + let x = &peer_groups[rsi.vids[cz]]; + let last = k == show_aa[cz].len() - 1; + let log = color_codon( + ctl, + &refseq, + &Vec::new(), + x, + cz, + 0, + p, + 0, + &mut last_color, + last, + cdr3_con, + exacts, + exact_clonotypes, + ); refx += strme(&log); } - if k < show_aa[cz].len() - 1 && p == cs + n - 1 { - refx += " "; - } } row.push(refx); for _ in 1..rsi.cvars[cz].len() { @@ -477,194 +702,73 @@ pub fn insert_reference_rows( // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ -pub fn build_table_stuff( +// Process COMPLETE. + +pub fn process_complete( ctl: &EncloneControl, - exacts: &Vec, - exact_clonotypes: &Vec, - rsi: &ColInfo, - vars: &Vec>, - show_aa: &Vec>, - row1: &mut Vec, - justify: &mut Vec, - drows: &mut Vec>, - rows: &mut Vec>, - lvars: &Vec, + nexacts: usize, + bads: &mut [bool], + mat: &[Vec>], ) { - // Build lead header row and justification to match. - - let cols = rsi.vids.len(); - let nexacts = exacts.len(); - if !ctl.clono_print_opt.bu { - row1.push("#".to_string()); - } else { - row1.push("# barcode".to_string()); - } - justify.push(b'l'); - for i in 0..lvars.len() { - let mut x = lvars[i].to_string(); - if x.contains(':') { - x = x.before(":").to_string(); - } - row1.push(x.clone()); - justify.push(justification(&x)); - } - - // Insert main chain row. Then insert chain info row if we're using CHAIN_SPLIT. - - let mut row = vec!["".to_string(); row1.len()]; - for j in 0..cols { - if rsi.chain_descrip[j].contains(&"IGH".to_string()) - || rsi.chain_descrip[j].contains(&"TRB".to_string()) - { - row.push(bold(&format!("CHAIN {}", j + 1))); - } else { - row.push(format!("CHAIN {}", j + 1)); - } - for _ in 1..rsi.cvars[j].len() { - row.push("\\ext".to_string()); - } - } - rows.push(row); - let mut row = vec!["".to_string(); row1.len()]; - for j in 0..cols { - if rsi.chain_descrip[j].contains(&"IGH".to_string()) - || rsi.chain_descrip[j].contains(&"TRB".to_string()) - { - row.push(bold(&format!("{}", rsi.chain_descrip[j]))); - } else { - row.push(format!("{}", rsi.chain_descrip[j])); + let cols = mat.len(); + if ctl.gen_opt.complete { + let mut used = vec![false; cols]; + for (u, &b) in bads.iter().take(nexacts).enumerate() { + if !b { + for (used, m) in used.iter_mut().zip(mat.iter()).take(cols) { + if m[u].is_some() { + *used = true; + } + } + } } - for _ in 1..rsi.cvars[j].len() { - row.push("\\ext".to_string()); + for (&used, mat) in used.iter().zip(mat.iter()).take(cols) { + if used { + for (b, m) in bads.iter_mut().take(nexacts).zip(mat.iter()) { + if m.is_none() { + *b = true; + } + } + } } } - rows.push(row); +} - // Insert divider row (horizontal line across the chains). +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - let mut row = vec!["".to_string(); lvars.len() + 1]; - let mut ncall = 0; - for j in 0..cols { - ncall += rsi.cvars[j].len(); +// Identify certain extra parseable variables. These arise from parameterizable cvars. + +pub fn get_extra_parseables<'a>(ctl: &'a EncloneControl, pcols_sort: &'a [String]) -> Vec<&'a str> { + let mut extra_parseables = Vec::<&str>::new(); + let mut exclusions = ctl + .clono_print_opt + .cvars + .iter() + .map(String::as_str) + .collect::>(); + for v in CVARS_ALLOWED { + exclusions.push(v); } - row.append(&mut vec!["\\hline".to_string(); ncall]); - rows.push(row); - - // Insert position rows. - - *drows = insert_position_rows(&rsi, &show_aa, &vars, &row1); - let mut drows2 = drows.clone(); - rows.append(&mut drows2); - - // Insert main per-chain header row. - - let mut row = vec!["".to_string(); row1.len()]; - for cx in 0..cols { - let show = &show_aa[cx]; - for j in 0..rsi.cvars[cx].len() { - if rsi.cvars[cx][j] != "amino".to_string() { - if drows.is_empty() { - row.push(rsi.cvars[cx][j].to_string()); - } else { - row.push("".to_string()); - } - } else { - for u in 0..nexacts { - let m = rsi.mat[cx][u]; - if m.is_some() { - let m = m.unwrap(); - let mut s = String::new(); - let amino = &ctl.clono_print_opt.amino; - let printing_cdr1 = - amino.contains(&"cdr1".to_string()) && rsi.cdr1_starts[cx].is_some(); - let printing_cdr2 = - amino.contains(&"cdr2".to_string()) && rsi.cdr2_starts[cx].is_some(); - let printing_cdr3 = amino.contains(&"cdr3".to_string()); - if printing_cdr1 { - let cs1 = rsi.cdr1_starts[cx].unwrap() / 3; - let lead = show.iter().position(|x| *x == cs1).unwrap(); - s += &" ".repeat(lead); - if lead > 0 { - s += " "; - } - let n = exact_clonotypes[exacts[u]].share[m].cdr1_aa.len(); - if n >= 4 { - let left = (n - 3) / 2; - let right = n - left - 4; - s += &"═".repeat(left); - s += "CDR1"; - s += &"═".repeat(right); - } else if n == 3 { - s += "CR1"; - } else if n == 2 { - s += "C1"; - } else if n == 1 { - s += "1"; - } - } - if printing_cdr2 { - let lead; - let cs2 = rsi.cdr2_starts[cx].unwrap() / 3; - let pos2 = show.iter().position(|x| *x == cs2).unwrap(); - if !printing_cdr1 { - lead = pos2; - } else { - let cs1 = rsi.cdr1_starts[cx].unwrap() / 3; - let pos1 = show.iter().position(|x| *x == cs1).unwrap(); - lead = pos2 - pos1; - } - s += &" ".repeat(lead); - if lead > 0 { - s += " "; - } - let n = exact_clonotypes[exacts[u]].share[m].cdr2_aa.len(); - if n >= 4 { - let left = (n - 3) / 2; - let right = n - left - 4; - s += &"═".repeat(left); - s += "CDR2"; - s += &"═".repeat(right); - } else if n == 3 { - s += "CR2"; - } else if n == 2 { - s += "C2"; - } else if n == 1 { - s += "2"; - } - } - - if printing_cdr3 { - let lead; - let cs3 = rsi.cdr3_starts[cx] / 3; - let pos3 = show.iter().position(|x| *x == cs3).unwrap(); - if !printing_cdr1 && !printing_cdr2 { - lead = pos3; - } else if printing_cdr2 { - let cs2 = rsi.cdr2_starts[cx].unwrap() / 3; - let pos2 = show.iter().position(|x| *x == cs2).unwrap(); - lead = pos3 - pos2; - } else { - let cs1 = rsi.cdr1_starts[cx].unwrap() / 3; - let pos1 = show.iter().position(|x| *x == cs1).unwrap(); - lead = pos3 - pos1; - } - s += &" ".repeat(lead); - if lead > 0 { - s += " "; - } - let n = exact_clonotypes[exacts[u]].share[m].cdr3_aa.len(); - let left = (n - 3) / 2; - let right = n - left - 4; - s += &"═".repeat(left); - s += "CDR3"; - s += &"═".repeat(right); - } - row.push(s); - break; - } - } + for v in CVARS_ALLOWED_PCELL { + exclusions.push(v); + } + unique_sort(&mut exclusions); + for x in pcols_sort.iter() { + let chars = x.char_indices().collect::>(); + let mut trim = 0; + for c in chars.iter().rev() { + if !c.1.is_ascii_digit() { + break; + } + trim += 1; + } + if trim > 0 { + let v = &x[..chars[chars.len() - trim].0]; + if !bin_member(&exclusions, &v) { + extra_parseables.push(v); } } } - rows.push(row); + unique_sort(&mut extra_parseables); + extra_parseables } diff --git a/enclone_print/src/print_utils4.rs b/enclone_print/src/print_utils4.rs index b99aaf1fe..d1aece781 100644 --- a/enclone_print/src/print_utils4.rs +++ b/enclone_print/src/print_utils4.rs @@ -1,168 +1,51 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. -use amino::*; -use debruijn::dna_string::*; -use enclone_core::defs::*; -use enclone_proto::types::*; +use amino::codon_to_aa; +use enclone_core::{ + barcode_fate::BarcodeFate, + defs::{ColInfo, EncloneControl, ExactClonotype, GexInfo}, +}; +use enclone_proto::types::DonorReferenceItem; use equiv::EquivRel; use itertools::Itertools; -use std::cmp::max; use std::collections::HashMap; -use string_utils::*; -use vdj_ann::refx::*; -use vector_utils::*; +use string_utils::TextUtils; +use vdj_ann::refx::RefData; +use vector_utils::{bin_member, bin_position, bin_position1_2, unique_sort}; // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ -// Note confusing notation. The object cdr3 contains pairs (String,usize) consisting of -// the cdr3_aa and the length of seq_del. - -pub fn define_mat( +pub fn get_gex_matrix_entry( ctl: &EncloneControl, - exact_clonotypes: &Vec, - cdr3s: &Vec>, - js: &Vec, - od: &Vec<(Vec, usize, i32)>, - info: &Vec, -) -> Vec>> { - // Form the flattened list of all CDR3_AAs. - - let nexacts = cdr3s.len(); - let mut all_cdr3s = Vec::<(Vec, usize)>::new(); - for j in 0..nexacts { - for k in 0..cdr3s[j].len() { - all_cdr3s.push((cdr3s[j][k].0.as_bytes().to_vec(), cdr3s[j][k].1)); - } - } - - // Sort the CDR3s. - - unique_sort(&mut all_cdr3s); - - // Form an equivalence relation on the CDR3_AAs, requiring that they are "close enough": - // 1. They have the same length and differ at no more than 4 positions. - // 2. Each has a V..J sequence such that the two differ by no more than 50 positions. - - let mut ec: EquivRel = EquivRel::new(all_cdr3s.len() as i32); - for m1 in 0..all_cdr3s.len() { - for m2 in m1 + 1..all_cdr3s.len() { - let (x1, x2) = (&all_cdr3s[m1].0, &all_cdr3s[m2].0); - let (y1, y2) = (all_cdr3s[m1].1, all_cdr3s[m2].1); - if x1.len() == x2.len() && y1 == y2 { - let mut diffs = 0; - for u in 0..x1.len() { - if x1[u] != x2[u] { - diffs += 1; - } - } - if diffs <= 4 { - 'outer: for l1 in 0..od.len() { - let y1: &CloneInfo = &info[od[l1].2 as usize]; - for u1 in 0..y1.cdr3_aa.len() { - if y1.cdr3_aa[u1] == strme(&x1).after(":") { - for l2 in 0..od.len() { - let y2: &CloneInfo = &info[od[l2].2 as usize]; - for u2 in 0..y2.cdr3_aa.len() { - if y2.cdr3_aa[u2] == strme(&x2).after(":") { - if y1.tigs[u1].len() == y2.tigs[u2].len() { - // Could be we're spending a lot of time - // finding diffs. - - let mut vj_diffs = 0; - if !y1.has_del[u1] && !y2.has_del[u2] { - vj_diffs = ndiffs(&y1.tigsp[u1], &y2.tigsp[u2]); - } else { - for j in 0..y1.tigs[u1].len() { - if y1.tigs[u1][j] != y2.tigs[u2][j] { - vj_diffs += 1; - } - } - } - if vj_diffs <= ctl.heur.max_diffs { - ec.join(m1 as i32, m2 as i32); - break 'outer; - } - } - } - } - } - } - } - } - } + gex_info: &GexInfo, + fid: usize, + d_all: &[Vec], + ind_all: &[Vec], + li: usize, + l: usize, + p: usize, + y: &str, +) -> f64 { + let mut raw_count = 0 as f64; + if gex_info.gex_matrices[li].initialized() { + raw_count = gex_info.gex_matrices[li].value(p, fid) as f64; + } else { + for j in 0..d_all[l].len() { + if ind_all[l][j] == fid as u32 { + raw_count = d_all[l][j] as f64; + break; } } } - - // Create rmap, that sends - // (index into exact subclonotypes for this clonotype, - // index into chains for one of these exact subclonotypes) - // to an index into the orbit reps for the CDR3s. - - let mut r = Vec::::new(); - ec.orbit_reps(&mut r); - let mut rpos = HashMap::<(usize, usize), usize>::new(); - for u in 0..nexacts { - let x = &cdr3s[u]; - for (iy, y) in x.iter().enumerate() { - let p = bin_position(&all_cdr3s, &(y.0.as_bytes().to_vec(), y.1)); - let c = ec.class_id(p); - let q = bin_position(&r, &c) as usize; - rpos.insert((u, iy), q); - } + let mult = if y.ends_with("_g") { + gex_info.gex_mults[li] + } else { + gex_info.fb_mults[li] + }; + if !ctl.gen_opt.full_counts { + raw_count *= mult; } - - // Find the maximum multiplicity of each orbit, and the number of columns. - - let mut mm = vec![0; r.len()]; - for (u, x) in cdr3s.iter().enumerate() { - let mut mm0 = vec![0; r.len()]; - for iy in 0..x.len() { - let q = rpos[&(u, iy)]; - mm0[q] += 1; - } - for i in 0..r.len() { - mm[i] = max(mm[i], mm0[i]); - } - } - let cols = mm.iter().sum(); - - // Define a matrix mat[col][ex] which is the column of the exact subclonotype ex - // corresponding to the given column col of the clonotype, which may or may not be - // defined. - // ◼ This should be propagated so we don't compute something equivalent - // over and over. - - let mut mat = vec![vec![None; nexacts]; cols]; - for cx in 0..cols { - // for every column - 'exact: for u in 0..nexacts { - // for every exact subclonotype - let clonotype_id = od[js[u]].1; - let ex = &exact_clonotypes[clonotype_id]; - let x = &cdr3s[u]; - let mut mm0 = vec![0; r.len()]; - // for every chain in the exact subclonotype: - for (iy, y) in x.iter().enumerate() { - let q = rpos[&(u, iy)]; - let mut col = mm0[q]; - for j in 0..q { - col += mm[j]; - } - mm0[q] += 1; - if col != cx { - continue; - } - for m in 0..ex.share.len() { - if ex.share[m].cdr3_aa == y.0.after(":") && ex.share[m].seq_del.len() == y.1 { - mat[cx][u] = Some(m); - continue 'exact; - } - } - } - } - } - mat + raw_count } // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ @@ -172,12 +55,12 @@ pub fn define_mat( pub fn build_show_aa( ctl: &EncloneControl, rsi: &ColInfo, - vars_amino: &Vec>, - shares_amino: &Vec>, + vars_amino: &[Vec], + shares_amino: &[Vec], refdata: &RefData, - dref: &Vec, - exacts: &Vec, - exact_clonotypes: &Vec, + dref: &[DonorReferenceItem], + _exacts: &[usize], + _exact_clonotypes: &[ExactClonotype], ) -> Vec> { let cols = rsi.vids.len(); let mut show_aa = vec![Vec::::new(); cols]; @@ -190,12 +73,62 @@ pub fn build_show_aa( } } } + if ctl.clono_print_opt.amino.contains(&"cdr1".to_string()) + && rsi.cdr1_starts[cx].is_some() + && rsi.fr2_starts[cx].is_some() + && rsi.cdr1_starts[cx].unwrap() <= rsi.fr2_starts[cx].unwrap() + { + for j in (rsi.cdr1_starts[cx].unwrap()..rsi.fr2_starts[cx].unwrap()).step_by(3) { + show_aa[cx].push(j / 3); + } + } + if ctl.clono_print_opt.amino.contains(&"cdr2".to_string()) + && rsi.cdr2_starts[cx].is_some() + && rsi.fr3_starts[cx].is_some() + && rsi.cdr2_starts[cx].unwrap() <= rsi.fr3_starts[cx].unwrap() + { + for j in (rsi.cdr2_starts[cx].unwrap()..rsi.fr3_starts[cx].unwrap()).step_by(3) { + show_aa[cx].push(j / 3); + } + } if ctl.clono_print_opt.amino.contains(&"cdr3".to_string()) { for j in 0..rsi.cdr3_lens[cx] { let p = rsi.cdr3_starts[cx] / 3 + j; show_aa[cx].push(p); } } + if ctl.clono_print_opt.amino.contains(&"fwr1".to_string()) + && rsi.cdr1_starts[cx].is_some() + && rsi.fr1_starts[cx] <= rsi.cdr1_starts[cx].unwrap() + { + for j in (rsi.fr1_starts[cx]..rsi.cdr1_starts[cx].unwrap()).step_by(3) { + show_aa[cx].push(j / 3); + } + } + if ctl.clono_print_opt.amino.contains(&"fwr2".to_string()) + && rsi.fr2_starts[cx].is_some() + && rsi.cdr2_starts[cx].is_some() + && rsi.fr2_starts[cx].unwrap() <= rsi.cdr2_starts[cx].unwrap() + { + for j in (rsi.fr2_starts[cx].unwrap()..rsi.cdr2_starts[cx].unwrap()).step_by(3) { + show_aa[cx].push(j / 3); + } + } + if ctl.clono_print_opt.amino.contains(&"fwr3".to_string()) + && rsi.fr3_starts[cx].is_some() + && rsi.fr3_starts[cx].unwrap() <= rsi.cdr3_starts[cx] + { + for j in (rsi.fr3_starts[cx].unwrap()..rsi.cdr3_starts[cx]).step_by(3) { + show_aa[cx].push(j / 3); + } + } + if ctl.clono_print_opt.amino.contains(&"fwr4".to_string()) { + for j in + (rsi.cdr3_starts[cx] + 3 * rsi.cdr3_lens[cx]..rsi.seq_del_lens[cx] - 1).step_by(3) + { + show_aa[cx].push(j / 3); + } + } if ctl.clono_print_opt.amino.contains(&"var".to_string()) { for j in 0..vars_amino[cx].len() { let p = vars_amino[cx][j]; @@ -211,15 +144,18 @@ pub fn build_show_aa( if ctl.clono_print_opt.amino.contains(&"donor".to_string()) { let vseq1 = refdata.refs[rsi.vids[cx]].to_ascii_vec(); let jseq1 = refdata.refs[rsi.jids[cx]].to_ascii_vec(); - let vseq2: Vec; - if rsi.vpids[cx].is_some() { - vseq2 = dref[rsi.vpids[cx].unwrap()].nt_sequence.clone(); + let vseq2 = if let Some(vpid) = rsi.vpids[cx] { + &dref[vpid].nt_sequence } else { - vseq2 = vseq1.clone(); - } + &vseq1 + }; let jseq2 = &jseq1; let vlen = vseq2.len() - ctl.heur.ref_v_trim; let jlen = jseq2.len() - ctl.heur.ref_j_trim; + // This test must be here for a reason, but we encountered examples where it was + // triggered, and there was nothing obviously wrong. In the event that an internal + // error is encountered elsewhere in the code, we might wish to turn this back on. + /* let gap = rsi.seq_lens[cx] as isize - vlen as isize - jlen as isize; if gap < 0 { let mut bcs = Vec::::new(); @@ -232,11 +168,16 @@ pub fn build_show_aa( bcs.sort(); panic!( "Something is wrong because gap is {}, which is negative.\n\ - This is happening for the clonotype with these barcodes:\n{}.", + This is happening for chain {} of {} of the clonotype with \ + these barcodes:\n{}\nand with first V..J sequence\n{}.", gap, - bcs.iter().format(",") + cx + 1, + cols, + bcs.iter().format(","), + strme(&exact_clonotypes[exacts[0]].share[cx].seq) ); } + */ for j in 0..vlen { if j < vseq1.len() && vseq1[j] != vseq2[j] { show_aa[cx].push(j / 3); @@ -252,17 +193,18 @@ pub fn build_show_aa( if ctl.clono_print_opt.amino.contains(&"donorn".to_string()) { let vseq1 = refdata.refs[rsi.vids[cx]].to_ascii_vec(); let jseq1 = refdata.refs[rsi.jids[cx]].to_ascii_vec(); - let vseq2: Vec; - if rsi.vpids[cx].is_some() { - vseq2 = dref[rsi.vpids[cx].unwrap()].nt_sequence.clone(); + let vseq2 = if let Some(vpid) = rsi.vpids[cx] { + &dref[vpid].nt_sequence } else { - vseq2 = vseq1.clone(); - } + &vseq1 + }; let jseq2 = &jseq1; let vlen = vseq2.len() - ctl.heur.ref_v_trim; let jlen = jseq2.len() - ctl.heur.ref_j_trim; + /* let gap = rsi.seq_lens[cx] as isize - vlen as isize - jlen as isize; assert!(gap >= 0); + */ for j in 0..vlen { if j < vseq1.len() && vseq1[j] != vseq2[j] { let n = 3 * (j / 3); @@ -300,10 +242,661 @@ pub fn build_show_aa( if !show_aa[cx].is_empty() { let p = show_aa[cx][show_aa[cx].len() - 1]; - if 3 * p + 3 > rsi.seq_lens[cx] { + if 3 * p + 3 > rsi.seq_del_lens[cx] { show_aa[cx].pop(); } } } show_aa } + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn compute_some_stats( + ctl: &EncloneControl, + lvars: &[String], + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], + gex_info: &GexInfo, + vdj_cells: &[Vec], + n_vdj_gex: &[usize], + cred: &mut Vec>, + pe: &mut Vec>, + ppe: &mut Vec>, + npe: &mut Vec>, +) { + let nexacts = exacts.len(); + + // Compute "cred" stats (credibility/# of neighboring cells that are also B cells). + + *cred = vec![Vec::::new(); lvars.len()]; + for (lvar, cred) in lvars.iter().zip(cred.iter_mut()) { + if lvar == "cred" { + for &clonotype_id in exacts.iter().take(nexacts) { + let ex = &exact_clonotypes[clonotype_id]; + for clone in &ex.clones { + let bc = &clone[0].barcode; + let li = clone[0].dataset_index; + if gex_info.pca[li].contains_key(&bc.clone()) { + let mut creds = 0; + let mut z = Vec::<(f64, String)>::new(); + let x = &gex_info.pca[li][&bc.clone()]; + for y in gex_info.pca[li].iter() { + let dist2 = x + .iter() + .zip(y.1.iter()) + .map(|(&xm, &ym)| (ym - xm) * (ym - xm)) + .sum::(); + z.push((dist2, y.0.clone())); + } + z.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let top = n_vdj_gex[li]; + for zi in z.iter().take(top) { + if bin_member(&vdj_cells[li], &zi.1) { + creds += 1; + } + } + let pc = 100.0 * creds as f64 / top as f64; + cred.push(format!("{pc:.1}")); + } else { + cred.push("".to_string()); + } + } + } + } + } + + // Compute pe (PCA distance). + + *pe = vec![Vec::::new(); lvars.len()]; + for k in 0..lvars.len() { + if lvars[k].starts_with("pe") { + let n = lvars[k].after("pe").force_usize(); + let mut bcs = Vec::::new(); + let mut lis = Vec::::new(); + let mut count = 0; + let mut to_index = Vec::::new(); + for &clonotype_id in exacts.iter().take(nexacts) { + let ex = &exact_clonotypes[clonotype_id]; + for clone in &ex.clones { + let bc = &clone[0].barcode; + let li = clone[0].dataset_index; + if gex_info.pca[li].contains_key(&bc.clone()) { + bcs.push(bc.to_string()); + lis.push(li); + to_index.push(count); + } + count += 1; + } + } + let mut e: EquivRel = EquivRel::new(bcs.len() as i32); + let mut mat = vec![Vec::::new(); bcs.len()]; + for i in 0..bcs.len() { + mat[i] = gex_info.pca[lis[i]][&bcs[i].clone()].clone(); + } + for i1 in 0..bcs.len() { + for i2 in i1 + 1..bcs.len() { + if e.class_id(i1 as i32) != e.class_id(i2 as i32) { + let mut d = 0.0; + for j in 0..mat[i1].len() { + d += (mat[i1][j] - mat[i2][j]) * (mat[i1][j] - mat[i2][j]); + } + d = d.sqrt(); + if d <= n as f64 { + e.join(i1 as i32, i2 as i32); + } + } + } + } + pe[k] = vec![String::new(); count]; + let mut ids = Vec::::new(); + for i in 0..bcs.len() { + ids.push(e.class_id(i as i32)); + } + unique_sort(&mut ids); + let mut reps = Vec::::new(); + e.orbit_reps(&mut reps); + reps.sort_unstable(); + for (i, idx) in to_index.into_iter().take(bcs.len()).enumerate() { + pe[k][idx] = format!("{}", bin_position(&ids, &e.class_id(i as i32))); + } + } + } + + // Compute ppe (PCA distance). + + *ppe = vec![Vec::::new(); lvars.len()]; + for k in 0..lvars.len() { + if lvars[k].starts_with("ppe") { + let n = lvars[k].after("ppe").force_usize(); + let mut bcs = Vec::<&str>::new(); + let mut lis = Vec::::new(); + let mut count = 0; + let mut to_index = Vec::::new(); + for &clonotype_id in exacts.iter().take(nexacts) { + let ex = &exact_clonotypes[clonotype_id]; + for clone in &ex.clones { + let bc = &clone[0].barcode; + let li = clone[0].dataset_index; + if gex_info.pca[li].contains_key(&bc.clone()) { + bcs.push(bc.as_str()); + lis.push(li); + to_index.push(count); + } + count += 1; + } + } + let mat = bcs + .iter() + .zip(lis.iter()) + .map(|(bc, &li)| gex_info.pca[li][&bc.to_string()].clone()) + .collect::>(); + let mut matg = Vec::>::new(); + for li in 0..ctl.origin_info.n() { + for i in gex_info.pca[li].iter() { + matg.push(i.1.to_vec()); + } + } + let mut x = vec![0; bcs.len()]; + for (m1, x) in mat.iter().zip(x.iter_mut()) { + for m2 in &matg { + let d = m1 + .iter() + .zip(m2.iter()) + .map(|(&m1, &m2)| (m1 - m2) * (m1 - m2)) + .sum::() + .sqrt(); + if d <= n as f64 { + *x += 1; + } + } + } + let mut y = vec![0; bcs.len()]; + for i1 in 0..mat.len() { + for i2 in 0..mat.len() { + let m1 = &mat[i1]; + let m2 = &mat[i2]; + let d = m1 + .iter() + .zip(m2.iter()) + .map(|(&m1, &m2)| (m1 - m2) * (m1 - m2)) + .sum::() + .sqrt(); + if d <= n as f64 { + y[i1] += 1; + } + } + } + ppe[k] = vec![String::new(); count]; + for i in 0..bcs.len() { + ppe[k][to_index[i]] = format!("{:.1}", 100.0 * y[i] as f64 / x[i] as f64); + } + } + } + + // Compute npe (PCA distance). + + *npe = vec![Vec::::new(); lvars.len()]; + for k in 0..lvars.len() { + if lvars[k].starts_with("npe") { + let n = lvars[k].after("npe").force_usize(); + let mut bcs = Vec::<&str>::new(); + let mut lis = Vec::::new(); + let mut count = 0; + let mut to_index = Vec::::new(); + for &clonotype_id in exacts.iter().take(nexacts) { + let ex = &exact_clonotypes[clonotype_id]; + for clone in &ex.clones { + let bc = &clone[0].barcode; + let li = clone[0].dataset_index; + if gex_info.pca[li].contains_key(&bc.clone()) { + bcs.push(bc.as_str()); + lis.push(li); + to_index.push(count); + } + count += 1; + } + } + let mat = bcs + .iter() + .zip(lis.iter()) + .map(|(&bc, &li)| gex_info.pca[li][&bc.to_string()].clone()) + .collect::>(); + let y = mat + .iter() + .map(|m1| { + mat.iter() + .filter(|m2| { + m1.iter() + .zip(m2.iter()) + .map(|(&m1, &m2)| (m1 - m2) * (m1 - m2)) + .sum::() + .sqrt() + <= n as f64 + }) + .count() + }) + .collect::>(); + npe[k] = vec![String::new(); count]; + for i in 0..bcs.len() { + npe[k][to_index[i]] = format!("{}", y[i]); + } + } + } +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn compute_bu( + u: usize, + cell_count: usize, + exacts: &[usize], + lvars: &[String], + ctl: &EncloneControl, + bli: &[(String, usize, usize)], + ex: &ExactClonotype, + exact_clonotypes: &[ExactClonotype], + row: &mut [String], + subrows: &mut Vec>, + varmat: &[Vec>], + have_gex: bool, + gex_info: &GexInfo, + rsi: &ColInfo, + sr: &mut Vec<(Vec, Vec>, Vec>, usize)>, + fate: &[HashMap], + nd_fields: &[String], + alt_bcs: &[String], + cred: &[Vec], + pe: &[Vec], + ppe: &[Vec], + npe: &[Vec], + d_all: &[Vec], + ind_all: &[Vec], + mat: &[Vec>], + these_stats: &[(String, Vec)], + refdata: &RefData, +) { + // Very bad computation because of embedded binary search. + + let cols = mat.len(); + *subrows = Vec::>::new(); + if ctl.clono_print_opt.bu { + for bcl in bli.iter() { + let mut row = Vec::::new(); + let bc = &bcl.0; + let li = bcl.1; + let di = ex.clones[bcl.2][0].dataset_index; + row.push(format!("$ {bc}")); + let ex = &exact_clonotypes[exacts[u]]; + for (k, var) in lvars.iter().enumerate() { + let p = bin_position1_2(these_stats, var); + let nr = row.len(); + let mut filled = false; + for l in 0..ctl.origin_info.n() { + if var.starts_with("n_") && var[2..] == ctl.origin_info.dataset_id[l] { + let n = if di == l { "1" } else { "0" }; + row.push(n.to_string()); + filled = true; + } + } + + // Not sure it makes sense to do the below for just var_def variables. + // Why not all? + + let mut in_var_def = false; + for i in 0..ctl.gen_opt.var_def.len() { + if ctl.gen_opt.var_def[i].0 == *var { + in_var_def = true; + } + } + if var == "nbc" { + in_var_def = true; + } + if in_var_def && p >= 0 { + let stats_me = &these_stats[p as usize].1; + row.push(stats_me[bcl.2].clone()); + filled = true; + } + + // Proceed. + + if filled { + + // Many of the variables (presumably a lot) should be computed by just + // looking up their values in these_stats. This should allow elimination of a + // lot of the code below. We give one example here. + } else if p >= 0 + && var.starts_with("fb") + && var.after("fb").ends_with("_n") + && var.after("fb").rev_before("_n").parse::().is_ok() + && var.after("fb").rev_before("_n").force_usize() >= 1 + { + let stats_me = &these_stats[p as usize].1; + row.push(stats_me[bcl.2].clone()); + } else if var == "n_b" { + let mut n = 0; + let li = ex.clones[bcl.2][0].dataset_index; + if gex_info.cell_type[li].contains_key(bc) + && gex_info.cell_type[li][&bc.clone()].starts_with('B') + { + n = 1; + } + row.push(format!("{n}")); + } else if var == "filter" { + let mut f = String::new(); + if fate[li].contains_key(bc) { + f = fate[li][bc].label().to_string(); + } + row.push(f); + } else if var == "n_other" { + let mut n = 0; + let di = ex.clones[bcl.2][0].dataset_index; + let f = format!("n_{}", ctl.origin_info.dataset_id[di]); + let found = nd_fields.iter().any(|ff| *ff == f); + if !found { + n = 1; + } + row.push(format!("{n}")); + } else if var == "sec" { + let mut n = 0; + if ctl.origin_info.secmem[li].contains_key(bc) { + n = ctl.origin_info.secmem[li][bc].0; + } + row.push(format!("{n}")); + } else if var == "mem" { + let mut n = 0; + if ctl.origin_info.secmem[li].contains_key(bc) { + n = ctl.origin_info.secmem[li][bc].1; + } + row.push(format!("{n}")); + } else if bin_member(alt_bcs, var) { + let mut val = String::new(); + let alt = &ctl.origin_info.alt_bc_fields[li]; + for aj in alt { + if aj.0 == *var && aj.1.contains_key(bc) { + val = aj.1[bc].clone(); + } + } + row.push(val); + } else if var == "datasets" { + row.push(ctl.origin_info.dataset_id[li].clone()); + } else if var == "origins" { + row.push(ctl.origin_info.origin_id[li].clone()); + } else if var == "donors" { + row.push(ctl.origin_info.donor_id[li].clone()); + } else if var == "clust" && have_gex { + let mut cid = 0; + if gex_info.cluster[li].contains_key(bc) { + cid = gex_info.cluster[li][bc]; + } + row.push(format!("{cid}")); + } else if var.starts_with("pe") && have_gex { + row.push(pe[k][cell_count + bcl.2].to_string()); + } else if var.starts_with("npe") && have_gex { + row.push(npe[k][cell_count + bcl.2].to_string()); + } else if var.starts_with("ppe") && have_gex { + row.push(ppe[k][cell_count + bcl.2].to_string()); + } else if var == "cred" && have_gex { + row.push(cred[k][cell_count + bcl.2].to_string()); + } else if var == "type" && have_gex { + let cell_type = if gex_info.cell_type[li].contains_key(bc) { + gex_info.cell_type[li][bc].clone() + } else { + String::default() + }; + row.push(cell_type); + } else if var == "n_gex" && have_gex { + let mut n_gex = 0; + if bin_member(&gex_info.gex_cell_barcodes[li], bc) { + n_gex = 1; + } + row.push(format!("{n_gex}")); + } else if var == "mark" { + let mut mark = String::new(); + if ex.clones[bcl.2][0].marked { + mark = "x".to_string(); + } + row.push(mark); + } else if var == "entropy" && have_gex { + // NOTE DUPLICATION WITH CODE BELOW. + let mut gex_count = 0; + let p = bin_position(&gex_info.gex_barcodes[li], bc); + if p >= 0 { + let mut raw_count = 0; + if gex_info.gex_matrices[li].initialized() { + let row = gex_info.gex_matrices[li].row(p as usize); + for (f, n) in row { + if gex_info.is_gex[li][f] { + raw_count += n; + } + } + } else { + let l = bcl.2; + for j in 0..d_all[l].len() { + if gex_info.is_gex[li][ind_all[l][j] as usize] { + raw_count += d_all[l][j] as usize; + } + } + } + gex_count = raw_count; + } + let mut entropy = 0.0; + if p >= 0 { + if gex_info.gex_matrices[li].initialized() { + let row = gex_info.gex_matrices[li].row(p as usize); + for (f, n) in row { + if gex_info.is_gex[li][f] { + let q = n as f64 / gex_count as f64; + entropy -= q * q.log2(); + } + } + } else { + let l = bcl.2; + for j in 0..d_all[l].len() { + if gex_info.is_gex[li][ind_all[l][j] as usize] { + let n = d_all[l][j] as usize; + let q = n as f64 / gex_count as f64; + entropy -= q * q.log2(); + } + } + } + } + row.push(format!("{entropy:.2}")); + } else if have_gex { + // this calc isn't needed except in _% case below + // TODO: ELIMINATE UNNEEDED CALC + let mut gex_count = 0.0; + let p = bin_position(&gex_info.gex_barcodes[li], bc); + if p >= 0 { + let mut raw_count = 0 as f64; + if gex_info.gex_matrices[li].initialized() { + let row = gex_info.gex_matrices[li].row(p as usize); + for (f, n) in row { + if gex_info.is_gex[li][f] { + raw_count += n as f64; + } + } + } else { + let l = bcl.2; + for j in 0..d_all[l].len() { + if gex_info.is_gex[li][ind_all[l][j] as usize] { + raw_count += d_all[l][j] as f64; + } + } + } + if !ctl.gen_opt.full_counts { + gex_count = raw_count * gex_info.gex_mults[li]; + } else { + gex_count = raw_count; + } + } + if var == "gex" { + row.push(format!("{}", gex_count.round())); + } else { + let mut y = var.as_str(); + if y.contains(':') { + y = y.after(":"); + } + let y0 = y; + let suffixes = ["_min", "_max", "_μ", "_Σ", "_cell", "_%"]; + for &s in suffixes.iter() { + if y.ends_with(s) { + y = y.rev_before(s); + break; + } + } + let p = bin_position(&gex_info.gex_barcodes[li], bc); + let mut computed = false; + let mut count = 0.0; + let l = bcl.2; + if p >= 0 { + let ux = ctl.clono_print_opt.regex_match[li] + .get(&y.to_string()) + .cloned() + .unwrap_or_default(); + if !ux.is_empty() { + computed = true; + for fid in ux.iter() { + let counti = get_gex_matrix_entry( + ctl, gex_info, *fid, d_all, ind_all, li, l, p as usize, y, + ); + count += counti; + } + } else if let Some(&fid) = gex_info.feature_id[li].get(&y.to_string()) { + computed = true; + count = get_gex_matrix_entry( + ctl, gex_info, fid, d_all, ind_all, li, l, p as usize, y, + ); + } + } + if computed { + // note unneeded calculation above in certain cases + // TODO: ELIMINATE! + if y0.ends_with("_min") + || y0.ends_with("_max") + || y0.ends_with("_μ") + || y0.ends_with("_Σ") + { + } else if y0.ends_with("_%") { + row.push(format!("{:.2}", (100.0 * count) / gex_count)); + } else { + row.push(format!("{}", count.round())); + } + } + } + } + if row.len() == nr { + row.push("".to_string()); + } + } + let mut ncall = 0; + for k in 0..cols { + ncall += rsi.cvars[k].len(); + } + let mut cx = vec!["".to_string(); ncall]; + let mut cp = 0; + for (m, cvars) in mat.iter().take(cols).zip(rsi.cvars.iter()) { + if let Some(m) = m[u] { + for (cvar, cxp) in cvars.iter().zip(cx.iter_mut().skip(cp)) { + if cvar == "v_name_orig" { + let v = &refdata.name[ex.clones[bcl.2][m].v_ref_id]; + *cxp = v.to_string(); + } else if cvar == "u" { + let numi = ex.clones[bcl.2][m].umi_count; + *cxp = format!("{numi}"); + } else if cvar == "r" { + let r = ex.clones[bcl.2][m].read_count; + *cxp = format!("{r}"); + } else if cvar == "nval" { + let mut n = 0; + if ex.clones[bcl.2][m].validated_umis.is_some() { + n = ex.clones[bcl.2][m].validated_umis.as_ref().unwrap().len(); + } + *cxp = format!("{n}"); + } else if cvar == "nnval" { + let mut n = 0; + if ex.clones[bcl.2][m].non_validated_umis.is_some() { + n = ex.clones[bcl.2][m] + .non_validated_umis + .as_ref() + .unwrap() + .len(); + } + *cxp = format!("{n}"); + } else if cvar == "nival" { + let mut n = 0; + if ex.clones[bcl.2][m].invalidated_umis.is_some() { + n = ex.clones[bcl.2][m].invalidated_umis.as_ref().unwrap().len(); + } + *cxp = format!("{n}"); + } else if cvar == "valumis" { + let mut n = Vec::::new(); + if ex.clones[bcl.2][m].validated_umis.is_some() { + n = ex.clones[bcl.2][m].non_validated_umis.clone().unwrap(); + } + *cxp = format!("{}", n.iter().format(",")); + } else if cvar == "valbcumis" { + let mut n = Vec::::new(); + if ex.clones[bcl.2][m].validated_umis.is_some() { + n = ex.clones[bcl.2][m] + .validated_umis + .as_ref() + .unwrap() + .iter() + .map(|ni| { + format!("{}{ni}", ex.clones[bcl.2][m].barcode.before("-")) + }) + .collect(); + } + *cxp = format!("{}", n.iter().format(",")); + } else if cvar == "nvalumis" { + let mut n = Vec::::new(); + if ex.clones[bcl.2][m].non_validated_umis.is_some() { + n = ex.clones[bcl.2][m].non_validated_umis.clone().unwrap(); + } + *cxp = format!("{}", n.iter().format(",")); + } else if cvar == "nvalbcumis" { + let mut n = Vec::::new(); + if ex.clones[bcl.2][m].non_validated_umis.is_some() { + n = ex.clones[bcl.2][m] + .non_validated_umis + .as_ref() + .unwrap() + .iter() + .map(|ni| { + format!("{}{ni}", ex.clones[bcl.2][m].barcode.before("-")) + }) + .collect(); + } + *cxp = format!("{}", n.iter().format(",")); + } else if cvar == "ivalumis" { + let mut n = Vec::::new(); + if ex.clones[bcl.2][m].invalidated_umis.is_some() { + n = ex.clones[bcl.2][m].invalidated_umis.clone().unwrap(); + } + *cxp = format!("{}", n.iter().format(",")); + } else if cvar == "ivalbcumis" { + let mut n = Vec::::new(); + if ex.clones[bcl.2][m].invalidated_umis.is_some() { + n = ex.clones[bcl.2][m] + .invalidated_umis + .as_ref() + .unwrap() + .iter() + .map(|ni| { + format!("{}{ni}", ex.clones[bcl.2][m].barcode.before("-")) + }) + .collect::>(); + } + *cxp = format!("{}", n.iter().format(",")); + } + } + } + cp += cvars.len(); + } + row.append(&mut cx); + subrows.push(row); + } + } + sr.push((row.to_vec(), subrows.to_vec(), varmat[u].clone(), u)); +} diff --git a/enclone_print/src/print_utils5.rs b/enclone_print/src/print_utils5.rs index 8f94f273b..2759a08e2 100644 --- a/enclone_print/src/print_utils5.rs +++ b/enclone_print/src/print_utils5.rs @@ -1,12 +1,16 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. -use enclone_core::defs::*; -use enclone_proto::types::*; -use itertools::*; -use std::cmp::max; +use crate::print_utils1::aa_classes; +use amino::codon_to_aa; +use ansi_escape::emit_end_escape; +use enclone_core::defs::{ColInfo, EncloneControl, ExactClonotype}; +use enclone_core::print_tools::emit_codon_color_escape; +use enclone_proto::types::DonorReferenceItem; +use itertools::Itertools; use std::collections::HashMap; -use vdj_ann::refx::*; -use vector_utils::*; +use string_utils::{strme, TextUtils}; +use vdj_ann::refx::RefData; +use vector_utils::{bin_member, meet_size, unique_sort, VecUtils}; // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ @@ -15,64 +19,88 @@ use vector_utils::*; pub fn vars_and_shares( pass: usize, ctl: &EncloneControl, - exacts: &Vec, - exact_clonotypes: &Vec, + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], rsi: &ColInfo, refdata: &RefData, - dref: &Vec, + dref: &[DonorReferenceItem], vars: &mut Vec>, vars_amino: &mut Vec>, shares_amino: &mut Vec>, - out_data: &mut Vec>, + ref_diff_pos: &mut Vec>>, + out_data: &mut [HashMap], ) { // Copied stuff. let pcols_sort = &ctl.parseable_opt.pcols_sort; let nexacts = exacts.len(); let cols = rsi.vids.len(); + *ref_diff_pos = vec![vec![Vec::::new(); nexacts]; cols]; // Go through the columns. - for cx in 0..cols { + for (cx, ((mat, (&vid, &vpid)), (ref_diff, (seqss, seqss_amino)))) in rsi + .mat + .iter() + .zip(rsi.vids.iter().zip(rsi.vpids.iter())) + .zip( + ref_diff_pos + .iter_mut() + .zip(rsi.seqss.iter().zip(rsi.seqss_amino.iter())), + ) + .take(cols) + .enumerate() + { // go through each column let (mut vref, mut jref) = (Vec::::new(), Vec::::new()); let mut vseq2 = Vec::::new(); - for u in 0..nexacts { - let m = rsi.mat[cx][u]; - if m.is_some() { - let m = m.unwrap(); + for (&exact, &m) in exacts.iter().zip(mat.iter()) { + if let Some(m) = m { // Reference assigned multiple times here, is wrong. // Also where using allelized reference, need to explain. // vref is supposed to be the donor reference, but seems like it isn't - vref = exact_clonotypes[exacts[u]].share[m].vs.to_ascii_vec(); - jref = exact_clonotypes[exacts[u]].share[m].js.to_ascii_vec(); + vref = exact_clonotypes[exact].share[m].vs.to_ascii_vec(); + jref = exact_clonotypes[exact].share[m].js.to_ascii_vec(); } - let vseq1 = refdata.refs[rsi.vids[cx]].to_ascii_vec(); - if rsi.vpids[cx].is_some() { - vseq2 = dref[rsi.vpids[cx].unwrap()].nt_sequence.clone(); + let vseq1 = refdata.refs[vid].to_ascii_vec(); + if let Some(vpid) = vpid { + vseq2 = dref[vpid].nt_sequence.clone(); } else { - vseq2 = vseq1.clone(); + vseq2 = vseq1; } } - let mut n = 0; - for z in 0..rsi.seqss[cx].len() { - n = max(n, rsi.seqss[cx][z].len()); + + for (&exact, (&m, ref_diff)) in exacts.iter().zip(mat.iter().zip(ref_diff.iter_mut())) { + if let Some(m) = m { + let seq = &exact_clonotypes[exact].share[m].seq_del_amino; + let n = seq.len(); + for (p, &b) in seq.iter().enumerate() { + if (p < vref.len() - ctl.heur.ref_v_trim && b != vref[p]) + || (p >= n - (jref.len() - ctl.heur.ref_j_trim) + && b != jref[jref.len() - (n - p)]) + { + ref_diff.push(p); + } + } + } } + + let n = seqss.iter().map(std::vec::Vec::len).max().unwrap_or(0); let (mut v, mut s) = (Vec::::new(), Vec::::new()); let (mut v_amino, mut s_amino) = (Vec::::new(), Vec::::new()); for p in 0..n { let mut bases = Vec::::new(); let mut bases_amino = Vec::::new(); - for s in 0..rsi.seqss[cx].len() { + for (seqss, seqss_amino) in seqss.iter().zip(seqss_amino.iter()) { // ◼ Hideous workaround for the problem that a productive pair // ◼ could have two contigs with identical CDR3_AA sequences. // (but also because we now have some null seq entries?) - if p >= rsi.seqss[cx][s].len() { + if p >= seqss.len() { // if pass == 2 { fwriteln!( &mut mlog, "DIFFERENT LENGTHS" ); } continue; } - bases.push(rsi.seqss[cx][s][p]); - bases_amino.push(rsi.seqss_amino[cx][s][p]); + bases.push(seqss[p]); + bases_amino.push(seqss_amino[p]); } unique_sort(&mut bases); unique_sort(&mut bases_amino); @@ -100,23 +128,20 @@ pub fn vars_and_shares( } } } - let mut va = Vec::::new(); - for x in v_amino.iter() { - va.push(*x / 3); - } + let mut va = v_amino.iter().map(|&x| x / 3).collect::>(); unique_sort(&mut va); - let mut sa = Vec::::new(); - for x in s_amino.iter() { - sa.push(*x / 3); - } + let mut sa = s_amino.iter().map(|&x| x / 3).collect::>(); unique_sort(&mut sa); - for u in 0..nexacts { + for u in out_data.iter_mut().take(nexacts) { macro_rules! speakc { ($u:expr, $col:expr, $var:expr, $val:expr) => { - if ctl.parseable_opt.pout.len() > 0 && $col + 1 <= ctl.parseable_opt.pchains { + if ctl.parseable_opt.pout.len() > 0 + && (ctl.parseable_opt.pchains == "max" + || cx < ctl.parseable_opt.pchains.force_usize()) + { let varc = format!("{}{}", $var, $col + 1); if pass == 2 && (pcols_sort.is_empty() || bin_member(&pcols_sort, &varc)) { - out_data[$u].insert(varc, $val); + $u.insert(varc, $val); } } }; @@ -156,13 +181,11 @@ pub fn vars_and_shares( pub fn delete_weaks( ctl: &EncloneControl, - exacts: &Vec, - mults: &Vec, // should eliminate - exact_clonotypes: &Vec, - total_cells: usize, - mat: &Vec>>, - vars: &Vec>, - bads: &mut Vec, + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], + mat: &[Vec>], + refdata: &RefData, + bads: &mut [bool], ) { // Mark for deletion exact subclonotypes that fail the MIN_CELLS_EXACT or MIN_CHAINS_EXACT // or CHAINS_EXACT tests. @@ -182,112 +205,301 @@ pub fn delete_weaks( } } - // Find and mark for deletion exact subclonotypes having a variant base in V..J that, - // accounting for all the cells in all the exact subclonotypes, never occurs as Q60 - // doesn't occur as Q40 twice, and disagrees with the reference. + // Mark for deletion exact subclonotypes, based on CONST_IGH and CONST_IGKL + // (see enclone help special). - let cols = mat.len(); - // (column, pos, base, qual, row) - if ctl.clono_filt_opt.qual_filter { - let mut vquals = Vec::<(usize, usize, u8, u8, usize)>::new(); + if ctl.clono_filt_opt.const_igh.is_some() { for u in 0..nexacts { - let clonotype_id = exacts[u]; - let ex = &exact_clonotypes[clonotype_id]; - for col in 0..cols { - let m = mat[col][u]; - if m.is_some() { - let m = m.unwrap(); - if ex.share[m].annv.len() > 1 { - continue; - } - let n = ex.share[m].seq_del.len(); - let vref = &exact_clonotypes[exacts[u]].share[m].vs.to_ascii_vec(); - let jref = &exact_clonotypes[exacts[u]].share[m].js.to_ascii_vec(); - for z in 0..vars[col].len() { - let p = vars[col][z]; - let b = ex.share[m].seq_del[p]; - let mut refdiff = false; - if p < vref.len() - ctl.heur.ref_v_trim && b != vref[p] { - refdiff = true; - } - if p >= n - (jref.len() - ctl.heur.ref_j_trim) - && b != jref[jref.len() - (n - p)] - { - refdiff = true; - } - if refdiff { - for j in 0..ex.clones.len() { - let qual = ex.clones[j][m].quals[p]; - vquals.push((col, p, b, qual, u)); - } - } + let mut ok = false; + let ex = &exact_clonotypes[exacts[u]]; + for m in 0..ex.share.len() { + if ex.share[m].left && ex.share[m].c_ref_id.is_some() { + let id = ex.share[m].c_ref_id.unwrap(); + if ctl + .clono_filt_opt + .const_igh + .as_ref() + .unwrap() + .is_match(&refdata.name[id]) + { + ok = true; } } } - } - vquals.sort(); - let mut j = 0; - while j < vquals.len() { - let mut k = j + 1; - while k < vquals.len() { - if vquals[k].0 != vquals[j].0 - || vquals[k].1 != vquals[j].1 - || vquals[k].2 != vquals[j].2 - { - break; - } - k += 1; + if !ok { + bads[u] = true; } - let mut q60 = false; - let mut q40 = 0; - for m in j..k { - if vquals[m].3 >= 60 { - q60 = true; - } else if vquals[m].3 >= 40 { - q40 += 1; + } + } + if ctl.clono_filt_opt.const_igkl.is_some() { + for u in 0..nexacts { + let mut ok = false; + let ex = &exact_clonotypes[exacts[u]]; + for m in 0..ex.share.len() { + if !ex.share[m].left && ex.share[m].c_ref_id.is_some() { + let id = ex.share[m].c_ref_id.unwrap(); + if ctl + .clono_filt_opt + .const_igkl + .as_ref() + .unwrap() + .is_match(&refdata.name[id]) + { + ok = true; + } } } - if !q60 && q40 < 2 { - let u = vquals[j].4; + if !ok { bads[u] = true; } - j = k; } } - // Based on the number of cells in each column, decide which exact subclonotypes - // look like junk. Preliminary heuristic. + // Remove onesies that do not have an exact match. - if cols > 2 && ctl.clono_filt_opt.weak_chains { - let mut ncells = vec![0; cols]; - let mut col_entries = vec![Vec::::new(); cols]; - for u in 0..nexacts { - for col in 0..cols { - let mid = mat[col][u]; - if mid.is_some() { - col_entries[col].push(u); - let clonotype_id = exacts[u]; - ncells[col] += exact_clonotypes[clonotype_id].clones.len(); + let cols = mat.len(); + if cols > 1 { + for u1 in 0..nexacts { + let ex1 = &exact_clonotypes[exacts[u1]]; + if ex1.share.len() == 1 && !bads[u1] { + let mut perf = false; + 'u2: for u2 in 0..nexacts { + let ex2 = &exact_clonotypes[exacts[u2]]; + if ex2.share.len() > 1 && !bads[u2] { + for i in 0..ex2.share.len() { + if ex1.share[0].seq == ex2.share[i].seq { + perf = true; + break 'u2; + } + } + } + } + if !perf { + bads[u1] = true; } } } - let total_cells: usize = mults.iter().sum(); - for j in 0..cols { - if ncells[j] <= 5 && 8 * ncells[j] < total_cells { - for d in col_entries[j].iter() { - bads[*d] = true; + } +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Build the diff row. This also has the variable names! + +pub fn build_diff_row( + ctl: &EncloneControl, + rsi: &ColInfo, + rows: &mut Vec>, + drows: &mut Vec>, + row1: &[String], + nexacts: usize, + field_types: &[Vec], + show_aa: &[Vec], +) { + let mat = &rsi.mat; + let cols = mat.len(); + let diff_pos = rows.len(); + if !drows.is_empty() { + let mut row = row1.to_owned(); + for col in 0..cols { + for m in 0..rsi.cvars[col].len() { + if rsi.cvars[col][m] == *"amino" { + let mut xdots = String::new(); + for k in 0..show_aa[col].len() { + if k > 0 + && field_types[col][k] != field_types[col][k - 1] + && !ctl.gen_opt.nospaces + { + xdots.push(' '); + } + let p = show_aa[col][k]; + let q = 3 * p; + let leader = q < rsi.fr1_starts[col]; + let mut cdr = false; + if rsi.cdr1_starts[col].is_some() + && rsi.cdr2_starts[col].is_some() + && rsi.fr2_starts[col].is_some() + && rsi.fr3_starts[col].is_some() + && q >= rsi.cdr1_starts[col].unwrap() + && q < rsi.fr2_starts[col].unwrap() + { + cdr = true; + } + + if rsi.cdr2_starts[col].is_some() + && rsi.fr3_starts[col].is_some() + && q >= rsi.cdr2_starts[col].unwrap() + && q < rsi.fr3_starts[col].unwrap() + { + cdr = true; + } + if q >= rsi.cdr3_starts[col] + && q < rsi.cdr3_starts[col] + 3 * rsi.cdr3_lens[col] + { + cdr = true; + } + let mut codons = Vec::>::new(); + for u in 0..nexacts { + if mat[col][u].is_some() { + let seq_amino = rsi.seqss_amino[col][u].clone(); + if 3 * p + 3 <= seq_amino.len() { + codons.push(seq_amino[3 * p..3 * p + 3].to_vec()); + } + } + } + unique_sort(&mut codons); + if codons.len() > 1 { + if cdr { + if ctl.gen_opt.diff_style == *"C1" { + xdots.push('C'); + } else if ctl.gen_opt.diff_style == *"C2" { + xdots.push(''); + xdots.push('['); + xdots.push('0'); + xdots.push('1'); + xdots.push('m'); + xdots.push(''); + xdots.push('['); + xdots.push('3'); + xdots.push('1'); + xdots.push('m'); + xdots.push('◼'); + xdots.push(''); + xdots.push('['); + xdots.push('0'); + xdots.push('1'); + xdots.push('m'); + xdots.push(''); + xdots.push('['); + xdots.push('3'); + xdots.push('0'); + xdots.push('m'); + } else { + xdots.push('x'); + } + } else if !leader { + if ctl.gen_opt.diff_style == *"C1" { + xdots.push('F'); + } else if ctl.gen_opt.diff_style == *"C2" { + xdots.push('▮'); + } else { + xdots.push('x'); + } + } else if ctl.gen_opt.diff_style == *"C1" { + xdots.push('L'); + } else if ctl.gen_opt.diff_style == *"C2" { + xdots.push('▮'); + } else { + xdots.push('x'); + } + } else { + xdots.push('.'); + } + } + row.push(xdots); + } else { + let mut v = rsi.cvars[col][m].clone(); + if v.contains(':') { + v = v.before(":").to_string(); + } + row.push(v); } } + for r in row.iter_mut() { + *r = format!("{}", *r); + } } + rows.push(row); + } else { + rows[diff_pos - 1][..row1.len()].clone_from_slice(row1); } +} - // Delete onesie clonotypes having one exact clonotype that's a light chain and very - // low frequency. +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - if cols == 1 && nexacts == 1 && ctl.clono_filt_opt.weak_onesies { - let ex = &exact_clonotypes[exacts[0]]; - if !ex.share[0].left && ex.ncells() > 1 && ex.ncells() * 1000 < total_cells { - bads[0] = true; +pub fn insert_consensus_row( + ctl: &EncloneControl, + rsi: &ColInfo, + nexacts: usize, + field_types: &[Vec], + show_aa: &[Vec], + row1: &[String], + rows: &mut Vec>, +) { + let mat = &rsi.mat; + if ctl.clono_print_opt.conx || ctl.clono_print_opt.conp { + let style = if ctl.clono_print_opt.conx { "x" } else { "p" }; + let mut row = vec!["consensus".to_string()]; + for _ in 1..row1.len() { + row.push("\\ext".to_string()); + } + let classes = aa_classes(); + for col in 0..rsi.mat.len() { + for m in 0..rsi.cvars[col].len() { + if rsi.cvars[col][m] == *"amino" { + let mut xdots = String::new(); + for k in 0..show_aa[col].len() { + if k > 0 + && field_types[col][k] != field_types[col][k - 1] + && !ctl.gen_opt.nospaces + { + xdots.push(' '); + } + let p = show_aa[col][k]; + let mut codons = Vec::>::new(); + for u in 0..nexacts { + if mat[col][u].is_some() { + let seq_amino = rsi.seqss_amino[col][u].clone(); + if 3 * p + 3 <= seq_amino.len() { + codons.push(seq_amino[3 * p..3 * p + 3].to_vec()); + } + } + } + unique_sort(&mut codons); + let mut gap = false; + for x in codons.iter() { + if x.contains(&b'-') { + gap = true; + } + } + if codons.solo() && gap { + xdots += "g"; + } else if codons.solo() { + let codon = &codons[0]; + let aa = codon_to_aa(codon); + let mut log = Vec::::new(); + emit_codon_color_escape(codon, &mut log); + log.push(aa); + emit_end_escape(&mut log); + xdots += strme(&log); + } else if gap { + xdots += "X"; + } else { + let mut aas = Vec::::new(); + for x in codons.iter() { + aas.push(codon_to_aa(x)); + } + unique_sort(&mut aas); + if aas.solo() { + xdots.push(aas[0] as char); + } else if style == "x" { + xdots += "X"; + } else { + for m in classes.iter() { + if meet_size(&aas, m.1) == aas.len() { + xdots.push(m.0); + break; + } + } + } + } + } + row.push(xdots); + } else { + row.push("".to_string()); + } + } } + rows.push(row); } } diff --git a/enclone_print/src/proc_cvar_auto.rs b/enclone_print/src/proc_cvar_auto.rs new file mode 100644 index 000000000..6cabbb453 --- /dev/null +++ b/enclone_print/src/proc_cvar_auto.rs @@ -0,0 +1,1565 @@ +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. +// This file is auto-generated by the crate enclone_vars, please do not edit. + +use crate::print_utils1::{ + cdr3_aa_con, get_cdr1, get_cdr2, get_cdr3, get_fwr1, get_fwr2, get_fwr3, + test_internal_error_seq, +}; +use crate::print_utils3::comp_edit; +use amino::{aa_seq, codon_to_aa}; +use enclone_core::align_to_vdj_ref::{align_to_vdj_ref, cigar}; +use enclone_core::defs::{AlleleData, ColInfo, EncloneControl, ExactClonotype, POUT_SEP}; +use enclone_core::median::rounded_median; +use enclone_core::opt_d::opt_d; +use enclone_proto::types::DonorReferenceItem; +use itertools::Itertools; +use stats_utils::percent_ratio; +use std::cmp::min; +use std::collections::HashMap; +use std::fmt::Write as _; +use string_utils::{stringme, TextUtils}; +use vdj_ann::refx::RefData; +use vector_utils::{bin_member, next_diff12_4, unique_sort}; + +pub fn proc_cvar_auto( + j: usize, + pass: usize, + var: &str, + ex: &ExactClonotype, + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], + mid: usize, + col: usize, + u: usize, + rsi: &ColInfo, + refdata: &RefData, + dref: &[DonorReferenceItem], + ctl: &EncloneControl, + extra_args: &[String], + pcols_sort: &[String], + cx: &mut [Vec], + varmat: &[Vec>], + out_data: &mut [HashMap], + stats: &mut Vec<(String, Vec)>, + allele_data: &AlleleData, +) -> Result { + let mut vname = var; + if var.contains(':') { + vname = var.after(":"); + } + let cvars = &ctl.clono_print_opt.cvars; + let mut abbrc = format!("{var}{}", col + 1); + if var.contains(':') { + abbrc = var.before(":").to_string(); + } + let val = if false { + (String::new(), Vec::::new(), String::new()) + } else if vname == "aa%" { + let xm = &ex.share[mid]; + let mut diffs = 0; + let mut denom = 0; + let aa_seq = &xm.aa_mod_indel; + let mut vref = refdata.refs[xm.v_ref_id].to_ascii_vec(); + if xm.v_ref_id_donor_alt_id.is_some() { + vref = dref[xm.v_ref_id_donor.unwrap()].nt_sequence.clone(); + } + let jref = refdata.refs[xm.j_ref_id].to_ascii_vec(); + let z = 3 * aa_seq.len() + 1; + for p in 0..aa_seq.len() { + if aa_seq[p] == b'-' { + diffs += 1; + denom += 1; + continue; + } + if 3 * p + 3 <= vref.len() - ctl.heur.ref_v_trim { + denom += 1; + if aa_seq[p] != codon_to_aa(&vref[3 * p..3 * p + 3]) { + diffs += 1; + } + } + if 3 * p > z - (jref.len() - ctl.heur.ref_j_trim) + 3 { + denom += 1; + if aa_seq[p] + != codon_to_aa(&jref[jref.len() - (z - 3 * p)..jref.len() - (z - 3 * p) + 3]) + { + diffs += 1; + } + } + } + + ( + format!("{:.1}", percent_ratio(denom - diffs, denom)), + Vec::new(), + "exact".to_string(), + ) + } else if vname == "allele" { + let mut allele = 0; + if ex.share[mid].v_ref_id_donor_alt_id.is_some() { + allele = ex.share[mid].v_ref_id_donor_alt_id.unwrap() + 1; + } + + (format!("{allele}"), Vec::new(), "clono".to_string()) + } else if vname == "allele_d" { + let mut refs = Vec::>::new(); + let alt_refs = &allele_data.alt_refs; + refs.push(refdata.refs[ex.share[mid].v_ref_id].to_ascii_vec()); + for ai in alt_refs { + // The following does not work correctly if an exact subclonotype contains cells + // from more than one donor. But that is extremely rare. + if ex.clones[0][0].donor_index.is_some() + && ai.0 == ex.clones[0][0].donor_index.unwrap() + && ai.1 == ex.share[mid].v_ref_id + { + refs.push(ai.2.to_ascii_vec()); + } + } + let m = refs.iter().map(Vec::len).min().unwrap(); + let mut ps = Vec::::new(); + let mut variant = Vec::>::new(); + for p in 0..m { + let bases = refs.iter().map(|r| r[p]).collect::>(); + let mut bases_sorted = bases.clone(); + unique_sort(&mut bases_sorted); + if bases_sorted.len() > 1 { + ps.push(p); + variant.push(bases); + } + } + let mut xs = Vec::::new(); + for i in 0..refs.len() { + let mut x = String::with_capacity(ps.len()); + for vj in variant.iter().take(ps.len()) { + x.push(vj[i] as char); + } + xs.push(x); + } + let mut me = String::new(); + for &psj in &ps { + let base = ex.share[mid].seq_del_amino[psj]; + me.push(base as char); + } + let mut details = String::new(); + if !ps.is_empty() { + details = format!("{me} : {}", xs.iter().format(",")); + } + + (details, Vec::new(), "clono".to_string()) + } else if vname == "cdiff" { + let cstart = ex.share[mid].j_stop; + let clen = ex.share[mid].full_seq.len() - cstart; + let cid = ex.share[mid].c_ref_id; + let mut cdiff = String::new(); + let mut ndiffs = 0; + if let Some(cid) = cid { + let r = &refdata.refs[cid]; + let mut extra = 0; + if clen > r.len() { + extra = clen - r.len(); + } + for i in 0..min(clen, r.len()) { + let tb = ex.share[mid].full_seq[cstart + i]; + let rb = r.to_ascii_vec()[i]; + if tb != rb { + ndiffs += 1; + if ndiffs <= 5 { + write!(cdiff, "{i}{}", tb as char).unwrap(); + } + } + } + if ndiffs > 5 { + cdiff += "..."; + } + if extra > 0 { + write!(cdiff, "+{extra}").unwrap(); + } + } else if clen > 0 { + cdiff = format!("+{clen}"); + } + + (cdiff, Vec::new(), "exact".to_string()) + } else if vname == "cdr3_aa_conp" { + ( + cdr3_aa_con("p", col, exacts, exact_clonotypes, rsi), + Vec::new(), + "clono".to_string(), + ) + } else if vname == "cdr3_aa_conx" { + ( + cdr3_aa_con("x", col, exacts, exact_clonotypes, rsi), + Vec::new(), + "clono".to_string(), + ) + } else if vname == "cdr3_start" { + ( + ex.share[mid].cdr3_start.to_string(), + Vec::new(), + "exact".to_string(), + ) + } else if vname.starts_with("cdr") + && vname.ends_with("_aa_ref") + && vname.between2("cdr", "_aa_ref").parse::().is_ok() + && vname.between2("cdr", "_aa_ref").force_i64() >= 1 + && vname.between2("cdr", "_aa_ref").force_i64() <= 2 + { + let arg1 = vname.between2("cdr", "_aa_ref").force_i64(); + let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + if arg1 == 1 { + if x.cdr1_start.is_some() + && x.fr2_start.is_some() + && x.cdr1_start.unwrap() <= x.fr2_start.unwrap() + { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec() + [x.cdr1_start.unwrap()..x.fr2_start.unwrap()] + .to_vec(); + y = stringme(&aa_seq(&dna, 0)); + } + } else if x.cdr2_start.is_some() + && x.fr3_start.is_some() + && x.cdr2_start.unwrap() <= x.fr3_start.unwrap() + { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec() + [x.cdr2_start.unwrap()..x.fr3_start.unwrap()] + .to_vec(); + y = stringme(&aa_seq(&dna, 0)); + } + + (y, Vec::new(), "clono".to_string()) + } else if vname.starts_with("cdr") + && vname.ends_with("_dna_ref") + && vname.between2("cdr", "_dna_ref").parse::().is_ok() + && vname.between2("cdr", "_dna_ref").force_i64() >= 1 + && vname.between2("cdr", "_dna_ref").force_i64() <= 2 + { + let arg1 = vname.between2("cdr", "_dna_ref").force_i64(); + let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + if arg1 == 1 { + if x.cdr1_start.is_some() + && x.fr2_start.is_some() + && x.cdr1_start.unwrap() <= x.fr2_start.unwrap() + { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec() + [x.cdr1_start.unwrap()..x.fr2_start.unwrap()] + .to_vec(); + y = stringme(&dna); + } + } else if x.cdr2_start.is_some() + && x.fr3_start.is_some() + && x.cdr2_start.unwrap() <= x.fr3_start.unwrap() + { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec() + [x.cdr2_start.unwrap()..x.fr3_start.unwrap()] + .to_vec(); + y = stringme(&dna); + } + + (y, Vec::new(), "clono".to_string()) + } else if vname.starts_with("cdr") + && vname.ends_with("_aa") + && vname.between2("cdr", "_aa").parse::().is_ok() + && vname.between2("cdr", "_aa").force_i64() >= 1 + && vname.between2("cdr", "_aa").force_i64() <= 3 + { + let arg1 = vname.between2("cdr", "_aa").force_i64(); + let x = &ex.share[mid]; + let y = if arg1 == 1 { + get_cdr1(x, 0, 0).map(|c| stringme(&aa_seq(c.as_bytes(), 0))) + } else if arg1 == 2 { + get_cdr2(x, 0, 0).map(|c| stringme(&aa_seq(c.as_bytes(), 0))) + } else { + Some(x.cdr3_aa.clone()) + } + .unwrap_or_else(|| "unknown".to_string()); + + (y, Vec::new(), "exact".to_string()) + } else if vname.starts_with("cdr") + && vname.ends_with("_aa_north") + && vname.between2("cdr", "_aa_north").parse::().is_ok() + && vname.between2("cdr", "_aa_north").force_i64() >= 1 + && vname.between2("cdr", "_aa_north").force_i64() <= 3 + { + let arg1 = vname.between2("cdr", "_aa_north").force_i64(); + let x = &ex.share[mid]; + let c = if arg1 == 1 { + let (left, right) = if x.left { (3, 3) } else { (0, 0) }; + get_cdr1(x, left, right) + } else if arg1 == 2 { + let (left, right) = if x.left { (2, 3) } else { (1, 0) }; + get_cdr2(x, left, right) + } else { + get_cdr3(x, -1, -1) + }; + let y = if let Some(c) = c { + stringme(&aa_seq(c.as_bytes(), 0)) + } else { + "unknown".to_string() + }; + + (y, Vec::new(), "exact".to_string()) + } else if vname.starts_with("cdr") + && vname.after("cdr").contains("_aa_") + && vname.after("cdr").after("_aa_").contains('_') + && vname + .after("cdr") + .after("_aa_") + .after("_") + .ends_with("_ext") + && vname.between("cdr", "_aa_").parse::().is_ok() + && vname.between("cdr", "_aa_").force_i64() >= 1 + && vname.between("cdr", "_aa_").force_i64() <= 3 + && vname + .after("cdr") + .after("_aa_") + .between("_", "_ext") + .parse::() + .is_ok() + { + let arg1 = vname.between("cdr", "_aa_").force_i64(); + let arg2 = vname.after("cdr").between("_aa_", "_").force_i64(); + let arg3 = vname + .after("cdr") + .after("_aa_") + .between("_", "_ext") + .force_i64(); + let (left, right) = (arg2 * 3, arg3 * 3); + let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + let mut dna = Vec::::new(); + if arg1 == 1 { + if x.cdr1_start.is_some() + && x.fr2_start.is_some() + && x.cdr1_start.unwrap() <= x.fr2_start.unwrap() + && x.cdr1_start.unwrap() as i64 - left >= 0 + && x.cdr1_start.unwrap() as i64 - left < x.seq_del_amino.len() as i64 + && x.fr2_start.unwrap() as i64 + right > 0 + && x.fr2_start.unwrap() as i64 + right <= x.seq_del_amino.len() as i64 + { + for p in x.cdr1_start.unwrap() as i64 - left..x.fr2_start.unwrap() as i64 + right { + let p = p as usize; + for j in 0..x.ins.len() { + if x.ins[j].0 == p { + let mut z = x.ins[j].1.clone(); + dna.append(&mut z); + } + } + if x.seq_del_amino[p] != b'-' { + dna.push(x.seq_del_amino[p]); + } + } + test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa)?; + y = stringme(&aa_seq(&dna, 0)); + } + } else if arg1 == 2 { + if x.cdr2_start.is_some() + && x.fr3_start.is_some() + && x.cdr2_start.unwrap() <= x.fr3_start.unwrap() + && x.cdr2_start.unwrap() as i64 - left >= 0 + && x.cdr2_start.unwrap() as i64 - left < x.seq_del_amino.len() as i64 + && x.fr3_start.unwrap() as i64 + right > 0 + && x.fr3_start.unwrap() as i64 + right <= x.seq_del_amino.len() as i64 + { + for p in x.cdr2_start.unwrap() as i64 - left..x.fr3_start.unwrap() as i64 + right { + let p = p as usize; + for j in 0..x.ins.len() { + if x.ins[j].0 == p { + let mut z = x.ins[j].1.clone(); + dna.append(&mut z); + } + } + if x.seq_del_amino[p] != b'-' { + dna.push(x.seq_del_amino[p]); + } + } + test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa)?; + y = stringme(&aa_seq(&dna, 0)); + } + } else if x.cdr3_start as i64 - left >= 0 + && x.cdr3_start as i64 - left < x.seq_del_amino.len() as i64 + && x.cdr3_start as i64 + 3 * x.cdr3_aa.len() as i64 + right > 0 + && x.cdr3_start as i64 + 3 * x.cdr3_aa.len() as i64 + right + <= x.seq_del_amino.len() as i64 + { + for p in + x.cdr3_start as i64 - left..x.cdr3_start as i64 + 3 * x.cdr3_aa.len() as i64 + right + { + let p = p as usize; + for j in 0..x.ins.len() { + if x.ins[j].0 == p { + let mut z = x.ins[j].1.clone(); + dna.append(&mut z); + } + } + if x.seq_del_amino[p] != b'-' { + dna.push(x.seq_del_amino[p]); + } + } + test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa)?; + y = stringme(&aa_seq(&dna, 0)); + } + + (y, Vec::new(), "exact".to_string()) + } else if vname.starts_with("cdr") + && vname.ends_with("_dna") + && vname.between2("cdr", "_dna").parse::().is_ok() + && vname.between2("cdr", "_dna").force_i64() >= 1 + && vname.between2("cdr", "_dna").force_i64() <= 3 + { + let arg1 = vname.between2("cdr", "_dna").force_i64(); + let x = &ex.share[mid]; + let y = if arg1 == 1 { + get_cdr1(x, 0, 0) + } else if arg1 == 2 { + get_cdr2(x, 0, 0) + } else { + Some(x.cdr3_dna.clone()) + } + .unwrap_or_else(|| "unknown".to_string()); + + (y, Vec::new(), "exact".to_string()) + } else if vname.starts_with("cdr") + && vname.ends_with("_len") + && vname.between2("cdr", "_len").parse::().is_ok() + && vname.between2("cdr", "_len").force_i64() >= 1 + && vname.between2("cdr", "_len").force_i64() <= 3 + { + let arg1 = vname.between2("cdr", "_len").force_i64(); + let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + let c; + if arg1 == 1 { + c = get_cdr1(x, 0, 0); + } else if arg1 == 2 { + c = get_cdr2(x, 0, 0); + } else { + c = Some(x.cdr3_dna.clone()); + } + if c.is_some() { + y = format!("{}", c.unwrap().len() / 3); + } + + (y, Vec::new(), "exact".to_string()) + } else if vname == "cigar" { + let vref = refdata.refs[rsi.vids[col]].to_ascii_vec(); + let mut dref = Vec::::new(); + if rsi.dids[col].is_some() { + dref = refdata.refs[rsi.dids[col].unwrap()].to_ascii_vec(); + } + let d2ref = Vec::::new(); + let jref = refdata.refs[rsi.jids[col]].to_ascii_vec(); + let td = &ex.share[mid]; + let tig = &td.seq; + let ops = align_to_vdj_ref( + tig, + &vref, + &dref, + &d2ref, + &jref, + "", // drefname + ex.share[mid].left, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + ) + .0; + + ( + cigar(&ops, 0, tig.len(), tig.len()), + Vec::new(), + "exact".to_string(), + ) + } else if vname == "clen" { + ( + format!("{}", ex.share[mid].full_seq.len() - ex.share[mid].j_stop), + Vec::new(), + "exact".to_string(), + ) + } else if vname == "comp" { + let (comp, _edit) = comp_edit(ex, mid, col, refdata, dref, rsi); + + (format!("{comp}"), Vec::new(), "exact".to_string()) + } else if vname == "const" { + let mut constx = vec![if let Some(cid) = ex.share[mid].c_ref_id { + refdata.name[cid].clone() + } else { + "?".to_string() + }]; + unique_sort(&mut constx); + // This is overcomplicated because there is now at most one + // const entry per exact subclonotype. + + ( + format!("{}", constx.iter().format(",")), + Vec::new(), + "exact".to_string(), + ) + } else if vname == "const_id" { + let const_id = if let Some(c_ref_id) = ex.share[mid].c_ref_id { + format!("{}", refdata.id[c_ref_id]) + } else { + String::new() + }; + + (const_id, Vec::new(), "exact".to_string()) + } else if vname == "d1_name" { + let mut opt_name = String::new(); + if ex.share[mid].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, + dref, + &mut scores, + &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col], + ); + let mut opt = Vec::new(); + if !ds.is_empty() { + opt = ds[0].clone(); + } + if opt.is_empty() { + opt_name = "none".to_string(); + } else { + for (i, o) in opt.into_iter().enumerate() { + if i > 0 { + opt_name += ":"; + } + opt_name += &refdata.name[o]; + } + } + } + + (opt_name, Vec::new(), "exact".to_string()) + } else if vname == "d1_score" { + let mut score = String::new(); + if ex.share[mid].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, + dref, + &mut scores, + &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col], + ); + let mut delta = 0.0; + if scores.len() > 1 { + delta = scores[0] - scores[1]; + } + score = format!("{delta:.1}") + } + + (score, Vec::new(), "exact".to_string()) + } else if vname == "d2_name" { + let mut opt2_name = String::new(); + if ex.share[mid].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, + dref, + &mut scores, + &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col], + ); + let mut opt2 = Vec::new(); + if ds.len() > 1 { + opt2 = ds[1].clone(); + } + if opt2.is_empty() { + opt2_name = "none".to_string(); + } else { + for (i, o) in opt2.into_iter().enumerate() { + if i > 0 { + opt2_name += ":"; + } + opt2_name += &refdata.name[o]; + } + } + } + + (opt2_name, Vec::new(), "exact".to_string()) + } else if vname == "d2_score" { + let mut scorex = String::new(); + if ex.share[mid].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, + dref, + &mut scores, + &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col], + ); + let mut score = 0.0; + if scores.len() > 1 { + score = scores[1]; + } + scorex = format!("{score:.1}") + } + + (scorex, Vec::new(), "exact".to_string()) + } else if vname == "d_delta" { + let mut del = String::new(); + if ex.share[mid].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, + dref, + &mut scores, + &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col], + ); + let mut delta = 0.0; + if scores.len() > 1 { + delta = scores[0] - scores[1]; + } + del = format!("{delta:.1}") + } + + (del, Vec::new(), "exact".to_string()) + } else if vname == "d_donor" { + let vid = ex.share[mid].v_ref_id; + let mut vref = refdata.refs[vid].to_ascii_vec(); + if rsi.vpids[col].is_some() { + vref = dref[rsi.vpids[col].unwrap()].nt_sequence.clone(); + } + let jid = ex.share[mid].j_ref_id; + let jref = &refdata.refs[jid].to_ascii_vec(); + let tig = &ex.share[mid].seq_del; + let n = tig.len(); + let mut diffs = 0; + for p in 0..n { + if tig[p] == b'-' { + continue; + } + if p < vref.len() - ctl.heur.ref_v_trim && tig[p] != vref[p] + || p >= n - (jref.len() - ctl.heur.ref_j_trim) + && tig[p] != jref[jref.len() - (n - p)] + { + diffs += 1; + } + } + + (format!("{diffs}"), Vec::new(), "exact".to_string()) + } else if vname == "d_frame" { + let mut d_frame = String::new(); + if ex.share[mid].d_start.is_some() { + d_frame = format!( + "{}", + (ex.share[mid].d_start.unwrap() - ex.share[mid].v_start) % 3 + ); + } + + (d_frame, Vec::new(), "exact".to_string()) + } else if vname == "d_id" { + let did = if rsi.dids[col].is_some() { + format!("{}", refdata.id[rsi.dids[col].unwrap()]) + } else { + String::new() + }; + + (did, Vec::new(), "clono".to_string()) + } else if vname == "d_name" { + let dname = if rsi.dids[col].is_some() { + refdata.name[rsi.dids[col].unwrap()].clone() + } else { + String::new() + }; + + (dname, Vec::new(), "clono".to_string()) + } else if vname == "d_start" { + let mut d_start = String::new(); + if ex.share[mid].d_start.is_some() { + d_start = format!("{}", ex.share[mid].d_start.unwrap()); + } + + (d_start, Vec::new(), "exact".to_string()) + } else if vname == "d_univ" { + let vid = ex.share[mid].v_ref_id; + let vref = &refdata.refs[vid].to_ascii_vec(); + let jid = ex.share[mid].j_ref_id; + let jref = &refdata.refs[jid].to_ascii_vec(); + let tig = &ex.share[mid].seq_del; + let n = tig.len(); + let mut diffs = 0; + for p in 0..n { + if tig[p] == b'-' { + continue; + } + if p < vref.len() - ctl.heur.ref_v_trim && tig[p] != vref[p] + || p >= n - (jref.len() - ctl.heur.ref_j_trim) + && tig[p] != jref[jref.len() - (n - p)] + { + diffs += 1; + } + } + + (format!("{diffs}"), Vec::new(), "exact".to_string()) + } else if vname == "d_Δ" { + let mut del = String::new(); + if ex.share[mid].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, + dref, + &mut scores, + &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col], + ); + let mut delta = 0.0; + if scores.len() > 1 { + delta = scores[0] - scores[1]; + } + del = format!("{delta:.1}") + } + + (del, Vec::new(), "exact".to_string()) + } else if vname == "dna%" { + let xm = &ex.share[mid]; + let mut diffs = 0; + let mut denom = 0; + let seq = &xm.seq_del_amino; + let mut vref = refdata.refs[xm.v_ref_id].to_ascii_vec(); + if xm.v_ref_id_donor_alt_id.is_some() { + vref = dref[xm.v_ref_id_donor.unwrap()].nt_sequence.clone(); + } + let jref = refdata.refs[xm.j_ref_id].to_ascii_vec(); + let z = seq.len(); + for p in 0..z { + let b = seq[p]; + if b == b'-' { + diffs += 1; + denom += 1; + continue; + } + if p < vref.len() - ctl.heur.ref_v_trim { + denom += 1; + if b != vref[p] { + diffs += 1; + } + } + if p >= z - (jref.len() - ctl.heur.ref_j_trim) { + denom += 1; + if b != jref[jref.len() - (z - p)] { + diffs += 1; + } + } + } + + ( + format!("{:.1}", percent_ratio(denom - diffs, denom)), + Vec::new(), + "exact".to_string(), + ) + } else if vname == "edit" { + let (_comp, edit) = comp_edit(ex, mid, col, refdata, dref, rsi); + + (edit, Vec::new(), "exact".to_string()) + } else if vname.starts_with("fwr") + && vname.ends_with("_aa") + && vname.between2("fwr", "_aa").parse::().is_ok() + && vname.between2("fwr", "_aa").force_i64() >= 1 + && vname.between2("fwr", "_aa").force_i64() <= 4 + { + let arg1 = vname.between2("fwr", "_aa").force_i64(); + let x = &ex.share[mid]; + let c = if arg1 == 1 { + get_fwr1(x) + } else if arg1 == 2 { + get_fwr2(x) + } else if arg1 == 3 { + get_fwr3(x) + } else { + let x = &ex.share[mid]; + let start = rsi.cdr3_starts[col] + 3 * rsi.cdr3_lens[col]; + let stop = rsi.seq_del_lens[col]; + let dna = &x.seq_del_amino[start..stop]; + Some(stringme(dna)) + }; + let y = if let Some(c) = c { + stringme(&aa_seq(c.as_bytes(), 0)) + } else { + "unknown".to_string() + }; + + (y, Vec::new(), "exact".to_string()) + } else if vname.starts_with("fwr") + && vname.ends_with("_aa_ref") + && vname.between2("fwr", "_aa_ref").parse::().is_ok() + && vname.between2("fwr", "_aa_ref").force_i64() >= 1 + && vname.between2("fwr", "_aa_ref").force_i64() <= 4 + { + let arg1 = vname.between2("fwr", "_aa_ref").force_i64(); + let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + if arg1 == 1 { + if x.cdr1_start.is_some() && x.fr1_start <= x.cdr1_start.unwrap() { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec() + [x.fr1_start..x.cdr1_start.unwrap()] + .to_vec(); + y = stringme(&aa_seq(&dna, 0)); + } + } else if arg1 == 2 { + if x.fr2_start.unwrap() <= x.cdr2_start.unwrap() { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec() + [x.fr2_start.unwrap()..x.cdr2_start.unwrap()] + .to_vec(); + y = stringme(&aa_seq(&dna, 0)); + } + } else if arg1 == 3 { + if x.fr3_start.is_some() && x.fr3_start.unwrap() <= x.cdr3_start - x.ins_len() { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec(); + if x.cdr3_start <= dna.len() { + let dna = dna[x.fr3_start.unwrap()..x.cdr3_start - x.ins_len()].to_vec(); + y = stringme(&aa_seq(&dna, 0)); + } + } + } else { + let heavy = refdata.rtype[x.j_ref_id] == 0; + let aa_len = if heavy { 10 } else { 9 }; + let dna = refdata.refs[x.j_ref_id].to_ascii_vec(); + let dna = &dna[dna.len() - 1 - 3 * aa_len..dna.len() - 1]; + y = stringme(&aa_seq(dna, 0)); + } + + (y, Vec::new(), "clono".to_string()) + } else if vname.starts_with("fwr") + && vname.ends_with("_dna") + && vname.between2("fwr", "_dna").parse::().is_ok() + && vname.between2("fwr", "_dna").force_i64() >= 1 + && vname.between2("fwr", "_dna").force_i64() <= 4 + { + let arg1 = vname.between2("fwr", "_dna").force_i64(); + let x = &ex.share[mid]; + let c = if arg1 == 1 { + get_fwr1(x) + } else if arg1 == 2 { + get_fwr2(x) + } else if arg1 == 3 { + get_fwr3(x) + } else { + let start = rsi.cdr3_starts[col] + 3 * rsi.cdr3_lens[col]; + let stop = rsi.seq_del_lens[col]; + let dna = &x.seq_del_amino[start..stop]; + Some(stringme(dna)) + }; + let y = if let Some(c) = c { + c + } else { + "unknown".to_string() + }; + + (y, Vec::new(), "exact".to_string()) + } else if vname.starts_with("fwr") + && vname.ends_with("_dna_ref") + && vname.between2("fwr", "_dna_ref").parse::().is_ok() + && vname.between2("fwr", "_dna_ref").force_i64() >= 1 + && vname.between2("fwr", "_dna_ref").force_i64() <= 4 + { + let arg1 = vname.between2("fwr", "_dna_ref").force_i64(); + let x = &ex.share[mid]; + let y = if arg1 == 1 { + x.cdr1_start.and_then(|cdr1_start| { + if x.fr1_start <= cdr1_start { + let dna = &refdata.refs[x.v_ref_id].to_ascii_vec()[x.fr1_start..cdr1_start]; + Some(stringme(dna)) + } else { + None + } + }) + } else if arg1 == 2 { + if x.fr2_start.unwrap() <= x.cdr2_start.unwrap() { + let dna = &refdata.refs[x.v_ref_id].to_ascii_vec() + [x.fr2_start.unwrap()..x.cdr2_start.unwrap()]; + Some(stringme(dna)) + } else { + None + } + } else if arg1 == 3 { + x.fr3_start.and_then(|fr3_start| { + if fr3_start <= x.cdr3_start - x.ins_len() { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec(); + if x.cdr3_start <= dna.len() { + let dna = &dna[x.fr3_start.unwrap()..x.cdr3_start - x.ins_len()]; + Some(stringme(dna)) + } else { + None + } + } else { + None + } + }) + } else { + let heavy = refdata.rtype[x.j_ref_id] == 0; + let aa_len = if heavy { 10 } else { 9 }; + let dna = refdata.refs[x.j_ref_id].to_ascii_vec(); + Some(stringme(&dna[dna.len() - 1 - 3 * aa_len..dna.len() - 1])) + } + .unwrap_or_else(|| "unknown".to_string()); + + (y, Vec::new(), "clono".to_string()) + } else if vname.starts_with("fwr") + && vname.ends_with("_len") + && vname.between2("fwr", "_len").parse::().is_ok() + && vname.between2("fwr", "_len").force_i64() >= 1 + && vname.between2("fwr", "_len").force_i64() <= 4 + { + let arg1 = vname.between2("fwr", "_len").force_i64(); + let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + let c; + if arg1 == 1 { + c = get_fwr1(x); + } else if arg1 == 2 { + c = get_fwr2(x); + } else if arg1 == 3 { + c = get_fwr3(x); + } else { + let x = &ex.share[mid]; + let start = rsi.cdr3_starts[col] + 3 * rsi.cdr3_lens[col]; + let stop = rsi.seq_del_lens[col]; + let dna = &x.seq_del_amino[start..stop]; + c = Some(stringme(dna)); + } + if c.is_some() { + y = format!("{}", c.unwrap().len() / 3); + } + + (y, Vec::new(), "exact".to_string()) + } else if vname == "ivalbcumis" { + let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = String::new(); + if ex.clones[k][mid].invalidated_umis.is_some() { + n = ex.clones[k][mid] + .invalidated_umis + .as_ref() + .unwrap() + .iter() + .map(|bc| format!("{}{bc}", ex.clones[k][mid].barcode.before("-"))) + .format(",") + .to_string(); + } + vals.push(n.to_string()); + } + + (String::new(), vals, "cell".to_string()) + } else if vname == "ivalumis" { + let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + let n = ex.clones[k][mid] + .invalidated_umis + .as_ref() + .map(|umi| umi.iter().format(",").to_string()) + .unwrap_or_default(); + vals.push(n.to_string()); + } + + (String::new(), vals, "cell".to_string()) + } else if vname == "j_id" { + ( + format!("{}", refdata.id[rsi.jids[col]]), + Vec::new(), + "clono".to_string(), + ) + } else if vname == "j_name" { + ( + refdata.name[rsi.jids[col]].clone(), + Vec::new(), + "clono".to_string(), + ) + } else if vname.starts_with("ndiff") + && vname.ends_with("vj") + && vname.between2("ndiff", "vj").parse::().is_ok() + && vname.between2("ndiff", "vj").force_i64() >= 1 + { + let arg1 = vname.between2("ndiff", "vj").force_i64(); + let mat = &rsi.mat; + let u0 = (arg1 - 1) as usize; + let nd = if u0 < exacts.len() && mat[col][u0].is_some() && mat[col][u].is_some() { + let m0 = mat[col][u0].unwrap(); + let m = mat[col][u].unwrap(); + let mut ndiff = 0; + let ex0 = &exact_clonotypes[exacts[u0]]; + let ex = &exact_clonotypes[exacts[u]]; + for p in 0..ex0.share[m0].seq_del.len() { + if ex0.share[m0].seq_del[p] != ex.share[m].seq_del[p] { + ndiff += 1; + } + } + format!("{ndiff}") + } else { + "_".to_string() + }; + + (nd, Vec::new(), "exact".to_string()) + } else if vname == "nival" { + let mut valsx = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = 0; + if ex.clones[k][mid].invalidated_umis.is_some() { + n = ex.clones[k][mid].invalidated_umis.as_ref().unwrap().len(); + } + valsx.push(format!("{n}")); + } + + (String::new(), valsx, "cell".to_string()) + } else if vname == "nnval" { + let mut valsx = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = 0; + if ex.clones[k][mid].non_validated_umis.is_some() { + n = ex.clones[k][mid].non_validated_umis.as_ref().unwrap().len(); + } + valsx.push(format!("{n}")); + } + + (String::new(), valsx, "cell".to_string()) + } else if vname == "notes" { + ( + ex.share[mid].vs_notesx.clone(), + Vec::new(), + "exact".to_string(), + ) + } else if vname == "nval" { + let mut valsx = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = 0; + if ex.clones[k][mid].validated_umis.is_some() { + n = ex.clones[k][mid].validated_umis.as_ref().unwrap().len(); + } + valsx.push(format!("{n}")); + } + + (String::new(), valsx, "cell".to_string()) + } else if vname == "nvalbcumis" { + let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + let n = ex.clones[k][mid] + .non_validated_umis + .as_ref() + .map(|umi| { + umi.iter() + .map(|bc| format!("{}{bc}", ex.clones[k][mid].barcode.before("-"))) + .format(",") + .to_string() + }) + .unwrap_or_default(); + vals.push(n.to_string()); + } + + (String::new(), vals, "cell".to_string()) + } else if vname == "nvalumis" { + let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = String::new(); + if ex.clones[k][mid].non_validated_umis.is_some() { + n = format!( + "{}", + ex.clones[k][mid] + .non_validated_umis + .as_ref() + .unwrap() + .iter() + .format(",") + ); + } + vals.push(n.to_string()); + } + + (String::new(), vals, "cell".to_string()) + } else if vname.starts_with('q') + && vname.ends_with('_') + && vname.between2("q", "_").parse::().is_ok() + && vname.between2("q", "_").force_i64() >= 0 + { + let arg1 = vname.between2("q", "_").force_i64(); + let mut val = String::new(); + if (arg1 as usize) < ex.share[mid].seq.len() { + let mut quals = Vec::::new(); + for j in 0..ex.clones.len() { + quals.push(ex.clones[j][mid].quals[arg1 as usize]); + } + val = format!("{}", quals.iter().format(",")); + } + + (val, Vec::new(), "exact".to_string()) + } else if vname == "r" { + let mut nreads = Vec::::new(); + let mut nreads_sorted = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(format!("{}", ex.clones[j][mid].read_count)); + nreads_sorted.push(ex.clones[j][mid].read_count); + } + nreads_sorted.sort_unstable(); + + ( + format!("{}", rounded_median(&nreads_sorted)), + nreads, + "cell-exact".to_string(), + ) + } else if vname == "r_cell" { + let mut nreads = Vec::::new(); + let mut nreads_sorted = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(format!("{}", ex.clones[j][mid].read_count)); + nreads_sorted.push(ex.clones[j][mid].read_count); + } + nreads_sorted.sort_unstable(); + + let _exact = format!("{}", rounded_median(&nreads_sorted)); + (String::new(), nreads, "cell-exact".to_string()) + } else if vname == "r_max" { + let mut nreads = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(ex.clones[j][mid].read_count); + } + + ( + format!("{}", *nreads.iter().max().unwrap()), + Vec::new(), + "exact".to_string(), + ) + } else if vname == "r_mean" { + let mut nreads = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(ex.clones[j][mid].read_count); + } + let rtot: usize = nreads.iter().sum(); + let r_mean = (rtot as f64 / nreads.len() as f64).round() as usize; + + (format!("{r_mean}"), Vec::new(), "exact".to_string()) + } else if vname == "r_min" { + let mut nreads = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(ex.clones[j][mid].read_count); + } + + ( + format!("{}", *nreads.iter().min().unwrap()), + Vec::new(), + "exact".to_string(), + ) + } else if vname == "r_sum" || vname == "r_Σ" { + let mut nreads = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(ex.clones[j][mid].read_count); + } + let rtot: usize = nreads.iter().sum(); + + (format!("{rtot}"), Vec::new(), "exact".to_string()) + } else if vname == "r_μ" { + let mut nreads = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(ex.clones[j][mid].read_count); + } + let rtot: usize = nreads.iter().sum(); + let r_mean = (rtot as f64 / nreads.len() as f64).round() as usize; + + (format!("{r_mean}"), Vec::new(), "exact".to_string()) + } else if vname == "u" { + let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + numis.sort_unstable(); + let median_numis = rounded_median(&numis); + let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + vals.push(format!("{}", ex.clones[k][mid].umi_count)); + } + + (format!("{median_numis}"), vals, "cell-exact".to_string()) + } else if vname == "u_cell" { + let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + numis.sort_unstable(); + let median_numis = rounded_median(&numis); + let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + vals.push(format!("{}", ex.clones[k][mid].umi_count)); + } + + let _exact = format!("{median_numis}"); + (String::new(), vals, "cell-exact".to_string()) + } else if vname == "u_max" { + let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + numis.sort_unstable(); + + ( + format!("{}", numis.iter().max().unwrap()), + Vec::new(), + "exact".to_string(), + ) + } else if vname == "u_mean" { + let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + numis.sort_unstable(); + let utot: usize = numis.iter().sum(); + let u_mean = (utot as f64 / numis.len() as f64).round() as usize; + + (format!("{u_mean}"), Vec::new(), "exact".to_string()) + } else if vname == "u_min" { + let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + numis.sort_unstable(); + + ( + format!("{}", numis.iter().min().unwrap()), + Vec::new(), + "exact".to_string(), + ) + } else if vname == "u_sum" || vname == "u_Σ" { + let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + let utot: usize = numis.iter().sum(); + + (format!("{utot}"), Vec::new(), "exact".to_string()) + } else if vname == "u_μ" { + let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + numis.sort_unstable(); + let utot: usize = numis.iter().sum(); + let u_mean = (utot as f64 / numis.len() as f64).round() as usize; + + (format!("{u_mean}"), Vec::new(), "exact".to_string()) + } else if vname == "udiff" { + let ulen = ex.share[mid].v_start; + let uid = ex.share[mid].u_ref_id; + let mut udiff = String::new(); + let mut ndiffs = 0; + if let Some(uid) = uid { + let r = &refdata.refs[uid]; + let mut extra = 0; + if ulen > r.len() { + extra = ulen - r.len(); + } + for i in 0..ulen { + let mut rpos = i; + if ulen < r.len() { + rpos += r.len() - ulen; + } else { + if i + r.len() < ulen { + continue; + } + rpos -= ulen - r.len(); + } + let tb = ex.share[mid].full_seq[i]; + let rb = r.to_ascii_vec()[rpos]; + if tb != rb { + ndiffs += 1; + if ndiffs <= 5 { + write!(udiff, "{rpos}{}", tb as char).unwrap(); + } + } + } + if ndiffs > 5 { + udiff += "..."; + } + if extra > 0 { + write!(udiff, "+{extra}").unwrap(); + } + } else if ulen > 0 { + udiff = format!("+{ulen}"); + } + + (udiff, Vec::new(), "exact".to_string()) + } else if vname == "ulen" { + ( + format!("{}", ex.share[mid].v_start), + Vec::new(), + "exact".to_string(), + ) + } else if vname == "utr_id" { + let mut u = String::new(); + let uid = ex.share[mid].u_ref_id; + if let Some(uid) = uid { + u = format!("{}", refdata.id[uid]); + } + + (u, Vec::new(), "exact".to_string()) + } else if vname == "utr_name" { + let mut u = String::new(); + let uid = ex.share[mid].u_ref_id; + if let Some(uid) = uid { + u = refdata.name[uid].clone(); + } + + (u, Vec::new(), "exact".to_string()) + } else if vname == "v_id" { + ( + format!("{}", refdata.id[rsi.vids[col]]), + Vec::new(), + "clono".to_string(), + ) + } else if vname == "v_name" { + ( + refdata.name[rsi.vids[col]].clone(), + Vec::new(), + "clono".to_string(), + ) + } else if vname == "v_name_orig" { + let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + vals.push(refdata.name[ex.clones[k][mid].v_ref_id].clone()); + } + let mut vals_uniq = vals.clone(); + unique_sort(&mut vals_uniq); + + ( + format!("{}", vals_uniq.iter().format(",")), + vals, + "cell-exact".to_string(), + ) + } else if vname == "v_name_orig_cell" { + let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + vals.push(refdata.name[ex.clones[k][mid].v_ref_id].clone()); + } + let mut vals_uniq = vals.clone(); + unique_sort(&mut vals_uniq); + + let _exact = format!("{}", vals_uniq.iter().format(",")); + (String::new(), vals, "cell-exact".to_string()) + } else if vname == "v_start" { + ( + format!("{}", ex.share[mid].v_start), + Vec::new(), + "exact".to_string(), + ) + } else if vname == "valbcumis" { + let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + let n = ex.clones[k][mid] + .validated_umis + .as_ref() + .map(|umi| { + umi.iter() + .map(|bc| format!("{}{bc}", ex.clones[k][mid].barcode.before("-"))) + .format(",") + .to_string() + }) + .unwrap_or_default(); + vals.push(n); + } + + (String::new(), vals, "cell".to_string()) + } else if vname == "valumis" { + let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + let n = ex.clones[k][mid] + .validated_umis + .as_ref() + .map(|umi| umi.iter().format(",").to_string()) + .unwrap_or_default(); + vals.push(n.to_string()); + } + + (String::new(), vals, "cell".to_string()) + } else if vname == "var" { + (stringme(&varmat[u][col]), Vec::new(), "exact".to_string()) + } else if vname == "vjlen" { + ( + format!("{}", ex.share[mid].j_stop - ex.share[mid].v_start), + Vec::new(), + "exact".to_string(), + ) + } else if vname == "white" { + let mut bch = vec![Vec::<(usize, String, usize, usize)>::new(); 2]; + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = &ex.clones[l][0].barcode; + let mut numi = 0; + for j in 0..ex.clones[l].len() { + numi += ex.clones[l][j].umi_count; + } + bch[0].push((li, bc[0..8].to_string(), numi, l)); + bch[1].push((li, bc[8..16].to_string(), numi, l)); + } + let mut junk = 0; + let mut bad = vec![false; ex.clones.len()]; + for l in 0..2 { + bch[l].sort(); + let mut m = 0; + while m < bch[l].len() { + let n = next_diff12_4(&bch[l], m as i32) as usize; + for u1 in m..n { + for u2 in m..n { + if bch[l][u1].2 >= 10 * bch[l][u2].2 { + bad[bch[l][u2].3] = true; + } + } + } + m = n; + } + } + for b in bad { + if b { + junk += 1; + } + } + let junk_rate = percent_ratio(junk, ex.clones.len()); + + (format!("{junk_rate:.1}"), Vec::new(), "exact".to_string()) + } else { + ( + "$UNDEFINED".to_string(), + Vec::::new(), + String::new(), + ) + }; + if val.0 == "$UNDEFINED" { + Ok(false) + } else { + let (exact, cell, _level) = &val; + let mut varc = format!("{var}{}", col + 1); + if !exact.is_empty() { + if j < rsi.cvars[col].len() && cvars.contains(&var.to_string()) { + cx[col][j] = exact.clone(); + } + if pass == 2 + && ((!ctl.parseable_opt.pout.is_empty() + && (ctl.parseable_opt.pchains == "max" + || col < ctl.parseable_opt.pchains.force_usize())) + || !extra_args.is_empty()) + { + abbrc = abbrc.replace("_Σ", "_sum"); + abbrc = abbrc.replace("_μ", "_mean"); + varc = varc.replace("_Σ", "_sum"); + varc = varc.replace("_μ", "_mean"); + + // Strip escape character sequences from exact. Can happen in notes, + // maybe other places. + + let val_clean = if exact.contains('') { + let mut val_clean = String::with_capacity(exact.len()); + let mut escaped = false; + for ch in exact.chars() { + if ch == '' { + escaped = true; + } + if escaped { + if ch == 'm' { + escaped = false; + } + continue; + } + val_clean.push(ch); + } + val_clean + } else { + exact.clone() + }; + + // Proceed. + + // let varc = format!("{}{}", v, col + 1); + if pcols_sort.is_empty() + || bin_member(pcols_sort, &varc) + || bin_member(extra_args, &varc) + { + out_data[u].insert(abbrc.clone(), val_clean); + } + } + if val.1.is_empty() { + stats.push((abbrc, vec![exact.to_string(); ex.ncells()])); + } else { + stats.push((abbrc, cell.to_vec())); + } + } else if !cell.is_empty() + && pass == 2 + && ((ctl.parseable_opt.pchains == "max" + || col < ctl.parseable_opt.pchains.force_usize()) + || !extra_args.is_empty()) + && (pcols_sort.is_empty() || bin_member(pcols_sort, &varc)) + { + let vals = format!("{}", cell.iter().format(POUT_SEP)); + out_data[u].insert(abbrc, vals); + } + Ok(true) + } +} diff --git a/enclone_print/src/proc_lvar2.rs b/enclone_print/src/proc_lvar2.rs new file mode 100644 index 000000000..5a3510693 --- /dev/null +++ b/enclone_print/src/proc_lvar2.rs @@ -0,0 +1,209 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// This file contains the single function proc_lvar, +// plus a small helper function get_gex_matrix_entry. + +use crate::print_utils4::get_gex_matrix_entry; +use enclone_core::defs::{EncloneControl, ExactClonotype, GexInfo, POUT_SEP}; +use enclone_core::median::rounded_median; +use itertools::Itertools; +use std::collections::HashMap; +use string_utils::TextUtils; +use vector_utils::{bin_member, bin_position}; + +pub fn proc_lvar2( + i: usize, + x: &str, + pass: usize, + u: usize, + ctl: &EncloneControl, + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], + gex_info: &GexInfo, + row: &mut Vec, + out_data: &mut [HashMap], + d_all: &mut [Vec], + ind_all: &mut [Vec], + stats: &mut Vec<(String, Vec)>, + lvars: &[String], + _alt_bcs: &[&str], + gex_mean: f64, + gex_sum: f64, + gex_fcounts_unsorted: &[f64], + extra_args: &[String], +) -> bool { + let clonotype_id = exacts[u]; + let ex = &exact_clonotypes[clonotype_id]; + let verbose = ctl.gen_opt.row_fill_verbose; + + // Set up speak macro. + + macro_rules! speak { + ($u:expr, $var:expr, $val:expr) => { + if pass == 2 && (ctl.parseable_opt.pout.len() > 0 || extra_args.len() > 0) { + let mut v = $var.to_string(); + if ctl.parseable_opt.pcols.is_empty() + || bin_member(&ctl.parseable_opt.pcols_sortx, &v) + || bin_member(&extra_args, &v) + { + v = v.replace("_Σ", "_sum"); + v = v.replace("_μ", "_mean"); + out_data[$u].insert(v, $val); + } + } + }; + } + + // Set up lead variable macros. This is the mechanism for generating + // both human-readable and parseable output for lead variables. + + macro_rules! lvar { + ($i: expr, $var:expr, $val:expr) => { + if verbose { + eprint!("lvar {} ==> {}; ", $var, $val); + eprintln!("$i = {}, lvars.len() = {}", $i, lvars.len()); + } + if $i < lvars.len() { + row.push($val) + } + if pass == 2 { + speak!(u, $var.to_string(), $val); + } + }; + } + macro_rules! lvar_stats1 { + ($i: expr, $var:expr, $val:expr) => { + if verbose { + eprint!("lvar {} ==> {}; ", $var, $val); + eprintln!("$i = {}, lvars.len() = {}", $i, lvars.len()); + } + if $i < lvars.len() { + row.push($val) + } + if pass == 2 { + speak!(u, $var.to_string(), $val); + } + stats.push(($var.to_string(), vec![$val; ex.ncells()])); + }; + } + + // Proceed. + + let (mut counts_sub, mut fcounts_sub) = (Vec::::new(), Vec::::new()); + let xorig = x; + let (mut x, mut y) = (x, x); + if x.contains(':') { + x = x.before(":"); + } + if y.contains(':') { + y = y.after(":"); + } + let y0 = y; + for _ in 1..=2 { + let suffixes = ["_min", "_max", "_μ", "_Σ", "_cell", "_%"]; + for s in suffixes.iter() { + if y.ends_with(s) { + y = y.rev_before(s); + break; + } + } + } + let mut computed = false; + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = ex.clones[l][0].barcode.clone(); + let ux = ctl.clono_print_opt.regex_match[li] + .get(&y.to_string()) + .cloned() + .unwrap_or_default(); + if !ux.is_empty() { + let p = bin_position(&gex_info.gex_barcodes[li], &bc); + if p >= 0 { + computed = true; + let mut raw_count = 0.0; + for fid in ux.iter() { + let raw_counti = get_gex_matrix_entry( + ctl, gex_info, *fid, d_all, ind_all, li, l, p as usize, y, + ); + raw_count += raw_counti; + } + counts_sub.push(raw_count.round() as usize); + fcounts_sub.push(raw_count); + } + } else if gex_info.feature_id[li].contains_key(&y.to_string()) { + computed = true; + let p = bin_position(&gex_info.gex_barcodes[li], &bc); + if p >= 0 { + let fid = gex_info.feature_id[li][&y.to_string()]; + let raw_count = + get_gex_matrix_entry(ctl, gex_info, fid, d_all, ind_all, li, l, p as usize, y); + counts_sub.push(raw_count.round() as usize); + fcounts_sub.push(raw_count); + } + } else { + counts_sub.push(0); + fcounts_sub.push(0.0); + } + } + if computed { + let mut f = Vec::::new(); + for x in fcounts_sub.iter() { + f.push(format!("{x}")); + } + if !y0.ends_with("_%") { + stats.push((x.to_string(), f)); + } else { + let mut f = Vec::::new(); + for &fc in &fcounts_sub { + let mut x = 0.0; + if gex_mean > 0.0 { + x = 100.0 * fc / gex_mean; + } + f.push(format!("{x}")); + } + stats.push((x.to_string(), f)); + } + let mut counts_sub_sorted = counts_sub.clone(); + counts_sub_sorted.sort_unstable(); + let sum = fcounts_sub.iter().sum::(); + let mean = sum / counts_sub.len() as f64; + + if xorig.ends_with("_%_cell") { + if pass == 2 { + let mut c = Vec::::new(); + for j in 0..counts_sub.len() { + c.push(format!( + "{:.2}", + 100.0 * counts_sub[j] as f64 / gex_fcounts_unsorted[j] + )); + } + let val = format!("{}", c.iter().format(POUT_SEP)); + speak!(u, x, val); + } + } else if xorig.ends_with("_cell") { + if pass == 2 { + let val = format!("{}", counts_sub.iter().format(POUT_SEP)); + speak!(u, x, val); + } + } else if y0.ends_with("_min") { + lvar![i, x, format!("{}", counts_sub_sorted[0])]; + } else if y0.ends_with("_max") { + lvar![i, x, format!("{}", counts_sub_sorted[counts_sub.len() - 1])]; + } else if y0.ends_with("_μ") { + lvar![i, x, format!("{}", mean.round())]; + } else if y0.ends_with("_Σ") { + lvar![i, x, format!("{}", sum.round())]; + } else if y0.ends_with("_%") { + lvar![i, x, format!("{:.2}", (100.0 * sum) / gex_sum)]; + } else { + let mut median = 0; + if !counts_sub_sorted.is_empty() { + median = rounded_median(&counts_sub_sorted); + } + lvar![i, x, format!("{median}")]; + } + } else if i < lvars.len() { + lvar_stats1![i, x, "".to_string()]; + } + true +} diff --git a/enclone_print/src/proc_lvar_auto.rs b/enclone_print/src/proc_lvar_auto.rs new file mode 100644 index 000000000..deb734757 --- /dev/null +++ b/enclone_print/src/proc_lvar_auto.rs @@ -0,0 +1,1682 @@ +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. +// This file is auto-generated by the crate enclone_vars, please do not edit. + +use amino::{aa_seq, codon_to_aa}; +use enclone_core::barcode_fate::BarcodeFate; +use enclone_core::defs::{ColInfo, EncloneControl, ExactClonotype, GexInfo, POUT_SEP}; +use enclone_core::median::{median_f64, rounded_median}; +use enclone_proto::types::DonorReferenceItem; +use hdf5::Reader; +use itertools::Itertools; +use ndarray::s; +use regex::Regex; +use std::cmp::{max, min}; +use std::collections::HashMap; +use string_utils::{abbrev_list, strme, TextUtils}; +use vdj_ann::refx::RefData; +use vector_utils::{bin_member, bin_position, unique_sort}; + +pub fn proc_lvar_auto( + i: usize, + pass: usize, + var: &str, + exacts: &[usize], + exact_clonotypes: &[ExactClonotype], + u: usize, + rsi: &ColInfo, + refdata: &RefData, + ctl: &EncloneControl, + extra_args: &[String], + out_data: &mut [HashMap], + stats: &mut Vec<(String, Vec)>, + lvars: &[String], + row: &mut Vec, + fate: &[HashMap], + dref: &[DonorReferenceItem], + varmat: &[Vec>], + fp: &[Vec], + n_vdj_gex: &[usize], + vdj_cells: &[Vec], + gex_info: &GexInfo, + groups: &HashMap>, + mults: &[usize], + nd_fields: &[String], + gex_counts_unsorted: &[usize], + gex_fcounts_unsorted: &[f64], + n_gexs: &[usize], + d_readers: &[Option], + ind_readers: &[Option], + h5_data: &[(usize, Vec, Vec)], + alt_bcs: &[&str], +) -> Result { + let clonotype_id = exacts[u]; + let ex = &exact_clonotypes[clonotype_id]; + let mat = &rsi.mat; + let cols = varmat[0].len(); + let verbose = ctl.gen_opt.row_fill_verbose; + let (abbr, vname) = var.split_once(':').unwrap_or((var, var)); + + macro_rules! speak { + ($u:expr, $var:expr, $val:expr) => { + if pass == 2 && (!ctl.parseable_opt.pout.is_empty() || !extra_args.is_empty()) { + let mut v = $var.to_string(); + v = v.replace("_Σ", "_sum"); + v = v.replace("_μ", "_mean"); + if ctl.parseable_opt.pcols.is_empty() + || bin_member(&ctl.parseable_opt.pcols_sortx, &v) + || bin_member(&extra_args, &v) + { + out_data[$u].insert(v, $val); + } + } + }; + } + + let val = if false { + (String::new(), Vec::::new(), "") + } else if bin_member(alt_bcs, &var) { + let mut r = Vec::::new(); + for clone in &ex.clones { + let li = clone[0].dataset_index; + let bc = clone[0].barcode.clone(); + let mut val = String::new(); + let alt = &ctl.origin_info.alt_bc_fields[li]; + for aj in alt { + if aj.0 == *var { + if let Some(v) = aj.1.get(&bc) { + val = v.clone(); + } + } + } + r.push(val); + } + + (String::new(), r, "cell") + } else if bin_member(&ctl.gen_opt.info_fields, &var.to_string()) { + let mut val = String::new(); + for q in 0..ctl.gen_opt.info_fields.len() { + if *var == ctl.gen_opt.info_fields[q] + && ex.share.len() == 2 + && ex.share[0].left != ex.share[1].left + { + let mut tag = String::new(); + for j in 0..ex.share.len() { + if ex.share[j].left { + tag += strme(&ex.share[j].seq); + } + } + tag += "_"; + for j in 0..ex.share.len() { + if !ex.share[j].left { + tag += strme(&ex.share[j].seq); + } + } + if ctl.gen_opt.info_data.contains_key(&tag) { + val = ctl.gen_opt.info_data[&tag][q].clone(); + } + } + } + + (val, Vec::new(), "exact") + } else if vname == "clonotype_ncells" { + let mut n = 0; + for u in exacts.iter() { + n += exact_clonotypes[*u].ncells(); + } + + (format!("{n}"), Vec::new(), "clono") + } else if vname == "clust" { + let mut clust = Vec::::new(); + for j in 0..ex.clones.len() { + let mut cid = 0; + let bc = &ex.clones[j][0].barcode; + let li = ex.clones[j][0].dataset_index; + if gex_info.cluster[li].contains_key(&bc.clone()) { + cid = gex_info.cluster[li][&bc.clone()]; + } + clust.push(cid); + } + let mut clustf = Vec::::new(); + for x in clust.iter() { + clustf.push(format!("{x}")); + } + clust.sort_unstable(); + + (abbrev_list(&clust), clustf, "cell-exect") + } else if vname.starts_with("count_") + && vname.after("count_").ends_with("") + && !vname.between2("count_", "").contains('_') + && Regex::new(vname.between2("count_", "")).is_ok() + { + let reg = Regex::new(vname.between2("count_", "")).unwrap(); + let mut n = 0; + for j in 0..ex.share.len() { + let aa = aa_seq(&ex.share[j].seq, 0); // seems inefficient + n += reg.find_iter(strme(&aa)).count(); + } + + ( + format!("{n}"), + vec![format!("{n}"); ex.ncells()], + "cell-exact", + ) + } else if vname.starts_with("count_") + && vname.after("count_").ends_with("_cell") + && !vname.between2("count_", "_cell").contains('_') + && Regex::new(vname.between2("count_", "_cell")).is_ok() + { + let reg = Regex::new(vname.between2("count_", "_cell")).unwrap(); + let mut n = 0; + for j in 0..ex.share.len() { + let aa = aa_seq(&ex.share[j].seq, 0); // seems inefficient + n += reg.find_iter(strme(&aa)).count(); + } + + let _exact = format!("{n}"); + ( + String::new(), + vec![format!("{n}"); ex.ncells()], + "cell-exact", + ) + } else if vname.starts_with("count_cdr_") + && vname.after("count_cdr_").ends_with("") + && !vname.between2("count_cdr_", "").contains('_') + && Regex::new(vname.between2("count_cdr_", "")).is_ok() + { + let reg = Regex::new(vname.between2("count_cdr_", "")).unwrap(); + let mut n = 0; + for j in 0..ex.share.len() { + if ex.share[j].cdr1_start.is_some() && ex.share[j].fr2_start.is_some() { + let cdr1 = ex.share[j].cdr1_start.unwrap(); + let fwr2 = ex.share[j].fr2_start.unwrap(); + if cdr1 < fwr2 { + let aa = aa_seq(&ex.share[j].seq[cdr1..fwr2], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + if ex.share[j].cdr2_start.is_some() && ex.share[j].fr3_start.is_some() { + let cdr2 = ex.share[j].cdr2_start.unwrap(); + let fwr3 = ex.share[j].fr3_start.unwrap(); + if cdr2 < fwr3 { + let aa = aa_seq(&ex.share[j].seq[cdr2..fwr3], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + let cdr3 = ex.share[j].cdr3_start; + let fwr4 = cdr3 + 3 * ex.share[j].cdr3_aa.len(); + let aa = aa_seq(&ex.share[j].seq[cdr3..fwr4], 0); + n += reg.find_iter(strme(&aa)).count(); + } + + ( + format!("{n}"), + vec![format!("{n}"); ex.ncells()], + "cell-exact", + ) + } else if vname.starts_with("count_cdr_") + && vname.after("count_cdr_").ends_with("_cell") + && !vname.between2("count_cdr_", "_cell").contains('_') + && Regex::new(vname.between2("count_cdr_", "_cell")).is_ok() + { + let reg = Regex::new(vname.between2("count_cdr_", "_cell")).unwrap(); + let mut n = 0; + for j in 0..ex.share.len() { + if ex.share[j].cdr1_start.is_some() && ex.share[j].fr2_start.is_some() { + let cdr1 = ex.share[j].cdr1_start.unwrap(); + let fwr2 = ex.share[j].fr2_start.unwrap(); + if cdr1 < fwr2 { + let aa = aa_seq(&ex.share[j].seq[cdr1..fwr2], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + if ex.share[j].cdr2_start.is_some() && ex.share[j].fr3_start.is_some() { + let cdr2 = ex.share[j].cdr2_start.unwrap(); + let fwr3 = ex.share[j].fr3_start.unwrap(); + if cdr2 < fwr3 { + let aa = aa_seq(&ex.share[j].seq[cdr2..fwr3], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + let cdr3 = ex.share[j].cdr3_start; + let fwr4 = cdr3 + 3 * ex.share[j].cdr3_aa.len(); + let aa = aa_seq(&ex.share[j].seq[cdr3..fwr4], 0); + n += reg.find_iter(strme(&aa)).count(); + } + + let _exact = format!("{n}"); + ( + String::new(), + vec![format!("{n}"); ex.ncells()], + "cell-exact", + ) + } else if vname.starts_with("count_cdr") + && vname.ends_with("") + && vname.between2("count_cdr", "").contains('_') + && vname.between("count_cdr", "_").parse::().is_ok() + && vname.between("count_cdr", "_").force_i64() >= 1 + && vname.between("count_cdr", "_").force_i64() <= 3 + && !vname.after("count_cdr").between2("_", "").contains('_') + && Regex::new(vname.between2("_", "")).is_ok() + { + let arg1 = vname.between("count_cdr", "_").force_i64(); + let reg = Regex::new(vname.after("count_cdr").between2("_", "")).unwrap(); + let mut n = 0; + if arg1 == 1 { + for j in 0..ex.share.len() { + if ex.share[j].cdr1_start.is_some() && ex.share[j].fr2_start.is_some() { + let cdr1 = ex.share[j].cdr1_start.unwrap(); + let fwr2 = ex.share[j].fr2_start.unwrap(); + if cdr1 < fwr2 { + let aa = aa_seq(&ex.share[j].seq[cdr1..fwr2], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else if arg1 == 2 { + for j in 0..ex.share.len() { + if ex.share[j].cdr2_start.is_some() && ex.share[j].fr3_start.is_some() { + let cdr2 = ex.share[j].cdr2_start.unwrap(); + let fwr3 = ex.share[j].fr3_start.unwrap(); + if cdr2 < fwr3 { + let aa = aa_seq(&ex.share[j].seq[cdr2..fwr3], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else { + for j in 0..ex.share.len() { + let cdr3 = ex.share[j].cdr3_start; + let fwr4 = cdr3 + 3 * ex.share[j].cdr3_aa.len(); + let aa = aa_seq(&ex.share[j].seq[cdr3..fwr4], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + + ( + format!("{n}"), + vec![format!("{n}"); ex.ncells()], + "cell-exact", + ) + } else if vname.starts_with("count_cdr") + && vname.ends_with("_cell") + && vname.between2("count_cdr", "_cell").contains('_') + && vname.between("count_cdr", "_").parse::().is_ok() + && vname.between("count_cdr", "_").force_i64() >= 1 + && vname.between("count_cdr", "_").force_i64() <= 3 + && !vname + .after("count_cdr") + .between2("_", "_cell") + .contains('_') + && Regex::new(vname.between2("_", "_cell")).is_ok() + { + let arg1 = vname.between("count_cdr", "_").force_i64(); + let reg = Regex::new(vname.after("count_cdr").between2("_", "_cell")).unwrap(); + let mut n = 0; + if arg1 == 1 { + for j in 0..ex.share.len() { + if ex.share[j].cdr1_start.is_some() && ex.share[j].fr2_start.is_some() { + let cdr1 = ex.share[j].cdr1_start.unwrap(); + let fwr2 = ex.share[j].fr2_start.unwrap(); + if cdr1 < fwr2 { + let aa = aa_seq(&ex.share[j].seq[cdr1..fwr2], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else if arg1 == 2 { + for j in 0..ex.share.len() { + if ex.share[j].cdr2_start.is_some() && ex.share[j].fr3_start.is_some() { + let cdr2 = ex.share[j].cdr2_start.unwrap(); + let fwr3 = ex.share[j].fr3_start.unwrap(); + if cdr2 < fwr3 { + let aa = aa_seq(&ex.share[j].seq[cdr2..fwr3], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else { + for j in 0..ex.share.len() { + let cdr3 = ex.share[j].cdr3_start; + let fwr4 = cdr3 + 3 * ex.share[j].cdr3_aa.len(); + let aa = aa_seq(&ex.share[j].seq[cdr3..fwr4], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + + let _exact = format!("{n}"); + ( + String::new(), + vec![format!("{n}"); ex.ncells()], + "cell-exact", + ) + } else if vname.starts_with("count_fwr_") + && vname.after("count_fwr_").ends_with("") + && !vname.between2("count_fwr_", "").contains('_') + && Regex::new(vname.between2("count_fwr_", "")).is_ok() + { + let reg = Regex::new(vname.between2("count_fwr_", "")).unwrap(); + let mut n = 0; + for j in 0..ex.share.len() { + if ex.share[j].cdr1_start.is_some() { + let fwr1 = ex.share[j].fr1_start; + let cdr1 = ex.share[j].cdr1_start.unwrap(); + if fwr1 < cdr1 { + let aa = aa_seq(&ex.share[j].seq[fwr1..cdr1], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + if ex.share[j].fr2_start.is_some() && ex.share[j].cdr2_start.is_some() { + let fwr2 = ex.share[j].fr2_start.unwrap(); + let cdr2 = ex.share[j].cdr2_start.unwrap(); + if fwr2 < cdr2 { + let aa = aa_seq(&ex.share[j].seq[fwr2..cdr2], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + if ex.share[j].fr3_start.is_some() { + let fwr3 = ex.share[j].fr3_start.unwrap(); + let cdr3 = ex.share[j].cdr3_start; + if fwr3 < cdr3 { + let aa = aa_seq(&ex.share[j].seq[fwr3..cdr3], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + let fwr4 = ex.share[j].cdr3_start + 3 * ex.share[j].cdr3_aa.len(); + let aa = aa_seq(&ex.share[j].seq[fwr4..], 0); + n += reg.find_iter(strme(&aa)).count(); + } + + ( + format!("{n}"), + vec![format!("{n}"); ex.ncells()], + "cell-exact", + ) + } else if vname.starts_with("count_fwr_") + && vname.after("count_fwr_").ends_with("_cell") + && !vname.between2("count_fwr_", "_cell").contains('_') + && Regex::new(vname.between2("count_fwr_", "_cell")).is_ok() + { + let reg = Regex::new(vname.between2("count_fwr_", "_cell")).unwrap(); + let mut n = 0; + for j in 0..ex.share.len() { + if ex.share[j].cdr1_start.is_some() { + let fwr1 = ex.share[j].fr1_start; + let cdr1 = ex.share[j].cdr1_start.unwrap(); + if fwr1 < cdr1 { + let aa = aa_seq(&ex.share[j].seq[fwr1..cdr1], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + if ex.share[j].fr2_start.is_some() && ex.share[j].cdr2_start.is_some() { + let fwr2 = ex.share[j].fr2_start.unwrap(); + let cdr2 = ex.share[j].cdr2_start.unwrap(); + if fwr2 < cdr2 { + let aa = aa_seq(&ex.share[j].seq[fwr2..cdr2], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + if ex.share[j].fr3_start.is_some() { + let fwr3 = ex.share[j].fr3_start.unwrap(); + let cdr3 = ex.share[j].cdr3_start; + if fwr3 < cdr3 { + let aa = aa_seq(&ex.share[j].seq[fwr3..cdr3], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + let fwr4 = ex.share[j].cdr3_start + 3 * ex.share[j].cdr3_aa.len(); + let aa = aa_seq(&ex.share[j].seq[fwr4..], 0); + n += reg.find_iter(strme(&aa)).count(); + } + + let _exact = format!("{n}"); + ( + String::new(), + vec![format!("{n}"); ex.ncells()], + "cell-exact", + ) + } else if vname.starts_with("count_fwr") + && vname.ends_with("") + && vname.between2("count_fwr", "").contains('_') + && vname.between("count_fwr", "_").parse::().is_ok() + && vname.between("count_fwr", "_").force_i64() >= 1 + && vname.between("count_fwr", "_").force_i64() <= 4 + && !vname.after("count_fwr").between2("_", "").contains('_') + && Regex::new(vname.between2("_", "")).is_ok() + { + let arg1 = vname.between("count_fwr", "_").force_i64(); + let reg = Regex::new(vname.after("count_fwr").between2("_", "")).unwrap(); + let mut n = 0; + if arg1 == 1 { + for j in 0..ex.share.len() { + if ex.share[j].cdr1_start.is_some() { + let fwr1 = ex.share[j].fr1_start; + let cdr1 = ex.share[j].cdr1_start.unwrap(); + if fwr1 < cdr1 { + let aa = aa_seq(&ex.share[j].seq[fwr1..cdr1], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else if arg1 == 2 { + for j in 0..ex.share.len() { + if ex.share[j].fr2_start.is_some() && ex.share[j].cdr2_start.is_some() { + let fwr2 = ex.share[j].fr2_start.unwrap(); + let cdr2 = ex.share[j].cdr2_start.unwrap(); + if fwr2 < cdr2 { + let aa = aa_seq(&ex.share[j].seq[fwr2..cdr2], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else if arg1 == 3 { + for j in 0..ex.share.len() { + if ex.share[j].fr3_start.is_some() { + let fwr3 = ex.share[j].fr3_start.unwrap(); + let cdr3 = ex.share[j].cdr3_start; + if fwr3 < cdr3 { + let aa = aa_seq(&ex.share[j].seq[fwr3..cdr3], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else { + for j in 0..ex.share.len() { + let fwr4 = ex.share[j].cdr3_start + 3 * ex.share[j].cdr3_aa.len(); + let aa = aa_seq(&ex.share[j].seq[fwr4..], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + + ( + format!("{n}"), + vec![format!("{n}"); ex.ncells()], + "cell-exact", + ) + } else if vname.starts_with("count_fwr") + && vname.ends_with("_cell") + && vname.between2("count_fwr", "_cell").contains('_') + && vname.between("count_fwr", "_").parse::().is_ok() + && vname.between("count_fwr", "_").force_i64() >= 1 + && vname.between("count_fwr", "_").force_i64() <= 4 + && !vname + .after("count_fwr") + .between2("_", "_cell") + .contains('_') + && Regex::new(vname.between2("_", "_cell")).is_ok() + { + let arg1 = vname.between("count_fwr", "_").force_i64(); + let reg = Regex::new(vname.after("count_fwr").between2("_", "_cell")).unwrap(); + let mut n = 0; + if arg1 == 1 { + for j in 0..ex.share.len() { + if ex.share[j].cdr1_start.is_some() { + let fwr1 = ex.share[j].fr1_start; + let cdr1 = ex.share[j].cdr1_start.unwrap(); + if fwr1 < cdr1 { + let aa = aa_seq(&ex.share[j].seq[fwr1..cdr1], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else if arg1 == 2 { + for j in 0..ex.share.len() { + if ex.share[j].fr2_start.is_some() && ex.share[j].cdr2_start.is_some() { + let fwr2 = ex.share[j].fr2_start.unwrap(); + let cdr2 = ex.share[j].cdr2_start.unwrap(); + if fwr2 < cdr2 { + let aa = aa_seq(&ex.share[j].seq[fwr2..cdr2], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else if arg1 == 3 { + for j in 0..ex.share.len() { + if ex.share[j].fr3_start.is_some() { + let fwr3 = ex.share[j].fr3_start.unwrap(); + let cdr3 = ex.share[j].cdr3_start; + if fwr3 < cdr3 { + let aa = aa_seq(&ex.share[j].seq[fwr3..cdr3], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else { + for j in 0..ex.share.len() { + let fwr4 = ex.share[j].cdr3_start + 3 * ex.share[j].cdr3_aa.len(); + let aa = aa_seq(&ex.share[j].seq[fwr4..], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + + let _exact = format!("{n}"); + ( + String::new(), + vec![format!("{n}"); ex.ncells()], + "cell-exact", + ) + } else if vname == "cred" { + let mut credsx = Vec::::new(); + for l in 0..ex.clones.len() { + let bc = &ex.clones[l][0].barcode; + let li = ex.clones[l][0].dataset_index; + if gex_info.pca[li].contains_key(&bc.clone()) { + let mut creds = 0; + let mut z = Vec::<(f64, &str)>::new(); + let x = &gex_info.pca[li][&bc.clone()]; + for y in gex_info.pca[li].iter() { + let dist2 = + y.1.iter() + .zip(x.iter()) + .map(|(&y, &x)| (y - x) * (y - x)) + .sum::(); + z.push((dist2, y.0.as_str())); + } + z.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let top = n_vdj_gex[li]; + for &zi in &z[..top] { + if bin_member(&vdj_cells[li], &zi.1.to_string()) { + creds += 1; + } + } + let pc = 100.0 * creds as f64 / top as f64; + credsx.push(pc); + } else { + credsx.push(0.0); + } + } + let credsx_unsorted = credsx.clone(); + credsx.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let r = credsx_unsorted + .into_iter() + .map(|c| format!("{c:.1}")) + .collect(); + + (format!("{:.1}", median_f64(&credsx)), r, "cell-exact") + } else if vname == "cred_cell" { + let mut credsx = Vec::::new(); + for l in 0..ex.clones.len() { + let bc = &ex.clones[l][0].barcode; + let li = ex.clones[l][0].dataset_index; + if gex_info.pca[li].contains_key(&bc.clone()) { + let mut creds = 0; + let mut z = Vec::<(f64, &str)>::new(); + let x = &gex_info.pca[li][&bc.clone()]; + for y in gex_info.pca[li].iter() { + let dist2 = + y.1.iter() + .zip(x.iter()) + .map(|(&y, &x)| (y - x) * (y - x)) + .sum::(); + z.push((dist2, y.0.as_str())); + } + z.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let top = n_vdj_gex[li]; + for &zi in &z[..top] { + if bin_member(&vdj_cells[li], &zi.1.to_string()) { + creds += 1; + } + } + let pc = 100.0 * creds as f64 / top as f64; + credsx.push(pc); + } else { + credsx.push(0.0); + } + } + let credsx_unsorted = credsx.clone(); + credsx.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let r = credsx_unsorted + .into_iter() + .map(|c| format!("{c:.1}")) + .collect(); + + let _exact = format!("{:.1}", median_f64(&credsx)); + (String::new(), r, "cell-exact") + } else if vname == "datasets" { + let mut datasets = Vec::::new(); + for j in 0..ex.clones.len() { + datasets.push(ctl.origin_info.dataset_id[ex.clones[j][0].dataset_index].clone()); + } + let mut datasets_unique = datasets.clone(); + unique_sort(&mut datasets_unique); + + ( + format!("{}", datasets_unique.iter().format(",")), + datasets, + "cell-exact", + ) + } else if vname == "datasets_cell" { + let mut datasets = Vec::::new(); + for j in 0..ex.clones.len() { + datasets.push(ctl.origin_info.dataset_id[ex.clones[j][0].dataset_index].clone()); + } + let mut datasets_unique = datasets.clone(); + unique_sort(&mut datasets_unique); + + let _exact = format!("{}", datasets_unique.iter().format(",")); + (String::new(), datasets, "cell-exact") + } else if vname == "donors" { + let mut donors = Vec::::new(); + for j in 0..ex.clones.len() { + if ex.clones[j][0].donor_index.is_some() { + donors + .push(ctl.origin_info.donor_list[ex.clones[j][0].donor_index.unwrap()].clone()); + } else { + donors.push("?".to_string()); + } + } + let donors_unsorted = donors.clone(); + unique_sort(&mut donors); + + ( + format!("{}", donors.iter().format(",")), + donors_unsorted, + "cell-exact", + ) + } else if vname == "donors_cell" { + let mut donors = Vec::::new(); + for j in 0..ex.clones.len() { + if ex.clones[j][0].donor_index.is_some() { + donors + .push(ctl.origin_info.donor_list[ex.clones[j][0].donor_index.unwrap()].clone()); + } else { + donors.push("?".to_string()); + } + } + let donors_unsorted = donors.clone(); + unique_sort(&mut donors); + + let _exact = format!("{}", donors.iter().format(",")); + (String::new(), donors_unsorted, "cell-exact") + } else if vname == "dref" { + let mut diffs = 0; + for m in 0..cols { + if mat[m][u].is_some() { + let r = mat[m][u].unwrap(); + let seq = &ex.share[r].seq_del_amino; + let mut vref = refdata.refs[rsi.vids[m]].to_ascii_vec(); + if rsi.vpids[m].is_some() { + vref = dref[rsi.vpids[m].unwrap()].nt_sequence.clone(); + } + let jref = refdata.refs[rsi.jids[m]].to_ascii_vec(); + let z = seq.len(); + for p in 0..z { + let b = seq[p]; + if p < vref.len() - ctl.heur.ref_v_trim && b != vref[p] { + diffs += 1; + } + if p >= z - (jref.len() - ctl.heur.ref_j_trim) + && b != jref[jref.len() - (z - p)] + { + diffs += 1; + } + } + } + } + + (format!("{diffs}"), Vec::new(), "exact") + } else if vname == "dref_aa" { + let mut diffs = 0; + for m in 0..cols { + if mat[m][u].is_some() { + let r = mat[m][u].unwrap(); + let aa_seq = &ex.share[r].aa_mod_indel; + let mut vref = refdata.refs[rsi.vids[m]].to_ascii_vec(); + if rsi.vpids[m].is_some() { + vref = dref[rsi.vpids[m].unwrap()].nt_sequence.clone(); + } + let jref = refdata.refs[rsi.jids[m]].to_ascii_vec(); + let z = 3 * aa_seq.len() + 1; + for p in 0..aa_seq.len() { + if aa_seq[p] == b'-' { + diffs += 1; + continue; + } + if 3 * p + 3 <= vref.len() - ctl.heur.ref_v_trim + && aa_seq[p] != codon_to_aa(&vref[3 * p..3 * p + 3]) + { + diffs += 1; + } + if 3 * p > z - (jref.len() - ctl.heur.ref_j_trim) + 3 + && aa_seq[p] + != codon_to_aa( + &jref[jref.len() - (z - 3 * p)..jref.len() - (z - 3 * p) + 3], + ) + { + diffs += 1; + } + } + } + } + + (format!("{diffs}"), Vec::new(), "exact") + } else if vname == "dref_max" { + let mut mx = 0; + for m in 0..cols { + let mut diffs = 0; + if mat[m][u].is_some() { + let r = mat[m][u].unwrap(); + let seq = &ex.share[r].seq_del_amino; + let mut vref = refdata.refs[rsi.vids[m]].to_ascii_vec(); + if rsi.vpids[m].is_some() { + vref = dref[rsi.vpids[m].unwrap()].nt_sequence.clone(); + } + let jref = refdata.refs[rsi.jids[m]].to_ascii_vec(); + let z = seq.len(); + for p in 0..z { + let b = seq[p]; + if p < vref.len() - ctl.heur.ref_v_trim && b != vref[p] { + diffs += 1; + } + if p >= z - (jref.len() - ctl.heur.ref_j_trim) + && b != jref[jref.len() - (z - p)] + { + diffs += 1; + } + } + mx = std::cmp::max(mx, diffs); + } + } + + (format!("{mx}"), Vec::new(), "exact") + } else if vname == "entropy" { + let mut total_counts = Vec::::new(); + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = ex.clones[l][0].barcode.clone(); + if !gex_info.gex_barcodes.is_empty() { + let p = bin_position(&gex_info.gex_barcodes[li], &bc); + if p >= 0 { + let mut raw_count = 0; + if gex_info.gex_matrices[li].initialized() { + let row = gex_info.gex_matrices[li].row(p as usize); + for (f, n) in row { + if gex_info.is_gex[li][f] { + raw_count += n; + } + } + } else { + let z1 = gex_info.h5_indptr[li][p as usize] as usize; + let z2 = gex_info.h5_indptr[li][p as usize + 1] as usize; // is p+1 OK?? + let d: Vec; + let ind: Vec; + if ctl.gen_opt.h5_pre { + d = h5_data[li].1[z1..z2].to_vec(); + ind = h5_data[li].2[z1..z2].to_vec(); + } else { + d = d_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + ind = ind_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + } + for j in 0..d.len() { + if gex_info.is_gex[li][ind[j] as usize] { + raw_count += d[j] as usize; + } + } + } + total_counts.push(raw_count); + } + } + } + let mut entropies = Vec::::new(); + for (clone, tc) in ex.clones.iter().zip(total_counts.into_iter()) { + let li = clone[0].dataset_index; + let bc = clone[0].barcode.as_str(); + if !gex_info.gex_barcodes.is_empty() { + let mut entropy = 0.0; + let p = bin_position(&gex_info.gex_barcodes[li], &bc.to_string()); + if p >= 0 { + if gex_info.gex_matrices[li].initialized() { + let row = gex_info.gex_matrices[li].row(p as usize); + for (f, n) in row { + if gex_info.is_gex[li][f] { + let q = n as f64 / tc as f64; + entropy -= q * q.log2(); + } + } + } else { + let z1 = gex_info.h5_indptr[li][p as usize] as usize; + let z2 = gex_info.h5_indptr[li][p as usize + 1] as usize; // is p+1 OK?? + let d: Vec; + let ind: Vec; + if ctl.gen_opt.h5_pre { + d = h5_data[li].1[z1..z2].to_vec(); + ind = h5_data[li].2[z1..z2].to_vec(); + } else { + d = d_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + ind = ind_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + } + for j in 0..d.len() { + if gex_info.is_gex[li][ind[j] as usize] { + let n = d[j] as usize; + let q = n as f64 / tc as f64; + entropy -= q * q.log2(); + } + } + } + } + entropies.push(entropy); + } + } + let entropies_unsorted = entropies.clone(); + entropies.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let mut entropy = 0.0; + if !entropies.is_empty() { + entropy = median_f64(&entropies); + } + let mut e = Vec::::new(); + for x in entropies_unsorted.iter() { + e.push(format!("{x:.2}")); + } + + (format!("{entropy:.2}"), e, "cell-exact") + } else if vname == "entropy_cell" { + let mut total_counts = Vec::::new(); + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = ex.clones[l][0].barcode.clone(); + if !gex_info.gex_barcodes.is_empty() { + let p = bin_position(&gex_info.gex_barcodes[li], &bc); + if p >= 0 { + let mut raw_count = 0; + if gex_info.gex_matrices[li].initialized() { + let row = gex_info.gex_matrices[li].row(p as usize); + for (f, n) in row { + if gex_info.is_gex[li][f] { + raw_count += n; + } + } + } else { + let z1 = gex_info.h5_indptr[li][p as usize] as usize; + let z2 = gex_info.h5_indptr[li][p as usize + 1] as usize; // is p+1 OK?? + let d: Vec; + let ind: Vec; + if ctl.gen_opt.h5_pre { + d = h5_data[li].1[z1..z2].to_vec(); + ind = h5_data[li].2[z1..z2].to_vec(); + } else { + d = d_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + ind = ind_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + } + for j in 0..d.len() { + if gex_info.is_gex[li][ind[j] as usize] { + raw_count += d[j] as usize; + } + } + } + total_counts.push(raw_count); + } + } + } + let mut entropies = Vec::::new(); + for (l, clone) in ex.clones.iter().enumerate() { + let li = clone[0].dataset_index; + let bc = clone[0].barcode.as_str(); + if !gex_info.gex_barcodes.is_empty() { + let mut entropy = 0.0; + let p = bin_position(&gex_info.gex_barcodes[li], bc); + if p >= 0 { + if gex_info.gex_matrices[li].initialized() { + let row = gex_info.gex_matrices[li].row(p as usize); + for (f, n) in row { + if gex_info.is_gex[li][f] { + let q = n as f64 / total_counts[l] as f64; + entropy -= q * q.log2(); + } + } + } else { + let z1 = gex_info.h5_indptr[li][p as usize] as usize; + let z2 = gex_info.h5_indptr[li][p as usize + 1] as usize; // is p+1 OK?? + let d: Vec; + let ind: Vec; + if ctl.gen_opt.h5_pre { + d = h5_data[li].1[z1..z2].to_vec(); + ind = h5_data[li].2[z1..z2].to_vec(); + } else { + d = d_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + ind = ind_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + } + for j in 0..d.len() { + if gex_info.is_gex[li][ind[j] as usize] { + let n = d[j] as usize; + let q = n as f64 / total_counts[l] as f64; + entropy -= q * q.log2(); + } + } + } + } + entropies.push(entropy); + } + } + let entropies_unsorted = entropies.clone(); + entropies.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let mut entropy = 0.0; + if !entropies.is_empty() { + entropy = median_f64(&entropies); + } + let mut e = Vec::::new(); + for x in entropies_unsorted.iter() { + e.push(format!("{x:.2}")); + } + + let _exact = format!("{entropy:.2}"); + (String::new(), e, "cell-exact") + } else if vname == "far" { + let mut dist = -1_isize; + for i2 in 0..varmat.len() { + if i2 == u || fp[i2] != fp[u] { + continue; + } + let mut d = 0_isize; + for c in fp[u].iter() { + for j in 0..varmat[u][*c].len() { + if varmat[u][*c][j] != varmat[i2][*c][j] { + d += 1; + } + } + } + dist = max(dist, d); + } + let d = if dist == -1_isize { + String::new() + } else { + format!("{dist}") + }; + + (d, Vec::new(), "exact") + } else if vname.starts_with("fb") + && vname.ends_with("") + && vname.between2("fb", "").parse::().is_ok() + && vname.between2("fb", "").force_i64() >= 1 + { + let arg1 = vname.between2("fb", "").force_i64(); + let ncols = gex_info.fb_top_matrices[0].ncols(); + let n = (arg1 - 1) as usize; + let fb = if n < ncols { + gex_info.fb_top_matrices[0].col_label(n) + } else { + String::new() + }; + + ((*fb).to_string(), Vec::new(), "exact") + } else if vname.starts_with("fb") + && vname.ends_with("_n") + && vname.between2("fb", "_n").parse::().is_ok() + && vname.between2("fb", "_n").force_i64() >= 1 + { + let arg1 = vname.between2("fb", "_n").force_i64(); + let ncols = gex_info.fb_top_matrices[0].ncols(); + let n = (arg1 - 1) as usize; + let median; + let mut counts; + if n >= ncols { + median = 0; + counts = vec!["0".to_string(); ex.ncells()]; + } else { + counts = Vec::::new(); + let mut counts_sorted = Vec::::new(); + for l in 0..ex.clones.len() { + let bc = ex.clones[l][0].barcode.clone(); + let p = bin_position(&gex_info.fb_top_barcodes[0], &bc); + if p < 0 { + counts.push("0".to_string()); + counts_sorted.push(0); + } else { + let x = gex_info.fb_top_matrices[0].value(p as usize, n); + counts.push(format!("{x}")); + counts_sorted.push(x); + } + } + counts_sorted.sort_unstable(); + median = rounded_median(&counts_sorted); + } + + (format!("{median}"), counts, "cell-exact") + } else if vname.starts_with("fb") + && vname.ends_with("_n_cell") + && vname.between2("fb", "_n_cell").parse::().is_ok() + && vname.between2("fb", "_n_cell").force_i64() >= 1 + { + let arg1 = vname.between2("fb", "_n_cell").force_i64(); + let ncols = gex_info.fb_top_matrices[0].ncols(); + let n = (arg1 - 1) as usize; + let median; + let mut counts; + if n >= ncols { + median = 0; + counts = vec!["0".to_string(); ex.ncells()]; + } else { + counts = Vec::::new(); + let mut counts_sorted = Vec::::new(); + for l in 0..ex.clones.len() { + let bc = ex.clones[l][0].barcode.clone(); + let p = bin_position(&gex_info.fb_top_barcodes[0], &bc); + if p < 0 { + counts.push("0".to_string()); + counts_sorted.push(0); + } else { + let x = gex_info.fb_top_matrices[0].value(p as usize, n); + counts.push(format!("{x}")); + counts_sorted.push(x); + } + } + counts_sorted.sort_unstable(); + median = rounded_median(&counts_sorted); + } + + let _exact = format!("{median}"); + (String::new(), counts, "cell-exact") + } else if vname == "filter" { + let mut fates = Vec::::new(); + for j in 0..ex.clones.len() { + let mut f = ""; + let bc = &ex.clones[j][0].barcode; + let li = ex.clones[j][0].dataset_index; + if fate[li].contains_key(&bc.clone()) { + f = fate[li][bc].label(); + } + fates.push(f.to_string()); + } + + (String::new(), fates, "cell") + } else if vname == "gex" { + let mut f = Vec::::new(); + for x in gex_fcounts_unsorted.iter() { + f.push(format!("{}", *x)); + } + let mut counts = gex_counts_unsorted.to_owned(); + counts.sort_unstable(); + let gex_median = rounded_median(&counts); + + (format!("{gex_median}"), f, "cell-exact") + } else if vname == "gex_cell" { + let mut f = Vec::::new(); + for x in gex_fcounts_unsorted.iter() { + f.push(format!("{}", *x)); + } + let mut counts = gex_counts_unsorted.to_owned(); + counts.sort_unstable(); + let gex_median = rounded_median(&counts); + + let _exact = format!("{gex_median}"); + (String::new(), f, "cell-exact") + } else if vname == "gex_max" { + ( + format!("{}", gex_counts_unsorted.iter().max().unwrap()), + Vec::new(), + "exact", + ) + } else if vname == "gex_mean" { + let gex_sum = gex_fcounts_unsorted.iter().sum::(); + let gex_mean = gex_sum / gex_fcounts_unsorted.len() as f64; + + ( + format!("{}", gex_mean.round() as usize), + Vec::new(), + "exact", + ) + } else if vname == "gex_min" { + ( + format!("{}", gex_counts_unsorted.iter().min().unwrap()), + Vec::new(), + "exact", + ) + } else if vname == "gex_sum" || vname == "gex_Σ" { + let gex_sum = gex_fcounts_unsorted.iter().sum::(); + + (format!("{}", gex_sum.round() as usize), Vec::new(), "exact") + } else if vname == "gex_μ" { + let gex_sum = gex_fcounts_unsorted.iter().sum::(); + let gex_mean = gex_sum / gex_fcounts_unsorted.len() as f64; + + ( + format!("{}", gex_mean.round() as usize), + Vec::new(), + "exact", + ) + } else if vname.starts_with('g') + && vname.ends_with("") + && vname.between2("g", "").parse::().is_ok() + && vname.between2("g", "").force_i64() >= 0 + { + let arg1 = vname.between2("g", "").force_i64(); + let d = arg1 as usize; + let answer = if groups.contains_key(&d) { + format!("{}", groups[&d][u] + 1) + } else { + String::new() + }; + + (answer, Vec::new(), "exact") + } else if vname == "hcomp" { + let mut hcomp = String::new(); + if ex.share.len() == 2 { + for j in 0..ex.share.len() { + if ex.share[j].left { + hcomp = format!("{}", ex.share[j].jun.hcomp); + } + } + } + + (hcomp, Vec::new(), "exact") + } else if vname == "inkt" { + let mut s = String::new(); + let alpha_g = ex.share[0].inkt_alpha_chain_gene_match; + let alpha_j = ex.share[0].inkt_alpha_chain_junction_match; + let beta_g = ex.share[0].inkt_beta_chain_gene_match; + let beta_j = ex.share[0].inkt_beta_chain_junction_match; + if alpha_g || alpha_j { + s += "𝝰"; + if alpha_g { + s += "g"; + } + if alpha_j { + s += "j"; + } + } + if beta_g || beta_j { + s += "𝝱"; + if beta_g { + s += "g"; + } + if beta_j { + s += "j"; + } + } + + (s, Vec::new(), "exact") + } else if vname == "jun_ins" { + let mut jun_ins = String::new(); + if ex.share.len() == 2 { + for j in 0..ex.share.len() { + if ex.share[j].left { + jun_ins = format!("{}", ex.share[j].jun.jun_ins); + } + } + } + + (jun_ins, Vec::new(), "exact") + } else if vname == "jun_mat" { + let mut jun_mat = String::new(); + if ex.share.len() == 2 { + for j in 0..ex.share.len() { + if ex.share[j].left { + jun_mat = format!("{}", ex.share[j].jun.matches); + } + } + } + + (jun_mat, Vec::new(), "exact") + } else if vname == "jun_sub" { + let mut jun_mis = String::new(); + if ex.share.len() == 2 { + for j in 0..ex.share.len() { + if ex.share[j].left { + jun_mis = format!("{}", ex.share[j].jun.mismatches); + } + } + } + + (jun_mis, Vec::new(), "exact") + } else if vname == "mait" { + let mut s = String::new(); + let alpha_g = ex.share[0].mait_alpha_chain_gene_match; + let alpha_j = ex.share[0].mait_alpha_chain_junction_match; + let beta_g = ex.share[0].mait_beta_chain_gene_match; + let beta_j = ex.share[0].mait_beta_chain_junction_match; + if alpha_g || alpha_j { + s += "𝝰"; + if alpha_g { + s += "g"; + } + if alpha_j { + s += "j"; + } + } + if beta_g || beta_j { + s += "𝝱"; + if beta_g { + s += "g"; + } + if beta_j { + s += "j"; + } + } + + (s, Vec::new(), "exact") + } else if vname == "mark" { + let mut n = 0; + for j in 0..ex.clones.len() { + if ex.clones[j][0].marked { + n += 1; + } + } + + (format!("{n}"), Vec::new(), "exact") + } else if vname == "mem" { + let mut n = 0; + let mut y = Vec::::new(); + if ctl.gen_opt.using_secmem { + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = &ex.clones[l][0].barcode; + let mut count = 0; + if ctl.origin_info.secmem[li].contains_key(&bc.clone()) { + count = ctl.origin_info.secmem[li][&bc.clone()].1; + n += count; + } + y.push(format!("{count}")); + } + } + + (format!("{n}"), y, "cell-exact") + } else if vname == "mem_cell" { + let mut n = 0; + let mut y = Vec::::new(); + if ctl.gen_opt.using_secmem { + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = &ex.clones[l][0].barcode; + let mut count = 0; + if ctl.origin_info.secmem[li].contains_key(&bc.clone()) { + count = ctl.origin_info.secmem[li][&bc.clone()].1; + n += count; + } + y.push(format!("{count}")); + } + } + + let _exact = format!("{n}"); + (String::new(), y, "cell-exact") + } else if vname == "n" { + let counts = vec!["1.0".to_string(); mults[u]]; + + (format!("{}", mults[u]), counts, "cell-exact") + } else if vname == "n_cell" { + let counts = vec!["1.0".to_string(); mults[u]]; + + let _exact = format!("{}", mults[u]); + (String::new(), counts, "cell-exact") + } else if vname.starts_with("n_") + && vname.after("n_").ends_with("") + && (bin_member( + &ctl.origin_info.dataset_list, + &vname.between2("n_", "").to_string(), + ) || bin_member( + &ctl.origin_info.origin_list, + &vname.between2("n_", "").to_string(), + ) || bin_member( + &ctl.origin_info.donor_list, + &vname.between2("n_", "").to_string(), + ) || bin_member( + &ctl.origin_info.tag_list, + &vname.between2("n_", "").to_string(), + )) + { + let name = vname.between2("n_", ""); + let mut count = 0; + let mut counts = Vec::::new(); + for j in 0..ex.clones.len() { + let x = &ex.clones[j][0]; + if ctl.origin_info.dataset_id[x.dataset_index] == name + || x.origin_index.is_some() + && ctl.origin_info.origin_list[x.origin_index.unwrap()] == name + || x.donor_index.is_some() + && ctl.origin_info.donor_list[x.donor_index.unwrap()] == name + || x.tag_index.is_some() && ctl.origin_info.tag_list[x.tag_index.unwrap()] == name + { + count += 1; + counts.push("1.0".to_string()); + } + } + + (format!("{count}"), counts, "cell-exact") + } else if vname.starts_with("n_") + && vname.after("n_").ends_with("_cell") + && (bin_member( + &ctl.origin_info.dataset_list, + &vname.between2("n_", "_cell").to_string(), + ) || bin_member( + &ctl.origin_info.origin_list, + &vname.between2("n_", "_cell").to_string(), + ) || bin_member( + &ctl.origin_info.donor_list, + &vname.between2("n_", "_cell").to_string(), + ) || bin_member( + &ctl.origin_info.tag_list, + &vname.between2("n_", "_cell").to_string(), + )) + { + let name = vname.between2("n_", "_cell"); + let mut count = 0; + let mut counts = Vec::::new(); + for j in 0..ex.clones.len() { + let x = &ex.clones[j][0]; + if ctl.origin_info.dataset_id[x.dataset_index] == name + || x.origin_index.is_some() + && ctl.origin_info.origin_list[x.origin_index.unwrap()] == name + || x.donor_index.is_some() + && ctl.origin_info.donor_list[x.donor_index.unwrap()] == name + || x.tag_index.is_some() && ctl.origin_info.tag_list[x.tag_index.unwrap()] == name + { + count += 1; + counts.push("1.0".to_string()); + } + } + + let _exact = format!("{count}"); + (String::new(), counts, "cell-exact") + } else if vname == "n_b" { + let mut n_b = 0; + let mut ns = Vec::::new(); + for j in 0..ex.clones.len() { + let bc = &ex.clones[j][0].barcode; + let li = ex.clones[j][0].dataset_index; + if gex_info.cell_type[li].contains_key(&bc.clone()) { + if gex_info.cell_type[li][&bc.clone()].starts_with('B') { + n_b += 1; + ns.push("1.0".to_string()); + } else { + ns.push("0.0".to_string()); + } + } + } + + (format!("{n_b}"), ns, "cell-exact") + } else if vname == "n_b_cell" { + let mut n_b = 0; + let mut ns = Vec::::new(); + for j in 0..ex.clones.len() { + let bc = &ex.clones[j][0].barcode; + let li = ex.clones[j][0].dataset_index; + if gex_info.cell_type[li].contains_key(&bc.clone()) { + if gex_info.cell_type[li][&bc.clone()].starts_with('B') { + n_b += 1; + ns.push("1.0".to_string()); + } else { + ns.push("0.0".to_string()); + } + } + } + + let _exact = format!("{n_b}"); + (String::new(), ns, "cell-exact") + } else if vname == "n_gex" { + let mut n = Vec::::new(); + let mut n_gex = 0; + for x in n_gexs.iter() { + n.push(format!("{}", *x)); + n_gex += *x; + } + + (format!("{n_gex}"), n, "cell-exact") + } else if vname == "n_gex_cell" { + let mut n = Vec::::new(); + let mut n_gex = 0; + for x in n_gexs.iter() { + n.push(format!("{}", *x)); + n_gex += *x; + } + + let _exact = format!("{n_gex}"); + (String::new(), n, "cell-exact") + } else if vname == "n_other" { + let mut n = 0; + let mut ns = Vec::::new(); + for j in 0..ex.clones.len() { + let di = ex.clones[j][0].dataset_index; + let f = format!("n_{}", ctl.origin_info.dataset_id[di]); + let found = nd_fields.iter().any(|ff| *ff == f); + if !found { + n += 1; + ns.push("1.0".to_string()); + } else { + ns.push("0.0".to_string()); + } + } + + (format!("{n}"), ns, "cell-exact") + } else if vname == "n_other_cell" { + let mut n = 0; + let mut ns = Vec::::new(); + for j in 0..ex.clones.len() { + let di = ex.clones[j][0].dataset_index; + let f = format!("n_{}", ctl.origin_info.dataset_id[di]); + let found = nd_fields.iter().any(|ff| *ff == f); + if !found { + n += 1; + ns.push("1.0".to_string()); + } else { + ns.push("0.0".to_string()); + } + } + + let _exact = format!("{n}"); + (String::new(), ns, "cell-exact") + } else if vname == "nbc" { + let mut nbc = Vec::::new(); + for j in 0..ex.clones.len() { + let bc = ex.clones[j][0].barcode.before("-").as_bytes(); + let mut n = 0_u64; + for (k, &b) in bc.iter().enumerate() { + if k > 0 { + n *= 4; + } + if b == b'C' { + n += 1; + } else if b == b'G' { + n += 2; + } else if b == b'T' { + n += 3; + } + } + nbc.push(format!("{n:010}")); + } + + (String::new(), nbc, "cell") + } else if vname == "nchains" { + (format!("{}", rsi.mat.len()), Vec::new(), "clono") + } else if vname == "nchains_present" { + ( + format!("{}", exact_clonotypes[exacts[u]].share.len()), + Vec::new(), + "exact", + ) + } else if vname == "near" { + let mut dist = 1_000_000; + for i2 in 0..varmat.len() { + if i2 == u || fp[i2] != fp[u] { + continue; + } + let mut d = 0; + for c in fp[u].iter() { + for j in 0..varmat[u][*c].len() { + if varmat[u][*c][j] != varmat[i2][*c][j] { + d += 1; + } + } + } + dist = min(dist, d); + } + let near = if dist == 1_000_000 { + String::new() + } else { + format!("{dist}") + }; + + (near, Vec::new(), "exact") + } else if vname == "npe" { + (String::new(), Vec::new(), "cell") + } else if vname == "origins" { + let mut origins = Vec::::new(); + for j in 0..ex.clones.len() { + if ex.clones[j][0].origin_index.is_some() { + origins.push( + ctl.origin_info.origin_list[ex.clones[j][0].origin_index.unwrap()].clone(), + ); + } else { + origins.push("?".to_string()); + } + } + let origins_unsorted = origins.clone(); + unique_sort(&mut origins); + + ( + format!("{}", origins.iter().format(",")), + origins_unsorted, + "cell-exact", + ) + } else if vname == "origins_cell" { + let mut origins = Vec::::new(); + for j in 0..ex.clones.len() { + if ex.clones[j][0].origin_index.is_some() { + origins.push( + ctl.origin_info.origin_list[ex.clones[j][0].origin_index.unwrap()].clone(), + ); + } else { + origins.push("?".to_string()); + } + } + let origins_unsorted = origins.clone(); + unique_sort(&mut origins); + + let _exact = format!("{}", origins.iter().format(",")); + (String::new(), origins_unsorted, "cell-exact") + } else if vname == "pe" || vname == "ppe" { + (String::new(), Vec::new(), "cell") + } else if vname == "sec" { + let mut n = 0; + let mut y = Vec::::new(); + if ctl.gen_opt.using_secmem { + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = &ex.clones[l][0].barcode; + let mut count = 0; + if ctl.origin_info.secmem[li].contains_key(&bc.clone()) { + count = ctl.origin_info.secmem[li][&bc.clone()].0; + n += count; + } + y.push(format!("{count}")); + } + } + + (format!("{n}"), y, "cell-exact") + } else if vname == "sec_cell" { + let mut n = 0; + let mut y = Vec::::new(); + if ctl.gen_opt.using_secmem { + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = &ex.clones[l][0].barcode; + let mut count = 0; + if ctl.origin_info.secmem[li].contains_key(&bc.clone()) { + count = ctl.origin_info.secmem[li][&bc.clone()].0; + n += count; + } + y.push(format!("{count}")); + } + } + + let _exact = format!("{n}"); + (String::new(), y, "cell-exact") + } else if vname == "type" { + let mut cell_types = Vec::::new(); + /* + for j in 0..ex.clones.len() { + let mut cell_type = "".to_string(); + let bc = &ex.clones[j][0].barcode; + let li = ex.clones[j][0].dataset_index; + if gex_info.cell_type[li].contains_key(&bc.clone()) { + cell_type = gex_info.cell_type[li][&bc.clone()].clone(); + } + cell_types.push(cell_type); + } + */ + cell_types.sort(); + + (abbrev_list(&cell_types), Vec::new(), "exact") + } else { + ("$UNDEFINED".to_string(), Vec::::new(), "") + }; + if val.0 == "$UNDEFINED" { + Ok(false) + } else { + let (exact, cell, level) = val; + if level == "cell" && !var.ends_with("_cell") { + if verbose { + eprint!("lvar {var} ==> {}; ", String::new()); + eprintln!("i = {i}, lvars.len() = {}", lvars.len()); + } + if i < lvars.len() { + row.push(String::new()) + } + if pass == 2 { + speak!(u, abbr, String::new()); + } + stats.push((abbr.to_string(), cell.clone())); + if pass == 2 { + speak!(u, abbr, format!("{}", cell.iter().format(POUT_SEP))); + } + } else if (!exact.is_empty() && !var.ends_with("_cell")) || cell.is_empty() { + if verbose { + eprint!("lvar {var} ==> {exact}; "); + eprintln!("i = {i}, lvars.len() = {}", lvars.len()); + } + if i < lvars.len() { + row.push(exact.clone()) + } + if pass == 2 { + speak!(u, abbr, exact.to_string()); + } + if cell.is_empty() { + stats.push((abbr.to_string(), vec![exact; ex.ncells()])); + } else { + stats.push((abbr.to_string(), cell.to_vec())); + } + } else if !cell.is_empty() { + if pass == 2 { + speak!(u, abbr, format!("{}", cell.iter().format(POUT_SEP))); + } + stats.push((abbr.to_string(), cell.to_vec())); + } + Ok(true) + } +} diff --git a/enclone_proto/Cargo.toml b/enclone_proto/Cargo.toml index f17a2e9b9..10330199f 100644 --- a/enclone_proto/Cargo.toml +++ b/enclone_proto/Cargo.toml @@ -1,29 +1,30 @@ [package] name = "enclone_proto" -version = "0.4.49" +version = "0.5.219" authors = ["""David Jaffe , Keri Dockter , + Lance Hepler , Shaun Jackman , Sreenath Krishnan , Meryl Lewis , + Alvin Liang , Patrick Marks , Wyatt McDonnell """] -edition = "2018" +edition = "2021" +license-file = "LICENSE.txt" publish = false # Please do not edit crate versions within this file. Instead edit the file master.toml # in the root of the enclone repo. [dependencies] -bio = "0.31.0" -bytes = "0.5.5" -byteorder = "1.3.2" -enclone_core = { path = "../enclone_core" } -failure = "0.1.5" -prost = "0.6.1" -serde = "1.0.90" -serde_derive = "1.0.102" - -[build-dependencies] -prost-build = "0.6.1" +bio_edit = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +byteorder = "1" +prost = { version = ">=0.9, <0.12", default_features = false, features = ["std", "prost-derive"] } +serde = "1" +thiserror = "1" +vdj_ann = { version = "0.4", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +[package.metadata.cargo-machete] +# Required for derive macro. +ignored = ["serde"] diff --git a/enclone_proto/LICENSE.txt b/enclone_proto/LICENSE.txt new file mode 120000 index 000000000..4ab43736a --- /dev/null +++ b/enclone_proto/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/enclone_proto/build.rs b/enclone_proto/build.rs deleted file mode 100644 index b350a8474..000000000 --- a/enclone_proto/build.rs +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// The purpose of this file is to auto generate `types.rs` from the `types.proto` file. - -use prost_build::Config; - -fn main() { - let mut config = Config::new(); - config.type_attribute(".", "#[derive(::serde::Serialize, ::serde::Deserialize)]"); - config.compile_protos(&["types.proto"], &["."]).unwrap(); -} diff --git a/enclone_proto/src/enclone.types.rs b/enclone_proto/src/enclone.types.rs new file mode 100644 index 000000000..ccf226cb2 --- /dev/null +++ b/enclone_proto/src/enclone.types.rs @@ -0,0 +1,516 @@ +/// Evidence that a given cell is iNKT or MAIT. Each ExactSubclonotype has one +/// instantiation of this structure for iNKT and one for MAIT. +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct InvariantTCellAnnotation { + #[prost(bool, required, tag = "1")] + pub alpha_chain_gene_match: bool, + #[prost(bool, required, tag = "2")] + pub alpha_chain_junction_match: bool, + #[prost(bool, required, tag = "3")] + pub beta_chain_gene_match: bool, + #[prost(bool, required, tag = "4")] + pub beta_chain_junction_match: bool, +} +/// Representation of an alignment +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Alignment { + /// Start of the alignment in the reference + #[prost(uint32, required, tag = "1")] + pub ref_start: u32, + /// Cigar string + #[prost(string, required, tag = "2")] + pub cigar: ::prost::alloc::string::String, +} +/// Defines a chain within an exact subclonotype. +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExactSubClonotypeChain { + /// Nucleotide sequence of the chain. This will only contain ACGT alphabets + #[prost(bytes = "vec", required, tag = "1")] + pub nt_sequence: ::prost::alloc::vec::Vec, + /// Amino acid sequence from the start codon at the beginning of the V-REGION. + /// This can be inferred from the `nt_sequence` and `v_start`, but stored for + /// convenience + #[prost(bytes = "vec", required, tag = "2")] + pub aa_sequence: ::prost::alloc::vec::Vec, + /// Index of the start of the V-REGION in the `nt_sequence`. + #[prost(uint32, required, tag = "3")] + pub v_start: u32, + /// Index of the end of the J-REGION in the `nt_sequence` (exclusive). + #[prost(uint32, required, tag = "4")] + pub j_end: u32, + /// Index of the C-REGION of this chain in the universal reference. + /// TODO: Should we store UVDJ regions here for convenience? The reason why it + /// is not stored at this level is because all exact subclonotypes share the + /// same UVDJ regions. + #[prost(uint32, optional, tag = "5")] + pub c_region_idx: ::core::option::Option, + /// Index of the start of the CDR3 sequence in the `nt_sequence`. The start of + /// the CDR3 amino acid in the `aa_sequence` is `(cdr3_start - v_start)/3`. + #[prost(uint32, required, tag = "6")] + pub cdr3_start: u32, + /// Index of the end of the CDR3 sequence in the `nt_sequence` (exclusive). + /// The end of the CDR3 amino acid in the `aa_sequence` is + /// `(cdr3_end - v_start)/3`. + #[prost(uint32, required, tag = "7")] + pub cdr3_end: u32, + /// UMI counts of contigs associated with this exact subclonotype chain. The + /// number of elements in this vector is equal to the number of barcodes + /// associated with this exact subclonotype. + #[prost(uint32, repeated, packed = "false", tag = "8")] + pub umi_counts: ::prost::alloc::vec::Vec, + /// Read counts of contigs associated with this exact subclonotype chain. The + /// number of elements in this vector is equal to the number of barcodes + /// associated with this exact subclonotype. + #[prost(uint32, repeated, packed = "false", tag = "9")] + pub read_counts: ::prost::alloc::vec::Vec, + /// Names of contigs associated with this exact subclonotype chain. The number + /// of elements in this vector is equal to the number of barcodes associated + /// with this exact subclonotype. The contig name would be of the form + /// `{barcode}_contig_{id}`. + #[prost(string, repeated, tag = "10")] + pub contig_ids: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// Alignment of the `nt_sequence` to the nucleotide sequence of the clonotype + /// consensus of this chain. + /// TODO: Do we need amino acid alignment info? + #[prost(message, required, tag = "11")] + pub clonotype_consensus_aln: Alignment, + /// Alignment of the `nt_sequence` to the nucleotide sequence of the + /// concatenated donor reference of this chain (defined elsewhere in this + /// file). + /// TODO: Default donor reference to universal reference? + #[prost(message, required, tag = "12")] + pub donor_reference_aln: Alignment, + /// Alignment of the `nt_sequence` to the nucleotide sequence of the + /// concatenated universal reference of this chain (defined elsewhere in this + /// file). + #[prost(message, required, tag = "13")] + pub universal_reference_aln: Alignment, + /// Index of the start of the FWR1 sequence in the `nt_sequence`. + #[prost(uint32, optional, tag = "14")] + pub fwr1_start: ::core::option::Option, + /// Index of the start of the CDR1 sequence in the `nt_sequence`. + #[prost(uint32, optional, tag = "15")] + pub cdr1_start: ::core::option::Option, + /// Index of the start of the FWR2 sequence in the `nt_sequence`. + #[prost(uint32, optional, tag = "16")] + pub fwr2_start: ::core::option::Option, + /// Index of the start of the CDR2 sequence in the `nt_sequence`. + #[prost(uint32, optional, tag = "17")] + pub cdr2_start: ::core::option::Option, + /// Index of the start of the FWR3 sequence in the `nt_sequence`. + #[prost(uint32, optional, tag = "18")] + pub fwr3_start: ::core::option::Option, + /// Index of the end of the FWR4 sequence in the `nt_sequence` (exclusive). + #[prost(uint32, optional, tag = "19")] + pub fwr4_end: ::core::option::Option, + /// Nucleotide percent identity with the donor reference, outside junction region. + #[prost(float, required, tag = "20")] + pub dna_percent: f32, + /// Amino acid percent identity with the donor reference, outside junction region. + #[prost(float, required, tag = "21")] + pub aa_percent: f32, +} +/// The chains in a clonotype are ordered an hence they have a unique index. +/// An exact subclonotype within a clonotype might not have all the chains that +/// are present in the clonotype. This structure stores the exact subclonotype +/// chain along with the `index` of the corresponding chain in the parent +/// clonotype. +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExactSubClonotypeChainInfo { + /// The index of this chain in the parent clonotype + #[prost(uint32, required, tag = "1")] + pub index: u32, + #[prost(message, required, tag = "2")] + pub chain: ExactSubClonotypeChain, +} +/// Define an exact subclonotype. +/// +/// All the barcodes within an exact subclonotype have the same number of +/// productive chains, the same sequence from the start of the V-REGION to the +/// end of the J-REGION as well as the same C-REGION annotation for each chain. +/// TODO: Maybe mutations outside V-J? +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExactSubClonotype { + /// The chains in an exact subclonotype along with the index of the chain in + /// the parent clonotype + #[prost(message, repeated, tag = "1")] + pub chains: ::prost::alloc::vec::Vec, + /// List of cell barcodes in this exact subclonotype. The number of elements in + /// this list is equal to the number of elements in the `umi_counts` and + /// `contig_ids` vector in an `ExactSubClonotypeChain`. The barcodes have the + /// appropriate gem group as the suffix. + #[prost(string, repeated, tag = "2")] + pub cell_barcodes: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// For a T cell, optionally annotate the exact subclonotype as one or both of + /// iNKT/MAIT. This data structure stores the evidence we have about the + /// annotation. The evidence can be zero, and indeed this will be the case for + /// all B cells. + #[prost(message, required, tag = "3")] + pub inkt_evidence: InvariantTCellAnnotation, + #[prost(message, required, tag = "4")] + pub mait_evidence: InvariantTCellAnnotation, +} +/// Define a clonotype chain +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ClonotypeChain { + /// The nucleotide sequence of this clonotype chain consensus + /// What we actually compute here is not the consensus across the clonotype + /// (whose biological meaning is questionable), but rather the sequence of the + /// first exact subclonotype which has an entry for the given chain. Over 99% + /// of the time, this will be the first exact subclonotype. + #[prost(bytes = "vec", required, tag = "1")] + pub nt_sequence: ::prost::alloc::vec::Vec, + /// Index of the 5' UTR region in the universal reference. The region in the + /// universal reference is guaranteed to be `Region::U` + #[prost(uint32, optional, tag = "2")] + pub u_idx: ::core::option::Option, + /// Index of the Variable region in the universal reference. The region in the + /// universal reference is guaranteed to be `Region::V` + #[prost(uint32, required, tag = "3")] + pub v_idx: u32, + /// Index of the Diversity region in the universal reference. The region in the + /// universal reference is guaranteed to be `Region::D`. D-REGION is not + /// present for light/alpha chains. Even for heavy/beta chains, this might be + /// `None` if there is ambiguity in the annotation. + #[prost(uint32, optional, tag = "4")] + pub d_idx: ::core::option::Option, + /// Index of the Joining region in the universal reference. The region in the + /// universal reference is guaranteed to be `Region::J` + #[prost(uint32, required, tag = "5")] + pub j_idx: u32, + /// Index of the Constant region in the universal reference. The region in the + /// universal reference is guaranteed to be `Region::C` + #[prost(uint32, optional, tag = "6")] + pub c_idx: ::core::option::Option, + /// Index of the Variable region in the donor reference. The region in the + /// donor reference is guaranteed to be `Region::V` and the `universal_idx` in + /// the donor reference item will be equal to the `v_idx` + #[prost(uint32, optional, tag = "7")] + pub donor_v_idx: ::core::option::Option, + /// / Index of the Joining region in the donor reference. The region in the + /// / donor reference is guaranteed to be `Region::J` and the `universal_idx` in + /// / the donor reference item will be equal to the `j_idx` + #[prost(uint32, optional, tag = "8")] + pub donor_j_idx: ::core::option::Option, + /// Concatenated universal reference = + /// `nt_sequence` of universal_reference\[u_idx\] if u_idx is not None + + /// `nt_sequence` of universal_reference\[v_idx\] + + /// `nt_sequence` of universal_reference\[d_idx\] if d_idx is not None + + /// `nt_sequence` of universal_reference\[j_idx\] + + /// `nt_sequence` of universal_reference\[c_idx\] if c_idx is not None. + #[prost(bytes = "vec", required, tag = "9")] + pub universal_reference: ::prost::alloc::vec::Vec, + /// Alignment of the `nt_sequence` to the nucleotide sequence of the + /// concatenated universal reference of this chain. + #[prost(message, required, tag = "10")] + pub universal_reference_aln: Alignment, + /// The concatenated donor reference is the same as the concatenated universal + /// reference, however with substitutions: + /// `nt_sequence` of donor_reference\[donor_v_idx\] if donor_v_idx is not + /// None + /// and + /// `nt_sequence` of donor_reference\[donor_j_idx\] if donor_j_idx is not + /// None. + #[prost(bytes = "vec", required, tag = "11")] + pub donor_reference: ::prost::alloc::vec::Vec, + /// Alignment of the `nt_sequence` to the nucleotide sequence of the + /// concatenated donor reference of this chain. + #[prost(message, required, tag = "12")] + pub donor_reference_aln: Alignment, + /// Index of the start of the V-REGION in the `nt_sequence`. + #[prost(uint32, required, tag = "13")] + pub v_start: u32, + /// Index of the stop of the aligned part of the V-REGION in the `nt-sequence` + /// (exclusive). + #[prost(uint32, required, tag = "14")] + pub v_end: u32, + /// Index of the stop of the aligned part of the V-REGION in universal + /// reference V (exclusive). + #[prost(uint32, required, tag = "15")] + pub v_end_ref: u32, + /// Index of the start of the aligned part of the J-REGION in the + /// `nt_sequence`. + #[prost(uint32, required, tag = "16")] + pub j_start: u32, + /// Index of the start of the aligned part of the J-REGION in universal + /// reference J. + #[prost(uint32, required, tag = "17")] + pub j_start_ref: u32, + /// Index of the end of the J-REGION in the `nt_sequence` (exclusive). + #[prost(uint32, required, tag = "18")] + pub j_end: u32, + /// Index of the start of the CDR3 sequence in the `nt_sequence`. The start of + /// the CDR3 amino acid in the `aa_sequence` is `(cdr3_start - v_start)/3` + #[prost(uint32, required, tag = "19")] + pub cdr3_start: u32, + /// Index of the end of the CDR3 sequence in the `nt_sequence` (exclusive). The + /// end of the CDR3 amino acid in the `aa_sequence` is `(cdr3_end - v_start)/3` + #[prost(uint32, required, tag = "20")] + pub cdr3_end: u32, + /// IGH, IGK, IGL, TRA, TRB, TRD, TRG or conceivably something else. + #[prost(string, required, tag = "21")] + pub chain_type: ::prost::alloc::string::String, + /// AA sequence of the clonotype chain consensus + #[prost(bytes = "vec", required, tag = "22")] + pub aa_sequence: ::prost::alloc::vec::Vec, + /// AA sequence of the concatenated universal reference starting from the V + /// regions + #[prost(bytes = "vec", required, tag = "23")] + pub aa_sequence_universal: ::prost::alloc::vec::Vec, + /// AA sequence of the concatenated donor reference starting from the V regions + #[prost(bytes = "vec", required, tag = "24")] + pub aa_sequence_donor: ::prost::alloc::vec::Vec, + /// Index of the start of the FWR1 sequence in the `nt_sequence`. + #[prost(uint32, optional, tag = "25")] + pub fwr1_start: ::core::option::Option, + /// Index of the start of the CDR1 sequence in the `nt_sequence`. + #[prost(uint32, optional, tag = "26")] + pub cdr1_start: ::core::option::Option, + /// Index of the start of the FWR2 sequence in the `nt_sequence`. + #[prost(uint32, optional, tag = "27")] + pub fwr2_start: ::core::option::Option, + /// Index of the start of the CDR2 sequence in the `nt_sequence`. + #[prost(uint32, optional, tag = "28")] + pub cdr2_start: ::core::option::Option, + /// Index of the start of the FWR3 sequence in the `nt_sequence`. + #[prost(uint32, optional, tag = "29")] + pub fwr3_start: ::core::option::Option, + /// Index of the end of the FWR4 sequence in the `nt_sequence` (exclusive). + #[prost(uint32, optional, tag = "30")] + pub fwr4_end: ::core::option::Option, +} +/// Definition of a clonotype. +/// +/// A clonotype is composed of a list of exact subclonotypes +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Clonotype { + /// The list of chains associated with this clonotype. The ordering of the + /// chains is important as this order is preserved in the list of chains + /// specified under each exact subclonotype. By convention heavy chain/beta + /// chain comes ahead of light chain/alpha chain. + /// TODO: What is the ordering when multiple chains of same kind are present? + #[prost(message, repeated, tag = "1")] + pub chains: ::prost::alloc::vec::Vec, + /// The list of exact subclonotypes in this clonotype ordered by the number of + /// cell barcodes in the exact subclonotype in descending order (TODO: Verify + /// sort order). The number of chains listed under each exact subclonotype will + /// be equal to the number of chains in this clonotype in the same order. + /// However, some of the exact subclonotype chains could be `None`. + #[prost(message, repeated, tag = "2")] + pub exact_clonotypes: ::prost::alloc::vec::Vec, + /// The total number of cell barcodes associated with this clonotype. This can + /// be inferred by summing up the number of barcodes within each exact + /// subclonotype, but it is stored here for convenience. + #[prost(uint32, required, tag = "3")] + pub frequency: u32, +} +/// A single donor reference sequence and metadata packaged in a convenient +/// struct. In the current version of enclone, the donor reference is only +/// inferred for the V-REGIONs. But we could extend it to J-REGIONs in the +/// future. +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct UniversalReferenceItem { + /// A unique identifier for this reference sequence that traces back to the + /// reference fasta. Need not be ordered or continuous + #[prost(uint32, required, tag = "1")] + pub ref_idx: u32, + /// The display name of this gene which will be shown in Loupe (optional allele + /// information) + #[prost(string, required, tag = "2")] + pub display_name: ::prost::alloc::string::String, + /// One of the U/V/D/J/C regions + #[prost(enumeration = "Region", required, tag = "3")] + pub region: i32, + /// Nucleotide sequence associated with this reference item + #[prost(bytes = "vec", required, tag = "4")] + pub nt_sequence: ::prost::alloc::vec::Vec, +} +/// List of all universal reference sequences and metadata packaged in a +/// convenient struct. Currently just a list of items. Could add metadata like +/// species etc. +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct UniversalReference { + /// UV(D)JC regions associated with a clonotype chain or an exact subclonotype + /// chain are stored as indices into this vector + #[prost(message, repeated, tag = "1")] + pub items: ::prost::alloc::vec::Vec, +} +/// A single donor reference sequence and metadata packaged in a convenient +/// struct. In the current version of enclone, the donor reference is only +/// inferred for the V-REGIONs. But we could extend it to J-REGIONs in the +/// future. +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DonorReferenceItem { + /// Index of the parent sequence in the universal reference + #[prost(uint32, required, tag = "1")] + pub universal_idx: u32, + /// Index of the donor associated with this reference. If there are no donors + /// specified, this will be `0` by default. + #[prost(uint32, required, tag = "2")] + pub donor_idx: u32, + /// The display name of this gene which will be shown in Loupe (optional allele + /// information) + /// TODO: Should this be modified to explicitly point out the donor? e.g TRAV-1 + /// [Donor 0]? for now, like this: "TRAV-1, donor 1, alt allele 1", etc. + #[prost(string, required, tag = "3")] + pub display_name: ::prost::alloc::string::String, + /// Currently, the donor reference region will only be the V-REGION + #[prost(enumeration = "Region", required, tag = "4")] + pub region: i32, + /// The nucleotide sequence associated with this reference item + #[prost(bytes = "vec", required, tag = "5")] + pub nt_sequence: ::prost::alloc::vec::Vec, + /// Alignment of the `nt-sequence` with the nucleotide sequence of the + /// corresponding universal reference item. + #[prost(message, required, tag = "6")] + pub universal_aln: Alignment, +} +/// List of all donor reference sequences and metadata packaged in a convenient +/// struct. Currently just a list of items. Could add metadata like all donor +/// names etc. +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DonorReference { + /// All the entries in this reference + /// The donor V-REGION associated with a clonotype chain is stored as an index + /// to this vector. + #[prost(message, repeated, tag = "1")] + pub items: ::prost::alloc::vec::Vec, +} +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GemWellInfo { + #[prost(string, required, tag = "1")] + pub donor: ::prost::alloc::string::String, + #[prost(string, required, tag = "2")] + pub origin: ::prost::alloc::string::String, + #[prost(string, required, tag = "3")] + pub library_id: ::prost::alloc::string::String, + /// Data for all the additional columns. For convenience, we are storing it as + /// a map, rather than an array with the order defined by `additional_columns` + /// in Metadata + #[prost(map = "string, string", tag = "8")] + pub additional_data: + ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>, +} +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Metadata { + /// The additional metadata columns specified by the user + #[prost(string, repeated, tag = "1")] + pub additional_columns: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// List of all the donors. The `donor_idx` in the `DonorReference` is an index + /// into this array. This will be empty for a single sample case + #[prost(string, repeated, tag = "2")] + pub donors: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// Key: Gem well + /// Value: Metadata for each gem well + /// This will be empty for a single sample case + #[prost(map = "uint32, message", tag = "3")] + pub per_gem_well_info: ::std::collections::HashMap, +} +/// Outputs from a single enclone run. +/// +/// This message itself is not written in the proto file, but the order of +/// messages follow the order of fields in this message +#[derive(::serde::Serialize, ::serde::Deserialize)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct EncloneOutputs { + #[prost(string, required, tag = "1")] + pub version: ::prost::alloc::string::String, + /// Metadata associated with this run. For single sample runs, there is no + /// useful metadata. In the context of aggr, this will be populated from the + /// user input + #[prost(message, required, tag = "10")] + pub metadata: Metadata, + /// The universal reference used. + #[prost(message, required, tag = "20")] + pub universal_reference: UniversalReference, + /// The inferred donor reference. + #[prost(message, required, tag = "30")] + pub donor_reference: DonorReference, + /// The total number of clonotypes + #[prost(uint32, required, tag = "100")] + pub num_clonotypes: u32, + /// List of all clonotypes computed in this enclone run. Each clonotype is + /// stored as an individual message in order to enable streaming. + #[prost(message, repeated, tag = "110")] + pub clonotypes: ::prost::alloc::vec::Vec, +} +/// Various regions within a VDJ transcript +#[derive( + ::serde::Serialize, + ::serde::Deserialize, + Clone, + Copy, + Debug, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, + ::prost::Enumeration, +)] +#[repr(i32)] +pub enum Region { + /// 5' untranslated region + U = 0, + /// Variable region + V = 1, + /// Diversity region + D = 2, + /// Joining region + J = 3, + /// Constant region + C = 4, +} +impl Region { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Region::U => "U", + Region::V => "V", + Region::D => "D", + Region::J => "J", + Region::C => "C", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "U" => Some(Self::U), + "V" => Some(Self::V), + "D" => Some(Self::D), + "J" => Some(Self::J), + "C" => Some(Self::C), + _ => None, + } + } +} diff --git a/enclone_proto/src/proto_io.rs b/enclone_proto/src/proto_io.rs index 1b66e39e8..2ff97c9b5 100644 --- a/enclone_proto/src/proto_io.rs +++ b/enclone_proto/src/proto_io.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. //! //! Code for reading and writing custom proto files @@ -21,7 +21,6 @@ use crate::types::{Clonotype, EncloneOutputs}; use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; -use failure::{format_err, Error}; use prost::Message; use std::fs::File; use std::io::{BufReader, BufWriter, Read, Write}; @@ -29,6 +28,21 @@ use std::path::Path; const BUFFER_CAPACITY: usize = 1_000_000; +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Expected to get {expected} bytes from the reader. Got {got} bytes!")] + Truncated { expected: usize, got: usize }, + + #[error(transparent)] + Io(#[from] std::io::Error), + + #[error(transparent)] + ProtoDecode(#[from] prost::DecodeError), + + #[error(transparent)] + ProtoEncode(#[from] prost::EncodeError), +} + /// A helper struct to write a length delimited protobuf encoded message into the inner `writer`. pub struct ProtoWriter { // Buffer space for storing the encoded message @@ -88,11 +102,10 @@ impl ProtoReader { .read_to_end(&mut self.decode_buffer)?; // If we did not get num_bytes bytes, return an error if self.decode_buffer.len() != num_bytes { - return Err(format_err!( - "Expected to get {} bytes from the reader. Got {} bytes!", - num_bytes, - self.decode_buffer.len() - )); + return Err(Error::Truncated { + expected: num_bytes, + got: self.decode_buffer.len(), + }); } Ok(()) } @@ -166,9 +179,11 @@ pub fn write_proto(enclone_outputs: EncloneOutputs, path: impl AsRef) -> R Ok(()) } -/// A read that mirrors the write above. It is possible to stream through the -/// clonotypes instead of loading everything into memory. -pub fn read_proto(path: impl AsRef) -> Result { +/// A read that mirrors the write above. The fields until the list of clonotypes are read here. +/// The clonotypes are assigned an empty vector. +pub fn read_proto_until_clonotypes( + path: impl AsRef, +) -> Result<(EncloneOutputs, ProtoReader), Error> { let reader = BufReader::new(File::open(path)?); let mut proto_reader = ProtoReader::from_reader(reader); @@ -182,19 +197,30 @@ pub fn read_proto(path: impl AsRef) -> Result { let donor_reference = proto_reader.read_and_decode()?; // Number of clonotypes let num_clonotypes: u32 = proto_reader.read_and_decode()?; + + Ok(( + EncloneOutputs { + version, + metadata, + universal_reference, + donor_reference, + num_clonotypes, + clonotypes: Vec::new(), + }, + proto_reader, + )) +} + +/// A read that mirrors the write above. It is possible to stream through the +/// clonotypes instead of loading everything into memory. +pub fn read_proto(path: impl AsRef) -> Result { + let (mut output, mut proto_reader) = read_proto_until_clonotypes(path)?; let mut clonotypes = Vec::new(); - for _ in 0..num_clonotypes { + for _ in 0..output.num_clonotypes { clonotypes.push(proto_reader.read_and_decode()?); } - - Ok(EncloneOutputs { - version, - metadata, - universal_reference, - donor_reference, - num_clonotypes, - clonotypes, - }) + output.clonotypes = clonotypes; + Ok(output) } /// Iterator over clonotypes @@ -233,7 +259,7 @@ impl Iterator for ClonotypeIter { if self.index < self.num_clonotypes { let cl = match self.proto_reader.read_and_decode() { Ok(c) => c, - Err(e) => panic!("Failed to decode clonotype due to {}", e), + Err(e) => panic!("Failed to decode clonotype due to {e}"), }; self.index += 1; Some(cl) diff --git a/enclone_proto/src/types.rs b/enclone_proto/src/types.rs index 64b6c3166..a7780bd0f 100644 --- a/enclone_proto/src/types.rs +++ b/enclone_proto/src/types.rs @@ -1,14 +1,16 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. //! //! This crate defines the data structure that would represent the clonotypes //! computed by enclone. //! -include!(concat!(env!("OUT_DIR"), "/enclone.types.rs")); +use vdj_ann::annotate::Region as AnnRegion; -impl From<&bio::alignment::Alignment> for Alignment { - fn from(al: &bio::alignment::Alignment) -> Self { +include!("./enclone.types.rs"); + +impl From<&bio_edit::alignment::Alignment> for Alignment { + fn from(al: &bio_edit::alignment::Alignment) -> Self { Alignment { ref_start: al.ystart as u32, cigar: al.cigar(false), @@ -16,5 +18,173 @@ impl From<&bio::alignment::Alignment> for Alignment { } } -// TODO: Donor names -// TODO: Aggr metadata structure +fn make_ann_region( + start: Option, + end: Option, + nt_seq: &[u8], + v_start: u32, + aa_seq: &[u8], +) -> Option { + if start.is_some() && end.is_some() && start.unwrap() >= end.unwrap() { + return None; + } + let v = v_start as usize; + match (start, end) { + (Some(s), Some(e)) => { + let s = s as usize; + let e = e as usize; + Some(AnnRegion { + start: s, + stop: e, + nt_seq: std::str::from_utf8(&nt_seq[s..e]).unwrap().to_string(), + aa_seq: std::str::from_utf8(&aa_seq[(s - v) / 3..(e - v) / 3]) + .unwrap() + .to_string(), + }) + } + _ => None, + } +} + +impl ClonotypeChain { + pub fn fwr1_region(&self) -> Option { + make_ann_region( + self.fwr1_start, + self.cdr1_start, + &self.nt_sequence, + self.v_start, + &self.aa_sequence, + ) + } + pub fn cdr1_region(&self) -> Option { + make_ann_region( + self.cdr1_start, + self.fwr2_start, + &self.nt_sequence, + self.v_start, + &self.aa_sequence, + ) + } + pub fn fwr2_region(&self) -> Option { + make_ann_region( + self.fwr2_start, + self.cdr2_start, + &self.nt_sequence, + self.v_start, + &self.aa_sequence, + ) + } + pub fn cdr2_region(&self) -> Option { + make_ann_region( + self.cdr2_start, + self.fwr3_start, + &self.nt_sequence, + self.v_start, + &self.aa_sequence, + ) + } + pub fn fwr3_region(&self) -> Option { + make_ann_region( + self.fwr3_start, + Some(self.cdr3_start), + &self.nt_sequence, + self.v_start, + &self.aa_sequence, + ) + } + pub fn fwr4_region(&self) -> Option { + make_ann_region( + Some(self.cdr3_end), + self.fwr4_end, + &self.nt_sequence, + self.v_start, + &self.aa_sequence, + ) + } + + pub fn cdr3_nt(&self) -> &[u8] { + &self.nt_sequence[self.cdr3_start as usize..self.cdr3_end as usize] + } + pub fn cdr3_nt_string(&self) -> String { + std::str::from_utf8(self.cdr3_nt()).unwrap().to_string() + } + pub fn cdr3_aa(&self) -> &[u8] { + let start = (self.cdr3_start - self.v_start) / 3; + let end = (self.cdr3_end - self.v_start) / 3; + &self.aa_sequence[start as usize..end as usize] + } + pub fn cdr3_aa_string(&self) -> String { + std::str::from_utf8(self.cdr3_aa()).unwrap().to_string() + } +} + +impl ExactSubClonotypeChain { + pub fn fwr1_region(&self) -> Option { + make_ann_region( + self.fwr1_start, + self.cdr1_start, + &self.nt_sequence, + self.v_start, + &self.aa_sequence, + ) + } + pub fn cdr1_region(&self) -> Option { + make_ann_region( + self.cdr1_start, + self.fwr2_start, + &self.nt_sequence, + self.v_start, + &self.aa_sequence, + ) + } + pub fn fwr2_region(&self) -> Option { + make_ann_region( + self.fwr2_start, + self.cdr2_start, + &self.nt_sequence, + self.v_start, + &self.aa_sequence, + ) + } + pub fn cdr2_region(&self) -> Option { + make_ann_region( + self.cdr2_start, + self.fwr3_start, + &self.nt_sequence, + self.v_start, + &self.aa_sequence, + ) + } + pub fn fwr3_region(&self) -> Option { + make_ann_region( + self.fwr3_start, + Some(self.cdr3_start), + &self.nt_sequence, + self.v_start, + &self.aa_sequence, + ) + } + pub fn fwr4_region(&self) -> Option { + make_ann_region( + Some(self.cdr3_end), + self.fwr4_end, + &self.nt_sequence, + self.v_start, + &self.aa_sequence, + ) + } + pub fn cdr3_nt(&self) -> &[u8] { + &self.nt_sequence[self.cdr3_start as usize..self.cdr3_end as usize] + } + pub fn cdr3_nt_string(&self) -> String { + std::str::from_utf8(self.cdr3_nt()).unwrap().to_string() + } + pub fn cdr3_aa(&self) -> &[u8] { + let start = (self.cdr3_start - self.v_start) / 3; + let end = (self.cdr3_end - self.v_start) / 3; + &self.aa_sequence[start as usize..end as usize] + } + pub fn cdr3_aa_string(&self) -> String { + std::str::from_utf8(self.cdr3_aa()).unwrap().to_string() + } +} diff --git a/enclone_ranger/Cargo.toml b/enclone_ranger/Cargo.toml new file mode 100644 index 000000000..292872fa1 --- /dev/null +++ b/enclone_ranger/Cargo.toml @@ -0,0 +1,41 @@ +[package] +name = "enclone_ranger" +version = "0.5.219" +authors = ["""David Jaffe , + Nigel Delaney , + Keri Dockter , + Jessica Hamel , + Lance Hepler , + Shaun Jackman , + Sreenath Krishnan , + Meryl Lewis , + Alvin Liang , + Patrick Marks , + Wyatt McDonnell """] +edition = "2021" +license-file = "LICENSE.txt" +publish = false + +# Please do not edit crate versions within this file. Instead edit the file master.toml +# in the root of the enclone repo. + +[dependencies] +enclone_core = { path = "../enclone_core" } +enclone_print = { path = "../enclone_print" } +enclone_stuff = { path = "../enclone_stuff" } +enclone = { path = "../enclone" } +enclone_args = { path = "../enclone_args" } +rayon = "1" +string_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +vdj_ann = { version = "0.4", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } + +[target.'cfg(not(windows))'.dependencies.hdf5] +git = "https://github.com/10XGenomics/hdf5-rust.git" +branch = "conda_nov2021" +features = ["conda"] +default-features = false + +[target.'cfg(windows)'.dependencies.hdf5] +git = "https://github.com/10XGenomics/hdf5-rust.git" +branch = "conda_nov2021" +default-features = false diff --git a/enclone_ranger/LICENSE.txt b/enclone_ranger/LICENSE.txt new file mode 120000 index 000000000..4ab43736a --- /dev/null +++ b/enclone_ranger/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/enclone_ranger/src/lib.rs b/enclone_ranger/src/lib.rs new file mode 100644 index 000000000..71faf4b83 --- /dev/null +++ b/enclone_ranger/src/lib.rs @@ -0,0 +1,8 @@ +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. + +pub mod main_enclone; +pub mod stop; + +use std::sync::atomic::AtomicBool; + +pub static USING_PAGER: AtomicBool = AtomicBool::new(false); diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs new file mode 100644 index 000000000..b2417e352 --- /dev/null +++ b/enclone_ranger/src/main_enclone.rs @@ -0,0 +1,160 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// This is a special entry point for cellranger, where we know that the arguments that could +// be passed are limited. The code here is simplified and could be further simplified. + +use self::refx::{make_vdj_ref_data_core, RefData}; +use crate::stop::main_enclone_stop_ranger; +use crate::USING_PAGER; +use enclone::innate::species; +use enclone_args::load_gex::get_gex_info; +use enclone_args::proc_args::proc_args; +use enclone_core::defs::EncloneControl; +use enclone_core::enclone_structs::EncloneSetup; +use enclone_stuff::start::main_enclone_start; +use std::sync::atomic::Ordering::SeqCst; +use std::{ + collections::HashMap, + fs::File, + io::{BufRead, BufReader}, + time::Instant, +}; +use string_utils::TextUtils; +use vdj_ann::refx; + +pub fn main_enclone_ranger(args: &[String]) -> Result<(), String> { + const REQUIRED_ARGS: [&str; 9] = [ + "CELLRANGER", + "DONOR_REF_FILE", + "FORCE_EXTERNAL", + "MAX_CORES", + "NOPAGER", + "NOPRINT", + "PRE", + "PROTO", + "REF", + ]; + const ALLOWED_ARGS: [&str; 16] = [ + "BCR", + "META", + "NOPRETTY", + "PROTO_METADATA", + "TCR", + "TCRGD", + "GAMMA_DELTA", + "FATE_FILE", + "NUMI", + "NUMI_RATIO", + "NGRAPH_FILTER", + "NWEAK_CHAINS", + "NFOURSIE_KILL", + "NDOUBLET", + "NSIG", + "SPLIT_MAX_CHAINS", + ]; + let mut found = [false; REQUIRED_ARGS.len()]; + for arg in args.iter().skip(1) { + let mut arg = arg.as_str(); + if arg.contains('=') { + arg = arg.before("="); + } + let mut ok = false; + for (f, &x) in found.iter_mut().zip(REQUIRED_ARGS.iter()) { + if arg == x { + ok = true; + *f = true; + } + } + ok = ok || ALLOWED_ARGS.contains(&arg); + if !ok { + panic!("Illegal argument {arg} passed to main_enclone_ranger."); + } + } + for (found, arg) in found.into_iter().zip(REQUIRED_ARGS.into_iter()) { + if !found { + panic!("Required argument {arg} not passed to main_enclone_ranger"); + } + } + let setup = main_enclone_setup_ranger(args)?; + let inter = main_enclone_start(setup)?; + main_enclone_stop_ranger(inter) +} + +pub fn main_enclone_setup_ranger(args: &[String]) -> Result { + let tall = Instant::now(); + + // Set up stuff, read args, etc. + + let mut ctl = EncloneControl::default(); + ctl.gen_opt.cellranger = true; + ctl.gen_opt.internal_run = false; + for arg in args.iter().skip(1) { + if arg.starts_with("PRE=") { + ctl.gen_opt.pre.clear(); + ctl.gen_opt + .pre + .extend(arg.after("PRE=").split(',').map(str::to_string)); + } + } + ctl.start_time = Some(tall); + ctl.gen_opt.cpu_all_start = 0; + ctl.gen_opt.cpu_this_start = 0; + ctl.gen_opt.nopager = true; + ctl.pretty = true; + ctl.gen_opt.h5 = true; + USING_PAGER.store(false, SeqCst); + proc_args(&mut ctl, args)?; + + // Get gene expression and feature barcode counts. + + let gex_info = get_gex_info(&mut ctl)?; + + // Determine the reference sequence that is to be used. + + let mut refx = String::new(); + let ann = "contig_annotations.json"; + let fx = File::open(&ctl.gen_opt.refname); + let f = BufReader::new(fx.unwrap()); + for line in f.lines() { + let s = line.unwrap(); + refx += &s; + refx += "\n"; + } + + // Build reference data. + + let refx2 = &refx; + let mut refdata = RefData::new(); + let ext_refx = String::new(); + let (mut is_tcr, mut is_bcr) = (true, true); + if ctl.gen_opt.tcr { + is_bcr = false; + } + if ctl.gen_opt.bcr { + is_tcr = false; + } + make_vdj_ref_data_core(&mut refdata, refx2, &ext_refx, is_tcr, is_bcr, None); + let to_ref_index: HashMap = refdata + .id + .iter() + .take(refdata.refs.len()) + .enumerate() + .map(|(i, &id)| (id as usize, i)) + .collect(); + + // Determine if the species is human or mouse or unknown. + + ctl.gen_opt.species = species(&refdata).to_string(); + + // Return. + + Ok(EncloneSetup { + ctl, + refdata, + ann: ann.to_string(), + gex_info, + tall: Some(tall), + is_bcr, + to_ref_index, + }) +} diff --git a/enclone_ranger/src/stop.rs b/enclone_ranger/src/stop.rs new file mode 100644 index 000000000..2a909990e --- /dev/null +++ b/enclone_ranger/src/stop.rs @@ -0,0 +1,94 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use enclone_core::defs::ColInfo; +use enclone_core::enclone_structs::EncloneIntermediates; +use enclone_print::print_clonotypes::print_clonotypes; +use hdf5::Reader; +use rayon::prelude::*; +use std::collections::HashMap; + +pub fn main_enclone_stop_ranger(mut inter: EncloneIntermediates) -> Result<(), String> { + // Unpack inputs. + + let to_bc = &inter.ex.to_bc; + let exact_clonotypes = &inter.ex.exact_clonotypes; + let raw_joins = &inter.ex.raw_joins; + let info = &inter.ex.info; + let orbits = &inter.ex.orbits; + let vdj_cells = &inter.ex.vdj_cells; + let refdata = &inter.setup.refdata; + let drefs = &inter.ex.drefs; + let gex_info = &inter.setup.gex_info; + let sr = &inter.ex.sr; + let fate = &mut inter.ex.fate; + let ctl = &inter.setup.ctl; + let is_bcr = inter.ex.is_bcr; + let allele_data = &inter.ex.allele_data; + + // Load the GEX and FB data. This is quite horrible: the code and computation are duplicated + // verbatim in fcell.rs. + + let mut d_readers = Vec::>::new(); + let mut ind_readers = Vec::>::new(); + for li in 0..ctl.origin_info.n() { + if !ctl.origin_info.gex_path[li].is_empty() && !gex_info.gex_matrices[li].initialized() { + let x = gex_info.h5_data[li].as_ref(); + d_readers.push(Some(x.unwrap().as_reader())); + ind_readers.push(Some(gex_info.h5_indices[li].as_ref().unwrap().as_reader())); + } else { + d_readers.push(None); + ind_readers.push(None); + } + } + let mut h5_data = Vec::<(usize, Vec, Vec)>::new(); + for li in 0..ctl.origin_info.n() { + h5_data.push((li, Vec::new(), Vec::new())); + } + h5_data.par_iter_mut().for_each(|res| { + let li = res.0; + if !ctl.origin_info.gex_path[li].is_empty() + && !gex_info.gex_matrices[li].initialized() + && ctl.gen_opt.h5_pre + { + res.1 = d_readers[li].as_ref().unwrap().read_raw().unwrap(); + res.2 = ind_readers[li].as_ref().unwrap().read_raw().unwrap(); + } + }); + + // Find and print clonotypes. (But we don't actually print them here.) + + let mut pics = Vec::::new(); + let mut exacts = Vec::>::new(); // ugly reuse of name + let mut in_center = Vec::::new(); + let mut rsi = Vec::::new(); // ditto + let mut out_datas = Vec::>>::new(); + let mut tests = Vec::::new(); + let mut controls = Vec::::new(); + print_clonotypes( + is_bcr, + to_bc, + sr, + refdata, + drefs, + ctl, + exact_clonotypes, + info, + orbits, + raw_joins, + gex_info, + vdj_cells, + &d_readers, + &ind_readers, + &h5_data, + &mut pics, + &mut exacts, + &mut in_center, + &mut rsi, + &mut out_datas, + &mut tests, + &mut controls, + fate, + allele_data, + )?; + Ok(()) +} diff --git a/enclone_stuff/Cargo.toml b/enclone_stuff/Cargo.toml new file mode 100644 index 000000000..5d0be705a --- /dev/null +++ b/enclone_stuff/Cargo.toml @@ -0,0 +1,53 @@ +[package] +name = "enclone_stuff" +version = "0.5.219" +authors = ["""David Jaffe , + Nigel Delaney , + Keri Dockter , + Jessica Hamel , + Lance Hepler , + Shaun Jackman , + Sreenath Krishnan , + Meryl Lewis , + Alvin Liang , + Patrick Marks , + Wyatt McDonnell """] +edition = "2021" +license-file = "LICENSE.txt" +publish = false + +# Please do not edit crate versions within this file. Instead edit the file master.toml +# in the root of the enclone repo. + +[dependencies] +amino = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +debruijn = "0.3" +enclone_core = { path = "../enclone_core" } +enclone_print = { path = "../enclone_print" } +enclone_proto = { path = "../enclone_proto" } +enclone = { path = "../enclone" } +enclone_args = { path = "../enclone_args" } +equiv = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +evalexpr = ">=7, <12" +io_utils = { version = "0.3", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +itertools.workspace = true +ndarray = "0.15" +qd = { git = "https://github.com/Barandis/qd" } +rayon = "1" +regex = { version = "1", default-features = false, features = ["std", "perf"] } +stats_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +string_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +tables = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +vdj_ann = { version = "0.4", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +vector_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } + +[target.'cfg(not(windows))'.dependencies.hdf5] +git = "https://github.com/10XGenomics/hdf5-rust.git" +branch = "conda_nov2021" +features = ["conda"] +default-features = false + +[target.'cfg(windows)'.dependencies.hdf5] +git = "https://github.com/10XGenomics/hdf5-rust.git" +branch = "conda_nov2021" +default-features = false diff --git a/enclone_stuff/LICENSE.txt b/enclone_stuff/LICENSE.txt new file mode 120000 index 000000000..4ab43736a --- /dev/null +++ b/enclone_stuff/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/enclone_stuff/src/analyze_dref.rs b/enclone_stuff/src/analyze_dref.rs new file mode 100644 index 000000000..d8f0b87dd --- /dev/null +++ b/enclone_stuff/src/analyze_dref.rs @@ -0,0 +1,344 @@ +// Copyright (c) 2022 10X Genomics, Inc. All rights reserved. + +// Analyze donor reference and exit. +// +// This displays tables, which are somewhat mangled unless there are at least four donors. + +use debruijn::dna_string::DnaString; +use enclone_core::defs::EncloneControl; +use itertools::Itertools; +use std::cmp::min; +use std::env; +use string_utils::TextUtils; +use tables::print_tabular_vbox; +use vdj_ann::refx::{make_vdj_ref_data_core, RefData}; +use vector_utils::{bin_position, erase_if, next_diff1_2, next_diff1_3, unique_sort}; + +pub fn analyze_donor_ref( + refdata: &RefData, + ctl: &EncloneControl, + alt_refs: &[(usize, usize, DnaString, usize, bool)], +) { + // Analyze donor reference. + + if !ctl.gen_opt.external_ref.is_empty() { + if ctl.gen_opt.echo { + let args: Vec = env::args().collect(); + println!("{}", args.iter().format(" ")); + } + let mut erefdata = RefData::new(); + let f = std::fs::read_to_string(&ctl.gen_opt.external_ref).unwrap(); + make_vdj_ref_data_core(&mut erefdata, &f, "", true, true, None); + let mut refs = Vec::<(String, String, Vec)>::new(); // {(gene, allele, seq)} + + // Store the external (IMGT) alleles. + + for i in 0..erefdata.refs.len() { + if erefdata.is_v(i) { + let allele = erefdata.rheaders_orig[i].between("*", " "); + refs.push(( + erefdata.name[i].clone(), + allele.to_string(), + erefdata.refs[i].to_ascii_vec(), + )); + } + } + + // Store the enclone reference alleles. + + for i in 0..refdata.refs.len() { + if refdata.is_v(i) { + refs.push(( + refdata.name[i].clone(), + format!("uref{i}"), + refdata.refs[i].to_ascii_vec(), + )); + } + } + + // Store the donor reference alleles; + + for (i, ar) in alt_refs.iter().enumerate() { + let donor = ar.0; + let ref_id = ar.1; + let name = &refdata.name[ref_id]; + let alt_seq = &ar.2; + refs.push(( + name.clone(), + format!("dref{i}_{donor}"), + alt_seq.to_ascii_vec(), + )); + } + + // Sort the alleles and group by gene. + + refs.sort(); + let mut i = 0; + while i < refs.len() { + let j = next_diff1_3(&refs, i as i32) as usize; + let gene = &refs[i].0; + let mut alleles = Vec::<(&[u8], &str)>::new(); // (sequence, name) + let mut have_alt = false; + for r in &refs[i..j] { + if r.1.starts_with("dref") { + have_alt = true; + } + alleles.push((r.2.as_ref(), r.1.as_str())); + } + + // Delete reference alleles having very low count relative to others. + + let mut to_delete = vec![false; alleles.len()]; + let mut mm = 0; + for ak in &alleles { + if ak.1.starts_with("dref") { + let ii = ak.1.between("dref", "_").force_usize(); + mm = std::cmp::max(mm, alt_refs[ii].3); + } + } + for (ak, d) in alleles.iter().zip(to_delete.iter_mut()) { + if ak.1.starts_with("dref") { + let ii = ak.1.between("dref", "_").force_usize(); + if alt_refs[ii].4 && alt_refs[ii].3 * 10 < mm { + *d = true; + } + } + } + erase_if(&mut alleles, &to_delete); + + // Proceed. + + if have_alt { + // Truncate alleles so that they all have the same length. + + let mut m = 1000000; + for ar in &alleles { + m = min(m, ar.0.len()); + } + for ar in alleles.iter_mut() { + if ar.0.len() > m { + ar.0 = &ar.0[..m]; + } + } + + // Now alleles = all the alleles for one gene, and there is at least one + // donor reference allele. Combine identical alleles, and reorder. + + alleles.sort(); + let mut allelesg = Vec::<(Vec<&str>, &[u8])>::new(); + let mut r = 0; + while r < alleles.len() { + let s = next_diff1_2(&alleles, r as i32) as usize; + let mut names = Vec::<&str>::new(); + for at in &alleles[r..s] { + names.push(at.1); + } + allelesg.push((names, alleles[r].0)); + r = s; + } + + // Find the positions at which the alleles differ, and make variant matrix. + + let mut dp = Vec::::new(); + for p in 0..m { + let mut bases = Vec::::new(); + for ar in &allelesg { + bases.push(ar.1[p]); + } + unique_sort(&mut bases); + if bases.len() > 1 { + dp.push(p); + } + } + let mut dm = vec![vec![0_u8; dp.len()]; allelesg.len()]; + for (ar, dmr) in allelesg.iter().zip(dm.iter_mut()) { + for (dmu, &dpu) in dmr.iter_mut().zip(dp.iter()) { + *dmu = ar.1[dpu]; + } + } + + // Make donor matrix. + + let ndonors = ctl.origin_info.donor_list.len(); + let mut dd = vec![vec![false; ndonors]; allelesg.len()]; + for r in 0..allelesg.len() { + for k in 0..allelesg[r].0.len() { + let n = &allelesg[r].0[k]; + if n.starts_with("dref") { + let d = n.after("_").force_usize(); + dd[r][d] = true; + } + } + } + + // Make IMGT matrix. + + let mut imgts = Vec::<&str>::new(); + for ar in &allelesg { + for n in ar.0.iter() { + if !n.starts_with('d') && !n.starts_with('u') { + imgts.push(n); + } + } + } + unique_sort(&mut imgts); + let nimgt = imgts.len(); + let mut im = vec![vec![false; nimgt]; allelesg.len()]; + for ar in &allelesg { + for n in &ar.0 { + let p = bin_position(&imgts, n); + if p >= 0 { + im[r][p as usize] = true; + } + } + } + + // Make table, if it won't be too wide. + + let mut log = String::new(); + if dp.len() <= 20 { + let mut rows = vec![ + { + let mut row = vec!["allele".to_string(), "donor".to_string()]; + for _ in 0..ndonors - 1 { + row.push("\\ext".to_string()); + } + if nimgt > 0 { + row.push("IMGT".to_string()); + for _ in 0..nimgt - 1 { + row.push("\\ext".to_string()); + } + } + if !dp.is_empty() { + row.push("position".to_string()); + for _ in 0..dp.len() - 1 { + row.push("\\ext".to_string()); + } + } + row + }, + { + let mut row = vec!["".to_string()]; + row.append(&mut vec![ + "\\hline".to_string(); + ndonors + nimgt + dp.len() + ]); + row + }, + { + let mut row = vec!["".to_string()]; + for d in 0..ndonors { + row.push(format!("{}", d + 1)); + } + for im in imgts { + row.push(im.to_string()); + } + for &u in &dp { + row.push(u.to_string()); + } + row + }, + vec!["\\hline".to_string(); ndonors + nimgt + dp.len() + 1], + ]; + for (r, alleleg) in allelesg.into_iter().enumerate() { + let mut row = Vec::::new(); + let allele_name = (b'A' + r as u8) as char; + let mut an = String::new(); + an.push(allele_name); + for n in alleleg.0.iter() { + if n.starts_with("uref") { + an.push('*'); + break; + } + } + row.push(an); + for d in 0..ndonors { + if dd[r][d] { + row.push("▓".to_string()); + } else { + row.push(" ".to_string()); + } + } + for k in 0..nimgt { + if im[r][k] { + row.push("▓".to_string()); + } else { + row.push(" ".to_string()); + } + } + for &u in &dp { + row.push((alleleg.1[u] as char).to_string()); + } + rows.push(row); + } + let mut just = b"l|".to_vec(); + just.extend(vec![b'l'; ndonors]); + if nimgt > 0 { + just.push(b'|'); + just.extend(vec![b'l'; nimgt]); + } + if !dp.is_empty() { + just.push(b'|'); + just.extend(vec![b'l'; dp.len()]); + } + print_tabular_vbox(&mut log, &rows, 1, &just, false, false); + } + + // Print. + + println!("\nworking on {gene}, have {} seqs", alleles.len()); + println!( + "alleles differ at {} positions = {}", + dp.len(), + dp.iter().format(",") + ); + if !log.is_empty() { + log.truncate(log.len() - 1); + println!("\n{log}"); + println!("* = a universal reference\n"); + } + for m1 in 0..alleles.len() { + for m2 in m1 + 1..alleles.len() { + let a1 = &alleles[m1]; + let a2 = &alleles[m2]; + let mut diffs = 0; + for p in 0..min(a1.0.len(), a2.0.len()) { + if a1.0[p] != a2.0[p] { + diffs += 1; + } + } + println!( + "{} = {} vs {} = {} ==> {} diffs", + m1 + 1, + a1.1, + m2 + 1, + a2.1, + diffs + ); + } + } + for a1 in &alleles { + let mut best = 1_000_000; + if !a1.1.starts_with("dref") { + continue; + } + for a2 in &alleles { + if a2.1.starts_with("dref") { + continue; + } + let mut diffs = 0; + for p in 0..min(a1.0.len(), a2.0.len()) { + if a1.0[p] != a2.0[p] { + diffs += 1; + } + } + best = min(best, diffs); + } + println!("{} is distance {best} from a reference", a1.1); + } + } + i = j; + } + std::process::exit(0); + } +} diff --git a/enclone_stuff/src/disintegrate.rs b/enclone_stuff/src/disintegrate.rs new file mode 100644 index 000000000..ce0a24315 --- /dev/null +++ b/enclone_stuff/src/disintegrate.rs @@ -0,0 +1,124 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. +// +// If NWEAK_ONESIES is not specified, disintegrate certain onesie clonotypes into single cell +// clonotypes. This requires editing of exact_clonotypes, info, eq, join_info and raw_joins. + +use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype}; +use equiv::EquivRel; +use std::collections::HashMap; +use std::time::Instant; +use vector_utils::unique_sort; + +pub fn disintegrate_onesies( + ctl: &EncloneControl, + disintegrated: &mut Vec, + eq: &mut EquivRel, + exact_clonotypes: &mut Vec, + info: &mut Vec, + join_info: &mut Vec<(usize, usize, bool, Vec)>, + raw_joins: &mut Vec<(i32, i32)>, +) { + if ctl.clono_filt_opt_def.weak_onesies { + let t = Instant::now(); + let ncells_total = exact_clonotypes + .iter() + .map(enclone_core::defs::ExactClonotype::ncells) + .sum(); + let mut to_info = HashMap::::new(); + let mut exacts2 = Vec::::new(); + for (i, inf) in info.iter().enumerate() { + to_info.insert(inf.clonotype_index, i); + } + let mut to_exact_new = Vec::>::new(); + for i in 0..exact_clonotypes.len() { + let ex = &exact_clonotypes[i]; + let mut enew = Vec::::new(); + if ex.share.len() == 1 + && ex.ncells() > 1 + && ex.ncells() * 1000 < ncells_total + && to_info.contains_key(&i) + && eq.orbit_size(to_info[&i] as i32) == 1 + { + for j in 0..ex.clones.len() { + enew.push(exacts2.len()); + exacts2.push(ExactClonotype { + share: ex.share.clone(), + clones: vec![ex.clones[j].clone()], + }); + disintegrated.push(true); + } + } else { + enew.push(exacts2.len()); + exacts2.push(exact_clonotypes[i].clone()); + disintegrated.push(false); + } + to_exact_new.push(enew); + } + let mut join_info2 = Vec::new(); + for ji in join_info.iter() { + let (u1, u2) = (ji.0, ji.1); + for v1 in to_exact_new[u1].iter() { + join_info2.reserve(to_exact_new[u2].len()); + for v2 in to_exact_new[u2].iter() { + let mut x = ji.clone(); + x.0 = *v1; + x.1 = *v2; + join_info2.push(x); + } + } + } + ctl.perf_stats(&t, "disintegrating onesies 1"); + let t = Instant::now(); + *join_info = join_info2; + *exact_clonotypes = exacts2; + let mut info2 = Vec::::new(); + let mut to_info2 = Vec::>::new(); + for i in 0..info.len() { + let j = info[i].clonotype_index; + let mut x = Vec::::new(); + for k in 0..to_exact_new[j].len() { + info[i].clonotype_index = to_exact_new[j][k]; + info[i].clonotype_id = to_exact_new[j][k]; + let mut origins = Vec::::new(); + let ex = &exact_clonotypes[info[i].clonotype_index]; + for i in 0..ex.clones.len() { + origins.push(ex.clones[i][0].dataset_index); + } + unique_sort(&mut origins); + info[i].origin = origins; + x.push(info2.len()); + info2.push(info[i].clone()); + } + to_info2.push(x); + } + ctl.perf_stats(&t, "disintegrating onesies 2"); + let t = Instant::now(); + *info = info2; + let mut raw_joins2 = Vec::<(i32, i32)>::new(); + for i in 0..raw_joins.len() { + let (j1, j2) = ( + &to_info2[raw_joins[i].0 as usize], + &to_info2[raw_joins[i].1 as usize], + ); + raw_joins2.push((j1[0] as i32, j2[0] as i32)); + } + *raw_joins = raw_joins2; + let mut reps = Vec::::new(); + eq.orbit_reps(&mut reps); + let mut eq2 = EquivRel::new(info.len() as i32); + for rep in reps { + let mut o = Vec::::new(); + eq.orbit(rep, &mut o); + if o.len() > 1 { + for (&o1, &o2) in o.iter().zip(o.iter().skip(1)) { + eq2.join( + to_info2[o1 as usize][0] as i32, + to_info2[o2 as usize][0] as i32, + ); + } + } + } + *eq = eq2; + ctl.perf_stats(&t, "disintegrating onesies 3"); + } +} diff --git a/enclone_stuff/src/doublets.rs b/enclone_stuff/src/doublets.rs new file mode 100644 index 000000000..10dd3671b --- /dev/null +++ b/enclone_stuff/src/doublets.rs @@ -0,0 +1,253 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Delete exact subclonotypes that appear to represent doublets. +// +// THIS FILTER DOESN'T PROPERLY TRACK FATE. + +use enclone_core::{ + barcode_fate::BarcodeFate, + defs::{CloneInfo, EncloneControl, ExactClonotype}, +}; +use enclone_print::define_mat::{define_mat, setup_define_mat}; +use enclone_proto::types::DonorReferenceItem; +use itertools::Itertools; +use qd::Double; +use rayon::prelude::*; +use std::collections::HashMap; +use std::time::Instant; +use vdj_ann::refx::RefData; +use vector_utils::{bin_member, erase_if, next_diff, next_diff1_2, sort_sync2}; + +pub fn delete_doublets( + orbits: &mut Vec>, + is_bcr: bool, + to_bc: &HashMap<(usize, usize), Vec>, + sr: &[Vec], + ctl: &EncloneControl, + exact_clonotypes: &[ExactClonotype], + info: &[CloneInfo], + raw_joins: &[Vec], + refdata: &RefData, + dref: &[DonorReferenceItem], + fate: &mut [HashMap], +) { + if ctl.clono_filt_opt_def.doublet { + let t = Instant::now(); + // Define pure subclonotypes. To do this we break each clonotype up by chain signature. + // Note duplication of code with print_clonotypes.rs. And this is doing some + // superfluous compute. + + let mut results = Vec::<(usize, Vec>)>::new(); + for i in 0..orbits.len() { + results.push((i, Vec::new())); + } + let mut pures = Vec::>::new(); + ctl.perf_stats(&t, "doublet filtering setup"); + let t = Instant::now(); + results.par_iter_mut().for_each(|res| { + let i = res.0; + let o = orbits[i].clone(); + let (od, mut exacts) = setup_define_mat(&o, info); + let mat = define_mat( + is_bcr, + to_bc, + sr, + ctl, + exact_clonotypes, + &exacts, + &od, + info, + raw_joins, + refdata, + dref, + ); + let nexacts = mat[0].len(); + let mut priority = Vec::>::new(); + for u in 0..nexacts { + let mut typex = vec![false; mat.len()]; + for col in 0..mat.len() { + if mat[col][u].is_some() { + typex[col] = true; + } + } + priority.push(typex.clone()); + } + sort_sync2(&mut priority, &mut exacts); + let mut j = 0; + while j < priority.len() { + let k = next_diff(&priority, j); + let mut p = Vec::::new(); + for &e in &exacts[j..k] { + p.push(e); + } + res.1.push(p); + j = k; + } + }); + for mut r in results { + pures.append(&mut r.1); + } + + // Define the number of cells in each pure subclonotype. + + let mut npure = vec![0; pures.len()]; + for j in 0..pures.len() { + for id in pures[j].iter() { + npure[j] += exact_clonotypes[*id].ncells(); + } + } + + // Find the pairs of pure subclonotypes that share identical CDR3 sequences. + + ctl.perf_stats(&t, "doublet filtering main"); + let t = Instant::now(); + let mut shares = Vec::<(usize, usize)>::new(); + { + let mut content = Vec::<(&str, usize)>::new(); + for (j, pure) in pures.iter().enumerate() { + for &id in pure { + let ex = &exact_clonotypes[id]; + for s in &ex.share { + content.push((s.cdr3_dna.as_str(), j)); + } + } + } + content.par_sort(); + content.dedup(); + ctl.perf_stats(&t, "doublet filtering shares setup"); + let t = Instant::now(); + let mut j = 0; + while j < content.len() { + let k = next_diff1_2(&content, j as i32) as usize; + for l1 in j..k { + for l2 in l1 + 1..k { + shares.push((content[l1].1, content[l2].1)); + shares.push((content[l2].1, content[l1].1)); + } + } + j = k; + } + shares.par_sort(); + shares.dedup(); + ctl.perf_stats(&t, "doublet filtering shares"); + } + + // Find triples of pure subclonotypes in which the first two have no share, but both + // of the first two share with the third. + + let t = Instant::now(); + const MIN_MULT_DOUBLET: usize = 5; + let mut trips = Vec::<(usize, usize, usize)>::new(); + { + let mut us = Vec::::new(); + let mut vs = Vec::>::new(); + let mut j = 0; + while j < shares.len() { + // not using next_diff1_2 here because of i32 overflow issue + let mut k = j + 1; + loop { + if k == shares.len() || shares[k].0 != shares[j].0 { + break; + } + k += 1; + } + let u = shares[j].0; + us.push(u); + let mut x = Vec::::new(); + for v in &shares[j..k] { + let v = v.1; + if MIN_MULT_DOUBLET * npure[u] <= npure[v] { + x.push(v); + } + } + vs.push(x); + j = k; + } + let mut results = Vec::<(usize, Vec<(usize, usize, usize)>)>::new(); + for i in 0..us.len() { + results.push((i, Vec::new())); + } + results.par_iter_mut().for_each(|res| { + let i = res.0; + let u = us[i]; + let vs = &vs[i]; + for l1 in 0..vs.len() { + for l2 in l1 + 1..vs.len() { + let v1 = vs[l1]; + let v2 = vs[l2]; + if !bin_member(&shares, &(v1, v2)) { + res.1.push((v1, v2, u)); + } + } + } + }); + for mut r in results { + trips.append(&mut r.1); + } + } + ctl.perf_stats(&t, "doublet filtering trips"); + + // Delete some of the third members of the triples. + + let t = Instant::now(); + let mut to_delete = vec![false; exact_clonotypes.len()]; + for (v1, v2, v0) in trips { + let verbose = false; + if verbose { + println!("\n{v0}, {v1}, {v2}"); + println!("DELETING"); + for (u, m) in pures[v0].iter().enumerate() { + let ex = &exact_clonotypes[*m]; + let mut cdrs = Vec::::new(); + for k in 0..ex.share.len() { + cdrs.push(ex.share[k].cdr3_aa.clone()); + } + println!("[{}] {}", u + 1, cdrs.iter().format(",")); + } + println!("USING"); + for (u, m) in pures[v1].iter().enumerate() { + let ex = &exact_clonotypes[*m]; + let mut cdrs = Vec::::new(); + for k in 0..ex.share.len() { + cdrs.push(ex.share[k].cdr3_aa.clone()); + } + println!("[{}] {}", u + 1, cdrs.iter().format(",")); + } + println!("AND"); + for (u, m) in pures[v2].iter().enumerate() { + let ex = &exact_clonotypes[*m]; + let mut cdrs = Vec::::new(); + for k in 0..ex.share.len() { + cdrs.push(ex.share[k].cdr3_aa.clone()); + } + println!("[{}] {}", u + 1, cdrs.iter().format(",")); + } + } + for m in pures[v0].iter() { + to_delete[*m] = true; + } + } + let mut orbits2 = Vec::>::new(); + for o in orbits.iter() { + let mut o = o.clone(); + let mut del2 = vec![false; o.len()]; + for j in 0..o.len() { + let id = info[o[j] as usize].clonotype_index; + if to_delete[id] { + del2[j] = true; + let x: &CloneInfo = &info[o[j] as usize]; + let ex = &exact_clonotypes[x.clonotype_index]; + for k in 0..ex.ncells() { + let li = ex.clones[k][0].dataset_index; + let bc = &ex.clones[k][0].barcode; + fate[li].insert(bc.clone(), BarcodeFate::Doublet); + } + } + } + erase_if(&mut o, &del2); + orbits2.push(o); + } + *orbits = orbits2; + ctl.perf_stats(&t, "doublet filtering tail"); + } +} diff --git a/enclone_stuff/src/fcell.rs b/enclone_stuff/src/fcell.rs new file mode 100644 index 000000000..48a1a3e0c --- /dev/null +++ b/enclone_stuff/src/fcell.rs @@ -0,0 +1,221 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Filter using constraints imposed by FCELL. + +use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype, GexInfo}; +use enclone_print::print_utils4::get_gex_matrix_entry; +use evalexpr::{ContextWithMutableVariables, HashMapContext}; +use hdf5::Reader; +use io_utils::{dir_list, path_exists}; +use ndarray::s; +use rayon::prelude::*; +use std::env; +use std::thread; +use std::time; +use std::time::Instant; +use vector_utils::{bin_position, erase_if}; + +pub fn filter_by_fcell( + ctl: &EncloneControl, + orbits: &mut Vec>, + info: &[CloneInfo], + exact_clonotypes: &mut [ExactClonotype], + gex_info: &GexInfo, +) -> Result<(), String> { + if !ctl.clono_filt_opt_def.fcell.is_empty() { + // Load the GEX and FB data. This is quite horrible: the code and computation are + // duplicated verbatim in stop.rs. + + let tdi = Instant::now(); + let mut d_readers = Vec::>::new(); + let mut ind_readers = Vec::>::new(); + for li in 0..ctl.origin_info.n() { + if !ctl.origin_info.gex_path[li].is_empty() && !gex_info.gex_matrices[li].initialized() + { + let x = gex_info.h5_data[li].as_ref(); + if x.is_none() { + // THIS FAILS SPORADICALLY, OBSERVED MULTIPLE TIMES, + // CAUSING PUSH TO D_READERS BELOW TO FAIL. + eprintln!("\nWeird, gex_info.h5_data[li].as_ref() is None."); + eprintln!("Path = {}.", ctl.origin_info.gex_path[li]); + let current = env::current_dir().unwrap(); + println!( + "The current working directory is {}", + current.canonicalize().unwrap().display() + ); + if path_exists(&ctl.origin_info.gex_path[li]) { + eprintln!( + "The directory that is supposed to contain \ + raw_feature_bc_matrix.h5 exists." + ); + let list = dir_list(&ctl.origin_info.gex_path[li]); + eprintln!( + "This directory is {} and its contents are:", + ctl.origin_info.gex_path[li] + ); + for (i, li) in list.into_iter().enumerate() { + eprintln!("{}. {li}", i + 1); + } + let h5_path = + format!("{}/raw_feature_bc_matrix.h5", ctl.origin_info.gex_path[li]); + eprintln!("H5 path = {h5_path}."); + if !path_exists(&h5_path) { + let mut msg = format!("H5 path {h5_path} does not exist.\n"); + msg += "Retrying a few times to see if it appears.\n"; + for _ in 0..5 { + msg += "Sleeping for 0.1 seconds."; + thread::sleep(time::Duration::from_millis(100)); + if !path_exists(&h5_path) { + msg += "Now h5 path does not exist.\n"; + } else { + msg += "Now h5 path exists.\n"; + break; + } + } + msg += "Aborting.\n"; + return Err(msg); + } else { + println!("h5 path exists."); + } + } else { + println!("Path exists."); + } + println!(); + } + d_readers.push(Some(x.unwrap().as_reader())); + ind_readers.push(Some(gex_info.h5_indices[li].as_ref().unwrap().as_reader())); + } else { + d_readers.push(None); + ind_readers.push(None); + } + } + let mut h5_data = Vec::<(usize, Vec, Vec)>::new(); + for li in 0..ctl.origin_info.n() { + h5_data.push((li, Vec::new(), Vec::new())); + } + h5_data.par_iter_mut().for_each(|res| { + let li = res.0; + if !ctl.origin_info.gex_path[li].is_empty() + && !gex_info.gex_matrices[li].initialized() + && ctl.gen_opt.h5_pre + { + res.1 = d_readers[li].as_ref().unwrap().read_raw().unwrap(); + res.2 = ind_readers[li].as_ref().unwrap().read_raw().unwrap(); + } + }); + ctl.perf_stats(&tdi, "setting up readers, zero"); + + // Proceed. + + let mut orbits2 = Vec::>::new(); + for o in orbits.iter() { + let mut o = o.clone(); + let mut to_deletex = vec![false; o.len()]; + for j in 0..o.len() { + let x: &CloneInfo = &info[o[j] as usize]; + let ex = &mut exact_clonotypes[x.clonotype_index]; + let mut to_delete = vec![false; ex.ncells()]; + let mut d_all = vec![Vec::::new(); ex.clones.len()]; + let mut ind_all = vec![Vec::::new(); ex.clones.len()]; + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = ex.clones[l][0].barcode.clone(); + if !gex_info.gex_barcodes.is_empty() { + let p = bin_position(&gex_info.gex_barcodes[li], &bc); + if p >= 0 && !gex_info.gex_matrices[li].initialized() { + let z1 = gex_info.h5_indptr[li][p as usize] as usize; + let z2 = gex_info.h5_indptr[li][p as usize + 1] as usize; // p+1 OK? + if ctl.gen_opt.h5_pre { + d_all[l] = h5_data[li].1[z1..z2].to_vec(); + ind_all[l] = h5_data[li].2[z1..z2].to_vec(); + } else { + d_all[l] = d_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + ind_all[l] = ind_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + } + } + } + } + for (l, (clone, d)) in ex + .clones + .iter() + .take(ex.ncells()) + .zip(to_delete.iter_mut()) + .enumerate() + { + let li = clone[0].dataset_index; + let bc = &clone[0].barcode; + let mut keep = true; + for x in ctl.clono_filt_opt_def.fcell.iter() { + let alt = &ctl.origin_info.alt_bc_fields[li]; + let vars = x.iter_variable_identifiers().collect::>(); + let mut vals = Vec::::new(); + for &var in &vars { + let mut val = String::new(); + let mut found = false; + 'uloop: for au in alt { + if au.0 == var { + if let Some(v) = au.1.get(bc) { + val = v.clone(); + found = true; + break 'uloop; + } + } + } + if !found { + if let Some(&fid) = gex_info.feature_id[li].get(&var.to_string()) { + let p = bin_position(&gex_info.gex_barcodes[li], bc); + if p >= 0 { + let raw_count = get_gex_matrix_entry( + ctl, gex_info, fid, &d_all, &ind_all, li, l, + p as usize, var, + ); + val = format!("{raw_count:.2}"); + } + } + } + vals.push(val); + } + let mut c = HashMapContext::new(); + for (&var, val) in vars.iter().zip(vals.iter()) { + if let Ok(val) = val.parse::() { + c.set_value(var.into(), evalexpr::Value::from(val)).unwrap(); + } else if let Ok(val) = val.parse::() { + c.set_value(var.into(), evalexpr::Value::from(val)).unwrap(); + } else { + c.set_value(var.into(), val.clone().into()).unwrap(); + } + } + let res = x.eval_with_context(&c); + let ok = res == Ok(evalexpr::Value::from(true)); + if !ok { + keep = false; + } + } + if !keep { + *d = true; + } + } + erase_if(&mut ex.clones, &to_delete); + if ex.ncells() == 0 { + to_deletex[j] = true; + } + } + erase_if(&mut o, &to_deletex); + if !o.is_empty() { + orbits2.push(o.clone()); + } + } + *orbits = orbits2; + } + Ok(()) +} diff --git a/enclone_stuff/src/filter_umi.rs b/enclone_stuff/src/filter_umi.rs new file mode 100644 index 000000000..f4a85568d --- /dev/null +++ b/enclone_stuff/src/filter_umi.rs @@ -0,0 +1,322 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Filter B cells based on UMI counts. + +use enclone_core::{ + barcode_fate::BarcodeFate, + defs::{CloneInfo, EncloneControl, ExactClonotype}, +}; +use equiv::EquivRel; +use stats_utils::binomial_sum; +use std::cmp::max; +use std::collections::HashMap; +use vector_utils::{erase_if, next_diff1_5, reverse_sort, VecUtils}; + +pub fn filter_umi( + eq: &EquivRel, + orbits: &mut Vec>, + ctl: &EncloneControl, + exact_clonotypes: &mut [ExactClonotype], + info: &[CloneInfo], + fate: &mut [HashMap], +) { + let (mut is_tcr, mut is_bcr) = (true, true); + if ctl.gen_opt.tcr { + is_bcr = false; + } + if ctl.gen_opt.bcr { + is_tcr = false; + } + + // For B cells, filter based on UMI counts. More details in heuristics.html. + // Find all clonotypes having one cell which has two chains, + // one heavy and one light. Get the sum of the chain UMI counts for this cell. + // + // For each cell, let umish be the umi count for its heavy chain having the most umis, and + // similarly define umisl. Let umitot = umish + umisl. + // + // If every cell in a clonotype would have been deleted, first find the exact subclonotype for + // which the sum of its umitot values is greatest, and then in it, find the cell having + // highest umitot value. Protect this cell, so long as it has at least two chains. + + *orbits = Vec::>::new(); + let mut reps = Vec::::new(); + eq.orbit_reps(&mut reps); + if is_tcr { + for rep in reps { + let mut o = Vec::::new(); + eq.orbit(rep, &mut o); + orbits.push(o); + } + } else { + let mut umis = vec![Vec::::new(); ctl.origin_info.n()]; + for &rep in &reps { + let mut o = Vec::::new(); + eq.orbit(rep, &mut o); + if o.solo() { + let x: &CloneInfo = &info[o[0] as usize]; + let ex = &exact_clonotypes[x.clonotype_index]; + if ex.ncells() == 1 && ex.share.duo() && ex.share[0].left != ex.share[1].left { + umis[ex.clones[0][0].dataset_index] + .push(ex.clones[0][0].umi_count + ex.clones[0][1].umi_count); + } + } + } + let mut nu = vec![0; ctl.origin_info.n()]; + let mut umin = vec![0.0; ctl.origin_info.n()]; + for l in 0..ctl.origin_info.n() { + umis[l].sort_unstable(); + nu[l] = umis[l].len(); + if ctl.gen_opt.baseline { + println!( + "\n{} umi counts for dataset {} = {}", + nu[l], + l + 1, + ctl.origin_info.dataset_id[l] + ); + } + if nu[l] > 0 { + let n10 = umis[l][nu[l] / 10] as f64; + let n50 = umis[l][nu[l] / 2] as f64; + umin[l] = n10.min(n50 - (4.0 * n50.sqrt())); + } + if nu[l] > 0 && ctl.gen_opt.baseline { + println!("1% ==> {}", umis[l][umis[l].len() / 100]); + println!("2% ==> {}", umis[l][umis[l].len() / 50]); + println!("5% ==> {}", umis[l][umis[l].len() / 20]); + println!("10% ==> {}", umis[l][umis[l].len() / 10]); + println!("20% ==> {}", umis[l][umis[l].len() / 5]); + println!("50% ==> {}", umis[l][umis[l].len() / 2]); + println!("umin = {:.2}", umin[l]); + } + } + // if ctl.clono_filt_opt_def.umi_filt || ctl.clono_filt_opt_def.umi_filt_mark { + const MIN_BASELINE_CELLS: usize = 20; + for rep in reps { + let mut o = Vec::::new(); + eq.orbit(rep, &mut o); + let mut ncells = 0; + for j in 0..o.len() { + let x: &CloneInfo = &info[o[j] as usize]; + let ex = &exact_clonotypes[x.clonotype_index]; + ncells += ex.ncells(); + } + let mut nbads = 0; + if ncells >= 2 { + let mut to_deletex = vec![false; o.len()]; + let (mut best_ex, mut best_ex_sum) = (0, 0); + let (mut best_cell, mut best_cell_count) = (0, 0); + let mut baselined = true; + let mut protected = false; + for pass in 1..=3 { + if pass == 2 { + if nbads == 0 { + protected = true; + } else { + let p = 0.1; + let bound = 0.01; + + // Find probability of observing nbads or more events of probability + // p in a sample of size ncells, and if that is at least bound, + // don't delete any cells (except onesies). + + if binomial_sum(ncells, ncells - nbads, 1.0 - p) >= bound { + protected = true; + } + } + } + for j in 0..o.len() { + let x: &CloneInfo = &info[o[j] as usize]; + let ex = &mut exact_clonotypes[x.clonotype_index]; + let mut to_delete = vec![false; ex.ncells()]; + let mut ex_sum = 0; + for (k, (clone, d)) in + ex.clones.iter_mut().zip(to_delete.iter_mut()).enumerate() + { + let li = clone[0].dataset_index; + if nu[li] >= MIN_BASELINE_CELLS { + let (mut umish, mut umisl) = (0, 0); + for (s, c) in ex.share.iter().zip(clone.iter()) { + if s.left { + umish = max(umish, c.umi_count); + } else { + umisl = max(umish, c.umi_count); + } + } + let umitot = umish + umisl; + if pass == 1 { + ex_sum += umitot; + } + if pass == 2 + && j == best_ex + && umitot > best_cell_count + && ex.share.len() > 1 + { + best_cell = k; + best_cell_count = umitot; + } + if (umitot as f64) < umin[li] { + if pass == 1 { + nbads += 1; + } else if pass == 3 && protected { + if ex.share.len() == 1 { + *d = true; + if ctl.clono_filt_opt_def.umi_filt_mark { + clone[0].marked = true; + } + } + } else if pass == 3 + && (!baselined + || (best_ex, best_cell) != (j, k) + || ex.share.len() == 1) + { + *d = true; + if ctl.clono_filt_opt_def.umi_filt_mark { + clone[0].marked = true; + } + } + } + } else { + baselined = false; + } + } + if pass == 1 && ex_sum > best_ex_sum { + best_ex = j; + best_ex_sum = ex_sum; + } + if pass == 3 { + for i in 0..ex.clones.len() { + if to_delete[i] { + fate[ex.clones[i][0].dataset_index] + .insert(ex.clones[i][0].barcode.clone(), BarcodeFate::Umi); + } + } + if ctl.clono_filt_opt_def.umi_filt { + erase_if(&mut ex.clones, &to_delete); + } + } + } + } + for j in 0..o.len() { + let x: &CloneInfo = &info[o[j] as usize]; + let ex = &mut exact_clonotypes[x.clonotype_index]; + if ex.ncells() == 0 { + to_deletex[j] = true; + } + } + erase_if(&mut o, &to_deletex); + } + if !o.is_empty() { + orbits.push(o.clone()); + } + } + // } + } + + // Filter B cells based on UMI count ratios. This assumes V..J identity to filter. + + if is_bcr { + const MIN_UMI_RATIO: usize = 500; + let mut orbits2 = Vec::>::new(); + 'orbit: for o in orbits.iter() { + let mut ncells = 0; + let mut o = o.clone(); + for j in 0..o.len() { + let x: &CloneInfo = &info[o[j] as usize]; + let ex = &exact_clonotypes[x.clonotype_index]; + ncells += ex.ncells(); + } + let mut nbads = 0; + for pass in 1..=2 { + if pass == 2 { + if nbads == 0 { + orbits2.push(o.clone()); + continue 'orbit; + } else { + let p = 0.1; + let bound = 0.01; + + // Find probability of observing nbads or more events of probability + // p in a sample of size ncells, and if that is at least bound, + // don't delete any cells. + + if binomial_sum(ncells, ncells - nbads, 1.0 - p) >= bound { + orbits2.push(o.clone()); + continue 'orbit; + } + } + } + let mut to_deletex = vec![false; o.len()]; + let mut z = Vec::<(Vec, usize, usize, usize, usize)>::new(); + let mut to_delete = Vec::>::new(); + for j in 0..o.len() { + let x: &CloneInfo = &info[o[j] as usize]; + let ex = &mut exact_clonotypes[x.clonotype_index]; + to_delete.push(vec![false; ex.ncells()]); + for k in 0..ex.ncells() { + let mut tot = 0; + for m in 0..ex.clones[k].len() { + tot += ex.clones[k][m].umi_count; + } + for m in 0..ex.clones[k].len() { + z.push(( + ex.share[m].seq.clone(), + ex.clones[k][m].umi_count, + j, + k, + tot, + )); + } + } + } + reverse_sort(&mut z); + let mut j = 0; + while j < z.len() { + let k = next_diff1_5(&z, j as i32) as usize; + for l in j..k { + if z[j].1 >= MIN_UMI_RATIO * z[l].4 { + to_delete[z[l].2][z[l].3] = true; + } + } + j = k; + } + for j in 0..o.len() { + let x: &CloneInfo = &info[o[j] as usize]; + let ex = &mut exact_clonotypes[x.clonotype_index]; + for l in 0..ex.ncells() { + if to_delete[j][l] { + if ctl.clono_filt_opt_def.umi_ratio_filt_mark { + ex.clones[l][0].marked = true; + } + nbads += 1; + } + } + + if pass == 2 { + for i in 0..ex.clones.len() { + if to_delete[j][i] { + fate[ex.clones[i][0].dataset_index] + .insert(ex.clones[i][0].barcode.clone(), BarcodeFate::UmiRatio); + } + } + if ctl.clono_filt_opt_def.umi_ratio_filt { + erase_if(&mut ex.clones, &to_delete[j]); + if ex.ncells() == 0 { + to_deletex[j] = true; + } + } + } + } + if pass == 2 { + if ctl.clono_filt_opt_def.umi_ratio_filt { + erase_if(&mut o, &to_deletex); + } + if !o.is_empty() { + orbits2.push(o.clone()); + } + } + } + } + *orbits = orbits2; + } +} diff --git a/enclone_stuff/src/flag_defective.rs b/enclone_stuff/src/flag_defective.rs new file mode 100644 index 000000000..78b5ff066 --- /dev/null +++ b/enclone_stuff/src/flag_defective.rs @@ -0,0 +1,255 @@ +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. + +// Flag defective reference sequences. + +use amino::aa_seq; +use enclone_core::defs::EncloneControl; +use io_utils::fwriteln; +use itertools::Itertools; +use std::io::Write; +use string_utils::{strme, TextUtils}; +use vdj_ann::refx::RefData; +use vdj_ann::vdj_features::{cdr2_start, cdr3_score, fr3_start, score4, score_fwr3}; +use vector_utils::unique_sort; + +pub fn flag_defective( + ctl: &EncloneControl, + refdata: &RefData, + log: &mut Vec, + broken: &mut Vec, +) { + *log = Vec::::new(); + let mut count = 0; + *broken = vec![false; refdata.refs.len()]; + + // Compute freqs. + + let mut freqs = Vec::>>::new(); + const F: &str = include_str!["fwr3_freqs.data"]; + let mut x = Vec::<(u32, u8)>::new(); + let mut y = Vec::>::new(); + let mut rlast = 0; + let mut ilast = 0; + for line in F.lines() { + let fields = line.split(',').collect::>(); + let r = fields[0].force_usize(); + let i = fields[1].force_usize(); + let count = fields[3].force_usize() as u32; + let res = fields[4].as_bytes()[0]; + if r != rlast || i != ilast { + y.push(x.clone()); + x.clear(); + if r != rlast { + freqs.push(y.clone()); + y.clear(); + } + } + x.push((count, res)); + rlast = r; + ilast = i; + } + y.push(x); + freqs.push(y); + + // Study the reference. + + for (i, ((((&rtype, refs), name), rh), broken)) in refdata + .rtype + .iter() + .zip(refdata.refs.iter()) + .zip(refdata.name.iter()) + .zip(refdata.rheaders_orig.iter()) + .zip(broken.iter_mut()) + .enumerate() + { + // Determine chain type and exclude those other than IGH, IGK, IGL, TRA and TRB. + + let chain_type; + if rtype == 0 { + chain_type = "IGH"; + } else if rtype == 1 { + chain_type = "IGK"; + } else if rtype == 2 { + chain_type = "IGL"; + } else if rtype == 3 { + chain_type = "TRA"; + } else if rtype == 4 { + chain_type = "TRB"; + } else { + continue; + } + + // Look for problems. + + if refdata.is_c(i) { + // This is very ugly. We are exempting mouse IGHG2B because is is in our current + // reference but has an extra base at the beginning. See also comments below at + // TRBV21-1. Also, we're not actually checking for mouse. + + if name == "IGHG2B" { + continue; + } + + // Continue. + + let seq = refs.to_ascii_vec(); + let aa0 = aa_seq(&seq, 0); + let aa2 = aa_seq(&seq, 2); + if aa2.contains(&b'*') && !aa0.contains(&b'*') { + count += 1; + *broken = true; + fwriteln!( + log, + "{}. The following C segment reference sequence appears to have \ + an extra base at its beginning:\n", + count + ); + fwriteln!(log, ">{}\n{}\n", rh, strme(&seq)); + } + } else if refdata.is_v(i) { + // This is very ugly. We are exempting human TRBV21-1 because it is in our current + // reference (twice), but has multiple stop codons. It should be deleted from the + // reference, and then we should remove this test. But probably in the future so as + // not to inconvenience users. + + if ctl.gen_opt.species != "mouse" && name == "TRBV21-1" { + *broken = true; + continue; + } + + // This is very ugly. We are exempting human IGHV1-12 because it is in our current + // human reference, but is only 60 amino acids long. Also we're not checking for mouse. + + if name == "IGHV1-12" { + *broken = true; + continue; + } + + // Ugly. Frameshifted. Also this is mouse and not checking for that. + + if name == "TRAV23" { + *broken = true; + continue; + } + + // Ugly. Truncated on right. Human. + + if name == "IGLV5-48" { + *broken = true; + continue; + } + + // Test for broken. + + let seq = refs.to_ascii_vec(); + let aa = aa_seq(&seq, 0); + let mut reasons = Vec::<&'static str>::new(); + if !aa.starts_with(b"M") { + reasons.push("does not begin with a start codon"); + } + let stops = aa.iter().filter(|&n| *n == b'*').count(); + if stops > 1 { + reasons.push("has more than one stop codon"); + } + if aa.len() < 100 { + reasons.push("appears truncated (has less than 100 amino acids)"); + } else if aa.len() < 105 && chain_type == "IGH" { + reasons.push("appears truncated (has less than 105 amino acids)"); + } + if aa.len() >= 30 { + let mut aap = aa.clone(); + aap.push(b'C'); + if cdr3_score(&aap, chain_type, false) > 4 + cdr3_score(&aa, chain_type, false) { + reasons.push("appears to need a C to be appended to its right end"); + } + } + if stops > 0 { + let mut fixable = false; + const TRIM: usize = 10; + for (j, &aj) in aa[..aa.len() - TRIM].iter().enumerate() { + if aj == b'*' { + let mut seqx = seq.clone(); + for _ in 1..=2 { + let _ = seqx.remove(3 * j); + let aax = aa_seq(&seqx, 0); + if !aax.contains(&b'*') { + fixable = true; + } + } + } + } + if fixable { + reasons.push("appears to be frameshifted"); + } + } + if aa.len() >= 31 { + for del in 1..=2 { + let aad = aa_seq(&seq, del); + if cdr3_score(&aad, chain_type, false) > 4 + cdr3_score(&aa, chain_type, false) + { + reasons.push("appears to be frameshifted"); + } + } + } + if reasons.is_empty() { + if let Some(cs2) = cdr2_start(&aa, chain_type, false) { + let fr3 = fr3_start(&aa, chain_type, false).unwrap(); + if cs2 > fr3 { + reasons.push( + "appears to be defective, because our computed \ + CDR2 start exceeds our computed FWR3 start", + ); + } + } + } + if reasons.is_empty() && aa.len() >= 31 { + // Pretty crappy frameshift test. One should see high aa and dna similarity + // to other seqs if shifted. Or use more aas. + let score = cdr3_score(&aa, chain_type, false); + let mut frameshift = false; + for del in 1..=2 { + let aad = aa_seq(&seq, del); + if score <= 6 && cdr3_score(&aad, chain_type, false) >= 3 + score { + frameshift = true; + } + } + if frameshift { + reasons.push("appears to be frameshifted"); + } + } + if reasons.is_empty() { + let r; + if chain_type == "IGH" { + r = 0; + } else if chain_type == "IGK" { + r = 1; + } else if chain_type == "IGL" { + r = 2; + } else if chain_type == "TRA" { + r = 3; + } else { + assert_eq!(chain_type, "TRB"); + r = 4; + } + let score = score_fwr3(&aa, r, &freqs); + if score < 8.0 && score4(&aa, r) < 5 { + reasons.push("appears to be frameshifted or truncated"); + } + } + + // Report results. + + unique_sort(&mut reasons); + if !reasons.is_empty() { + let msg = format!( + "The following V segment reference sequence {}", + reasons.iter().format(", and ") + ); + count += 1; + *broken = true; + fwriteln!(log, "{}. {}:\n", count, msg); + fwriteln!(log, ">{}\n{}\n", rh, strme(&seq)); + } + } + } +} diff --git a/enclone_stuff/src/fwr3_freqs.data b/enclone_stuff/src/fwr3_freqs.data new file mode 100644 index 000000000..a7382a410 --- /dev/null +++ b/enclone_stuff/src/fwr3_freqs.data @@ -0,0 +1,1670 @@ +0,0,0,752,C +0,0,1,4,S +0,0,2,2,Y +0,0,3,2,V +0,0,4,2,Q +0,0,5,2,I +0,0,6,1,W +0,0,7,1,M +0,0,8,1,G +0,0,9,1,F +0,0,10,1,E +0,0,11,1,A +0,1,0,638,Y +0,1,1,113,F +0,1,2,6,L +0,1,3,4,S +0,1,4,4,H +0,1,5,2,N +0,1,6,2,C +0,1,7,1,M +0,2,0,756,Y +0,2,1,5,H +0,2,2,2,I +0,2,3,1,S +0,2,4,1,N +0,2,5,1,L +0,2,6,1,K +0,2,7,1,F +0,2,8,1,D +0,2,9,1,C +0,3,0,361,V +0,3,1,200,T +0,3,2,76,M +0,3,3,75,I +0,3,4,32,L +0,3,5,19,R +0,3,6,4,A +0,3,7,1,Y +0,3,8,1,S +0,3,9,1,C +0,4,0,716,A +0,4,1,30,G +0,4,2,9,T +0,4,3,3,V +0,4,4,2,S +0,4,5,2,Q +0,4,6,2,M +0,4,7,2,E +0,4,8,2,D +0,4,9,1,P +0,4,10,1,F +0,5,0,614,T +0,5,1,104,S +0,5,2,29,M +0,5,3,7,A +0,5,4,6,I +0,5,5,2,P +0,5,6,2,H +0,5,7,1,R +0,5,8,1,N +0,5,9,1,L +0,5,10,1,K +0,5,11,1,F +0,5,12,1,C +0,6,0,744,D +0,6,1,8,G +0,6,2,4,T +0,6,3,3,N +0,6,4,3,I +0,6,5,2,Y +0,6,6,2,K +0,6,7,2,A +0,6,8,1,L +0,6,9,1,E +0,7,0,543,E +0,7,1,104,A +0,7,2,55,D +0,7,3,35,V +0,7,4,14,S +0,7,5,5,K +0,7,6,5,G +0,7,7,2,T +0,7,8,2,Q +0,7,9,2,N +0,7,10,1,P +0,7,11,1,L +0,7,12,1,C +0,8,0,252,A +0,8,1,222,S +0,8,2,171,T +0,8,3,62,P +0,8,4,19,N +0,8,5,10,D +0,8,6,9,G +0,8,7,9,E +0,8,8,7,I +0,8,9,4,V +0,8,10,2,L +0,8,11,2,K +0,8,12,1,Y +0,9,0,268,T +0,9,1,263,R +0,9,2,100,K +0,9,3,64,Q +0,9,4,52,D +0,9,5,4,I +0,9,6,3,N +0,9,7,3,E +0,9,8,2,S +0,9,9,2,P +0,9,10,2,M +0,9,11,2,L +0,9,12,2,G +0,9,13,2,A +0,9,14,1,F +0,10,0,580,L +0,10,1,138,V +0,10,2,35,M +0,10,3,5,R +0,10,4,3,P +0,10,5,2,S +0,10,6,2,Q +0,10,7,2,I +0,10,8,1,T +0,10,9,1,H +0,10,10,1,A +0,11,0,613,S +0,11,1,91,N +0,11,2,25,R +0,11,3,17,T +0,11,4,5,A +0,11,5,4,K +0,11,6,4,G +0,11,7,3,Q +0,11,8,3,L +0,11,9,1,V +0,11,10,1,I +0,11,11,1,H +0,11,12,1,D +0,11,13,1,C +0,12,0,319,N +0,12,1,303,S +0,12,2,88,T +0,12,3,22,D +0,12,4,14,R +0,12,5,5,G +0,12,6,4,I +0,12,7,3,Y +0,12,8,3,K +0,12,9,3,A +0,12,10,2,P +0,12,11,2,H +0,12,12,1,M +0,12,13,1,L +0,13,0,423,M +0,13,1,265,L +0,13,2,41,I +0,13,3,15,V +0,13,4,7,W +0,13,5,4,T +0,13,6,4,K +0,13,7,3,F +0,13,8,2,S +0,13,9,2,P +0,13,10,2,D +0,13,11,1,Q +0,13,12,1,N +0,14,0,429,Q +0,14,1,173,K +0,14,2,85,E +0,14,3,42,T +0,14,4,15,S +0,14,5,7,D +0,14,6,5,R +0,14,7,5,L +0,14,8,3,V +0,14,9,2,N +0,14,10,2,H +0,14,11,1,I +0,14,12,1,F +0,15,0,597,L +0,15,1,144,M +0,15,2,11,F +0,15,3,6,I +0,15,4,3,V +0,15,5,2,R +0,15,6,2,Q +0,15,7,2,N +0,15,8,1,T +0,15,9,1,K +0,15,10,1,H +0,16,0,457,Y +0,16,1,115,F +0,16,2,99,S +0,16,3,29,V +0,16,4,27,T +0,16,5,12,D +0,16,6,9,L +0,16,7,6,C +0,16,8,4,H +0,16,9,3,P +0,16,10,3,N +0,16,11,3,G +0,16,12,1,M +0,16,13,1,K +0,16,14,1,I +0,17,0,228,L +0,17,1,224,V +0,17,2,201,A +0,17,3,89,F +0,17,4,5,I +0,17,5,4,S +0,17,6,4,Q +0,17,7,3,Y +0,17,8,3,T +0,17,9,3,R +0,17,10,2,W +0,17,11,2,G +0,17,12,1,M +0,17,13,1,K +0,18,0,405,T +0,18,1,228,Q +0,18,2,59,S +0,18,3,26,I +0,18,4,22,M +0,18,5,6,N +0,18,6,6,L +0,18,7,4,R +0,18,8,4,K +0,18,9,3,V +0,18,10,3,H +0,18,11,2,F +0,18,12,1,P +0,18,13,1,A +0,19,0,416,N +0,19,1,323,S +0,19,2,13,T +0,19,3,8,D +0,19,4,3,K +0,19,5,2,Y +0,19,6,2,Q +0,19,7,2,A +0,19,8,1,G +0,20,0,454,K +0,20,1,119,S +0,20,2,48,Q +0,20,3,33,T +0,20,4,25,E +0,20,5,21,N +0,20,6,21,A +0,20,7,16,I +0,20,8,13,R +0,20,9,7,L +0,20,10,5,M +0,20,11,3,V +0,20,12,2,G +0,20,13,1,P +0,20,14,1,F +0,20,15,1,D +0,21,0,556,S +0,21,1,148,A +0,21,2,22,N +0,21,3,18,T +0,21,4,6,P +0,21,5,6,K +0,21,6,5,D +0,21,7,3,F +0,21,8,2,V +0,21,9,1,Y +0,21,10,1,R +0,21,11,1,L +0,21,12,1,G +0,22,0,313,T +0,22,1,268,N +0,22,2,87,K +0,22,3,68,D +0,22,4,6,M +0,22,5,5,S +0,22,6,4,R +0,22,7,4,I +0,22,8,3,Y +0,22,9,3,E +0,22,10,3,A +0,22,11,2,V +0,22,12,2,P +0,22,13,1,Q +0,22,14,1,G +0,23,0,671,D +0,23,1,44,E +0,23,2,31,N +0,23,3,9,S +0,23,4,3,T +0,23,5,3,K +0,23,6,3,G +0,23,7,2,C +0,23,8,1,W +0,23,9,1,V +0,23,10,1,R +0,23,11,1,Q +0,24,0,413,R +0,24,1,114,K +0,24,2,97,V +0,24,3,71,A +0,24,4,19,S +0,24,5,16,L +0,24,6,11,T +0,24,7,11,I +0,24,8,6,P +0,24,9,4,M +0,24,10,3,D +0,24,11,2,E +0,24,12,1,H +0,24,13,1,G +0,24,14,1,F +0,25,0,509,S +0,25,1,231,T +0,25,2,7,N +0,25,3,5,F +0,25,4,3,I +0,25,5,3,H +0,25,6,2,Y +0,25,7,2,R +0,25,8,2,P +0,25,9,2,C +0,25,10,1,W +0,25,11,1,Q +0,25,12,1,K +0,25,13,1,D +0,26,0,563,I +0,26,1,106,L +0,26,2,32,M +0,26,3,31,F +0,26,4,22,V +0,26,5,5,T +0,26,6,5,S +0,26,7,3,G +0,26,8,2,H +0,26,9,1,C +0,27,0,573,T +0,27,1,125,S +0,27,2,21,I +0,27,3,17,A +0,27,4,12,V +0,27,5,8,R +0,27,6,6,F +0,27,7,3,K +0,27,8,2,N +0,27,9,1,Y +0,27,10,1,G +0,27,11,1,D +0,28,0,322,F +0,28,1,140,L +0,28,2,121,A +0,28,3,105,V +0,28,4,31,S +0,28,5,26,I +0,28,6,10,T +0,28,7,8,G +0,28,8,3,R +0,28,9,2,Y +0,28,10,2,P +0,29,0,606,R +0,29,1,115,K +0,29,2,17,Q +0,29,3,6,G +0,29,4,5,N +0,29,5,4,P +0,29,6,4,L +0,29,7,4,H +0,29,8,2,T +0,29,9,2,A +0,29,10,1,Y +0,29,11,1,W +0,29,12,1,S +0,29,13,1,F +0,29,14,1,E +1,0,0,324,C +1,0,1,2,S +1,0,2,2,G +1,0,3,1,V +1,0,4,1,L +1,1,0,283,Y +1,1,1,30,F +1,1,2,8,H +1,1,3,3,S +1,1,4,3,C +1,1,5,1,T +1,1,6,1,L +1,1,7,1,D +1,2,0,325,Y +1,2,1,2,F +1,2,2,1,L +1,2,3,1,E +1,2,4,1,C +1,3,0,141,V +1,3,1,121,T +1,3,2,22,D +1,3,3,12,I +1,3,4,11,S +1,3,5,11,M +1,3,6,4,A +1,3,7,3,Y +1,3,8,2,L +1,3,9,1,N +1,3,10,1,H +1,3,11,1,E +1,4,0,208,A +1,4,1,113,G +1,4,2,3,V +1,4,3,2,T +1,4,4,2,R +1,4,5,2,E +1,5,0,94,A +1,5,1,82,V +1,5,2,68,F +1,5,3,44,L +1,5,4,18,I +1,5,5,14,T +1,5,6,4,M +1,5,7,2,G +1,5,8,2,E +1,5,9,1,P +1,5,10,1,D +1,6,0,325,D +1,6,1,2,N +1,6,2,1,V +1,6,3,1,S +1,6,4,1,G +1,7,0,277,E +1,7,1,44,D +1,7,2,3,Q +1,7,3,3,N +1,7,4,2,K +1,7,5,1,S +1,8,0,181,A +1,8,1,84,P +1,8,2,30,S +1,8,3,7,V +1,8,4,7,E +1,8,5,7,C +1,8,6,5,T +1,8,7,3,Q +1,8,8,2,Y +1,8,9,2,D +1,8,10,1,I +1,8,11,1,G +1,9,0,181,E +1,9,1,125,Q +1,9,2,13,K +1,9,3,3,H +1,9,4,2,L +1,9,5,2,G +1,9,6,2,A +1,9,7,1,V +1,9,8,1,T +1,10,0,148,V +1,10,1,144,L +1,10,2,28,M +1,10,3,3,I +1,10,4,2,P +1,10,5,2,G +1,10,6,2,F +1,10,7,1,T +1,11,0,160,S +1,11,1,87,R +1,11,2,26,G +1,11,3,24,N +1,11,4,9,P +1,11,5,7,T +1,11,6,7,K +1,11,7,5,C +1,11,8,3,Q +1,11,9,1,I +1,11,10,1,D +1,12,0,274,S +1,12,1,27,N +1,12,2,10,H +1,12,3,5,I +1,12,4,3,E +1,12,5,3,D +1,12,6,2,T +1,12,7,2,R +1,12,8,2,G +1,12,9,1,Y +1,12,10,1,C +1,13,0,327,I +1,13,1,2,V +1,13,2,1,D +1,14,0,211,T +1,14,1,69,K +1,14,2,22,R +1,14,3,11,S +1,14,4,6,N +1,14,5,4,I +1,14,6,3,E +1,14,7,2,A +1,14,8,1,Q +1,14,9,1,P +1,15,0,300,L +1,15,1,23,F +1,15,2,2,V +1,15,3,1,R +1,15,4,1,M +1,15,5,1,I +1,15,6,1,G +1,15,7,1,C +1,16,0,245,T +1,16,1,70,S +1,16,2,6,I +1,16,3,5,V +1,16,4,2,A +1,16,5,1,N +1,16,6,1,L +1,17,0,253,F +1,17,1,74,Y +1,17,2,1,T +1,17,3,1,H +1,17,4,1,G +1,18,0,250,D +1,18,1,34,S +1,18,2,19,E +1,18,3,16,Q +1,18,4,3,H +1,18,5,2,V +1,18,6,1,Y +1,18,7,1,N +1,18,8,1,K +1,18,9,1,G +1,18,10,1,F +1,18,11,1,A +1,19,0,288,T +1,19,1,19,S +1,19,2,7,A +1,19,3,4,R +1,19,4,4,K +1,19,5,2,Q +1,19,6,2,N +1,19,7,2,I +1,19,8,1,V +1,19,9,1,M +1,20,0,297,G +1,20,1,21,E +1,20,2,2,V +1,20,3,2,T +1,20,4,2,R +1,20,5,2,D +1,20,6,2,A +1,20,7,1,W +1,20,8,1,S +1,21,0,307,S +1,21,1,7,Y +1,21,2,6,A +1,21,3,4,F +1,21,4,2,P +1,21,5,2,L +1,21,6,1,T +1,21,7,1,H +1,22,0,300,G +1,22,1,18,R +1,22,2,7,Q +1,22,3,3,V +1,22,4,1,W +1,22,5,1,K +1,23,0,315,S +1,23,1,6,T +1,23,2,3,G +1,23,3,2,R +1,23,4,2,I +1,23,5,1,N +1,23,6,1,L +1,24,0,317,G +1,24,1,7,S +1,24,2,4,A +1,24,3,1,T +1,24,4,1,D +1,25,0,264,S +1,25,1,42,T +1,25,2,10,K +1,25,3,8,I +1,25,4,4,R +1,25,5,1,V +1,25,6,1,N +1,26,0,327,F +1,26,1,2,L +1,26,2,1,G +1,27,0,317,R +1,27,1,4,G +1,27,2,2,S +1,27,3,2,Q +1,27,4,1,W +1,27,5,1,P +1,27,6,1,K +1,27,7,1,D +1,27,8,1,C +1,28,0,133,D +1,28,1,125,S +1,28,2,47,A +1,28,3,6,E +1,28,4,4,C +1,28,5,3,V +1,28,6,3,K +1,28,7,2,T +1,28,8,2,H +1,28,9,2,G +1,28,10,1,P +1,28,11,1,N +1,28,12,1,L +1,29,0,302,P +1,29,1,25,S +1,29,2,3,L +2,0,0,324,C +2,0,1,10,E +2,0,2,3,Y +2,0,3,2,S +2,0,4,1,W +2,0,5,1,I +2,0,6,1,G +2,1,0,278,Y +2,1,1,39,F +2,1,2,10,H +2,1,3,10,A +2,1,4,3,I +2,1,5,1,S +2,1,6,1,D +2,2,0,328,Y +2,2,1,10,Q +2,2,2,2,D +2,2,3,1,H +2,2,4,1,C +2,3,0,294,D +2,3,1,10,L +2,3,2,10,G +2,3,3,10,E +2,3,4,6,I +2,3,5,4,N +2,3,6,3,M +2,3,7,2,Y +2,3,8,1,V +2,3,9,1,T +2,3,10,1,A +2,4,0,320,A +2,4,1,12,S +2,4,2,6,T +2,4,3,2,V +2,4,4,2,D +2,5,0,322,E +2,5,1,10,S +2,5,2,6,D +2,5,3,3,K +2,5,4,1,Q +2,6,0,320,D +2,6,1,10,I +2,6,2,9,E +2,6,3,2,V +2,6,4,1,N +2,7,0,307,E +2,7,1,10,G +2,7,2,9,T +2,7,3,5,K +2,7,4,5,D +2,7,5,2,N +2,7,6,1,Q +2,7,7,1,M +2,7,8,1,I +2,7,9,1,A +2,8,0,223,A +2,8,1,61,P +2,8,2,18,T +2,8,3,17,S +2,8,4,10,L +2,8,5,4,V +2,8,6,3,I +2,8,7,3,D +2,8,8,1,M +2,8,9,1,G +2,8,10,1,F +2,9,0,280,Q +2,9,1,20,R +2,9,2,13,L +2,9,3,10,T +2,9,4,7,E +2,9,5,5,K +2,9,6,3,H +2,9,7,2,W +2,9,8,2,P +2,10,0,206,L +2,10,1,92,A +2,10,2,28,V +2,10,3,6,I +2,10,4,4,T +2,10,5,3,P +2,10,6,3,F +2,11,0,226,G +2,11,1,75,S +2,11,2,11,R +2,11,3,10,T +2,11,4,10,N +2,11,5,7,E +2,11,6,1,W +2,11,7,1,V +2,11,8,1,L +2,12,0,217,S +2,12,1,85,T +2,12,2,18,A +2,12,3,16,N +2,12,4,3,R +2,12,5,1,P +2,12,6,1,L +2,12,7,1,H +2,13,0,322,I +2,13,1,10,G +2,13,2,3,V +2,13,3,3,T +2,13,4,3,L +2,13,5,1,R +2,14,0,274,T +2,14,1,29,L +2,14,2,22,S +2,14,3,9,A +2,14,4,5,I +2,14,5,2,G +2,14,6,1,R +2,15,0,321,L +2,15,1,9,M +2,15,2,8,R +2,15,3,2,S +2,15,4,2,K +2,16,0,202,T +2,16,1,61,S +2,16,2,28,A +2,16,3,23,L +2,16,4,8,V +2,16,5,7,I +2,16,6,6,Y +2,16,7,6,F +2,16,8,1,N +2,17,0,247,A +2,17,1,68,G +2,17,2,6,V +2,17,3,6,R +2,17,4,5,T +2,17,5,3,S +2,17,6,3,F +2,17,7,2,H +2,17,8,2,D +2,18,0,198,T +2,18,1,65,S +2,18,2,33,K +2,18,3,29,A +2,18,4,6,D +2,18,5,5,M +2,18,6,3,V +2,18,7,2,N +2,18,8,1,L +2,19,0,233,N +2,19,1,47,S +2,19,2,17,T +2,19,3,12,D +2,19,4,10,F +2,19,5,9,K +2,19,6,6,A +2,19,7,5,G +2,19,8,1,Y +2,19,9,1,Q +2,19,10,1,H +2,20,0,271,G +2,20,1,20,S +2,20,2,20,A +2,20,3,10,R +2,20,4,8,T +2,20,5,8,E +2,20,6,5,D +2,21,0,309,S +2,21,1,10,D +2,21,2,8,L +2,21,3,5,I +2,21,4,4,T +2,21,5,2,K +2,21,6,2,A +2,21,7,1,V +2,21,8,1,P +2,22,0,113,K +2,22,1,55,R +2,22,2,49,S +2,22,3,36,N +2,22,4,24,I +2,22,5,21,A +2,22,6,17,T +2,22,7,10,P +2,22,8,10,L +2,22,9,2,D +2,22,10,1,V +2,22,11,1,Q +2,22,12,1,M +2,22,13,1,F +2,22,14,1,C +2,23,0,249,S +2,23,1,47,D +2,23,2,15,T +2,23,3,11,A +2,23,4,10,V +2,23,5,3,F +2,23,6,2,Y +2,23,7,2,G +2,23,8,2,E +2,23,9,1,N +2,24,0,278,G +2,24,1,35,K +2,24,2,13,I +2,24,3,6,S +2,24,4,6,A +2,24,5,3,C +2,24,6,1,V +2,25,0,330,S +2,25,1,5,T +2,25,2,3,P +2,25,3,2,V +2,25,4,1,D +2,25,5,1,C +2,26,0,277,F +2,26,1,48,G +2,26,2,8,A +2,26,3,2,S +2,26,4,2,R +2,26,5,2,P +2,26,6,1,Y +2,26,7,1,L +2,26,8,1,D +2,27,0,263,R +2,27,1,49,S +2,27,2,12,W +2,27,3,7,Q +2,27,4,3,H +2,27,5,2,L +2,27,6,2,D +2,27,7,2,C +2,27,8,1,G +2,27,9,1,E +2,28,0,171,D +2,28,1,46,E +2,28,2,45,F +2,28,3,30,A +2,28,4,18,S +2,28,5,18,N +2,28,6,7,V +2,28,7,3,G +2,28,8,2,P +2,28,9,1,R +2,28,10,1,H +2,29,0,261,P +2,29,1,44,R +2,29,2,18,S +2,29,3,8,T +2,29,4,4,L +2,29,5,2,V +2,29,6,2,C +2,29,7,1,W +2,29,8,1,I +2,29,9,1,H +3,0,0,759,C +3,0,1,1,Y +3,0,2,1,W +3,0,3,1,V +3,0,4,1,K +3,0,5,1,G +3,1,0,396,F +3,1,1,169,Y +3,1,2,166,L +3,1,3,18,I +3,1,4,6,H +3,1,5,3,V +3,1,6,2,N +3,1,7,1,T +3,1,8,1,Q +3,1,9,1,D +3,1,10,1,C +3,2,0,752,Y +3,2,1,6,F +3,2,2,2,C +3,2,3,1,Q +3,2,4,1,L +3,2,5,1,E +3,2,6,1,D +3,3,0,297,T +3,3,1,266,V +3,3,2,55,L +3,3,3,39,K +3,3,4,32,I +3,3,5,29,M +3,3,6,18,S +3,3,7,17,E +3,3,8,3,R +3,3,9,3,F +3,3,10,2,Q +3,3,11,2,A +3,3,12,1,N +3,4,0,592,A +3,4,1,149,G +3,4,2,17,T +3,4,3,2,V +3,4,4,1,Y +3,4,5,1,S +3,4,6,1,I +3,4,7,1,H +3,5,0,561,S +3,5,1,128,A +3,5,2,59,T +3,5,3,7,V +3,5,4,4,L +3,5,5,2,F +3,5,6,1,W +3,5,7,1,P +3,5,8,1,K +3,6,0,741,D +3,6,1,9,L +3,6,2,6,H +3,6,3,3,Y +3,6,4,2,N +3,6,5,1,T +3,6,6,1,F +3,6,7,1,A +3,7,0,267,S +3,7,1,145,G +3,7,2,144,E +3,7,3,68,T +3,7,4,52,R +3,7,5,37,A +3,7,6,23,K +3,7,7,12,D +3,7,8,6,V +3,7,9,3,N +3,7,10,3,I +3,7,11,2,Q +3,7,12,1,Y +3,7,13,1,L +3,8,0,220,L +3,8,1,213,P +3,8,2,83,T +3,8,3,51,E +3,8,4,48,A +3,8,5,40,W +3,8,6,25,I +3,8,7,21,V +3,8,8,19,M +3,8,9,19,G +3,8,10,9,H +3,8,11,6,Q +3,8,12,5,C +3,8,13,3,D +3,8,14,1,S +3,8,15,1,R +3,9,0,543,Q +3,9,1,71,H +3,9,2,34,T +3,9,3,30,E +3,9,4,21,R +3,9,5,19,L +3,9,6,18,K +3,9,7,8,S +3,9,8,5,V +3,9,9,4,N +3,9,10,4,A +3,9,11,3,Y +3,9,12,3,I +3,9,13,1,F +3,10,0,315,S +3,10,1,182,V +3,10,2,125,A +3,10,3,75,T +3,10,4,40,L +3,10,5,17,P +3,10,6,5,I +3,10,7,2,M +3,10,8,2,G +3,10,9,1,R +3,11,0,264,A +3,11,1,226,S +3,11,2,121,D +3,11,3,39,Q +3,11,4,28,T +3,11,5,17,P +3,11,6,15,R +3,11,7,14,G +3,11,8,14,E +3,11,9,8,H +3,11,10,6,V +3,11,11,6,L +3,11,12,3,F +3,11,13,1,Y +3,11,14,1,W +3,11,15,1,K +3,12,0,200,S +3,12,1,166,T +3,12,2,128,A +3,12,3,77,P +3,12,4,76,R +3,12,5,27,K +3,12,6,15,L +3,12,7,14,G +3,12,8,12,Q +3,12,9,12,M +3,12,10,11,I +3,12,11,6,E +3,12,12,5,V +3,12,13,5,H +3,12,14,4,D +3,12,15,2,Y +3,12,16,2,W +3,12,17,2,F +3,13,0,446,I +3,13,1,203,K +3,13,2,91,L +3,13,3,19,V +3,13,4,1,T +3,13,5,1,Q +3,13,6,1,M +3,13,7,1,F +3,13,8,1,D +3,14,0,305,H +3,14,1,69,R +3,14,2,69,Q +3,14,3,59,V +3,14,4,51,K +3,14,5,41,Y +3,14,6,34,E +3,14,7,32,T +3,14,8,32,L +3,14,9,24,N +3,14,10,21,S +3,14,11,16,I +3,14,12,5,F +3,14,13,2,M +3,14,14,2,A +3,14,15,1,G +3,14,16,1,C +3,15,0,734,L +3,15,1,9,F +3,15,2,7,M +3,15,3,5,I +3,15,4,2,S +3,15,5,2,P +3,15,6,2,H +3,15,7,1,W +3,15,8,1,V +3,15,9,1,A +3,16,0,358,S +3,16,1,190,H +3,16,2,116,T +3,16,3,16,Y +3,16,4,16,N +3,16,5,14,F +3,16,6,12,L +3,16,7,9,P +3,16,8,9,A +3,16,9,8,Q +3,16,10,6,G +3,16,11,5,I +3,16,12,2,R +3,16,13,2,K +3,16,14,1,V +3,17,0,310,S +3,17,1,255,F +3,17,2,88,L +3,17,3,55,I +3,17,4,32,V +3,17,5,11,G +3,17,6,5,T +3,17,7,4,A +3,17,8,2,N +3,17,9,1,Q +3,17,10,1,K +3,18,0,324,S +3,18,1,121,H +3,18,2,85,R +3,18,3,77,Y +3,18,4,41,Q +3,18,5,20,F +3,18,6,18,K +3,18,7,17,N +3,18,8,14,D +3,18,9,12,G +3,18,10,10,T +3,18,11,8,L +3,18,12,8,A +3,18,13,4,I +3,18,14,3,E +3,18,15,2,C +3,19,0,308,K +3,19,1,126,R +3,19,2,100,S +3,19,3,81,T +3,19,4,48,Q +3,19,5,35,L +3,19,6,23,G +3,19,7,16,F +3,19,8,8,A +3,19,9,7,N +3,19,10,3,E +3,19,11,2,Y +3,19,12,2,M +3,19,13,2,I +3,19,14,2,H +3,19,15,1,C +3,20,0,198,E +3,20,1,100,A +3,20,2,98,R +3,20,3,95,S +3,20,4,84,D +3,20,5,53,K +3,20,6,45,T +3,20,7,37,N +3,20,8,16,Q +3,20,9,14,L +3,20,10,7,I +3,20,11,5,G +3,20,12,4,V +3,20,13,3,F +3,20,14,2,M +3,20,15,1,P +3,20,16,1,H +3,20,17,1,C +3,21,0,206,S +3,21,1,157,K +3,21,2,90,T +3,21,3,85,A +3,21,4,60,E +3,21,5,57,D +3,21,6,37,R +3,21,7,21,N +3,21,8,17,G +3,21,9,10,L +3,21,10,8,Q +3,21,11,8,P +3,21,12,3,V +3,21,13,3,H +3,21,14,1,M +3,21,15,1,F +3,22,0,375,K +3,22,1,93,S +3,22,2,62,T +3,22,3,47,E +3,22,4,43,R +3,22,5,40,P +3,22,6,23,A +3,22,7,17,L +3,22,8,15,I +3,22,9,13,N +3,22,10,9,Y +3,22,11,8,Q +3,22,12,5,G +3,22,13,5,D +3,22,14,3,V +3,22,15,3,F +3,22,16,2,M +3,22,17,1,H +3,23,0,258,N +3,23,1,104,D +3,23,2,83,Q +3,23,3,76,V +3,23,4,41,G +3,23,5,39,S +3,23,6,38,A +3,23,7,25,T +3,23,8,25,K +3,23,9,21,R +3,23,10,21,I +3,23,11,19,H +3,23,12,5,P +3,23,13,3,F +3,23,14,3,E +3,23,15,2,L +3,23,16,1,Y +3,24,0,254,L +3,24,1,222,F +3,24,2,61,I +3,24,3,53,T +3,24,4,48,Y +3,24,5,29,V +3,24,6,24,M +3,24,7,22,A +3,24,8,21,H +3,24,9,11,S +3,24,10,6,P +3,24,11,3,E +3,24,12,2,R +3,24,13,2,G +3,24,14,2,D +3,24,15,1,Q +3,24,16,1,N +3,24,17,1,K +3,24,18,1,C +3,25,0,279,T +3,25,1,81,N +3,25,2,73,E +3,25,3,69,F +3,25,4,60,L +3,25,5,44,S +3,25,6,31,Q +3,25,7,29,H +3,25,8,26,R +3,25,9,17,V +3,25,10,15,A +3,25,11,11,K +3,25,12,9,I +3,25,13,7,D +3,25,14,4,Y +3,25,15,4,W +3,25,16,3,G +3,25,17,2,M +3,26,0,323,A +3,26,1,163,V +3,26,2,96,L +3,26,3,72,S +3,26,4,33,I +3,26,5,31,G +3,26,6,11,F +3,26,7,10,Y +3,26,8,9,C +3,26,9,5,T +3,26,10,3,K +3,26,11,2,Q +3,26,12,2,E +3,26,13,1,W +3,26,14,1,P +3,26,15,1,M +3,26,16,1,H +3,27,0,225,T +3,27,1,173,S +3,27,2,137,E +3,27,3,53,R +3,27,4,43,N +3,27,5,42,K +3,27,6,25,Q +3,27,7,22,H +3,27,8,16,F +3,27,9,13,I +3,27,10,4,V +3,27,11,3,Y +3,27,12,3,A +3,27,13,2,M +3,27,14,1,W +3,27,15,1,G +3,27,16,1,D +3,28,0,311,F +3,28,1,229,L +3,28,2,84,Y +3,28,3,43,A +3,28,4,24,I +3,28,5,20,G +3,28,6,17,V +3,28,7,11,E +3,28,8,7,M +3,28,9,4,W +3,28,10,4,N +3,28,11,3,K +3,28,12,2,T +3,28,13,2,S +3,28,14,2,Q +3,28,15,1,C +3,29,0,418,R +3,29,1,209,G +3,29,2,40,K +3,29,3,33,M +3,29,4,16,Q +3,29,5,14,H +3,29,6,10,S +3,29,7,7,V +3,29,8,5,E +3,29,9,4,T +3,29,10,2,Y +3,29,11,2,L +3,29,12,1,W +3,29,13,1,N +3,29,14,1,I +3,29,15,1,D +4,0,0,374,C +4,0,1,2,Y +4,0,2,2,R +4,0,3,1,N +4,0,4,1,L +4,0,5,1,H +4,0,6,1,A +4,1,0,209,L +4,1,1,151,F +4,1,2,6,I +4,1,3,4,V +4,1,4,4,T +4,1,5,3,R +4,1,6,2,Y +4,1,7,2,S +4,1,8,1,H +4,2,0,361,Y +4,2,1,6,F +4,2,2,6,C +4,2,3,3,L +4,2,4,2,S +4,2,5,1,V +4,2,6,1,R +4,2,7,1,N +4,2,8,1,I +4,3,0,195,V +4,3,1,99,L +4,3,2,25,M +4,3,3,22,F +4,3,4,13,T +4,3,5,6,S +4,3,6,6,A +4,3,7,4,R +4,3,8,4,I +4,3,9,2,W +4,3,10,2,Q +4,3,11,1,Y +4,3,12,1,P +4,3,13,1,E +4,3,14,1,D +4,4,0,235,A +4,4,1,114,S +4,4,2,22,G +4,4,3,4,L +4,4,4,3,R +4,4,5,1,V +4,4,6,1,P +4,4,7,1,D +4,4,8,1,C +4,5,0,242,S +4,5,1,122,T +4,5,2,6,A +4,5,3,4,L +4,5,4,3,R +4,5,5,2,I +4,5,6,1,N +4,5,7,1,M +4,5,8,1,G +4,6,0,249,D +4,6,1,108,Q +4,6,2,6,R +4,6,3,6,N +4,6,4,5,H +4,6,5,4,P +4,6,6,2,Y +4,6,7,1,E +4,6,8,1,A +4,7,0,110,G +4,7,1,93,S +4,7,2,59,E +4,7,3,34,N +4,7,4,22,T +4,7,5,21,Q +4,7,6,16,R +4,7,7,8,A +4,7,8,6,K +4,7,9,6,D +4,7,10,4,M +4,7,11,1,V +4,7,12,1,I +4,7,13,1,F +4,8,0,140,P +4,8,1,124,L +4,8,2,28,Q +4,8,3,18,T +4,8,4,18,R +4,8,5,13,S +4,8,6,13,A +4,8,7,8,K +4,8,8,6,V +4,8,9,4,H +4,8,10,3,M +4,8,11,3,I +4,8,12,2,E +4,8,13,1,F +4,8,14,1,D +4,9,0,138,E +4,9,1,48,Q +4,9,2,40,S +4,9,3,24,K +4,9,4,23,A +4,9,5,18,T +4,9,6,17,V +4,9,7,16,L +4,9,8,14,N +4,9,9,12,H +4,9,10,12,D +4,9,11,10,G +4,9,12,4,I +4,9,13,3,R +4,9,14,2,P +4,9,15,1,Y +4,10,0,152,A +4,10,1,106,L +4,10,2,71,T +4,10,3,27,V +4,10,4,14,S +4,10,5,7,P +4,10,6,2,R +4,10,7,1,M +4,10,8,1,I +4,10,9,1,C +4,11,0,178,S +4,11,1,54,P +4,11,2,54,A +4,11,3,29,R +4,11,4,18,N +4,11,5,13,L +4,11,6,10,K +4,11,7,8,T +4,11,8,6,F +4,11,9,3,V +4,11,10,2,Q +4,11,11,2,H +4,11,12,2,C +4,11,13,1,Y +4,11,14,1,E +4,11,15,1,D +4,12,0,93,Q +4,12,1,74,E +4,12,2,50,S +4,12,3,36,T +4,12,4,26,K +4,12,5,20,R +4,12,6,18,N +4,12,7,14,L +4,12,8,13,D +4,12,9,12,A +4,12,10,10,H +4,12,11,6,P +4,12,12,4,G +4,12,13,3,V +4,12,14,1,Y +4,12,15,1,M +4,12,16,1,I +4,13,0,136,L +4,13,1,125,I +4,13,2,96,V +4,13,3,10,M +4,13,4,9,S +4,13,5,2,G +4,13,6,1,T +4,13,7,1,P +4,13,8,1,N +4,13,9,1,F +4,14,0,93,T +4,14,1,70,K +4,14,2,62,N +4,14,3,32,H +4,14,4,27,E +4,14,5,26,R +4,14,6,18,S +4,14,7,11,I +4,14,8,8,M +4,14,9,8,D +4,14,10,6,Q +4,14,11,6,G +4,14,12,4,Y +4,14,13,4,V +4,14,14,3,L +4,14,15,2,A +4,14,16,1,P +4,14,17,1,F +4,15,0,346,L +4,15,1,25,M +4,15,2,7,V +4,15,3,1,W +4,15,4,1,T +4,15,5,1,P +4,15,6,1,G +4,16,0,90,T +4,16,1,73,P +4,16,2,66,E +4,16,3,60,S +4,16,4,21,I +4,16,5,16,L +4,16,6,13,Y +4,16,7,9,N +4,16,8,8,H +4,16,9,7,Q +4,16,10,7,F +4,16,11,3,V +4,16,12,3,R +4,16,13,3,A +4,16,14,2,D +4,16,15,1,G +4,17,0,168,S +4,17,1,129,F +4,17,2,39,L +4,17,3,35,C +4,17,4,3,T +4,17,5,3,A +4,17,6,2,D +4,17,7,1,P +4,17,8,1,N +4,17,9,1,I +4,18,0,64,S +4,18,1,45,D +4,18,2,40,H +4,18,3,40,F +4,18,4,29,N +4,18,5,27,P +4,18,6,25,R +4,18,7,23,L +4,18,8,21,E +4,18,9,19,V +4,18,10,15,Y +4,18,11,10,Q +4,18,12,8,K +4,18,13,4,I +4,18,14,4,G +4,18,15,4,C +4,18,16,3,T +4,18,17,1,A +4,19,0,130,S +4,19,1,73,E +4,19,2,35,T +4,19,3,26,A +4,19,4,24,Y +4,19,5,14,G +4,19,6,14,F +4,19,7,10,V +4,19,8,9,R +4,19,9,9,L +4,19,10,9,D +4,19,11,8,P +4,19,12,5,K +4,19,13,4,Q +4,19,14,4,M +4,19,15,3,N +4,19,16,2,H +4,19,17,2,C +4,19,18,1,I +4,20,0,63,G +4,20,1,59,K +4,20,2,59,D +4,20,3,54,T +4,20,4,53,N +4,20,5,24,L +4,20,6,22,S +4,20,7,12,Q +4,20,8,9,E +4,20,9,9,A +4,20,10,6,R +4,20,11,5,V +4,20,12,3,I +4,20,13,2,Y +4,20,14,1,P +4,20,15,1,M +4,21,0,78,S +4,21,1,66,N +4,21,2,52,E +4,21,3,51,K +4,21,4,45,D +4,21,5,21,P +4,21,6,20,Q +4,21,7,15,G +4,21,8,14,T +4,21,9,12,R +4,21,10,4,A +4,21,11,3,H +4,21,12,1,L +4,22,0,164,P +4,22,1,67,F +4,22,2,45,S +4,22,3,25,L +4,22,4,20,T +4,22,5,17,E +4,22,6,14,K +4,22,7,5,I +4,22,8,5,A +4,22,9,4,V +4,22,10,4,Q +4,22,11,3,R +4,22,12,2,Y +4,22,13,2,G +4,22,14,2,D +4,22,15,1,N +4,22,16,1,H +4,22,17,1,C +4,23,0,207,R +4,23,1,69,Q +4,23,2,44,C +4,23,3,18,M +4,23,4,17,H +4,23,5,10,S +4,23,6,8,F +4,23,7,2,V +4,23,8,2,T +4,23,9,2,A +4,23,10,1,W +4,23,11,1,I +4,23,12,1,E +4,24,0,137,S +4,24,1,95,E +4,24,2,38,Q +4,24,3,30,K +4,24,4,20,H +4,24,5,13,T +4,24,6,13,R +4,24,7,11,N +4,24,8,6,V +4,24,9,4,L +4,24,10,4,G +4,24,11,3,F +4,24,12,3,D +4,24,13,3,A +4,24,14,2,I +4,25,0,160,A +4,25,1,109,V +4,25,2,37,G +4,25,3,31,P +4,25,4,24,I +4,25,5,8,S +4,25,6,4,T +4,25,7,3,Y +4,25,8,3,R +4,25,9,2,L +4,25,10,1,K +4,26,0,231,S +4,26,1,46,N +4,26,2,17,P +4,26,3,16,T +4,26,4,13,Q +4,26,5,11,L +4,26,6,10,R +4,26,7,8,K +4,26,8,7,F +4,26,9,6,D +4,26,10,4,V +4,26,11,3,H +4,26,12,3,G +4,26,13,3,A +4,26,14,2,E +4,26,15,1,W +4,26,16,1,I +4,27,0,241,F +4,27,1,112,Y +4,27,2,12,S +4,27,3,7,L +4,27,4,4,A +4,27,5,2,V +4,27,6,1,R +4,27,7,1,I +4,27,8,1,H +4,27,9,1,G +4,28,0,186,R +4,28,1,113,G +4,28,2,23,K +4,28,3,19,N +4,28,4,14,H +4,28,5,9,Q +4,28,6,6,E +4,28,7,4,D +4,28,8,2,V +4,28,9,2,F +4,28,10,1,Y +4,28,11,1,W +4,28,12,1,M +4,28,13,1,C +4,29,0,158,D +4,29,1,60,E +4,29,2,38,S +4,29,3,28,N +4,29,4,27,G +4,29,5,24,A +4,29,6,15,Q +4,29,7,13,K +4,29,8,6,P +4,29,9,4,R +4,29,10,3,Y +4,29,11,3,T +4,29,12,1,M +4,29,13,1,L +4,29,14,1,H diff --git a/enclone_stuff/src/inconsistent.rs b/enclone_stuff/src/inconsistent.rs new file mode 100644 index 000000000..2f2000f6f --- /dev/null +++ b/enclone_stuff/src/inconsistent.rs @@ -0,0 +1,129 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Test for consistency between VDJ cells and GEX cells. This is designed to work even if +// NCELL is used. We take up to 100 VDJ cells having both heavy and light (or TRB and TRA) +// chains, and having the highest VDJ UMI count total (but using only one cell per exact +// subclonotype), and find those that are GEX cells. +// +// If n cells were taken, and k of those are GEX cells, we require that +// binomial_sum(n, k, 0.7) >= 0.00002. For n = 100, this is the same as requiring that +// k >= 50. Using a binomial sum threshold allows the stringency of the requirement to be +// appropriately lower when n is small. When we tested on 260 libraries, the lowest value +// observed for k/n was 0.65, and the vast majority of values were 0.9 or higher. +// +// This code is inefficient because for every dataset, it searches the entirety of tig_bc, but +// it doesn't matter much because not much time is spent here. + +use enclone_core::defs::{EncloneControl, ExactClonotype, GexInfo, TigData}; +use rayon::prelude::*; +use stats_utils::binomial_sum; +use std::time::Instant; +use vector_utils::{bin_member, bin_position, reverse_sort}; + +pub fn test_vdj_gex_inconsistent( + ctl: &EncloneControl, + tig_bc: &[Vec], + exact_clonotypes: &[ExactClonotype], + vdj_cells: &[Vec], + gex_info: &GexInfo, +) -> Result<(), &'static str> { + let tinc = Instant::now(); + + let mut results = Vec::<(usize, String)>::new(); + for li in 0..ctl.origin_info.n() { + results.push((li, String::new())); + } + results.par_iter_mut().for_each(|res| { + let li = res.0; + if !ctl.origin_info.gex_path[li].is_empty() && !ctl.gen_opt.allow_inconsistent { + let vdj = &vdj_cells[li]; + let gex = &gex_info.gex_cell_barcodes[li]; + let (mut heavy, mut light) = (vec![false; vdj.len()], vec![false; vdj.len()]); + let mut exid = vec![0; vdj.len()]; + let mut inex = vec![false; vdj.len()]; + for (i, ex) in exact_clonotypes.iter().enumerate() { + for clone in &ex.clones { + let p = bin_position(vdj, &clone[0].barcode); + if p >= 0 { + inex[p as usize] = true; + exid[p as usize] = i; + } + } + } + let mut numi = vec![0; vdj.len()]; + for tigi in tig_bc { + if tigi[0].dataset_index == li { + let p = bin_position(vdj, &tigi[0].barcode); + if p >= 0 { + for tig in tigi { + numi[p as usize] += tig.umi_count; + if tig.left { + heavy[p as usize] = true; + } else { + light[p as usize] = true; + } + } + } + } + } + let mut x = Vec::<(usize, bool, usize)>::new(); + for i in 0..vdj.len() { + if heavy[i] && light[i] { + x.push((numi[i], bin_member(gex, &vdj[i]), i)); + } + } + reverse_sort(&mut x); + let mut used = vec![false; exact_clonotypes.len()]; + let (mut total, mut good) = (0, 0); + for xi in x { + let m = xi.2; + if inex[m] && used[exid[m]] { + continue; + } + total += 1; + if xi.1 { + good += 1; + } + if inex[m] { + used[exid[m]] = true; + } + if total == 100 { + break; + } + } + if total >= 1 { + let bino = binomial_sum(total, good, 0.7); + if bino < 0.00002 { + res.1 = format!( + "\nThe VDJ dataset with path\n{}\nand the GEX dataset with path\n\ + {}\nshow insufficient sharing of barcodes. \ + Of the {total} VDJ cells that were tested,\n\ + only {good} were GEX cells.\n", + ctl.origin_info.dataset_path[li], ctl.origin_info.gex_path[li] + ); + } + } + } + }); + let mut fail = false; + for r in &results { + if !r.1.is_empty() { + fail = true; + } + } + if fail { + for r in results { + eprint!("{}", r.1); + } + return Err( + "\nThis test is restricted to VDJ cells having both chain types, uses at most \ + one cell\nper exact subclonotype, and uses up to 100 cells having the highest \ + UMI counts.\n\ + \nThe data suggest a laboratory or informatic mixup. If you believe \ + that this is not the case,\nyou can force enclone to run by adding \ + the argument ALLOW_INCONSISTENT to the command line.\n", + ); + } + ctl.perf_stats(&tinc, "testing for inconsistency"); + Ok(()) +} diff --git a/enclone_stuff/src/lib.rs b/enclone_stuff/src/lib.rs new file mode 100644 index 000000000..d156dbfaa --- /dev/null +++ b/enclone_stuff/src/lib.rs @@ -0,0 +1,16 @@ +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. + +pub mod analyze_dref; +pub mod disintegrate; +pub mod doublets; +pub mod fcell; +pub mod filter_umi; +pub mod flag_defective; +pub mod inconsistent; +pub mod merge_onesies; +pub mod populate_features; +pub mod some_filters; +pub mod split_orbits; +pub mod start; +pub mod vars; +pub mod weak_chains; diff --git a/enclone_stuff/src/merge_onesies.rs b/enclone_stuff/src/merge_onesies.rs new file mode 100644 index 000000000..5788a1299 --- /dev/null +++ b/enclone_stuff/src/merge_onesies.rs @@ -0,0 +1,115 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Merge onesies where totally unambiguous. Possibly inefficient and should optimize. + +use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype}; +use equiv::EquivRel; +use vector_utils::{lower_bound1_2, unique_sort, upper_bound1_2}; + +pub fn merge_onesies( + orbits: &mut Vec>, + ctl: &EncloneControl, + exact_clonotypes: &[ExactClonotype], + info: &[CloneInfo], + eq: &EquivRel, + disintegrated: &[bool], +) { + if ctl.join_alg_opt.merge_onesies { + // ctl.join_alg_opt.merge_onesies is always true + let mut eqo = EquivRel::new(orbits.len() as i32); + let mut to_orbit = vec![None; info.len()]; + for i in 0..orbits.len() { + for j in 0..orbits[i].len() { + to_orbit[orbits[i][j] as usize] = Some(i); + } + } + let ncells_total = exact_clonotypes.iter().map(ExactClonotype::ncells).sum(); + let mut onesies = Vec::::new(); + for i in 0..info.len() { + if to_orbit[i].is_some() + && info[i].tigs.len() == 1 + && (!ctl.clono_filt_opt_def.weak_onesies || !disintegrated[info[i].clonotype_index]) + { + onesies.push(i); + } + } + let mut alltigs2 = Vec::<(Vec, usize)>::new(); + for i in 0..info.len() { + if to_orbit[i].is_some() && info[i].tigs.len() >= 2 { + for j in 0..info[i].tigs.len() { + alltigs2.push((info[i].tigs[j].clone(), i)); + } + } + } + alltigs2.sort(); + for x in onesies.iter() { + let low = lower_bound1_2(&alltigs2, &info[*x].tigs[0]); + let high = upper_bound1_2(&alltigs2, &info[*x].tigs[0]); + let mut ms = Vec::::new(); + for m in low..high { + if alltigs2[m as usize].0 == info[*x].tigs[0] { + ms.push(m as usize); + } + } + let mut ok = !ms.is_empty(); + let mut exacts = Vec::::new(); + for j in 0..ms.len() { + if eq.class_id(alltigs2[ms[j]].1 as i32) != eq.class_id(alltigs2[ms[0]].1 as i32) { + ok = false; + } + let mut o = Vec::::new(); + eq.orbit(alltigs2[ms[j]].1 as i32, &mut o); + for z in o.iter() { + exacts.push(info[*z as usize].clonotype_index); + } + } + unique_sort(&mut exacts); + if ctl.join_alg_opt.merge_onesies_ctl { + let ncells0 = exact_clonotypes[info[*x].clonotype_index].ncells(); + if ncells0 * 10000 < ncells_total { + ok = false; + } + } + if ok { + let orb1 = to_orbit[*x].unwrap(); + let orb2 = to_orbit[alltigs2[ms[0]].1].unwrap(); + + // Test for donor mixing. + + if !ctl.clono_filt_opt_def.donor { + let mut donors = vec![Vec::>::new(); 2]; + let orbs = [&orb1, &orb2]; + for (pass, orb) in orbs.iter().enumerate() { + for id in orbits[**orb].iter() { + let ex = &exact_clonotypes[info[*id as usize].clonotype_id]; + for i in 0..ex.clones.len() { + donors[pass].push(ex.clones[i][0].donor_index); + } + } + unique_sort(&mut donors[pass]); + } + if donors[0] != donors[1] { + continue; + } + } + + // Make join. + + eqo.join(orb1 as i32, orb2 as i32); + } + } + let mut orbits2 = Vec::>::new(); + let mut repsx = Vec::::new(); + eqo.orbit_reps(&mut repsx); + for r in repsx { + let mut ox = Vec::::new(); + eqo.orbit(r, &mut ox); + let mut o = Vec::::new(); + for oj in ox { + o.append(&mut orbits[oj as usize].clone()); + } + orbits2.push(o); + } + *orbits = orbits2; + } +} diff --git a/enclone_stuff/src/populate_features.rs b/enclone_stuff/src/populate_features.rs new file mode 100644 index 000000000..755c06bf2 --- /dev/null +++ b/enclone_stuff/src/populate_features.rs @@ -0,0 +1,187 @@ +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. + +// Populate features. + +use amino::aa_seq; +use enclone_core::defs::EncloneControl; +use io_utils::fwriteln; +use std::fmt::Write as _; +use std::io::Write; +use string_utils::{stringme, strme}; +use tables::print_tabular_vbox; +use vdj_ann::refx::RefData; +use vdj_ann::vdj_features::{cdr1_start, cdr2_start, fr1_start, fr2_start, fr3_start}; + +pub fn populate_features( + ctl: &EncloneControl, + refdata: &RefData, + broken: &[bool], + fr1_starts: &mut Vec, + fr2_starts: &mut Vec>, + fr3_starts: &mut Vec>, + cdr1_starts: &mut Vec>, + cdr2_starts: &mut Vec>, + log: &mut Vec, +) -> Result<(), String> { + *fr1_starts = vec![0; refdata.refs.len()]; + *fr2_starts = vec![None; refdata.refs.len()]; + *fr3_starts = vec![None; refdata.refs.len()]; + *cdr1_starts = vec![None; refdata.refs.len()]; + *cdr2_starts = vec![None; refdata.refs.len()]; + let mut msg = String::new(); + for i in 0..refdata.refs.len() { + if refdata.is_v(i) { + if broken[i] && ctl.gen_opt.require_unbroken_ok { + continue; + } + let aa = aa_seq(&refdata.refs[i].to_ascii_vec(), 0); + let rtype = refdata.rtype[i]; + let chain_type = if rtype == 0 { + "IGH" + } else if rtype == 1 { + "IGK" + } else if rtype == 2 { + "IGL" + } else if rtype == 3 { + "TRA" + } else if rtype == 4 { + "TRB" + } else { + continue; + }; + let fs1 = fr1_start(&aa, chain_type); + fr1_starts[i] = 3 * fs1; + let fs2 = fr2_start(&aa, chain_type, false); + if let Some(fs2) = fs2 { + fr2_starts[i] = Some(3 * fs2); + } else if ctl.gen_opt.require_unbroken_ok { + msg += "\nYou supplied the argument REQUIRE_UNBROKEN_OK, but the FWR2 start \ + could not be computed\nfor this reference sequence:"; + let seq = refdata.refs[i].to_ascii_vec(); + write!(msg, ">{}\n{}\n", refdata.rheaders_orig[i], strme(&seq)) + .expect("formatting to string"); + } + let fs3 = fr3_start(&aa, chain_type, false); + if let Some(fs3) = fs3 { + fr3_starts[i] = Some(3 * fs3); + } else if ctl.gen_opt.require_unbroken_ok { + msg += "\nYou supplied the argument REQUIRE_UNBROKEN_OK, but the FWR3 start \ + could not be computed\nfor this reference sequence:"; + let seq = refdata.refs[i].to_ascii_vec(); + write!(msg, ">{}\n{}\n", refdata.rheaders_orig[i], strme(&seq)) + .expect("formatting to string"); + } + let cs1 = cdr1_start(&aa, chain_type, false); + if let Some(cs1) = cs1 { + cdr1_starts[i] = Some(3 * cs1); + if let Some(fs2) = fs2 { + if cs1 > fs2 && ctl.gen_opt.require_unbroken_ok { + msg += + "\nYou supplied the argument REQUIRE_UNBROKEN_OK, but the CDR1 start \ + exceeds the FWR2 start for this reference sequence:"; + let seq = refdata.refs[i].to_ascii_vec(); + write!(msg, ">{}\n{}\n", refdata.rheaders_orig[i], strme(&seq)) + .expect("formatting to string"); + } + } + } else if ctl.gen_opt.require_unbroken_ok { + msg += "\nYou supplied the argument REQUIRE_UNBROKEN_OK, but the CDR1 start \ + could not be computed\nfor this reference sequence:\n"; + let seq = refdata.refs[i].to_ascii_vec(); + write!(msg, ">{}\n{}\n", refdata.rheaders_orig[i], strme(&seq)) + .expect("formatting to string"); + } + let cs2 = cdr2_start(&aa, chain_type, false); + if let Some(cs2) = cs2 { + cdr2_starts[i] = Some(3 * cs2); + if let Some(fs3) = fs3 { + if ctl.gen_opt.require_unbroken_ok && cs2 > fs3 { + msg += + "\nYou supplied the argument REQUIRE_UNBROKEN_OK, but the CDR2 start \ + exceeds the FWR3 start for this reference sequence:"; + let seq = refdata.refs[i].to_ascii_vec(); + write!(msg, ">{}\n{}\n", refdata.rheaders_orig[i], strme(&seq)) + .expect("formatting to string"); + } + } + } else if ctl.gen_opt.require_unbroken_ok { + msg += "\nYou supplied the argument REQUIRE_UNBROKEN_OK, but the CDR2 start \ + could not be computed\nfor this reference sequence:"; + let seq = refdata.refs[i].to_ascii_vec(); + write!(msg, ">{}\n{}\n", refdata.rheaders_orig[i], strme(&seq)) + .expect("formatting to string"); + } + if let Some(cs1) = cs1 { + if fs1 > cs1 && ctl.gen_opt.require_unbroken_ok { + msg += "\nYou supplied the argument REQUIRE_UNBROKEN_OK, but the FWR1 start \ + exceeds the CDR1 start for this reference sequence:\n"; + let seq = refdata.refs[i].to_ascii_vec(); + write!(msg, ">{}\n{}\n", refdata.rheaders_orig[i], strme(&seq)) + .expect("formatting to string"); + } + } + if let (Some(cs2), Some(fs2)) = (cs2, fs2) { + if fs2 > cs2 && ctl.gen_opt.require_unbroken_ok { + msg += "\nYou supplied the argument REQUIRE_UNBROKEN_OK, but the FWR2 start \ + exceeds the CDR2 start for this reference sequence:"; + let seq = refdata.refs[i].to_ascii_vec(); + write!(msg, ">{}\n{}\n", refdata.rheaders_orig[i], strme(&seq)) + .expect("formatting to string"); + } + } + } + } + if !msg.is_empty() { + return Err(msg); + } + + // Report on broken reference sequences. This comes after the json loading because possibly + // the user supplied the wrong reference, so there is no value in criticizing the reference + // in that case. + + if !log.is_empty() && !ctl.gen_opt.cellranger && !ctl.gen_opt.accept_broken { + let mut log = Vec::::new(); + fwriteln!( + log, + "\nSome errors were detected in the reference sequences supplied to enclone.\n\ + Please see comments at end for what you can do about this.\n\n", + ); + fwriteln!(log, +"🌼 Dear user, some defects were detected in the reference sequences supplied to enclone. 🌼\n\ + 🌼 Some of these defects may be small. Generally they are associated with V segments that 🌼\n\ + 🌼 are frameshifted or truncated, or with C segments that have an extra base at the 🌼\n\ + 🌼 beginning. We are letting you know about this because they could result in 🌼\n\ + 🌼 misannotation. 🌼\n" + ); + + let rows = [ + vec![ + "You can make enclone ignore these defects by adding the additional argument" + .to_string(), + ], + vec![ + "ACCEPT_BROKEN to the enclone command line. Or you can obtain the same" + .to_string(), + ], + vec![ + "behavior by defining the environment variable ENCLONE_ACCEPT_BROKEN.".to_string(), + ], + ]; + let mut log = stringme(&log); + print_tabular_vbox(&mut log, &rows, 2, b"l".as_ref(), false, true); + let mut log = log.as_bytes().to_vec(); + fwriteln!(log, ""); + fwriteln!( + log, + "This is probably OK, but if your sample is human or mouse, you may wish to either:\n\ + • rerun cellranger using the cleaned up reference sequences that come prepackaged with \ + it\n (noting that your might have used an older, less clean version of that)\n\ + • or add the argument BUILT_IN to enclone, which will force use of the built-in \ + reference\n \ + sequences. This will be a bit slower because all the contigs will need to be\n \ + reannotated. If you're using mouse, you'll also need to add the argument MOUSE.\n" + ); + return Err(stringme(&log)); + } + Ok(()) +} diff --git a/enclone_stuff/src/some_filters.rs b/enclone_stuff/src/some_filters.rs new file mode 100644 index 000000000..14bda3894 --- /dev/null +++ b/enclone_stuff/src/some_filters.rs @@ -0,0 +1,453 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use crate::doublets::delete_doublets; +use crate::merge_onesies::merge_onesies; +use crate::split_orbits::split_orbits; +use crate::weak_chains::weak_chains; +use enclone_core::barcode_fate::BarcodeFate; +use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype}; +use enclone_print::define_mat::{define_mat, setup_define_mat}; +use enclone_print::print_utils3::define_column_info; +use enclone_proto::types::DonorReferenceItem; +use equiv::EquivRel; +use qd::Double; +use rayon::prelude::*; +use std::cmp::max; +use std::collections::{HashMap, HashSet}; +use std::time::Instant; +use vdj_ann::refx::RefData; +use vector_utils::{erase_if, next_diff1_2, unique_sort}; + +pub fn some_filters( + orbits: &mut Vec>, + is_bcr: bool, + to_bc: &HashMap<(usize, usize), Vec>, + sr: &[Vec], + ctl: &EncloneControl, + exact_clonotypes: &[ExactClonotype], + info: &[CloneInfo], + raw_joins: &[Vec], + eq: &EquivRel, + disintegrated: &[bool], + fate: &mut [HashMap], + refdata: &RefData, + dref: &[DonorReferenceItem], +) { + // Delete exact subclonotypes that appear to represent doublets. + + delete_doublets( + orbits, + is_bcr, + to_bc, + sr, + ctl, + exact_clonotypes, + info, + raw_joins, + refdata, + dref, + fate, + ); + + // Given a signature s having at least two chains, if the total cells in the two-chain + // signatures that are different from it but share a chain with it is at least 20 times + // greater, delete s. + // + // Note duplication of calls to define_mat with other code. This is expensive. + + let tsig = Instant::now(); + const SIG_MULT: usize = 20; + let mut results = Vec::<(usize, Vec<(usize, String, BarcodeFate)>, Vec)>::new(); + for i in 0..orbits.len() { + results.push((i, Vec::new(), Vec::new())); + } + results.par_iter_mut().for_each(|res| { + let i = res.0; + let o = orbits[i].clone(); + let (od, exacts) = setup_define_mat(&o, info); + let mat = define_mat( + is_bcr, + to_bc, + sr, + ctl, + exact_clonotypes, + &exacts, + &od, + info, + raw_joins, + refdata, + dref, + ); + + // Find all the signatures and cell counts associated to each. + + let mut freq = Vec::<(usize, Vec)>::new(); + { + let mut types = Vec::<(Vec, usize)>::new(); + for (u, &e) in exacts.iter().enumerate() { + let mut t = Vec::::new(); + for (col, m) in mat.iter().enumerate() { + if m[u].is_some() { + t.push(col); + } + } + if t.len() >= 2 { + types.push((t, exact_clonotypes[e].ncells())); + } + } + types.sort(); + let mut i = 0; + while i < types.len() { + let j = next_diff1_2(&types, i as i32) as usize; + let mut mult = 0; + for t in &types[i..j] { + mult += t.1; + } + freq.push((mult, types[i].0.clone())); + i = j; + } + } + /* + let mut msg = "\nfrequencies:\n".to_string(); // XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + use itertools::Itertools; // XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + for i in 0..freq.len() { // XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + msg += &mut format!("{} ==> {}\n", freq[i].0, freq[i].1.iter().format(",")); // XXX + } // XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + */ + + // Decide which signatures to delete. + + let mut dels = HashSet::>::new(); + for i in 0..freq.len() { + let mut n2 = 0; + for j in 0..freq.len() { + if j != i && freq[j].1.len() == 2 { + let mut share = false; + for x in freq[j].1.iter() { + if freq[i].1.contains(x) { + share = true; + } + } + if share { + n2 += freq[j].0; + } + } + } + if n2 > SIG_MULT * freq[i].0 { + dels.insert(freq[i].1.clone()); + /* + msg += &mut format!("delete {}\n", freq[i].1.iter().format(",")); // XXXXXXXXXX + */ + } + } + /* + if dels.len() > 0 { println!("{}", msg); } // XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + */ + for u in 0..exacts.len() { + let mut t = Vec::::new(); + for (col, m) in mat.iter().enumerate() { + if m[u].is_some() { + t.push(col); + } + } + if dels.contains(&t) && ctl.clono_filt_opt_def.signature { + res.2.push(exacts[u]); + let ex = &exact_clonotypes[exacts[u]]; + for i in 0..ex.ncells() { + res.1.push(( + ex.clones[i][0].dataset_index, + ex.clones[i][0].barcode.clone(), + BarcodeFate::Signature, + )); + } + } + } + }); + let mut to_delete = vec![false; exact_clonotypes.len()]; + for i in 0..results.len() { + for j in 0..results[i].1.len() { + fate[results[i].1[j].0].insert(results[i].1[j].1.clone(), results[i].1[j].2.clone()); + } + for j in 0..results[i].2.len() { + to_delete[results[i].2[j]] = true; + } + } + let mut orbits2 = Vec::>::new(); + for o in orbits.iter() { + let mut o = o.clone(); + let mut del = vec![false; o.len()]; + for j in 0..o.len() { + let id = info[o[j] as usize].clonotype_index; + if to_delete[id] { + del[j] = true; + } + } + erase_if(&mut o, &del); + orbits2.push(o); + } + *orbits = orbits2; + ctl.perf_stats(&tsig, "signature filtering"); + + // Merge onesies where totally unambiguous. + + let tmerge = Instant::now(); + merge_onesies(orbits, ctl, exact_clonotypes, info, eq, disintegrated); + ctl.perf_stats(&tmerge, "merging onesies"); + + // Check for disjoint orbits. + + let tsplit = Instant::now(); + split_orbits( + orbits, + is_bcr, + to_bc, + sr, + ctl, + exact_clonotypes, + info, + raw_joins, + refdata, + dref, + ); + ctl.perf_stats(&tsplit, "splitting orbits 1"); + + // Test for weak chains. + + let tweak = Instant::now(); + weak_chains( + orbits, + is_bcr, + to_bc, + sr, + ctl, + exact_clonotypes, + info, + raw_joins, + fate, + refdata, + dref, + ); + ctl.perf_stats(&tweak, "weak chain filtering"); + + // Check for disjoint orbits (again). + + let tsplit = Instant::now(); + split_orbits( + orbits, + is_bcr, + to_bc, + sr, + ctl, + exact_clonotypes, + info, + raw_joins, + refdata, + dref, + ); + ctl.perf_stats(&tsplit, "splitting orbits 2"); + + // Find and mark for deletion exact subclonotypes having a variant base in V..J that, + // accounting for all the cells in all the exact subclonotypes, never occurs as Q60 + // doesn't occur as Q40 twice, and disagrees with the reference. + + let mut results = Vec::<(usize, Vec<(usize, String, BarcodeFate)>, Vec)>::new(); + for i in 0..orbits.len() { + results.push((i, Vec::new(), Vec::new())); + } + results.par_iter_mut().for_each(|res| { + let i = res.0; + let o = orbits[i].clone(); + let (od, exacts) = setup_define_mat(&o, info); + let mat = define_mat( + is_bcr, + to_bc, + sr, + ctl, + exact_clonotypes, + &exacts, + &od, + info, + raw_joins, + refdata, + dref, + ); + let cols = mat.len(); + let rsi = define_column_info(ctl, &exacts, exact_clonotypes, &mat, refdata); + + // Create vars, copied from vars_and_shares. + + let mut vars = Vec::>::new(); + for cx in 0..cols { + let mut n = 0; + for z in 0..rsi.seqss[cx].len() { + n = max(n, rsi.seqss[cx][z].len()); + } + let mut v = Vec::::new(); + for p in 0..n { + let mut bases = Vec::::new(); + for s in 0..rsi.seqss[cx].len() { + if p >= rsi.seqss[cx][s].len() { + continue; + } + bases.push(rsi.seqss[cx][s][p]); + } + unique_sort(&mut bases); + if bases.len() > 1 { + v.push(p); + } + } + vars.push(v); + } + + // Pretest if using JOIN_BASIC_H. The code would crash without this. + + let mut neuter = false; + if ctl.join_alg_opt.basic_h.is_some() { + let mut ns = vec![Vec::::new(); cols]; + for (u, &clonotype_id) in exacts.iter().enumerate() { + let ex = &exact_clonotypes[clonotype_id]; + for (m, nn) in mat.iter().zip(ns.iter_mut()) { + if let Some(m) = m[u] { + if ex.share[m].annv.len() > 1 { + continue; + } + let n = ex.share[m].seq_del.len(); + nn.push(n); + } + } + } + for mut nn in ns { + unique_sort(&mut nn); + if nn.len() > 1 { + neuter = true; + } + } + } + + // Proceed. + + // (column, pos, base, qual, row) + let mut vquals = Vec::<(usize, usize, u8, u8, usize)>::new(); + for u in 0..exacts.len() { + if neuter { + continue; + } + let clonotype_id = exacts[u]; + let ex = &exact_clonotypes[clonotype_id]; + for col in 0..cols { + if let Some(m) = mat[col][u] { + if ex.share[m].annv.len() > 1 { + continue; + } + let n = ex.share[m].seq_del.len(); + let vref = &exact_clonotypes[exacts[u]].share[m].vs.to_ascii_vec(); + let jref = &exact_clonotypes[exacts[u]].share[m].js.to_ascii_vec(); + for z in 0..vars[col].len() { + let p = vars[col][z]; + // not sure how this can happen + if ctl.join_alg_opt.basic_h.is_some() && p >= ex.share[m].seq_del.len() { + neuter = true; + continue; + } + let b = ex.share[m].seq_del[p]; + let mut refdiff = false; + if p < vref.len() - ctl.heur.ref_v_trim && b != vref[p] { + refdiff = true; + } + if p >= n - (jref.len() - ctl.heur.ref_j_trim) + && b != jref[jref.len() - (n - p)] + { + refdiff = true; + } + if refdiff { + for j in 0..ex.clones.len() { + let qual = ex.clones[j][m].quals[p]; + vquals.push((col, p, b, qual, u)); + } + } + } + } + } + } + if neuter { + vquals.clear(); + } + vquals.sort_unstable(); + let mut j = 0; + while j < vquals.len() { + let mut k = j + 1; + while k < vquals.len() { + if vquals[k].0 != vquals[j].0 + || vquals[k].1 != vquals[j].1 + || vquals[k].2 != vquals[j].2 + { + break; + } + k += 1; + } + let mut q60 = false; + let mut q40 = 0; + for v in &vquals[j..k] { + if v.3 >= 60 { + q60 = true; + } else if v.3 >= 40 { + q40 += 1; + } + } + if !q60 && q40 < 2 { + let u = vquals[j].4; + if ctl.clono_filt_opt.qual_filter { + res.2.push(exacts[u]); + } + let ex = &exact_clonotypes[exacts[u]]; + for i in 0..ex.ncells() { + res.1.push(( + ex.clones[i][0].dataset_index, + ex.clones[i][0].barcode.clone(), + BarcodeFate::Qual, + )); + } + } + j = k; + } + }); + let mut to_delete = vec![false; exact_clonotypes.len()]; + let mut dels = Vec::::new(); + for i in 0..results.len() { + for j in 0..results[i].1.len() { + fate[results[i].1[j].0].insert(results[i].1[j].1.clone(), results[i].1[j].2.clone()); + } + for x in results[i].2.iter() { + to_delete[*x] = true; + } + } + dels.sort_unstable(); + for o in orbits.iter_mut() { + let mut del = vec![false; o.len()]; + for (&oj, d) in o.iter().zip(del.iter_mut()) { + let id = info[oj as usize].clonotype_index; + if to_delete[id] { + *d = true; + } + } + erase_if(o, &del); + } + + // Check for disjoint orbits (again again). + + let tsplit = Instant::now(); + split_orbits( + orbits, + is_bcr, + to_bc, + sr, + ctl, + exact_clonotypes, + info, + raw_joins, + refdata, + dref, + ); + // *orbits = orbits.iter().flatten().map(|x| vec![*x]).collect(); + ctl.perf_stats(&tsplit, "splitting orbits 3"); +} diff --git a/enclone_stuff/src/split_orbits.rs b/enclone_stuff/src/split_orbits.rs new file mode 100644 index 000000000..c828148b7 --- /dev/null +++ b/enclone_stuff/src/split_orbits.rs @@ -0,0 +1,192 @@ +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. + +use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype}; +use enclone_print::define_mat::{define_mat, setup_define_mat}; +use enclone_proto::types::DonorReferenceItem; +use equiv::EquivRel; +use qd::Double; +use std::collections::HashMap; +use vdj_ann::refx::RefData; +use vector_utils::{bin_position, unique_sort, VecUtils}; + +// Check for disjoint orbits. + +pub fn split_orbits( + orbits: &mut Vec>, + is_bcr: bool, + to_bc: &HashMap<(usize, usize), Vec>, + sr: &[Vec], + ctl: &EncloneControl, + exact_clonotypes: &[ExactClonotype], + info: &[CloneInfo], + raw_joins: &[Vec], + refdata: &RefData, + dref: &[DonorReferenceItem], +) { + let mut orbits2 = Vec::>::new(); + for o in orbits.iter() { + let (od, exacts) = setup_define_mat(o, info); + let mat = define_mat( + is_bcr, + to_bc, + sr, + ctl, + exact_clonotypes, + &exacts, + &od, + info, + raw_joins, + refdata, + dref, + ); + let cols = mat.len(); + + // Define map of indices into exacts. + + let nexacts = exacts.len(); + let mut to_exacts = HashMap::::with_capacity(nexacts); + for (u, &e) in exacts.iter().enumerate() { + to_exacts.insert(e, u); + } + + // Get the info indices corresponding to this clonotype. + + let mut infos = Vec::::new(); + for &oi in o { + infos.push(oi as usize); + } + + // Define map of exacts to infos. + + let mut to_infos = vec![Vec::::new(); nexacts]; + for (i, &infoi) in infos.iter().enumerate() { + let u = to_exacts[&info[infoi].clonotype_index]; + to_infos[u].push(i); + } + + // Determine which columns are "left", meaning IGH or TRB. + + let mut left = vec![false; cols]; + for m in 0..cols { + for u in 0..mat[0].len() { + if mat[m][u].is_some() { + let c = mat[m][u].unwrap(); + let ex = &exact_clonotypes[exacts[u]]; + if ex.share[c].left { + left[m] = true; + } + break; + } + } + } + + // Determine which pairs of configurations share both chain types, and if so, call + // them joined. + + let mut matu = Vec::>>::with_capacity(nexacts); + for u in 0..nexacts { + let mut m = Vec::>::with_capacity(cols); + for mm in mat.iter().take(cols) { + m.push(mm[u]); + } + matu.push(m); + } + unique_sort(&mut matu); + let mut eqm = vec![vec![false; matu.len()]; matu.len()]; + for (mj1, eqm) in matu.iter().zip(eqm.iter_mut()) { + for (mj2, eqm) in matu.iter().zip(eqm.iter_mut()) { + let (mut l, mut r) = (false, false); + for ((&mm1, &mm2), &ll) in mj1.iter().zip(mj2.iter()).zip(left.iter()).take(cols) { + if mm1.is_some() && mm2.is_some() { + if ll { + l = true; + } else { + r = true; + } + } + } + if l && r { + *eqm = true; + } + } + } + + // Propagate this to an equivalence relation on the orbit elements. + + let mut eqx = EquivRel::new(o.len() as i32); + let mut lists = vec![Vec::::new(); matu.len()]; + for u in 0..nexacts { + let mut m = Vec::>::with_capacity(cols); + for mat in mat.iter().take(cols) { + m.push(mat[u]); + } + lists[bin_position(&matu, &m) as usize].push(u); + } + for (l1, eqm) in lists.iter().zip(eqm.into_iter()) { + for (l2, eqm) in lists.iter().zip(eqm.into_iter()) { + if eqm { + let u1 = l1[0]; + for &u2 in l2.iter() { + for &i1 in to_infos[u1].iter() { + for &i2 in to_infos[u2].iter() { + eqx.join(i1 as i32, i2 as i32); + } + } + } + } + } + } + let mut reps = Vec::::new(); + eqx.orbit_reps(&mut reps); + + // Join onesies where possible. This should probably be more efficient. + + for (&e1, info1) in exacts.iter().zip(to_infos.iter()).take(nexacts) { + let ex1 = &exact_clonotypes[e1]; + if ex1.share.solo() { + let mut is = Vec::::new(); + for (&e2, info2) in exacts.iter().take(nexacts).zip(to_infos.iter()) { + let ex2 = &exact_clonotypes[e2]; + if ex2.share.solo() { + if ex1.share[0].seq == ex2.share[0].seq { + eqx.join(info1[0] as i32, info2[0] as i32); + } + } else { + for j in 0..ex2.share.len() { + if ex2.share[j].seq == ex1.share[0].seq { + is.push(info2[0]); + } + } + } + } + let mut rs = Vec::::new(); + for &ij in &is { + rs.push(eqx.class_id(ij as i32) as usize); + } + unique_sort(&mut rs); + if rs.solo() { + eqx.join(info1[0] as i32, is[0] as i32); + } + } + } + + // Divide the orbit if needed. + + if eqx.norbits() == 1 { + orbits2.push(o.clone()); + } else { + let mut repsx = Vec::::new(); + eqx.orbit_reps(&mut repsx); + for r in repsx { + let mut ox = Vec::::new(); + eqx.orbit(r, &mut ox); + let mut o2 = Vec::::new(); + for ko in ox { + o2.push(o[ko as usize]); + } + orbits2.push(o2); + } + } + } + *orbits = orbits2; +} diff --git a/enclone_stuff/src/start.rs b/enclone_stuff/src/start.rs new file mode 100644 index 000000000..b537b6c41 --- /dev/null +++ b/enclone_stuff/src/start.rs @@ -0,0 +1,931 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. +// +// See README for documentation. + +use crate::analyze_dref::analyze_donor_ref; +use crate::disintegrate::disintegrate_onesies; +use crate::fcell::filter_by_fcell; +use crate::filter_umi::filter_umi; +use crate::flag_defective::flag_defective; +use crate::inconsistent::test_vdj_gex_inconsistent; +use crate::populate_features::populate_features; +use crate::some_filters::some_filters; +use debruijn::dna_string::DnaString; +use enclone::allele::{find_alleles, sub_alts}; +use enclone::graph_filter::graph_filter; +use enclone::info::build_info; +use enclone::join::join_exacts; +use enclone::misc1::{cross_filter, lookup_heavy_chain_reuse}; +use enclone::misc2::{check_for_barcode_reuse, find_exact_subclonotypes, search_for_shm_indels}; +use enclone::misc3::sort_tig_bc; +use enclone_args::read_json::parse_json_annotations_files; +use enclone_core::barcode_fate::BarcodeFate; +use enclone_core::defs::{AlleleData, CloneInfo, TigData}; +use enclone_core::enclone_structs::{EncloneExacts, EncloneIntermediates, EncloneSetup}; +use enclone_core::hcomp::heavy_complexity; +use enclone_print::define_mat::{define_mat, setup_define_mat}; +use enclone_print::loupe::make_donor_refs; +use equiv::EquivRel; +use io_utils::{fwriteln, open_for_read}; +use itertools::Itertools; +use qd::dd; +use std::{ + collections::HashMap, + env, + fs::File, + io::{BufRead, BufWriter, Write}, + time::Instant, +}; +use string_utils::{add_commas, TextUtils}; +use vector_utils::{bin_member, erase_if, next_diff12_3, sort_sync2, unique_sort}; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// This is a copy of stirling2_ratio_table from the stirling_numbers crate, that has been modified +// to use higher precision internal math. This has also been speeded up, and in the process +// made less readable. + +use qd::Double; +use rayon::prelude::*; + +pub fn stirling2_ratio_table_double(n_max: usize) -> Vec> { + let mut s = Vec::>::new(); + let zero = dd![0.0]; + let one = dd![1.0]; + for n in 0..=n_max { + s.push(vec![zero; n + 1]); + } + s[0][0] = one; + let mut z = Vec::::new(); + let mut n2n1 = vec![dd![0.0]; n_max + 1]; + for (n, nn) in n2n1.iter_mut().enumerate().skip(2) { + *nn = Double::from((n - 2) as u32) / Double::from((n - 1) as u32); + } + let mut k1k = vec![dd![0.0]; n_max]; + for (k, kk) in k1k.iter_mut().enumerate().skip(1) { + *kk = Double::from((k - 1) as u32) / Double::from(k as u32); + } + let mut njn = Vec::<(usize, Double)>::new(); + for i in 0..n_max + 1 { + njn.push((i, dd![0.0])); + } + njn.par_iter_mut().for_each(|res| { + let n = res.0; + if n >= 1 { + let mut p = one; + for j in 1..=n { + p *= Double::from(j as u32) / Double::from(n as u32); + } + res.1 = p; + } + }); + + // This is the slow part of the function. + + for n in 1..=n_max { + s[n][0] = zero; + for k in 1..n - 1 { + z[k - 1] *= k1k[k]; + } + if n >= 2 { + z.push(n2n1[n].powi((n - 1) as i32)); + } + for k in 1..n { + let x = z[k - 1]; // = ((k-1)/k)^(n-1) + s[n][k] = s[n - 1][k] + s[n - 1][k - 1] * x; + } + s[n][n] = njn[n].1; + } + s +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +pub fn main_enclone_start(setup: EncloneSetup) -> Result { + let tr = Instant::now(); + let ctl = &setup.ctl; + let gex_info = &setup.gex_info; + let refdata = &setup.refdata; + let is_bcr = setup.is_bcr; + let to_ref_index = &setup.to_ref_index; + + // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + + // Flag defective reference sequences. + + let mut log = Vec::::new(); + let mut broken = Vec::::new(); + flag_defective(ctl, refdata, &mut log, &mut broken); + ctl.perf_stats(&tr, "flagging defective references"); + + // Parse the json annotations file. + + let tparse = Instant::now(); + let mut tig_bc = Vec::>::new(); + let mut vdj_cells = Vec::>::new(); + let mut gex_cells = Vec::>::new(); + let mut gex_cells_specified = Vec::::new(); + let mut fate = vec![HashMap::::new(); ctl.origin_info.n()]; + parse_json_annotations_files( + ctl, + &mut tig_bc, + refdata, + to_ref_index, + &mut vdj_cells, + &mut gex_cells, + &mut gex_cells_specified, + &mut fate, + )?; + ctl.perf_stats(&tparse, "loading from json"); + + // Populate features. + + let tpop = Instant::now(); + let mut fr1_starts = Vec::::new(); + let mut fr2_starts = Vec::>::new(); + let mut fr3_starts = Vec::>::new(); + let mut cdr1_starts = Vec::>::new(); + let mut cdr2_starts = Vec::>::new(); + populate_features( + ctl, + refdata, + &broken, + &mut fr1_starts, + &mut fr2_starts, + &mut fr3_starts, + &mut cdr1_starts, + &mut cdr2_starts, + &mut log, + )?; + if ctl.gen_opt.require_unbroken_ok { + return Ok(EncloneIntermediates::default()); + } + for tigi in &mut tig_bc { + for x in tigi { + x.fr1_start = fr1_starts[x.v_ref_id]; + x.fr2_start = fr2_starts[x.v_ref_id]; + x.fr3_start = fr3_starts[x.v_ref_id]; + x.cdr1_start = cdr1_starts[x.v_ref_id]; + x.cdr2_start = cdr2_starts[x.v_ref_id]; + } + } + ctl.perf_stats(&tpop, "populating features"); + + // Test for no data. + + let tproto = Instant::now(); + if ctl.origin_info.n() == 0 { + return Err("\nNo TCR or BCR data have been specified.\n".to_string()); + } + + // Search for SHM indels. + + search_for_shm_indels(ctl, &tig_bc); + if ctl.gen_opt.indels { + return Ok(EncloneIntermediates::default()); + } + + // Record fate of non-cells. + + if ctl.gen_opt.ncell { + for tigi in &tig_bc { + let bc = &tigi[0].barcode; + let li = tigi[0].dataset_index; + if !bin_member(&vdj_cells[li], bc) { + fate[li].insert(bc.clone(), BarcodeFate::NotAsmCell); + } + } + } + + // Filter using light --> heavy graph. + + graph_filter(ctl, &mut tig_bc, ctl.gen_opt.graph, &mut fate); + + // Sort tig_bc. + + sort_tig_bc(ctl, &mut tig_bc, refdata); + + // Cross filter. + + cross_filter(ctl, &mut tig_bc, &mut fate); + + // Look for barcode reuse. + + check_for_barcode_reuse(ctl, &tig_bc)?; + ctl.perf_stats(&tproto, "in proto stuff"); + + // Find exact subclonotypes. + + let mut exact_clonotypes = find_exact_subclonotypes(ctl, &tig_bc, refdata, &mut fate); + if ctl.gen_opt.utr_con || ctl.gen_opt.con_con { + return Ok(EncloneIntermediates::default()); + } + if !ctl.gen_opt.trace_barcode.is_empty() { + for ex in &exact_clonotypes { + for clone in &ex.clones { + if clone[0].barcode == ctl.gen_opt.trace_barcode { + println!( + "\nfound {} in an initial exact subclonotype having {} cells", + ctl.gen_opt.trace_barcode, + ex.ncells(), + ); + } + } + } + } + + // Test for consistency between VDJ cells and GEX cells. + + test_vdj_gex_inconsistent(ctl, &tig_bc, &exact_clonotypes, &vdj_cells, gex_info)?; + + // Filter out some foursie artifacts. + + let t = Instant::now(); + let mut to_delete = vec![false; exact_clonotypes.len()]; + let mut twosies = Vec::<(&[u8], &[u8])>::new(); + for ex in &exact_clonotypes { + if ex.share.len() == 2 && (ex.share[0].left ^ ex.share[1].left) && ex.ncells() >= 10 { + twosies.push((ex.share[0].seq.as_ref(), ex.share[1].seq.as_ref())); + } + } + unique_sort(&mut twosies); + for (ex, d) in exact_clonotypes.iter().zip(to_delete.iter_mut()) { + if ex.share.len() == 4 { + for (i1, s1) in ex.share.iter().enumerate() { + for s2 in &ex.share[i1 + 1..4] { + if s1.left ^ s2.left { + let p = (s1.seq.as_ref(), s2.seq.as_ref()); + if bin_member(&twosies, &p) { + *d = true; + for clone in &ex.clones { + fate[clone[0].dataset_index] + .insert(clone[0].barcode.clone(), BarcodeFate::FoursieKill); + } + } + } + } + } + } + } + if ctl.clono_filt_opt_def.weak_foursies { + erase_if(&mut exact_clonotypes, &to_delete); + } + + // Filter if MAX_HEAVIES = 1 set. + + if ctl.gen_opt.max_heavies == 1 { + let mut to_delete = vec![false; exact_clonotypes.len()]; + for i in 0..exact_clonotypes.len() { + let ex = &exact_clonotypes[i]; + let mut heavies = 0; + for j in 0..ex.share.len() { + if ex.share[j].left { + heavies += 1; + } + } + if heavies > 1 { + to_delete[i] = true; + } + } + erase_if(&mut exact_clonotypes, &to_delete); + } + ctl.perf_stats(&t, "filtering foursies"); + + // Build info about clonotypes. Note that this edits the V reference sequence to perform + // an indel in some cases. + + let tinfo = Instant::now(); + let mut info: Vec = build_info(refdata, ctl, &mut exact_clonotypes, &mut fate); + ctl.perf_stats(&tinfo, "building info"); + + // Derive consensus sequences for alternate alleles of V segments. Then create donor + // reference sequences for Loupe. + + let talt = Instant::now(); + // {(donor, ref id, alt seq, support, is_ref)}: + let mut alt_refs = Vec::<(usize, usize, DnaString, usize, bool)>::new(); + if !ctl.gen_opt.no_alt_alleles { + alt_refs = find_alleles(refdata, ctl, &exact_clonotypes); + } + ctl.perf_stats(&talt, "finding alt alleles"); + if !ctl.gen_opt.dref_file.is_empty() { + let f = File::create(&ctl.gen_opt.dref_file); + if f.is_err() { + eprintln!( + "\nError trying to write ctl.gen_opt.dref_file = {}.", + ctl.gen_opt.dref_file + ); + } + let mut f = BufWriter::new(f.unwrap()); + let mut count = 0; + for i in 0..alt_refs.len() { + let donor = alt_refs[i].0; + let ref_id = alt_refs[i].1; + if i > 0 && (donor != alt_refs[i - 1].0 || ref_id != alt_refs[i - 1].1) { + count = 0; + } + let alt_seq = &alt_refs[i].2; + fwriteln!( + f, + ">{}:{}:{}:{} (reference record id : donor name : allele number : gene name)\n{}", + refdata.id[ref_id], + ctl.origin_info.donor_id[donor], + count + 1, + refdata.name[ref_id], + alt_seq.to_string() + ); + count += 1; + } + } + let tdonor = Instant::now(); + let drefs = make_donor_refs(&alt_refs, refdata); + ctl.perf_stats(&tdonor, "making donor refs"); + + // Analyze donor reference. + + analyze_donor_ref(refdata, ctl, &alt_refs); + + // Update reference sequences for V segments by substituting in alt alleles if better. + + sub_alts(refdata, ctl, &alt_refs, &mut info, &mut exact_clonotypes); + + // Compute to_bc, which maps (dataset_index, clonotype_id) to {barcodes}. + // This is intended as a replacement for some old code below. + + let tbc = Instant::now(); + let mut to_bc = HashMap::<(usize, usize), Vec>::new(); + for (i, ex) in exact_clonotypes.iter().enumerate() { + for clone in &ex.clones { + let x = &clone[0]; + to_bc + .entry((x.dataset_index, i)) + .or_default() + .push(x.barcode.clone()); + } + } + ctl.perf_stats(&tbc, "computing to_bc"); + + // Make stirling ratio table. Not sure that fixing the size of this is safe. + + let tsr = Instant::now(); + let sr = stirling2_ratio_table_double(3000); + ctl.perf_stats(&tsr, "computing stirling number table"); + + // Compute complexity. + + let tcomp = Instant::now(); + if ctl.join_alg_opt.comp_filt < 1_000_000 { + let jun = heavy_complexity(refdata, &exact_clonotypes, ctl, &drefs); + for u in 0..exact_clonotypes.len() { + let ex = &mut exact_clonotypes[u]; + for m in 0..ex.share.len() { + if ex.share.len() == 2 && ex.share[m].left { + ex.share[m].jun = jun[u].clone(); + } + } + } + } + ctl.perf_stats(&tcomp, "computing complexity"); + + // Form equivalence relation on exact subclonotypes. We also keep the raw joins, consisting + // of pairs of info indices, that were originally joined. + + let mut join_info = Vec::<(usize, usize, bool, Vec)>::new(); + let mut raw_joins = Vec::<(i32, i32)>::new(); + let mut eq: EquivRel = join_exacts( + is_bcr, + &to_bc, + refdata, + ctl, + &exact_clonotypes, + &info, + &mut join_info, + &mut raw_joins, + &sr, + &drefs, + ); + + // If NWEAK_ONESIES is not specified, disintegrate certain onesie clonotypes into single cell + // clonotypes. This requires editing of exact_clonotypes, info, eq, join_info and raw_joins. + + let mut disintegrated = Vec::::new(); + disintegrate_onesies( + ctl, + &mut disintegrated, + &mut eq, + &mut exact_clonotypes, + &mut info, + &mut join_info, + &mut raw_joins, + ); + + // Update to_bc. + + let txxx = Instant::now(); + let mut to_bc = HashMap::<(usize, usize), Vec>::new(); + for (i, ex) in exact_clonotypes.iter().enumerate() { + for clone in &ex.clones { + let x = &clone[0]; + to_bc + .entry((x.dataset_index, i)) + .or_default() + .push(x.barcode.clone()); + } + } + + // Restructure raw joins. + + raw_joins.sort_unstable(); + let raw_joins = { + let mut raw_joins2 = vec![Vec::::new(); info.len()]; + for r in raw_joins { + raw_joins2[r.0 as usize].push(r.1 as usize); + raw_joins2[r.1 as usize].push(r.0 as usize); + } + raw_joins2 + }; + + // Lock info. + + let info = &info; + + // Lookup for heavy chain reuse (special purpose experimental option). + + lookup_heavy_chain_reuse(ctl, &exact_clonotypes, info, &eq); + if ctl.gen_opt.heavy_chain_reuse { + return Ok(EncloneIntermediates::default()); + } + if !ctl.gen_opt.trace_barcode.is_empty() { + for ex in &exact_clonotypes { + for clone in &ex.clones { + if clone[0].barcode == ctl.gen_opt.trace_barcode { + println!( + "\nfound {} in a pre-filter exact subclonotype having {} cells", + ctl.gen_opt.trace_barcode, + ex.ncells(), + ); + } + } + } + } + ctl.perf_stats(&txxx, "in some odds and ends"); + + // Filter B cells based on UMI counts. + + let tumi = Instant::now(); + let mut orbits = Vec::>::new(); + filter_umi( + &eq, + &mut orbits, + ctl, + &mut exact_clonotypes, + info, + &mut fate, + ); + if !ctl.gen_opt.trace_barcode.is_empty() { + for ex in &exact_clonotypes { + for clone in &ex.clones { + if clone[0].barcode == ctl.gen_opt.trace_barcode { + println!( + "\nfound {} in an post-umi-filter exact subclonotype having {} cells", + ctl.gen_opt.trace_barcode, + ex.ncells(), + ); + } + } + } + } + + // Remove cells that are not called cells by GEX or feature barcodes. + + let mut orbits = { + let mut orbits2 = Vec::>::new(); + for mut o in orbits { + let mut to_deletex = vec![false; o.len()]; + for (&x, dx) in o.iter().zip(to_deletex.iter_mut()) { + let x: &CloneInfo = &info[x as usize]; + let ex = &mut exact_clonotypes[x.clonotype_index]; + let mut to_delete = vec![false; ex.ncells()]; + for (clone, d) in ex.clones.iter().take(ex.ncells()).zip(to_delete.iter_mut()) { + let li = clone[0].dataset_index; + let bc = &clone[0].barcode; + if ctl.gen_opt.cellranger { + if gex_cells_specified[li] && !bin_member(&gex_cells[li], bc) { + *d = true; + fate[li].insert(bc.clone(), BarcodeFate::NotGexCell); + } + } else if !ctl.origin_info.gex_path[li].is_empty() { + let gbc = &gex_info.gex_cell_barcodes[li]; + if !bin_member(gbc, bc) { + fate[li].insert(bc.clone(), BarcodeFate::NotGexCell); + if !ctl.clono_filt_opt_def.ngex { + *d = true; + } + } + } + } + erase_if(&mut ex.clones, &to_delete); + if ex.ncells() == 0 { + *dx = true; + } + } + erase_if(&mut o, &to_deletex); + if !o.is_empty() { + orbits2.push(o); + } + } + orbits2 + }; + + // Filter using constraints imposed by FCELL. + + filter_by_fcell(ctl, &mut orbits, info, &mut exact_clonotypes, gex_info)?; + ctl.perf_stats(&tumi, "umi filtering and such"); + + // Break up clonotypes containing a large number of chains. These are + // very likely to be false merges + let mut orbits: Vec> = orbits + .into_iter() + .flat_map(|orbit| { + let (od, exacts) = setup_define_mat(&orbit, info); + let mat = define_mat( + is_bcr, + &to_bc, + &sr, + ctl, + &exact_clonotypes, + &exacts, + &od, + info, + &raw_joins, + refdata, + &drefs, + ); + let num_chains = mat.len(); + if num_chains < ctl.join_alg_opt.split_max_chains { + vec![orbit] + } else { + let exacts_of_chains = mat + .iter() + .enumerate() + .flat_map(|(chain_num, chain_in_exact)| { + exacts + .iter() + .zip_eq(chain_in_exact.iter()) + .filter_map(move |(e, chain)| chain.map(|_| (e, chain_num))) + }) + .into_group_map() + .into_iter() + .map(|(k, v)| (v, *k)) + .into_group_map(); + + let mut group_of_exacts = HashMap::new(); + let mut group_num = 0; + for (chains, chain_exacts) in exacts_of_chains { + if chains.len() == 1 { + for e in chain_exacts { + group_of_exacts.insert(e, group_num); + group_num += 1; + } + } else { + for e in chain_exacts { + group_of_exacts.insert(e, group_num); + } + group_num += 1; + } + } + + let mut groups = vec![vec![]; group_num]; + + for (_, exact_clonotype_id, val) in &od { + groups[group_of_exacts[exact_clonotype_id]].push(*val); + } + + // To split every subclonotype + // od + // .into_iter() + // .group_by(|o| o.1) + // .into_iter() + // .map(|(_, vals)| vals.map(|v| v.2).collect()) + // .collect(); + groups + } + }) + .collect(); + + // Run some filters. + + some_filters( + &mut orbits, + is_bcr, + &to_bc, + &sr, + ctl, + &exact_clonotypes, + info, + &raw_joins, + &eq, + &disintegrated, + &mut fate, + refdata, + &drefs, + ); + + // Pre evaluate (PRE_EVAL). + + if ctl.gen_opt.pre_eval || ctl.join_alg_opt.basic_h.is_some() { + // + // Echo command. + + if ctl.gen_opt.echo { + let args: Vec = env::args().collect(); + println!("{}", args.iter().format(" ")); + } + if ctl.gen_opt.echoc { + let args: Vec = env::args().collect(); + println!("# {}", args.iter().format(" ")); + } + + // Gather exact subclonotypes. + + let mut exacts = Vec::>::new(); + { + let mut results = Vec::<(usize, Vec)>::new(); + for i in 0..orbits.len() { + results.push((i, Vec::::new())); + } + results.par_iter_mut().for_each(|res| { + let i = res.0; + let o = &orbits[i]; + let mut od = Vec::<(Vec, usize, i32)>::new(); + for id in o.iter() { + let x: &CloneInfo = &info[*id as usize]; + od.push((x.origin.clone(), x.clonotype_id, *id)); + } + od.sort(); + let mut j = 0; + while j < od.len() { + let k = next_diff12_3(&od, j as i32) as usize; + res.1.push(od[j].1); + j = k; + } + }); + for r in results { + exacts.push(r.1); + } + } + + // Apply POST_FILTER. + + if !ctl.gen_opt.post_filter.is_empty() { + let mut post_filter = Vec::<(String, String)>::new(); + let f = open_for_read![&ctl.gen_opt.post_filter]; + for (i, line) in f.lines().enumerate() { + let s = line.unwrap(); + if i == 0 { + assert_eq!(s, "dataset,barcode"); + } else { + post_filter.push((s.before(",").to_string(), s.after(",").to_string())); + } + } + post_filter.sort(); + for ex in exact_clonotypes.iter_mut() { + let mut to_delete = vec![false; ex.ncells()]; + for (clone, d) in ex.clones.iter().zip(to_delete.iter_mut()) { + let x = &clone[0]; + if !bin_member( + &post_filter, + &( + ctl.origin_info.dataset_id[x.dataset_index].clone(), + x.barcode.clone(), + ), + ) { + *d = true; + } + } + erase_if(&mut ex.clones, &to_delete); + } + let mut to_delete = vec![false; exacts.len()]; + for (ex, d) in exacts.iter().zip(to_delete.iter_mut()) { + let mut ncells = 0; + for &e in ex { + ncells += exact_clonotypes[e].ncells(); + } + if ncells == 0 { + *d = true; + } + } + erase_if(&mut exacts, &to_delete); + } + + // Set up metrics. + + let mut merges2 = 0; + let mut mixes = 0; + let mut mixed_clonotypes = 0; + let mut mixed_clonotype_sizes = 0; + let mut cells1 = 0; + let mut clonotypes1 = 0; + let mut cells2 = 0; + let mut clonotypes2 = 0; + let mut cells_by_donor = vec![0_usize; ctl.origin_info.donor_list.len()]; + + // Reverse sort clonotypes by size. + + let mut n = vec![0; exacts.len()]; + for i in 0..exacts.len() { + for j in 0..exacts[i].len() { + let ex = &exact_clonotypes[exacts[i][j]]; + n[i] += ex.ncells(); + } + } + sort_sync2(&mut n, &mut exacts); + exacts.reverse(); + n.reverse(); + + // Process clonotypes. + + for i in 0..exacts.len() { + let mut mixed = false; + clonotypes1 += 1; + cells1 += n[i]; + if n[i] >= 2 { + clonotypes2 += 1; + } + cells2 += n[i]; + if ctl.gen_opt.pre_eval_show && n[i] > 1 { + println!("\nclonotype"); + } + let mut cells_by_donor_this = vec![0; ctl.origin_info.donor_list.len()]; + for j in 0..exacts[i].len() { + let ex = &exact_clonotypes[exacts[i][j]]; + if ctl.gen_opt.pre_eval_show && n[i] > 1 { + let donor = ex.clones[0][0].donor_index; + if donor.is_some() { + print!("{} ", ctl.origin_info.donor_list[donor.unwrap()]); + } + for k in 0..ex.share.len() { + if ex.share[k].left { + print!( + "{},{}\t", + refdata.name[ex.share[k].v_ref_id], + refdata.name[ex.share[k].j_ref_id] + ); + } + } + for k in 0..ex.share.len() { + if !ex.share[k].left { + print!( + "{},{}\t", + refdata.name[ex.share[k].v_ref_id], + refdata.name[ex.share[k].j_ref_id] + ); + } + } + println!(); + } + for k in 0..ex.clones.len() { + let x = &ex.clones[k][0]; + if x.donor_index.is_some() { + cells_by_donor[x.donor_index.unwrap()] += 1; + cells_by_donor_this[x.donor_index.unwrap()] += 1; + } + } + } + let mut mixes_this = 0; + if ctl.origin_info.donor_list.len() > 1 && ctl.clono_filt_opt_def.donor { + for j1 in 0..exacts[i].len() { + let ex1 = &exact_clonotypes[exacts[i][j1]]; + for j2 in j1..exacts[i].len() { + let ex2 = &exact_clonotypes[exacts[i][j2]]; + for k1 in 0..ex1.clones.len() { + let x1 = &ex1.clones[k1][0]; + for k2 in 0..ex2.clones.len() { + if (j1, k1) < (j2, k2) { + let x2 = &ex2.clones[k2][0]; + if x1.donor_index.is_some() + && x2.donor_index.is_some() + && x1.donor_index.unwrap() != x2.donor_index.unwrap() + { + mixes_this += 1; + mixes += 1; + mixed = true; + } + } + } + } + } + } + } + if ctl.gen_opt.pre_eval_show && exacts[i].len() > 1 { + println!("mixes = {mixes_this}"); + } + let mut merges2_this = 0; + for n in cells_by_donor_this { + if n > 1 { + merges2_this += (n * (n - 1)) / 2; + merges2 += (n * (n - 1)) / 2; + } + } + if ctl.gen_opt.pre_eval_show && exacts[i].len() > 1 { + println!("merges = {merges2_this}"); + } + if mixed { + mixed_clonotypes += 1; + mixed_clonotype_sizes += n[i]; + } + } + let mut cross = 0; + let mut intra = 0; + for i1 in 0..cells_by_donor.len() { + if cells_by_donor[i1] > 1 { + intra += cells_by_donor[i1] * (cells_by_donor[i1] - 1) / 2; + } + for i2 in i1 + 1..cells_by_donor.len() { + cross += cells_by_donor[i1] * cells_by_donor[i2]; + } + } + println!("\nnumber of intradonor comparisons = {}", add_commas(intra)); + println!( + "number of intradonor cell-cell merges (quadratic) = {}", + add_commas(merges2) + ); + println!("number of cross-donor comparisons = {}", add_commas(cross)); + println!( + "number of cross-donor comparisons that mix donors = {}", + add_commas(mixes) + ); + let rate = (mixes as f64) * 1_000_000_000.0 / (cross as f64); + println!("rate of cross donor mixing = {rate:.2} x 10^-9"); + let bogus = (intra as f64) * (mixes as f64) / (cross as f64); + println!( + "estimated number of false intradonor merges = {}", + add_commas(bogus.round() as usize) + ); + println!("number of mixed clonotypes = {mixed_clonotypes}"); + println!( + "percent of non-single-cell mixed clonotypes = {:.2}", + 100.0 * mixed_clonotypes as f64 / clonotypes2 as f64 + ); + println!("sum of mixed clonotype sizes = {mixed_clonotype_sizes}"); + println!("total number of cells in clonotypes = {cells1}"); + println!( + "mean clonotype size = {:.3}", + cells1 as f64 / clonotypes1 as f64 + ); + println!( + "mean non-single-cell clonotype size = {:.3}\n", + cells2 as f64 / clonotypes2 as f64 + ); + std::process::exit(0); + } + + // Mark VDJ noncells. + + let tmark = Instant::now(); + if ctl.clono_filt_opt_def.non_cell_mark { + for ex in exact_clonotypes.iter_mut() { + for clone in ex.clones.iter_mut() { + let di = clone[0].dataset_index; + if !bin_member(&vdj_cells[di], &clone[0].barcode) { + clone[0].marked = true; + } + } + } + } + ctl.perf_stats(&tmark, "marking vdj noncells"); + if !ctl.gen_opt.trace_barcode.is_empty() { + for ex in &exact_clonotypes { + for clone in &ex.clones { + if clone[0].barcode == ctl.gen_opt.trace_barcode { + println!( + "\nfound {} in an intermediate exact subclonotype having {} cells", + ctl.gen_opt.trace_barcode, + ex.ncells(), + ); + } + } + } + } + Ok(EncloneIntermediates { + setup, + ex: EncloneExacts { + to_bc, + exact_clonotypes, + raw_joins, + info: info.to_vec(), + orbits, + vdj_cells, + join_info, + drefs, + sr, + fate, + is_bcr, + allele_data: AlleleData { + alt_refs, + var_pos: Vec::new(), + var_bases: Vec::new(), + }, + }, + }) +} diff --git a/enclone_stuff/src/vars.rs b/enclone_stuff/src/vars.rs new file mode 100644 index 000000000..6cc56473e --- /dev/null +++ b/enclone_stuff/src/vars.rs @@ -0,0 +1,127 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +use enclone_core::defs::{EncloneControl, GexInfo}; +use regex::Regex; +use std::collections::HashMap; +use std::time::Instant; +use string_utils::TextUtils; +use vector_utils::unique_sort; + +pub fn match_vars(ctl: &mut EncloneControl, gex_info: &GexInfo) -> Result<(), String> { + // Find matching features for _g etc. + + let tstar = Instant::now(); + ctl.clono_print_opt.regex_match = + vec![HashMap::>::new(); ctl.origin_info.n()]; + let ends0 = [ + "_g", "_ab", "_ag", "_cr", "_cu", "_g_μ", "_ab_μ", "_ag_μ", "_cr_μ", "_cu_μ", "_g_%", + ]; + let ends1 = [ + "_g", "_ab", "_ag", "_cr", "_cu", "_g", "_ab", "_ag", "_cr", "_cu", "_g", + ]; + let suffixes = ["", "_min", "_max", "_μ", "_Σ"]; + let mut ends = Vec::::new(); + let mut endsz = Vec::::new(); + for (ix, x) in ends0.iter().enumerate() { + for y in suffixes.iter() { + ends.push(format!("{x}{y}")); + endsz.push(ends1[ix].to_string()); + } + } + let mut vars = ctl.clono_print_opt.lvars.clone(); + vars.append(&mut ctl.parseable_opt.pcols.clone()); + for con in ctl.clono_filt_opt_def.fcell.iter() { + for var in con.iter_variable_identifiers() { + vars.push(var.to_string()); + } + } + unique_sort(&mut vars); + ctl.perf_stats(&tstar, "doing miscellaneous stuff"); + let tomega = Instant::now(); + for x in vars.iter() { + for (iy, y) in ends.iter().enumerate() { + let mut xc = x.clone(); + if x.ends_with("_cell") { + xc = xc.rev_before("_cell").to_string(); + } + if xc.ends_with(y) { + let mut p = xc.rev_before(y); + if p.contains(':') { + p = p.after(":"); + } + let pp = format!("{p}{}", endsz[iy]); + if !p.is_empty() && Regex::new(p).is_ok() { + let mut ok = true; + let mut px = false; + for &b in p.as_bytes() { + if !(b.is_ascii_uppercase() + || b.is_ascii_lowercase() + || b.is_ascii_digit() + || b".-_[]()|*".contains(&b)) + { + ok = false; + break; + } + if b"[]()|*".contains(&b) { + px = true; + } + } + if ok && px { + let reg = Regex::new(&format!("^{p}$")); + for li in 0..ctl.origin_info.n() { + let mut js = Vec::::new(); + for j in 0..gex_info.gex_features[li].len() { + let f = &gex_info.gex_features[li][j]; + let ff = f.split('\t').collect::>(); + let mut ok = false; + if ff[2].starts_with("Antibody") { + if y.contains("_ab") { + ok = true; + } + } else if ff[2].starts_with("CRISPR") { + if y.contains("_cr") { + ok = true; + } + } else if ff[2].starts_with("Custom") { + if y.contains("_cu") { + ok = true; + } + } else if ff[2].starts_with("Antigen") { + if y.contains("_ag") { + ok = true; + } + } else if y.contains("_g") { + ok = true; + } + if ok + && (reg.as_ref().unwrap().is_match(ff[0]) + || reg.as_ref().unwrap().is_match(ff[1])) + { + js.push(j); + } + } + if !js.is_empty() { + ctl.clono_print_opt.regex_match[li].insert(pp.clone(), js); + } + } + let mut matches = false; + for li in 0..ctl.origin_info.n() { + if ctl.clono_print_opt.regex_match[li].contains_key(&pp) { + matches = true; + } + } + if !matches { + return Err(format!( + "\nLead variable {x} contains a pattern that matches \ + no features.\n" + )); + } + break; + } + } + } + } + } + ctl.perf_stats(&tomega, "messing with variables"); + Ok(()) +} diff --git a/enclone_stuff/src/weak_chains.rs b/enclone_stuff/src/weak_chains.rs new file mode 100644 index 000000000..05196854f --- /dev/null +++ b/enclone_stuff/src/weak_chains.rs @@ -0,0 +1,116 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Based on the number of cells in each column, decide which exact subclonotypes +// look like junk. Preliminary heuristic. + +use enclone_core::{ + barcode_fate::BarcodeFate, + defs::{CloneInfo, EncloneControl, ExactClonotype}, +}; +use enclone_print::define_mat::{define_mat, setup_define_mat}; +use enclone_proto::types::DonorReferenceItem; +use qd::Double; +use rayon::prelude::*; +use std::collections::HashMap; +use vdj_ann::refx::RefData; +use vector_utils::erase_if; + +pub fn weak_chains( + orbits: &mut Vec>, + is_bcr: bool, + to_bc: &HashMap<(usize, usize), Vec>, + sr: &[Vec], + ctl: &EncloneControl, + exact_clonotypes: &[ExactClonotype], + info: &[CloneInfo], + raw_joins: &[Vec], + fate: &mut [HashMap], + refdata: &RefData, + dref: &[DonorReferenceItem], +) { + // Note mat calculation duplicated with print_clonotypes and also doublet detection. + + let mut results = Vec::<(usize, Vec<(usize, String, BarcodeFate)>, Vec)>::new(); + for i in 0..orbits.len() { + results.push((i, Vec::new(), Vec::new())); + } + results.par_iter_mut().for_each(|res| { + let i = res.0; + let o = orbits[i].clone(); + let (od, exacts) = setup_define_mat(&o, info); + let mat = define_mat( + is_bcr, + to_bc, + sr, + ctl, + exact_clonotypes, + &exacts, + &od, + info, + raw_joins, + refdata, + dref, + ); + let cols = mat.len(); + if cols > 2 { + let nexacts = exacts.len(); + let mut ncells = vec![0; cols]; + let mut col_entries = vec![Vec::::new(); cols]; + for (u, &clonotype_id) in exacts.iter().enumerate().take(nexacts) { + for ((nc, ce), m) in ncells + .iter_mut() + .zip(col_entries.iter_mut()) + .zip(mat.iter().take(cols)) + { + let mid = m[u]; + if mid.is_some() { + ce.push(u); + *nc += exact_clonotypes[clonotype_id].clones.len(); + } + } + } + let mut total_cells = 0; + for j in 0..exacts.len() { + total_cells += exact_clonotypes[exacts[j]].ncells(); + } + for j in 0..cols { + if ncells[j] <= 20 && 8 * ncells[j] < total_cells { + for d in col_entries[j].iter() { + if ctl.clono_filt_opt_def.weak_chains { + res.2.push(exacts[*d]); + } + let ex = &exact_clonotypes[exacts[*d]]; + for i in 0..ex.ncells() { + res.1.push(( + ex.clones[i][0].dataset_index, + ex.clones[i][0].barcode.clone(), + BarcodeFate::WeakChains, + )); + } + } + } + } + } + }); + let mut to_delete = vec![false; exact_clonotypes.len()]; + let mut dels = Vec::::new(); + for i in 0..results.len() { + for j in 0..results[i].1.len() { + fate[results[i].1[j].0].insert(results[i].1[j].1.clone(), results[i].1[j].2.clone()); + } + for x in results[i].2.iter() { + to_delete[*x] = true; + } + } + dels.sort_unstable(); + for o in orbits.iter_mut() { + let del = o + .iter() + .map(|&oj| { + let id = info[oj as usize].clonotype_index; + to_delete[id] + }) + .collect::>(); + erase_if(o, &del); + } +} diff --git a/enclone_tail/Cargo.toml b/enclone_tail/Cargo.toml deleted file mode 100644 index 6bfb2d983..000000000 --- a/enclone_tail/Cargo.toml +++ /dev/null @@ -1,48 +0,0 @@ -[package] -name = "enclone_tail" -version = "0.4.49" -authors = ["""David Jaffe , - Keri Dockter , - Shaun Jackman , - Sreenath Krishnan , - Meryl Lewis , - Patrick Marks , - Wyatt McDonnell """] -edition = "2018" -license = "LICENSE.txt" -publish = false - -# Please do not edit crate versions within this file. Instead edit the file master.toml -# in the root of the enclone repo. - -[dependencies] -amino = "0.1.1" -ansi_escape = "0.1.0" -debruijn = "0.3.2" -enclone_core = { path = "../enclone_core" } -enclone_proto = { path = "../enclone_proto" } -equiv = "0.1.1" -io_utils = "0.2" -itertools = "0.9.0" -mirror_sparse_matrix = "0.1.4" -ndarray = "0.13" -perf_stats = "0.1.2" -pretty_trace = "0.3.2" -rayon = "1.0.2" -serde = "1.0.90" -serde_derive = "1.0.102" -serde_json = "*" -stats_utils = "0.1.1" -string_utils = "0.1.1" -tables = "0.1.2" -tar = "0.4.29" -vdj_ann = { git = "https://github.com/10XGenomics/rust-toolbox.git", rev="183e2d657e6436494072a32cf8da4f7b753d1e69" } -vector_utils = "0.1.3" -# to replace by this after bumping version -# vector_utils = "0.1.0" - -[dependencies.hdf5] -features = ["conda"] -git = "https://github.com/pmarks/hdf5-rs.git" -rev = "0c98e57b2af1f4247708c198b324ba3a8bc18dba" - diff --git a/enclone_tail/src/display_tree.rs b/enclone_tail/src/display_tree.rs deleted file mode 100644 index ab86cccfb..000000000 --- a/enclone_tail/src/display_tree.rs +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Text display of a rooted directed tree, in which each vertex has a name and each edge has a -// floating point length. A width parameter sets the approximate page width in characters, -// with edge lengths scaled roughly to match this. -// -// The design is adapted from https://gitlab.com/Noughmad/ptree, by Miha Čančula. - -use itertools::Itertools; -use std::cmp::max; -use vector_utils::*; - -// vnames: vertex names -// directed edges: {(v, w, weight)} -// r: index of the root vertex -// max_width: max page width - -pub fn display_tree( - vnames: &Vec, - edges: &Vec<(usize, usize, f64)>, - r: usize, - width: usize, -) -> String { - // Test input data and create an index. - - let mut edges = edges.clone(); - let mut n = 0; - for i in 0..edges.len() { - n = max(n, edges[i].0 + 1); - n = max(n, edges[i].1 + 1); - } - assert!(r < n); - let mut index = vec![Vec::::new(); n]; - for i in 0..edges.len() { - index[edges[i].0].push(i); - index[edges[i].1].push(i); - } - assert_eq!(n, vnames.len()); - - // For each vertex, define a path, which is the sequence of edge indices from the root to it. - - let mut vpaths = vec![Vec::::new(); n]; - for v in 0..n { - let mut w = v; - while w != r { - for j in index[w].iter() { - if edges[*j].1 == w { - vpaths[v].push(*j); - w = edges[*j].0; - } - } - } - vpaths[v].reverse(); - } - let mut vs = Vec::::new(); - for v in 0..n { - vs.push(v); - } - - // Sort. The output lines now correspond to the entries of this vector. - - sort_sync2(&mut vpaths, &mut vs); - - // In the special case where every edge has length zero, change all to one. - - let mut max_e = 0 as f64; - for e in edges.iter() { - max_e = max_e.max(e.2); - } - if max_e == 0.0 { - for i in 0..edges.len() { - edges[i].2 = 1.0; - } - } - - // For each path, define its constant and variable length components. - - let mut clen = vec![0; n]; - let mut vlen = vec![Vec::::new(); n]; - for i in 0..vpaths.len() { - clen[i] = vnames[i].chars().count() + 2 * vpaths[i].len(); - for j in vpaths[i].iter() { - vlen[i].push(edges[*j].2.max(0.0)); - } - } - - // Define length multiplier. This is very inefficient. Then scale the lengths. - - let mut mult = 1.0; - { - let fwidth = width as f64; - let mut last_change = "".to_string(); - loop { - let mut len = 0.0 as f64; - let mut max_w = 1.0 as f64; - for i in 0..vpaths.len() { - let mut l = clen[i] as f64; - for j in 0..vlen[i].len() { - let w = (vlen[i][j] * mult).round().max(1.0); - max_w = max_w.max(w); - l += w; - } - len = len.max(l); - } - if len <= fwidth && len >= 1.05 * fwidth { - break; - } else if max_w == 1.0 && len >= fwidth { - break; - } else if len > fwidth { - mult = 0.95 * mult; - last_change = "minus".to_string(); - } else { - if last_change == "minus".to_string() { - break; - } - mult = 1.05 * mult; - last_change = "plus".to_string(); - } - } - } - for i in 0..edges.len() { - edges[i].2 = (edges[i].2 * mult).round().max(1.0); - } - - // Generate the lines. - - let mut x = String::new(); - for i in 0..n { - for j in 0..vpaths[i].len() { - let e = vpaths[i][j]; - let t = edges[e].2 as usize; - let hedge = format!("{}", vec!['═'; t].iter().format("")); - let hnone = format!("{}", vec![' '; t].iter().format("")); - let mut last_edge = true; - for k in i + 1..n { - if j >= vpaths[k].len() { - break; - } - if vpaths[k][j] != vpaths[i][j] { - last_edge = false; - } - } - - // alternatives (all variable length) - - // 1. "╠═══ " - - // 2. "║ " - - // 3. "╠═══ " - - // 4. "╚═══ " - - // 5. " " - - if j >= vpaths[i - 1].len() { - if !last_edge { - x += &format!("╠{} ", hedge); - } else { - x += &format!("╚{} ", hedge); - } - } else if vpaths[i][j] != vpaths[i - 1][j] { - if !last_edge { - x += &format!("╠{} ", hedge); - } else { - x += &format!("╚{} ", hedge); - } - } else { - if !last_edge { - x += &format!("║{} ", hnone); - } else { - x += &format!(" {} ", hnone); - } - } - } - x += &format!("{}\n", vnames[vs[i]]); - } - x -} diff --git a/enclone_tail/src/group.rs b/enclone_tail/src/group.rs deleted file mode 100644 index 14b8fa21d..000000000 --- a/enclone_tail/src/group.rs +++ /dev/null @@ -1,1686 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Group and print clonotypes. For now, limited grouping functionality. - -use crate::display_tree::*; -use crate::neighbor::*; -use crate::newick::*; -use amino::*; -use ansi_escape::ansi_to_html::*; -use ansi_escape::*; -use enclone_core::defs::*; -use enclone_core::print_tools::*; -use enclone_proto::types::*; -use equiv::EquivRel; -use io_utils::*; -use itertools::*; -use perf_stats::*; -use stats_utils::*; -use std::cmp::max; -use std::collections::HashMap; -use std::env; -use std::fs::File; -use std::io::Write; -use std::io::*; -use std::mem::swap; -use std::path::Path; -use std::time::{Instant, SystemTime, UNIX_EPOCH}; -use string_utils::*; -use tables::*; -use tar::{Builder, Header}; -use vdj_ann::refx::*; -use vector_utils::*; - -pub fn group_and_print_clonotypes( - tall: &Instant, - refdata: &RefData, - pics: &Vec, - exacts: &Vec>, - rsi: &Vec, - exact_clonotypes: &Vec, - ctl: &EncloneControl, - out_datas: &mut Vec>>, - join_info: &Vec<(usize, usize, bool, Vec)>, - gex_info: &GexInfo, - dref: &Vec, -) { - // Build index to join info. - - let mut to_join_info = vec![Vec::::new(); exact_clonotypes.len()]; - for i in 0..join_info.len() { - to_join_info[join_info[i].0].push(i); - to_join_info[join_info[i].1].push(i); - } - - // Set up for parseable output. - - let mut parseable_fields = Vec::::new(); - set_speakers(&ctl, &mut parseable_fields); - #[allow(bare_trait_objects)] - let mut pout = match ctl.parseable_opt.pout.as_str() { - "" => (Box::new(stdout()) as Box), - "stdout" => (Box::new(stdout()) as Box), - "stdouth" => (Box::new(stdout()) as Box), - _ => { - let path = Path::new(&ctl.parseable_opt.pout); - Box::new(File::create(&path).unwrap()) as Box - } - }; - let mut pcols = ctl.parseable_opt.pcols.clone(); - for i in 0..pcols.len() { - pcols[i] = pcols[i].replace("_Σ", "_sum"); - pcols[i] = pcols[i].replace("_μ", "_mean"); - } - if pcols.is_empty() { - pcols = parseable_fields.clone(); - } - if !ctl.parseable_opt.pout.is_empty() - && ctl.parseable_opt.pout != "stdout".to_string() - && ctl.parseable_opt.pout != "stdouth".to_string() - { - fwriteln!(pout, "{}", pcols.iter().format(",")); - } - let mut pcols2 = Vec::::new(); - for i in 0..pcols.len() { - if pcols[i].contains(":") { - pcols2.push(pcols[i].before(":").to_string()); - } else { - pcols2.push(pcols[i].clone()); - } - } - pcols = pcols2; - - // Set up for fasta output. - - #[allow(bare_trait_objects)] - let mut fout = match ctl.gen_opt.fasta_filename.as_str() { - "" => (Box::new(stdout()) as Box), - "stdout" => (Box::new(stdout()) as Box), - _ => { - let path = Path::new(&ctl.gen_opt.fasta_filename); - Box::new(File::create(&path).unwrap()) as Box - } - }; - #[allow(bare_trait_objects)] - let mut faaout = match ctl.gen_opt.fasta_aa_filename.as_str() { - "" => (Box::new(stdout()) as Box), - "stdout" => (Box::new(stdout()) as Box), - _ => { - let path = Path::new(&ctl.gen_opt.fasta_aa_filename); - Box::new(File::create(&path).unwrap()) as Box - } - }; - - // Set up for clustal output. - - let (mut clustal_aa, mut clustal_dna) = (None, None); - if ctl.gen_opt.clustal_aa.len() > 0 && ctl.gen_opt.clustal_aa != "stdout".to_string() { - let file = File::create(&ctl.gen_opt.clustal_aa).unwrap(); - clustal_aa = Some(Builder::new(file)); - } - if ctl.gen_opt.clustal_dna.len() > 0 && ctl.gen_opt.clustal_dna != "stdout".to_string() { - let file = File::create(&ctl.gen_opt.clustal_dna).unwrap(); - clustal_dna = Some(Builder::new(file)); - } - - // Set up for phylip output. - - let (mut phylip_aa, mut phylip_dna) = (None, None); - if ctl.gen_opt.phylip_aa.len() > 0 && ctl.gen_opt.phylip_aa != "stdout".to_string() { - let file = File::create(&ctl.gen_opt.phylip_aa).unwrap(); - phylip_aa = Some(Builder::new(file)); - } - if ctl.gen_opt.phylip_dna.len() > 0 && ctl.gen_opt.phylip_dna != "stdout".to_string() { - let file = File::create(&ctl.gen_opt.phylip_dna).unwrap(); - phylip_dna = Some(Builder::new(file)); - } - - // Group clonotypes and make output. - - let mut last_width = 0; - let mut e: EquivRel = EquivRel::new(pics.len() as i32); - if ctl.clono_group_opt.heavy_cdr3_aa { - let mut all = Vec::<(String, usize)>::new(); - for i in 0..pics.len() { - for x in exacts[i].iter() { - for m in 0..exact_clonotypes[*x].share.len() { - let y = &exact_clonotypes[*x].share[m]; - if y.left { - all.push((y.cdr3_aa.clone(), i)); - } - } - } - } - all.sort(); - let mut i = 0; - while i < all.len() { - let j = next_diff1_2(&all, i as i32) as usize; - for k in i + 1..j { - e.join(all[i].1 as i32, all[k].1 as i32); - } - i = j; - } - } - if ctl.clono_group_opt.vj_refname || ctl.clono_group_opt.vj_refname_strong { - let mut all = Vec::<(Vec, usize)>::new(); - for i in 0..pics.len() { - let ex = &exact_clonotypes[exacts[i][0]]; - let mut s = Vec::::new(); - for j in 0..ex.share.len() { - s.push(refdata.name[ex.share[j].v_ref_id].clone()); - s.push(refdata.name[ex.share[j].j_ref_id].clone()); - } - s.sort(); - all.push((s, i)); - } - // Note duplication with above code. - all.sort(); - let mut i = 0; - while i < all.len() { - let j = next_diff1_2(&all, i as i32) as usize; - for k in i + 1..j { - let m1 = all[i].1; - let m2 = all[k].1; - if ctl.clono_group_opt.vj_refname_strong { - let ex1 = &exact_clonotypes[exacts[m1][0]]; - let ex2 = &exact_clonotypes[exacts[m2][0]]; - let mut lens1 = Vec::<(usize, usize)>::new(); - let mut lens2 = Vec::<(usize, usize)>::new(); - for j in 0..ex1.share.len() { - lens1.push((ex1.share[j].seq_del.len(), ex1.share[j].cdr3_aa.len())); - } - for j in 0..ex2.share.len() { - lens2.push((ex2.share[j].seq_del.len(), ex2.share[j].cdr3_aa.len())); - } - lens1.sort(); - lens2.sort(); - if lens1 != lens2 { - continue; - } - } - e.join(m1 as i32, m2 as i32); - } - i = j; - } - } - let mut groups = 0; - let mut greps = Vec::::new(); - e.orbit_reps(&mut greps); - - // Sort so that larger groups (as measured by cells) come first. - - let mut grepsn = Vec::<(usize, usize)>::new(); - for i in 0..greps.len() { - let mut o = Vec::::new(); - e.orbit(greps[i], &mut o); - if o.len() < ctl.clono_group_opt.min_group { - continue; - } - let mut n = 0; - for j in 0..o.len() { - let x = o[j] as usize; - let s = &exacts[x]; - for k in 0..s.len() { - n += exact_clonotypes[s[k]].clones.len(); - } - } - grepsn.push((n, i)); - } - reverse_sort(&mut grepsn); - - // Echo command. - - let mut logx = Vec::::new(); - if ctl.gen_opt.echo { - let args: Vec = env::args().collect(); - fwriteln!(logx, "\n{}", args.iter().format(" ")); - if ctl.gen_opt.html { - fwriteln!(logx, ""); - } - } - - // Now print clonotypes. - - for z in 0..grepsn.len() { - let i = grepsn[z].1; - let n = grepsn[z].0; - let mut o = Vec::::new(); - e.orbit(greps[i], &mut o); - groups += 1; - - // Generate human readable output. Getting the newlines right is tricky, so - // they're marked. - - if !ctl.gen_opt.noprint { - if !ctl.gen_opt.html && !ctl.gen_opt.ngroup { - fwriteln!(logx, ""); // NEWLINE 1 - } - - // If we just printed a clonotype box, output a bar. - - if last_width > 0 { - if ctl.gen_opt.ngroup || ctl.gen_opt.html { - fwriteln!(logx, ""); // NEWLINE 2 - } - if ctl.pretty { - let mut log = Vec::::new(); - emit_eight_bit_color_escape(&mut log, 44); - fwrite!(logx, "{}", strme(&log)); - } - fwrite!(logx, "╺{}╸", "━".repeat(last_width - 2)); - if !ctl.gen_opt.ngroup { - fwriteln!(logx, ""); // NEWLINE 3 - } - fwriteln!(logx, ""); // NEWLINE 4 - if ctl.pretty { - let mut log = Vec::::new(); - emit_end_escape(&mut log); - fwrite!(logx, "{}", strme(&log)); - } - } - - // If NGROUP is not on, output a GROUP line, including a newline at the end. - - if !ctl.gen_opt.ngroup { - if ctl.pretty { - let mut log = Vec::::new(); - emit_bold_escape(&mut log); - emit_eight_bit_color_escape(&mut log, 27); - fwrite!(logx, "{}", strme(&log)); - } - fwrite!( - logx, - "[{}] GROUP = {} CLONOTYPES = {} CELLS", - groups, - o.len(), - n - ); - if ctl.pretty { - let mut log = Vec::::new(); - emit_end_escape(&mut log); - fwrite!(logx, "{}", strme(&log)); - } - fwriteln!(logx, ""); // NEWLINE 5 - } - } - let mut group_ncells = 0; - for j in 0..o.len() { - let oo = o[j] as usize; - for l in 0..exacts[oo].len() { - group_ncells += exact_clonotypes[exacts[oo][l]].ncells(); - } - } - for j in 0..o.len() { - let oo = o[j] as usize; - if !ctl.gen_opt.noprint { - if z > 0 || j > 0 || !(ctl.gen_opt.html && ctl.gen_opt.ngroup) { - fwrite!(logx, "\n"); // NEWLINE 6 - } - if ctl.gen_opt.svg { - const FONT_SIZE: usize = 15; - let s = format!("[{}.{}] {}", groups, j + 1, pics[oo]); - - // Generate svg. This does not generate the shortest possible string. One - // thing that could be done is to use only one text tag and instead use - // relative positions in the tspan tags to avoid repeating the font family, - // etc. But there are probably other economizations. - // - // The other thing is that the aspect ratio is just a little bit off. - - fwrite!( - logx, - "{}", - convert_text_with_ansi_escapes_to_svg(&s, "Menlo", FONT_SIZE) - ); - } else { - fwrite!(logx, "[{}.{}] {}", groups, j + 1, pics[oo]); - } - } - let x = &pics[oo]; - let mut y = Vec::::new(); - for c in x.chars() { - y.push(c); - } - y.reverse(); - let mut m = 2; - while m < y.len() { - if y[m] == '\n' { - break; - } - m += 1; - } - last_width = m - 1; - - // Print join info. - - let mut ji = Vec::::new(); - for u in exacts[oo].iter() { - ji.append(&mut to_join_info[*u].clone()); - } - unique_sort(&mut ji); - for i in 0..ji.len() { - fwriteln!(logx, "{}", strme(&join_info[ji[i]].3)); - } - - // Generate clustal output. See: - // 1. http://meme-suite.org/doc/clustalw-format.html - // 2. https://www.ebi.ac.uk/seqdb/confluence/display/THD/Help+-+Clustal+Omega+FAQ - // at "What do the consensus symbols mean in the alignment?". - - if ctl.gen_opt.clustal_aa.len() > 0 { - let stdout = ctl.gen_opt.clustal_aa == "stdout".to_string(); - let mut data = Vec::::new(); - if stdout { - fwriteln!(logx, ""); - fwriteln!(logx, "CLUSTALW\n"); - } else { - fwriteln!(data, "CLUSTALW\n"); - } - let mut aa = Vec::>::new(); - let mut names = Vec::::new(); - for (k, u) in exacts[oo].iter().enumerate() { - let ex = &exact_clonotypes[*u]; - let mut seq = Vec::::new(); - for m in 0..rsi[oo].mat.len() { - if rsi[oo].mat[m][k].is_none() { - seq.append(&mut vec![b'-'; rsi[oo].seq_del_lens[m] / 3]); - } else { - let r = rsi[oo].mat[m][k].unwrap(); - let s = ex.share[r].seq_del_amino.clone(); - let mut s = aa_seq(&s, 0); - seq.append(&mut s); - } - } - names.push(format!("{}.{}.{}", groups, j + 1, k + 1,)); - aa.append(&mut vec![seq; 1]); - } - const W: usize = 60; - const PAD: usize = 4; - let mut name_width = 0; - for i in 0..names.len() { - name_width = std::cmp::max(name_width, names[i].len()); - } - for start in (0..aa[0].len()).step_by(W) { - if start > 0 { - if stdout { - fwriteln!(logx, ""); - } else { - fwriteln!(data, ""); - } - } - let stop = std::cmp::min(start + W, aa[0].len()); - for i in 0..aa.len() { - if stdout { - fwrite!(logx, "{}", names[i]); - fwrite!( - logx, - "{}", - strme(&vec![b' '; name_width + PAD - names[i].len()]) - ); - fwriteln!(logx, "{} {}", strme(&aa[i][start..stop]), stop - start); - } else { - fwrite!(data, "{}", names[i]); - fwrite!( - data, - "{}", - strme(&vec![b' '; name_width + PAD - names[i].len()]) - ); - fwriteln!(data, "{} {}", strme(&aa[i][start..stop]), stop - start); - } - } - if stdout { - fwrite!(logx, "{}", strme(&vec![b' '; name_width + PAD])); - } else { - fwrite!(data, "{}", strme(&vec![b' '; name_width + PAD])); - } - for p in start..stop { - let mut res = Vec::::new(); - for i in 0..aa.len() { - res.push(aa[i][p]); - } - unique_sort(&mut res); - if res.solo() { - if stdout { - fwrite!(logx, "*"); - } else { - fwrite!(data, "*"); - } - } else { - let mut con = false; - 'pass: for pass in 1..=2 { - let x: Vec<&[u8]>; - // Conservative mutations - if pass == 1 { - x = vec![ - b"STA", b"NEQK", b"NHQK", b"NDEQ", b"QHRK", b"MILV", - b"MILF", b"HY", b"FYW", - ]; - } else { - // Semi-conservative mutations - x = vec![ - b"CSA", b"ATV", b"SAG", b"STNK", b"STPA", b"SGND", - b"SNDEQK", b"NDEQHK", b"NEQHRK", b"FVLIM", b"HFY", - ]; - } - for y in x.iter() { - let mut sub = true; - for c in res.iter() { - if !y.contains(c) { - sub = false; - break; - } - } - if sub { - let sym; - if pass == 1 { - sym = ":"; - } else { - sym = "."; - } - if stdout { - fwrite!(logx, "{}", sym); - } else { - fwrite!(data, "{}", sym); - } - con = true; - break 'pass; - } - } - } - if !con { - if stdout { - fwrite!(logx, " "); - } else { - fwrite!(data, " "); - } - } - } - } - if stdout { - fwriteln!(logx, ""); - } else { - fwriteln!(data, ""); - } - } - if !stdout { - let mut header = Header::new_gnu(); - header.set_size(data.len() as u64); - header.set_cksum(); - header.set_mode(0o0644); - let now = SystemTime::now(); - header.set_mtime(now.duration_since(UNIX_EPOCH).unwrap().as_secs()); - let filename = format!("{}.{}", groups, j + 1); - clustal_aa - .as_mut() - .unwrap() - .append_data(&mut header, &filename, &data[..]) - .unwrap(); - } - } - if ctl.gen_opt.clustal_dna.len() > 0 { - let stdout = ctl.gen_opt.clustal_dna == "stdout".to_string(); - let mut data = Vec::::new(); - if stdout { - fwriteln!(logx, ""); - fwriteln!(logx, "CLUSTALW\n"); - } else { - fwriteln!(data, "CLUSTALW\n"); - } - let mut dna = Vec::>::new(); - let mut names = Vec::::new(); - for (k, u) in exacts[oo].iter().enumerate() { - let ex = &exact_clonotypes[*u]; - let mut seq = Vec::::new(); - for m in 0..rsi[oo].mat.len() { - if rsi[oo].mat[m][k].is_none() { - seq.append(&mut vec![b'-'; rsi[oo].seq_del_lens[m]]); - } else { - let r = rsi[oo].mat[m][k].unwrap(); - let mut s = ex.share[r].seq_del_amino.clone(); - seq.append(&mut s); - } - } - names.push(format!("{}.{}.{}", groups, j + 1, k + 1,)); - dna.append(&mut vec![seq; 1]); - } - const W: usize = 60; - const PAD: usize = 4; - let mut name_width = 0; - for i in 0..names.len() { - name_width = std::cmp::max(name_width, names[i].len()); - } - for start in (0..dna[0].len()).step_by(W) { - if start > 0 { - if stdout { - fwriteln!(logx, ""); - } else { - fwriteln!(data, ""); - } - } - let stop = std::cmp::min(start + W, dna[0].len()); - for i in 0..dna.len() { - if stdout { - fwrite!(logx, "{}", names[i]); - fwrite!( - logx, - "{}", - strme(&vec![b' '; name_width + PAD - names[i].len()]) - ); - fwriteln!(logx, "{} {}", strme(&dna[i][start..stop]), stop - start); - } else { - fwrite!(data, "{}", names[i]); - fwrite!( - data, - "{}", - strme(&vec![b' '; name_width + PAD - names[i].len()]) - ); - fwriteln!(data, "{} {}", strme(&dna[i][start..stop]), stop - start); - } - } - if stdout { - fwrite!(logx, "{}", strme(&vec![b' '; name_width + PAD])); - } else { - fwrite!(data, "{}", strme(&vec![b' '; name_width + PAD])); - } - for p in start..stop { - let mut res = Vec::::new(); - for i in 0..dna.len() { - res.push(dna[i][p]); - } - unique_sort(&mut res); - if res.solo() { - if stdout { - fwrite!(logx, "*"); - } else { - fwrite!(data, "*"); - } - } else { - if stdout { - fwrite!(logx, " "); - } else { - fwrite!(data, " "); - } - } - } - if stdout { - fwriteln!(logx, ""); - } else { - fwriteln!(data, ""); - } - } - if !stdout { - let mut header = Header::new_gnu(); - header.set_size(data.len() as u64); - header.set_cksum(); - header.set_mode(0o0644); - let now = SystemTime::now(); - header.set_mtime(now.duration_since(UNIX_EPOCH).unwrap().as_secs()); - let filename = format!("{}.{}", groups, j + 1); - clustal_dna - .as_mut() - .unwrap() - .append_data(&mut header, &filename, &data[..]) - .unwrap(); - } - } - - // Generate sequential PHYLIP output. See: - // 1. http://www.atgc-montpellier.fr/phyml/usersguide.php?type=phylip - // 2. http://evolution.genetics.washington.edu/phylip/doc/sequence.html. - // We don't fold lines because it may not be necessary. See giant value for W; - // code left in place in case it turns out that folding is needed. This will involve - // a bit more than lowering W. - - if ctl.gen_opt.phylip_aa.len() > 0 { - let stdout = ctl.gen_opt.phylip_aa == "stdout".to_string(); - let mut data = Vec::::new(); - let mut nbases = 0; - for m in 0..rsi[oo].mat.len() { - nbases += rsi[oo].seq_del_lens[m]; - } - if stdout { - fwriteln!(logx, ""); - fwriteln!(logx, "{} {}", exacts[oo].len(), nbases / 3); - } else { - fwriteln!(data, "{} {}", exacts[oo].len(), nbases / 3); - } - let mut aa = Vec::>::new(); - let mut names = Vec::::new(); - for (k, u) in exacts[oo].iter().enumerate() { - let ex = &exact_clonotypes[*u]; - let mut seq = Vec::::new(); - for m in 0..rsi[oo].mat.len() { - if rsi[oo].mat[m][k].is_none() { - seq.append(&mut vec![b'-'; rsi[oo].seq_del_lens[m] / 3]); - } else { - let r = rsi[oo].mat[m][k].unwrap(); - let s = ex.share[r].seq_del_amino.clone(); - let mut s = aa_seq(&s, 0); - seq.append(&mut s); - } - } - names.push(format!("{}", k + 1,)); - aa.append(&mut vec![seq; 1]); - } - const W: usize = 10000; - const PAD: usize = 4; - let mut name_width = 0; - for i in 0..names.len() { - name_width = std::cmp::max(name_width, names[i].len()); - } - for start in (0..aa[0].len()).step_by(W) { - if start > 0 { - if stdout { - fwriteln!(logx, ""); - } else { - fwriteln!(data, ""); - } - } - let stop = std::cmp::min(start + W, aa[0].len()); - for i in 0..aa.len() { - if stdout { - fwrite!(logx, "{}", names[i]); - fwrite!( - logx, - "{}", - strme(&vec![b' '; name_width + PAD - names[i].len()]) - ); - fwriteln!(logx, "{}", strme(&aa[i][start..stop])); - } else { - fwrite!(data, "{}", names[i]); - fwrite!( - data, - "{}", - strme(&vec![b' '; name_width + PAD - names[i].len()]) - ); - fwriteln!(data, "{}", strme(&aa[i][start..stop])); - } - } - if stdout { - fwrite!(logx, "{}", strme(&vec![b' '; name_width + PAD])); - } else { - fwrite!(data, "{}", strme(&vec![b' '; name_width + PAD])); - } - } - if !stdout { - let mut header = Header::new_gnu(); - header.set_size(data.len() as u64); - header.set_cksum(); - header.set_mode(0o0644); - let now = SystemTime::now(); - header.set_mtime(now.duration_since(UNIX_EPOCH).unwrap().as_secs()); - let filename = format!("{}.{}", groups, j + 1); - phylip_aa - .as_mut() - .unwrap() - .append_data(&mut header, &filename, &data[..]) - .unwrap(); - } - } - if ctl.gen_opt.phylip_dna.len() > 0 { - let stdout = ctl.gen_opt.phylip_dna == "stdout".to_string(); - let mut data = Vec::::new(); - let mut nbases = 0; - for m in 0..rsi[oo].mat.len() { - nbases += rsi[oo].seq_del_lens[m]; - } - if stdout { - fwriteln!(logx, ""); - fwriteln!(logx, "{} {}", exacts[oo].len(), nbases); - } else { - fwriteln!(data, "{} {}", exacts[oo].len(), nbases); - } - let mut dna = Vec::>::new(); - let mut names = Vec::::new(); - for (k, u) in exacts[oo].iter().enumerate() { - let ex = &exact_clonotypes[*u]; - let mut seq = Vec::::new(); - for m in 0..rsi[oo].mat.len() { - if rsi[oo].mat[m][k].is_none() { - seq.append(&mut vec![b'-'; rsi[oo].seq_del_lens[m]]); - } else { - let r = rsi[oo].mat[m][k].unwrap(); - let mut s = ex.share[r].seq_del_amino.clone(); - seq.append(&mut s); - } - } - names.push(format!("{}", k + 1,)); - dna.append(&mut vec![seq; 1]); - } - const W: usize = 10000; - const PAD: usize = 4; - let mut name_width = 0; - for i in 0..names.len() { - name_width = std::cmp::max(name_width, names[i].len()); - } - for start in (0..dna[0].len()).step_by(W) { - if start > 0 { - if stdout { - fwriteln!(logx, ""); - } else { - fwriteln!(data, ""); - } - } - let stop = std::cmp::min(start + W, dna[0].len()); - for i in 0..dna.len() { - if stdout { - fwrite!(logx, "{}", names[i]); - fwrite!( - logx, - "{}", - strme(&vec![b' '; name_width + PAD - names[i].len()]) - ); - fwriteln!(logx, "{} {}", strme(&dna[i][start..stop]), stop - start); - } else { - fwrite!(data, "{}", names[i]); - fwrite!( - data, - "{}", - strme(&vec![b' '; name_width + PAD - names[i].len()]) - ); - fwriteln!(data, "{} {}", strme(&dna[i][start..stop]), stop - start); - } - } - if stdout { - fwrite!(logx, "{}", strme(&vec![b' '; name_width + PAD])); - } else { - fwrite!(data, "{}", strme(&vec![b' '; name_width + PAD])); - } - } - if !stdout { - let mut header = Header::new_gnu(); - header.set_size(data.len() as u64); - header.set_cksum(); - header.set_mode(0o0644); - let now = SystemTime::now(); - header.set_mtime(now.duration_since(UNIX_EPOCH).unwrap().as_secs()); - let filename = format!("{}.{}", groups, j + 1); - phylip_dna - .as_mut() - .unwrap() - .append_data(&mut header, &filename, &data[..]) - .unwrap(); - } - } - - // Generate experimental tree output (options NEWICK0 and TREE). - - if ctl.gen_opt.newick || ctl.gen_opt.tree != "".to_string() { - // Compute the n x n distance matrix for the exact subclonotypes. - - let n = exacts[oo].len(); - let cols = rsi[oo].mat.len(); - let mut dist = vec![vec![0; n]; n]; - for i1 in 0..n { - for i2 in 0..n { - let ex1 = &exact_clonotypes[exacts[oo][i1]]; - let ex2 = &exact_clonotypes[exacts[oo][i2]]; - for m in 0..cols { - if rsi[oo].mat[m][i1].is_some() && rsi[oo].mat[m][i2].is_some() { - let r1 = rsi[oo].mat[m][i1].unwrap(); - let r2 = rsi[oo].mat[m][i2].unwrap(); - let seq1 = &ex1.share[r1].seq_del_amino; - let seq2 = &ex2.share[r2].seq_del_amino; - for j in 0..seq1.len() { - if seq1[j] != seq2[j] { - dist[i1][i2] += 1; - } - } - } - } - } - } - - // Add a zeroeth entry for a "root subclonotype" which is defined to have the - // donor reference away from the recombination region, and is undefined within it. - // Define its distance to actual exact subclonotypes by only computing away from - // the recombination region. This yields an (n+1) x (n+1) matrix. - - let mut droot = vec![0; n]; - for i in 0..n { - let ex = &exact_clonotypes[exacts[oo][i]]; - for m in 0..cols { - if rsi[oo].mat[m][i].is_some() { - let r = rsi[oo].mat[m][i].unwrap(); - let seq = &ex.share[r].seq_del_amino; - let mut vref = refdata.refs[rsi[oo].vids[m]].to_ascii_vec(); - if rsi[oo].vpids[m].is_some() { - vref = dref[rsi[oo].vpids[m].unwrap()].nt_sequence.clone(); - } - let jref = refdata.refs[rsi[oo].jids[m]].to_ascii_vec(); - let z = seq.len(); - for p in 0..z { - let b = seq[p]; - if p < vref.len() - ctl.heur.ref_v_trim && b != vref[p] { - droot[i] += 1; - } - if p >= z - (jref.len() - ctl.heur.ref_j_trim) - && b != jref[jref.len() - (z - p)] - { - droot[i] += 1; - } - } - } - } - } - let mut distp = vec![vec![0.0; n + 1]; n + 1]; - for i1 in 0..n { - for i2 in 0..n { - distp[i1 + 1][i2 + 1] = dist[i1][i2] as f64; - } - } - for i in 0..n { - distp[i + 1][0] = droot[i] as f64; - distp[0][i + 1] = droot[i] as f64; - } - - // Generate the neighborhood joining tree associated to these data. - - let mut tree = neighbor_joining(&distp); - let mut nvert = 0; - for i in 0..tree.len() { - nvert = max(nvert, tree[i].0 + 1); - nvert = max(nvert, tree[i].1 + 1); - } - - // Use the root to direct the edges. - - let r = 0; - let mut index = vec![Vec::::new(); nvert]; - for i in 0..tree.len() { - index[tree[i].0].push(i); - index[tree[i].1].push(i); - } - let mut rooted = vec![false; nvert]; - rooted[r] = true; - let mut roots = vec![r]; - for i in 0..nvert { - let v = roots[i]; - for j in index[v].iter() { - let e = &mut tree[*j]; - - if e.1 == v && !rooted[e.0] { - swap(&mut e.0, &mut e.1); - } - if e.0 == v && !rooted[e.1] { - rooted[e.1] = true; - roots.push(e.1); - } - } - } - - // Output in Newick format. - - if ctl.gen_opt.newick { - let mut vnames = Vec::::new(); - for i in 0..=n { - vnames.push(format!("{}", i)); - } - let mut edges = Vec::<(usize, usize, String)>::new(); - for i in 0..tree.len() { - edges.push((tree[i].0, tree[i].1, format!("{:.2}", tree[i].2))); - } - for i in n + 1..nvert { - vnames.push(format!("I{}", i - n)); - } - let nw = newick(&vnames, 0, &edges); - fwriteln!(logx, "\n{}", nw); - } - - // Output as visual tree. - - if ctl.gen_opt.tree != "".to_string() { - let mut edges = Vec::<(usize, usize, f64)>::new(); - let mut nvert = 0; - for i in 0..tree.len() { - edges.push((tree[i].0, tree[i].1, tree[i].2)); - nvert = max(nvert, tree[i].0 + 1); - nvert = max(nvert, tree[i].1 + 1); - } - - // Make edge names. - - let mut vnames = Vec::::new(); - for i in 0..nvert { - let mut len = 0.0; - for j in 0..edges.len() { - if edges[j].1 == i { - len = edges[j].2; - } - } - let mut c = String::new(); - if i > 0 && i <= n && ctl.gen_opt.tree == "const".to_string() { - let ex = &exact_clonotypes[exacts[oo][i - 1]]; - let mut h = Vec::::new(); - for m in 0..ex.share.len() { - if ex.share[m].left { - if ex.share[m].c_ref_id.is_none() { - h.push("?".to_string()); - } else { - h.push(refdata.name[ex.share[m].c_ref_id.unwrap()].clone()); - } - } - } - c = format!(",{}", h.iter().format("+")); - } - if i == 0 { - vnames.push("•".to_string()); - } else if i <= n { - if ctl.pretty { - vnames.push(format!("{} [{:.2}{}]", i, len, c)); - } else { - vnames.push(format!("{} [{:.2}{}]", i, len, c)); - } - } else { - vnames.push(format!("• [{:.2}{}]", len, c)); - } - } - - // Display the tree. - - let nw = display_tree(&vnames, &edges, 0, 100); - fwrite!(logx, "\n{}", nw); - } - } - - // Generate FASTA output. - - if ctl.gen_opt.fasta_filename.len() > 0 { - for (k, u) in exacts[oo].iter().enumerate() { - for m in 0..rsi[oo].mat.len() { - if rsi[oo].mat[m][k].is_some() { - let r = rsi[oo].mat[m][k].unwrap(); - let ex = &exact_clonotypes[*u]; - if ctl.gen_opt.fasta_filename != "stdout".to_string() { - fwriteln!( - fout, - ">group{}.clonotype{}.exact{}.chain{}", - groups, - j + 1, - k + 1, - m + 1 - ); - } else { - fwriteln!( - logx, - ">group{}.clonotype{}.exact{}.chain{}", - groups, - j + 1, - k + 1, - m + 1 - ); - } - let mut seq = ex.share[r].seq.clone(); - let mut cid = ex.share[r].c_ref_id; - if cid.is_none() { - for l in 0..exacts[oo].len() { - if rsi[oo].mat[m][l].is_some() { - let r2 = rsi[oo].mat[m][l].unwrap(); - let ex2 = &exact_clonotypes[exacts[oo][l]]; - let cid2 = ex2.share[r2].c_ref_id; - if cid2.is_some() { - cid = cid2; - break; - } - } - } - } - if cid.is_some() { - let mut cseq = refdata.refs[cid.unwrap()].to_ascii_vec(); - seq.append(&mut cseq); - if ctl.gen_opt.fasta_filename != "stdout".to_string() { - fwriteln!(fout, "{}", strme(&seq)); - } else { - fwriteln!(logx, "{}", strme(&seq)); - } - } - } - } - } - } - - // Generate fasta amino acid output. - - if ctl.gen_opt.fasta_aa_filename.len() > 0 { - for (k, u) in exacts[oo].iter().enumerate() { - for m in 0..rsi[oo].mat.len() { - if rsi[oo].mat[m][k].is_some() { - let r = rsi[oo].mat[m][k].unwrap(); - let ex = &exact_clonotypes[*u]; - if ctl.gen_opt.fasta_aa_filename != "stdout".to_string() { - fwriteln!( - faaout, - ">group{}.clonotype{}.exact{}.chain{}", - groups, - j + 1, - k + 1, - m + 1 - ); - } else { - fwriteln!( - logx, - ">group{}.clonotype{}.exact{}.chain{}", - groups, - j + 1, - k + 1, - m + 1 - ); - } - let mut seq = ex.share[r].seq.clone(); - let mut cid = ex.share[r].c_ref_id; - if cid.is_none() { - for l in 0..exacts[oo].len() { - if rsi[oo].mat[m][l].is_some() { - let r2 = rsi[oo].mat[m][l].unwrap(); - let ex2 = &exact_clonotypes[exacts[oo][l]]; - let cid2 = ex2.share[r2].c_ref_id; - if cid2.is_some() { - cid = cid2; - break; - } - } - } - } - if cid.is_some() { - let mut cseq = refdata.refs[cid.unwrap()].to_ascii_vec(); - seq.append(&mut cseq); - if ctl.gen_opt.fasta_aa_filename != "stdout".to_string() { - fwriteln!(faaout, "{}", strme(&aa_seq(&seq, 0))); - } else { - fwriteln!(logx, "{}", strme(&aa_seq(&seq, 0))); - } - } - } - } - } - } - - // Generate parseable output. - - if ctl.parseable_opt.pout.len() > 0 - && (!ctl.gen_opt.noprint - || (ctl.parseable_opt.pout != "stdout".to_string() - && ctl.parseable_opt.pout != "stdouth".to_string())) - { - let mut rows = Vec::>::new(); - for m in 0..out_datas[oo].len() { - out_datas[oo][m].insert("group_id".to_string(), format!("{}", groups)); - out_datas[oo][m] - .insert("group_ncells".to_string(), format!("{}", group_ncells)); - out_datas[oo][m].insert("clonotype_id".to_string(), format!("{}", j + 1)); - } - if ctl.parseable_opt.pout == "stdout".to_string() { - fwriteln!(logx, "{}", pcols.iter().format(",")); - } - if ctl.parseable_opt.pout == "stdouth".to_string() { - rows.push(pcols.clone()); - } - let x = &out_datas[oo]; - for (u, y) in x.iter().enumerate() { - if !ctl.parseable_opt.pbarcode { - if ctl.parseable_opt.pout != "stdouth".to_string() { - for (i, c) in pcols.iter().enumerate() { - if i > 0 { - if ctl.parseable_opt.pout != "stdout".to_string() { - fwrite!(pout, ","); - } else { - fwrite!(logx, ","); - } - } - if y.contains_key(c) { - let val = &y[c]; - if !val.contains(',') { - if ctl.parseable_opt.pout != "stdout".to_string() { - fwrite!(pout, "{}", val); - } else { - fwrite!(logx, "{}", val); - } - } else { - if ctl.parseable_opt.pout != "stdout".to_string() { - fwrite!(pout, "\"{}\"", val); - } else { - fwrite!(logx, "\"{}\"", val); - } - } - } else { - if ctl.parseable_opt.pout != "stdout".to_string() { - fwrite!(pout, ""); - } else { - fwrite!(logx, ""); - } - } - } - if ctl.parseable_opt.pout != "stdout".to_string() { - fwriteln!(pout, ""); - } else { - fwriteln!(logx, ""); - } - } else { - let mut row = Vec::::new(); - for c in pcols.iter() { - if y.contains_key(c) { - let val = &y[c]; - row.push(val.clone()); - } else { - row.push("".to_string()); - } - } - rows.push(row); - } - } else { - let ex = &exact_clonotypes[exacts[oo][u]]; - let n = ex.ncells(); - if ctl.parseable_opt.pout != "stdouth".to_string() { - for m in 0..n { - for (i, c) in pcols.iter().enumerate() { - if i > 0 { - if ctl.parseable_opt.pout != "stdout".to_string() { - fwrite!(pout, ","); - } else { - fwrite!(logx, ","); - } - } - if y.contains_key(c) { - let mut id = 0; - let vals = y[c].split(';').collect::>(); - if vals.len() > 1 { - id = m; - } - if id >= vals.len() { - panic!( - "id >= vals.len() where id = {} and vals.len() \ - = {},\nparseable variable = {}, barcodes include \ - {}, n = {}, y[c] = {}", - id, - vals.len(), - c, - ex.clones[0][0].barcode, - n, - y[c], - ); - } - let val = vals[id]; - if !val.contains(',') { - if ctl.parseable_opt.pout != "stdout".to_string() { - fwrite!(pout, "{}", val); - } else { - fwrite!(logx, "{}", val); - } - } else { - if ctl.parseable_opt.pout != "stdout".to_string() { - fwrite!(pout, "\"{}\"", val); - } else { - fwrite!(logx, "\"{}\"", val); - } - } - } else { - if ctl.parseable_opt.pout != "stdout".to_string() { - fwrite!(pout, ""); - } else { - fwrite!(logx, ""); - } - } - } - if ctl.parseable_opt.pout != "stdout".to_string() { - fwriteln!(pout, ""); - } else { - fwriteln!(logx, ""); - } - } - } else { - for m in 0..n { - let mut row = Vec::::new(); - for c in pcols.iter() { - if y.contains_key(c) { - let mut id = 0; - let vals = y[c].split(';').collect::>(); - if vals.len() > 1 { - id = m; - } - let val = vals[id]; - row.push(val.to_string()); - } else { - row.push("".to_string()); - } - } - rows.push(row); - } - } - } - } - if ctl.parseable_opt.pout == "stdouth".to_string() { - let mut log = Vec::::new(); - let mut justify = Vec::::new(); - for x in rows[0].iter() { - justify.push(justification(&x)); - } - print_tabular(&mut log, &rows, 2, Some(justify)); - fwrite!(logx, "{}", strme(&log)); - } - } - } - } - - // Finish CLUSTAL. - - if clustal_aa.is_some() { - clustal_aa.unwrap().finish().unwrap(); - } - if clustal_dna.is_some() { - clustal_dna.unwrap().finish().unwrap(); - } - - // Compute two umi stats. - - let nclono = exacts.len(); - let mut umish = Vec::::new(); - let mut umisl = Vec::::new(); - for i in 0..nclono { - for j in 0..exacts[i].len() { - let ex = &exact_clonotypes[exacts[i][j]]; - for k in 0..ex.clones.len() { - for l in 0..ex.share.len() { - if ex.share[l].left { - umish.push(ex.clones[k][l].umi_count); - } else { - umisl.push(ex.clones[k][l].umi_count); - } - } - } - } - } - umish.sort(); - umisl.sort(); - let (mut middleh, mut denomh) = (0, 0); - for j in umish.len() / 3..(2 * umish.len()) / 3 { - middleh += umish[j]; - denomh += 1; - } - let mut middle_mean_umish = 0.0; - if denomh > 0 { - middle_mean_umish = (middleh as f64) / (denomh as f64); - } - let (mut middlel, mut denoml) = (0, 0); - for j in umisl.len() / 3..(2 * umisl.len()) / 3 { - middlel += umisl[j]; - denoml += 1; - } - let mut middle_mean_umisl = 0.0; - if denoml > 0 { - middle_mean_umisl = (middlel as f64) / (denoml as f64); - } - - // Compute n1 and n23. - - let mut n1 = 0; - let mut n23 = 0; - for i in 0..nclono { - for j in 0..exacts[i].len() { - let ex = &exact_clonotypes[exacts[i][j]]; - if ex.nchains() == 1 { - n1 += ex.ncells(); - } else if ex.nchains() == 2 || ex.nchains() == 3 { - n23 += ex.ncells(); - } - } - } - - // Print summary stats. - - if ctl.gen_opt.summary { - fwriteln!(logx, "\nSUMMARY STATISTICS"); - fwriteln!(logx, "1. overall"); - let mut nclono2 = 0; - let mut ncells = 0; - let mut ncc = Vec::<(usize, usize)>::new(); - let mut sd = Vec::<(Option, Option)>::new(); - for i in 0..nclono { - let mut n = 0; - for j in 0..exacts[i].len() { - let ex = &exact_clonotypes[exacts[i][j]]; - n += ex.ncells(); - for k in 0..ex.clones.len() { - let x = &ex.clones[k][0]; - sd.push((x.origin_index, x.donor_index)); - } - } - if n >= 2 { - nclono2 += 1; - } - ncells += n; - ncc.push((rsi[i].mat.len(), n)); - } - sd.sort(); - let mut sdx = Vec::<(Option, Option, usize)>::new(); - let mut i = 0; - while i < sd.len() { - let j = next_diff(&sd, i); - sdx.push((sd[i].0, sd[i].1, j - i)); - i = j; - } - fwriteln!(logx, " • number of datasets = {}", ctl.origin_info.n()); - fwriteln!(logx, " • number of donors = {}", ctl.origin_info.donors); - - // Print mean reads per cell if known. - - let mut known = true; - for i in 0..ctl.origin_info.n() { - if ctl.origin_info.cells_cellranger[i].is_none() { - known = false; - } else if ctl.origin_info.mean_read_pairs_per_cell_cellranger[i].is_none() { - known = false; - } - } - if known { - let (mut cells, mut read_pairs) = (0, 0); - for i in 0..ctl.origin_info.n() { - let c = ctl.origin_info.cells_cellranger[i].unwrap(); - let rpc = ctl.origin_info.mean_read_pairs_per_cell_cellranger[i].unwrap(); - cells += c; - read_pairs += cells * rpc; - } - let rpc = ((read_pairs as f64) / (cells as f64)).round(); - fwriteln!(logx, " • cells (from cellranger) = {}", cells); - fwriteln!(logx, " • read pairs per cell (from cellranger) = {}", rpc); - } - - // Print computational performance stats. - - if !ctl.gen_opt.summary_clean { - fwriteln!( - logx, - " • total elapsed time = {:.1} seconds", - elapsed(&tall) - ); - #[cfg(not(target_os = "macos"))] - fwriteln!(logx, " • peak memory = {:.1} GB", peak_mem_usage_gb()); - } - - // Compute marking stats. - - let (mut nmarked, mut nmarked_good, mut ndubious) = (0, 0, 0); - let (mut nfake, mut nfake_marked, mut ngood, mut ngood_marked) = (0, 0, 0, 0); - if ctl.gen_opt.mark_stats || ctl.gen_opt.mark_stats2 { - for i in 0..nclono { - let mut datasets = Vec::::new(); - let mut ncells = 0; - for j in 0..exacts[i].len() { - let ex = &exact_clonotypes[exacts[i][j]]; - ncells += ex.ncells(); - for l in 0..ex.ncells() { - datasets.push(ex.clones[l][0].dataset_index); - } - } - datasets.sort(); - let mut freq = Vec::<(u32, usize)>::new(); - make_freq(&datasets, &mut freq); - let mut fake = false; - let mut di = -1; - if freq.len() == 1 || freq[0].0 >= 10 * freq[1].0 { - di = freq[0].1 as isize; - } - if ncells >= 10 && di >= 0 { - nfake += ncells - 1; - fake = true; - } - if ncells >= 2 { - for j in 0..exacts[i].len() { - let ex = &exact_clonotypes[exacts[i][j]]; - - // Determine if cell is called a B cell. - - for l in 0..ex.ncells() { - let mut b = false; - let li = ex.clones[l][0].dataset_index; - let bc = &ex.clones[l][0].barcode; - if gex_info.cell_type[li].contains_key(&bc.clone()) { - if gex_info.cell_type[li][&bc.clone()].starts_with('B') { - b = true; - } - } - - // Record accordingly. - - if ex.clones[l][0].dataset_index as isize == di || !b { - ndubious += 1; - } - if !fake - && ncells >= 10 - && b - && (ex.share.len() == 2 || ex.share.len() == 3) - { - ngood += 1; - if ex.clones[l][0].marked { - ngood_marked += 1; - } - } - } - } - } - if fake { - ngood += 1; - } - let mut fake_marks = 0; - for j in 0..exacts[i].len() { - let ex = &exact_clonotypes[exacts[i][j]]; - for l in 0..ex.ncells() { - if ex.clones[l][0].marked { - nmarked += 1; - if fake { - nfake_marked += 1; - fake_marks += 1; - } - let chains_ok = ex.nchains() >= 2 && ex.nchains() <= 3; - let mut b = false; - let li = ex.clones[l][0].dataset_index; - let bc = &ex.clones[l][0].barcode; - if gex_info.cell_type[li].contains_key(&bc.clone()) { - if gex_info.cell_type[li][&bc.clone()].starts_with('B') { - b = true; - } - } - if chains_ok && freq.len() >= 2 && b { - nmarked_good += 1; - } - } - } - } - if fake_marks == ncells { - nfake_marked -= 1; - } - } - } - - // Print other stats. - - fwriteln!(logx, "2. for the selected clonotypes"); - - // Print summary table for chains / clonotypes / cells. - - ncc.sort(); - let mut i = 0; - let mut rows = Vec::>::new(); - let row = vec![ - "chains".to_string(), - "clonotypes with this".to_string(), - "cells in these".to_string(), - "%".to_string(), - ]; - rows.push(row); - let row = vec![ - "".to_string(), - "number of chains".to_string(), - "clonotypes".to_string(), - "".to_string(), - ]; - rows.push(row); - let row = vec!["\\hline".to_string(); 4]; - rows.push(row); - while i < ncc.len() { - let j = next_diff1_2(&ncc, i as i32) as usize; - let nchains_this = ncc[i].0; - let nclono_this = j - i; - let mut ncells_this = 0; - for k in i..j { - ncells_this += ncc[k].1; - } - let row = vec![ - format!("{}", nchains_this), - format!("{}", nclono_this), - format!("{}", ncells_this), - format!("{:.1}", percent_ratio(ncells_this, ncells)), - ]; - rows.push(row); - i = j; - } - let row = vec![ - "total".to_string(), - format!("{}", nclono), - format!("{}", ncells), - "100.0".to_string(), - ]; - rows.push(row); - let mut log = String::new(); - print_tabular_vbox(&mut log, &rows, 2, &b"l|r|r|r".to_vec(), false, false); - log = log.replace("\n", "\n "); - fwrite!(logx, " {}", log); - - // Print other cell/clonotype stats. - - fwriteln!( - logx, - "• number of clonotypes having at least two cells = {}", - nclono2 - ); - fwriteln!(logx, " • number of cells having 1 chain = {}", n1); - fwriteln!(logx, " • number of cells having 2 or 3 chains = {}", n23); - - // Print UMI stats. - - fwriteln!( - logx, - " • mean over middle third of contig UMI counts (heavy chain / TRB) = {:.2}", - middle_mean_umish, - ); - fwriteln!( - logx, - " • mean over middle third of contig UMI counts (light chain / TRA) = {:.2}", - middle_mean_umisl, - ); - - // Print marking stats. - - if ctl.gen_opt.mark_stats { - fwriteln!(logx, " --------------------------------"); - fwriteln!(logx, " • number of dubious cells = {}", ndubious); - fwriteln!(logx, " • number of marked cells = {}", nmarked); - fwriteln!(logx, " • number of good marked cells = {}", nmarked_good); - } - if ctl.gen_opt.mark_stats2 { - fwriteln!(logx, " --------------------------------"); - fwriteln!( - logx, - " • number of fake expanded clonotype cells = {}", - nfake - ); - fwriteln!( - logx, - " • number of these that are marked = {}", - nfake_marked - ); - fwriteln!(logx, " • residual = {}", nfake - nfake_marked); - fwriteln!( - logx, - " • number of good expanded clonotype cells = {}", - ngood - ); - fwriteln!( - logx, - " • number of these that are marked = {}", - ngood_marked - ); - } - - // Print origin (sample)/donor table. - - let mut rows = Vec::>::new(); - let row = vec![ - "origin".to_string(), - "donor".to_string(), - "cells".to_string(), - ]; - rows.push(row); - let row = vec!["\\hline".to_string(); 3]; - rows.push(row); - for i in 0..sdx.len() { - let mut row = Vec::::new(); - if sdx[i].0.is_some() { - row.push(format!( - "{}", - ctl.origin_info.origin_list[sdx[i].0.unwrap()] - )); - } else { - row.push("?".to_string()); - } - if sdx[i].1.is_some() { - row.push(format!("{}", ctl.origin_info.donor_list[sdx[i].1.unwrap()])); - } else { - row.push("?".to_string()); - } - row.push(format!("{}", sdx[i].2)); - rows.push(row); - } - let mut log = String::new(); - print_tabular_vbox(&mut log, &rows, 2, &b"llr".to_vec(), false, false); - log = log.replace("\n", "\n "); - fwrite!(logx, " {}", log); - } - - // Print summary csv stats. - - if ctl.gen_opt.summary_csv { - println!("\nmiddle_mean_umis_heavy,middle_mean_umis_light,n_twothreesie"); - println!("{:.2},{:.2},{}", middle_mean_umish, middle_mean_umisl, n23); - } - - // Print to stdout. - - if !ctl.gen_opt.html { - print!("{}", compress_ansi_escapes(&strme(&logx))); - } else { - // Note that we do not link to the css file, because it is less fragile then including - // the font face information directly. In particular, the css file could be accidentally - // deleted or renamed, which would break previously generated user html files. This - // actually happened! - let s = convert_text_with_ansi_escapes_to_html( - strme(&logx), - "", // source - &ctl.gen_opt.html_title, - &format!("", font_face_in_css()), - "DejaVuSansMono", - 14, - ); - print!("{}", s); - } - - // Test for required number of false positives. - - if ctl.gen_opt.required_fps.is_some() { - let mut fps = 0; - for i in 0..pics.len() { - if pics[i].contains("WARNING:") { - fps += 1; - } - } - if fps != ctl.gen_opt.required_fps.unwrap() { - eprintln!( - "\nA \"false positive\" is a clonotype that contains cells from multiple\n\ - donors. You invoked enclone with the argument REQUIRED_FPS={}, but we found\n\ - {} false positives, so the requirement is not met.\n", - ctl.gen_opt.required_fps.unwrap(), - fps - ); - std::process::exit(1); - } - } -} diff --git a/enclone_tail/src/lib.rs b/enclone_tail/src/lib.rs deleted file mode 100644 index caa95eb2b..000000000 --- a/enclone_tail/src/lib.rs +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) 2020 10x Genomics, Inc. All rights reserved. - -extern crate enclone_core; - -pub mod display_tree; -pub mod group; -pub mod neighbor; -pub mod newick; -pub mod plot; -pub mod string_width; -pub mod tail; diff --git a/enclone_tail/src/neighbor.rs b/enclone_tail/src/neighbor.rs deleted file mode 100644 index 81d5a56b5..000000000 --- a/enclone_tail/src/neighbor.rs +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Implement the phylogenetic tree neighbor joining algorithm, with one tweak (see below). -// -// Saitou N., Nei M. (1987). The neighbor-joining method: a new method for reconstructing -// phylogenetic trees. Molecular Biology and Evolution 4: 406–425. PMID 3447015. -// -// We follow https://en.wikipedia.org/wiki/Neighbor_joining. -// -// Tweak: negative edge lengths are replaced by zero as suggested by Kuhner and Felsenstein (1994). -// Kuhner M.K., Felsenstein J. (1994). A simulation comparison of phylogeny algorithms under equal and unequal evolutionary rates. Molecular Biology and Evolution 11(3): 459-468. PMID 8015439. -// -// The single input argument should be a symmetric n x n matrix, n >= 1. -// The output is a vector of 2n-3 edges, represented as (v, w, distance). -// -// Note that this algorithm is O(n^3). - -pub fn neighbor_joining(d: &Vec>) -> Vec<(usize, usize, f64)> { - let (mut d, mut d2) = (d.clone(), d.clone()); - let n0 = d.len(); - assert!(n0 >= 1); - for i in 0..n0 { - assert_eq!(d[i].len(), n0); - } - for i in 0..n0 { - for j in i + 1..n0 { - assert_eq!(d[i][j], d[j][i]); - } - } - if n0 == 1 { - return Vec::new(); - } else if n0 == 2 { - return vec![(0, 1, d[0][1])]; - } - let mut verts = vec![0; n0]; - for i in 0..n0 { - verts[i as usize] = i; - } - let mut edges = vec![(0, 0, 0.0); 2 * n0 - 3]; - let mut q = vec![vec![0.0; n0]; n0]; - for n in (3..=n0).rev() { - for i in 0..n { - for j in i + 1..n { - q[i][j] = (n - 2) as f64 * d[i][j]; - for k in 0..n { - q[i][j] -= d[i][k] + d[j][k]; - } - q[j][i] = q[i][j]; - } - } - let (mut f, mut g) = (0, 1); - let mut m = q[0][1]; - for i in 0..n { - for j in i + 1..n { - if q[i][j] < m { - f = i; - g = j; - m = q[i][j]; - } - } - } - let mut df = (n - 2) as f64 * d[f][g]; - for k in 0..n { - df += d[f][k] - d[g][k]; - } - df /= (2 * (n - 2)) as f64; - let dg = d[f][g] - df; - let vnew = n0 + (n0 - n); - edges[2 * (n0 - n)] = (verts[f], vnew, df); - edges[2 * (n0 - n) + 1] = (verts[g], vnew, dg); - verts[f] = vnew; - for k in g..n - 1 as usize { - verts[k] = verts[k + 1]; - } - for i in 0..n { - for j in 0..n as usize { - d2[i][j] = d[i][j]; - } - } - for k in 0..n { - if k != f { - d2[f][k] = (d[f][k] + d[g][k] - d[f][g]) / 2.0; - d2[k][f] = d2[f][k]; - } - } - for i in 0..n { - if i != g { - for j in 0..n { - if j != g { - let (mut ip, mut jp) = (i, j); - if ip > g { - ip -= 1; - } - if jp > g { - jp -= 1; - } - d[ip][jp] = d2[i][j]; - } - } - } - } - if n == 3 { - edges[2 * n0 - 4] = (verts[0], verts[1], d[0][1]); - } - } - for i in 0..edges.len() { - edges[i].2 = edges[i].2.max(0.0); - } - edges -} diff --git a/enclone_tail/src/newick.rs b/enclone_tail/src/newick.rs deleted file mode 100644 index 88f67f0f6..000000000 --- a/enclone_tail/src/newick.rs +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Convert a rooted directed tree into Newick format, -// see https://en.wikipedia.org/wiki/Newick_format. -// -// This is not an efficient implementation. -// -// The input graph must be acyclic and connected. Otherwise bad things may happen. Not checked. -// -// Input data: -// 1. vertex names = vnames -// 2. index of root vertex = r -// 3. edges (v, w, edge-name) = edges, where v and w are zero-based indices of vertices. -// -// Upon entry, the edge-names should be string representations of weights. - -use itertools::Itertools; -use std::cmp::max; - -pub fn newick(vnames: &Vec, r: usize, edges: &Vec<(usize, usize, String)>) -> String { - // Set up. - - let mut edges = edges.clone(); - let mut n = 0; - for i in 0..edges.len() { - n = max(n, edges[i].0 + 1); - n = max(n, edges[i].1 + 1); - } - assert!(r < n); - let mut index = vec![Vec::::new(); n]; - for i in 0..edges.len() { - index[edges[i].0].push(i); - index[edges[i].1].push(i); - } - assert_eq!(n, vnames.len()); - - // Incorporate the vertex names into the weights. - - for i in 0..edges.len() { - edges[i].2 = format!("{}:{}", vnames[edges[i].1], edges[i].2); - } - - // Gradually chew back the edges and roll up the labels as we go. - - let mut used = vec![false; n]; - let mut nused = 0; - // while nused < n - 1 { - loop { - let nused0 = nused; - for v in 0..n { - if !used[v] && index[v].len() > 1 { - let mut subterminal = true; - for i in index[v].iter() { - let e = &edges[*i]; - if e.0 == v { - let w = e.1; - if index[w].len() > 1 { - subterminal = false; - break; - } - } - } - if subterminal { - let mut labels = Vec::::new(); - let mut j = 0; - for i in index[v].iter() { - let e = &edges[*i]; - if e.0 == v { - let w = e.1; - assert!(!used[w]); - used[w] = true; - nused += 1; - labels.push(e.2.clone()); - } else { - j = *i; - } - } - index[v] = vec![j]; - let label = labels.iter().format(","); - if v != r { - edges[j].2 = format!("({}){}", label, edges[j].2); - } else { - edges[j].2 = format!("({}){};", label, vnames[edges[j].0]); - } - } - } - } - if nused == nused0 { - break; - } - } - if nused == n - 1 { - edges[index[r][0]].2.clone() - } else { - format!("({}){};", edges[index[r][0]].2, vnames[r]) - } -} diff --git a/enclone_tail/src/plot.rs b/enclone_tail/src/plot.rs deleted file mode 100644 index e7a4fa24a..000000000 --- a/enclone_tail/src/plot.rs +++ /dev/null @@ -1,573 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// The purpose of this file is the function plot_clonotypes. It plots clonotypes as partial -// hexagonal closest packings. This is visually kind of satisfying, but also a bit weird looking. -// In some cases, by eye, you can see rounder forms that could be created by relocating some of -// the cells. - -use crate::string_width::*; -use ansi_escape::*; -use enclone_core::defs::*; -use io_utils::*; -use std::fs::File; -use std::io::Write; -use std::io::*; -use string_utils::*; -use vdj_ann::refx::*; -use vector_utils::*; - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// For radius r and n = 0, 1, ..., consider a counterclockwise spiral of lattice-packed disks of -// radius r, starting at the origin and going first to the right. Return the coordinates of the -// center of the nth disk. See this picture: -// https://www.researchgate.net/profile/Guorui_Li4/publication/220270050/figure/fig1/ -// AS:393993713143808@1470946829076/The-hexagonal-coordinate-system.png -// There is no attempt at efficiency. - -fn hex_coord(n: usize, r: f64) -> (f64, f64) { - // Special case. - if n == 0 { - return (0.0, 0.0); - } - // If the hexagons are numbered 0, 1, ... outward, which hexagon "hid" are we on and - // which position "hpos" on that are we at? - let mut hid = 1; - let mut k = 6; - let mut hpos = n - 1; - loop { - if hpos < k { - break; - } - hpos -= k; - hid += 1; - k += 6; - } - // Find coordinates. - let c = r * 3.0f64.sqrt() / 2.0; // center to center distance, divided by 2 - let mut x = hid as f64 * 2.0 * c; - let mut y = 0.0; - let mut p = hpos; - if p > 0 { - // Traverse the six faces, as far as we have to go. - for _ in 0..hid { - x -= c; - y += 1.5; - p -= 1; - if p == 0 { - break; - } - } - if p > 0 { - for _ in 0..hid { - x -= 2.0 * c; - p -= 1; - if p == 0 { - break; - } - } - if p > 0 { - for _ in 0..hid { - x -= c; - y -= 1.5; - p -= 1; - if p == 0 { - break; - } - } - if p > 0 { - for _ in 0..hid { - x += c; - y -= 1.5; - p -= 1; - if p == 0 { - break; - } - } - if p > 0 { - for _ in 0..hid { - x += 2.0 * c; - p -= 1; - if p == 0 { - break; - } - } - if p > 0 { - for _ in 0..hid - 1 { - x += c; - y += 1.5; - p -= 1; - if p == 0 { - break; - } - } - } - } - } - } - } - } - x *= 2.0 / 3.0f64.sqrt(); - y *= 2.0 / 3.0f64.sqrt(); - (x, y) -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Pack circles of given radii. There is probably a literature on this, and this is probably -// a very crappy algorithm. The answer is certainly not optimal. The run time is O(n^2) where -// the constant includes a factor of 10^5. Return centers for the circles. - -fn pack_circles(r: &Vec) -> Vec<(f64, f64)> { - let mut c = Vec::<(f64, f64)>::new(); - if r.is_empty() { - return c; - } - c.push((0.0, 0.0)); - let mut bigr = r[0]; - let mut rand = 0i64; - // We use a ridiculously large sample. Reducing it to 1000 substantially reduces symmetry. - // Presumably as the number of clusters increases, the sample would need to be increased - // (ideally) to increase symmetry. - const SAMPLE: usize = 100000; - const MUL: f64 = 1.5; - for i in 1..r.len() { - let mut q = Vec::<(f64, f64, f64)>::new(); - loop { - for _ in 0..SAMPLE { - // Get a random point in [-1,+1] x [-1,+1]. Using a hand-rolled random number - // generator (from the internet) for speed and reproducibility, although there - // might be something better in the standard packages. - let rand1 = 6_364_136_223_846_793_005i64 - .wrapping_mul(rand) - .wrapping_add(1_442_695_040_888_963_407); - let rand2 = 6_364_136_223_846_793_005i64 - .wrapping_mul(rand1) - .wrapping_add(1_442_695_040_888_963_407); - rand = rand2; - let mut r1 = (2.0 * (rand1 % 1_000_000i64) as f64 / 1_000_000.0) - 1.0; - let mut r2 = (2.0 * (rand2 % 1_000_000i64) as f64 / 1_000_000.0) - 1.0; - // Make it bigger. - r1 *= (bigr + r[i]) * MUL; - r2 *= (bigr + r[i]) * MUL; - // See if circle at (r1,r2) overlaps any of the existing circles. - let mut ok = true; - for k in 0..i { - let d = ((c[k].0 - r1) * (c[k].0 - r1) + (c[k].1 - r2) * (c[k].1 - r2)).sqrt(); - if d < r[i] + r[k] { - ok = false; - break; - } - } - if ok { - q.push((r1 * r1 + r2 * r2, r1, r2)); - } - } - q.sort_by(|a, b| a.partial_cmp(b).unwrap()); - if !q.is_empty() { - break; - } - } - c.push((q[0].1, q[0].2)); - bigr = bigr.max(r[i] + (c[i].0 * c[i].0 + c[i].1 * c[i].1).sqrt()); - } - c -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Given a collection of circles having specified colors, create an svg string that shows the -// circles on a canvas of fixed size. The circles are moved and resized accordingly. - -fn circles_to_svg( - center: &Vec<(f64, f64)>, - radius: &Vec, - color: &Vec, - width: usize, - height: usize, - boundary: usize, -) -> String { - let n = center.len(); - assert!(!center.is_empty()); - assert!(radius.len() == n); - assert!(color.len() == n); - assert!(boundary < width); - assert!(boundary < height); - for i in 0..n { - assert!(radius[i] > 0.0); - } - let mut out = format!( - "\n", - width, height - ); - let mut center = center.clone(); - let mut radius = radius.clone(); - let mut xmin = center[0].0; - let mut xmax = center[0].0; - let mut ymin = center[0].1; - let mut ymax = center[0].1; - for i in 0..n { - xmin = xmin.min(center[i].0 - radius[i]); - xmax = xmax.max(center[i].0 + radius[i]); - ymin = ymin.min(center[i].1 - radius[i]); - ymax = ymax.max(center[i].1 + radius[i]); - } - let width = width - boundary; - let height = height - boundary; - let scale = ((width as f64) / (xmax - xmin)).min((height as f64) / (ymax - ymin)); - for i in 0..n { - center[i].0 -= xmin; - center[i].1 -= ymin; - center[i].0 *= scale; - center[i].1 *= scale; - radius[i] *= scale; - center[i].0 += boundary as f64; - center[i].1 += boundary as f64; - } - for i in 0..center.len() { - out += &format!( - "\n", - center[i].0, center[i].1, radius[i], color[i] - ); - } - - out += "\n"; - out -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -// Here, and in "enclone help color", we swap the order of colors, placing the last three before -// the first three. This is because the last three seem to make a better three-color palette. - -fn substitute_enclone_color(color: &mut String) { - if *color == "@1".to_string() { - *color = "rgb(0,95,175)".to_string(); - } else if *color == "@2".to_string() { - *color = "rgb(215,135,175)".to_string(); - } else if *color == "@3".to_string() { - *color = "rgb(0,175,135)".to_string(); - } else if *color == "@4".to_string() { - *color = "rgb(215,95,0)".to_string(); - } else if *color == "@5".to_string() { - *color = "rgb(95,175,255)".to_string(); - } else if *color == "@6".to_string() { - *color = "rgb(215,175,0)".to_string(); - } -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -pub fn plot_clonotypes( - ctl: &EncloneControl, - refdata: &RefData, - exacts: &Vec>, - exact_clonotypes: &Vec, -) { - if ctl.gen_opt.plot_file.is_empty() { - return; - } - if exacts.is_empty() { - eprintln!("\nThere are no clonotypes to plot, giving up.\n"); - std::process::exit(1); - } - - let mut const_names = Vec::::new(); - for id in refdata.cs.iter() { - if refdata.rtype[*id] == 0 { - const_names.push(refdata.name[*id].clone()); - } - } - unique_sort(&mut const_names); - if ctl.gen_opt.plot_by_isotype && const_names.len() > 12 { - eprintln!( - "\nCurrently PLOT_BY_ISOTYPE only works if there are at most 12 constant \ - region names. If this is a problem, please let us know and we will generalize it.\n" - ); - std::process::exit(1); - } - let mut clusters = Vec::<(Vec, Vec<(f64, f64)>)>::new(); - let mut radii = Vec::::new(); - const SEP: f64 = 1.0; // separation between clusters - let mut origins = Vec::::new(); - - // Go through the clonotypes. - - for i in 0..exacts.len() { - let mut colors = Vec::::new(); - let mut coords = Vec::<(f64, f64)>::new(); - let mut n = 0; - - // For PLOT_BY_MARK, find the dataset having the largest number of cells. - - let mut dsx = 0; - if ctl.gen_opt.plot_by_mark { - let mut ds = Vec::::new(); - for j in 0..exacts[i].len() { - let ex = &exact_clonotypes[exacts[i][j]]; - for j in 0..ex.clones.len() { - ds.push(ex.clones[j][0].dataset_index); - } - } - ds.sort(); - let mut freq = Vec::<(u32, usize)>::new(); - make_freq(&ds, &mut freq); - dsx = freq[0].1; - } - - // Go through the exact subclonotypes in a clonotype. - - for j in 0..exacts[i].len() { - let ex = &exact_clonotypes[exacts[i][j]]; - for j in 0..ex.clones.len() { - let mut color = "black".to_string(); - - // Determine color for PLOT_BY_ISOTYPE. - - if ctl.gen_opt.plot_by_isotype { - let mut crefs = Vec::>::new(); - for l in 0..ex.share.len() { - if ex.share[l].left { - crefs.push(ex.share[l].c_ref_id); - } - } - unique_sort(&mut crefs); - let mut color_id = 0; - if crefs.solo() && crefs[0].is_some() { - let c = &refdata.name[crefs[0].unwrap()]; - let p = bin_position(&const_names, &c) as usize; - color_id = 1 + p; - } - let x = print_color13(color_id); - color = format!("rgb({},{},{})", x.0, x.1, x.2); - - // Determine color for PLOT_BY_MARK. - } else if ctl.gen_opt.plot_by_mark { - let dom = ex.clones[j][0].dataset_index == dsx; - let marked = ex.clones[j][0].marked; - if dom { - if !marked { - color = "red".to_string(); - } else { - color = "rgb(255,200,200)".to_string(); - } - } else { - if !marked { - color = "blue".to_string(); - } else { - color = "rgb(200,200,255)".to_string(); - } - } - - // Determine color in other cases. - } else { - if ex.clones[j][0].origin_index.is_some() { - let s = &ctl.origin_info.origin_list[ex.clones[j][0].origin_index.unwrap()]; - origins.push(s.clone()); - if ctl.gen_opt.origin_color_map.contains_key(&s.clone()) { - color = ctl.gen_opt.origin_color_map[s].clone(); - } - } - if ctl.gen_opt.origin_color_map.is_empty() { - let mut dataset_colors = false; - for c in ctl.origin_info.color.iter() { - if !c.is_empty() { - dataset_colors = true; - } - } - let di = ex.clones[j][0].dataset_index; - if dataset_colors { - color = ctl.origin_info.color[di].clone(); - } else { - let bc = &ex.clones[j][0].barcode; - if ctl.origin_info.barcode_color[di].contains_key(bc) { - color = ctl.origin_info.barcode_color[di][bc].clone(); - } - } - } - } - colors.push(color); - coords.push(hex_coord(n, 1.0)); - n += 1; - } - } - unique_sort(&mut origins); - - // Move the colors around to get vertical separation, e.g. blues on the left, reds - // on the right. - - colors.sort(); - coords.sort_by(|a, b| a.partial_cmp(b).unwrap()); - - // Substitute enclone colors. - - for j in 0..colors.len() { - substitute_enclone_color(&mut colors[j]); - } - - // Save. - - let mut radius = 0.0f64; - for j in 0..coords.len() { - radius = - radius.max(1.0 + (coords[j].0 * coords[j].0 + coords[j].1 * coords[j].1).sqrt()); - } - radius += SEP; - clusters.push((colors, coords)); - radii.push(radius); - } - let centers = pack_circles(&radii); - for i in 0..clusters.len() { - for j in 0..clusters[i].1.len() { - clusters[i].1[j].0 += centers[i].0; - clusters[i].1[j].1 += centers[i].1; - } - } - let mut center = Vec::<(f64, f64)>::new(); - let mut radius = Vec::::new(); - let mut color = Vec::::new(); - for i in 0..clusters.len() { - for j in 0..clusters[i].0.len() { - color.push(clusters[i].0[j].clone()); - center.push((clusters[i].1[j].0, clusters[i].1[j].1)); - radius.push(1.0); - } - } - const WIDTH: usize = 400; - const HEIGHT: usize = 400; - const BOUNDARY: usize = 10; - for i in 0..center.len() { - center[i].1 = -center[i].1; // otherwise inverted, not sure why - } - let mut svg = circles_to_svg(¢er, &radius, &color, WIDTH, HEIGHT, BOUNDARY); - - // Add legend. - - if ctl.gen_opt.use_legend || ctl.gen_opt.plot_by_isotype || ctl.gen_opt.plot_by_mark { - let (mut colors, mut labels) = (Vec::::new(), Vec::::new()); - let mut max_string_width = 0.0f64; - if ctl.gen_opt.plot_by_isotype { - for i in 0..const_names.len() { - labels.push(const_names[i].clone()); - let color_id = i + 1; - let x = print_color13(color_id); - let color = format!("rgb({},{},{})", x.0, x.1, x.2); - colors.push(color); - } - labels.push("undetermined".to_string()); - let color_id = 0; - let x = print_color13(color_id); - let color = format!("rgb({},{},{})", x.0, x.1, x.2); - colors.push(color); - } else if ctl.gen_opt.plot_by_mark { - colors.push("red".to_string()); - labels.push("in most common dataset, !marked".to_string()); - colors.push("rgb(255,200,200)".to_string()); - labels.push("in most common dataset, marked".to_string()); - colors.push("blue".to_string()); - labels.push("not in most common dataset, !marked".to_string()); - colors.push("rgb(200,200,255)".to_string()); - labels.push("not in most common dataset, marked".to_string()); - } else { - if ctl.gen_opt.legend.len() == 0 { - for s in origins.iter() { - let mut color = "black".to_string(); - if ctl.gen_opt.origin_color_map.contains_key(&s.clone()) { - color = ctl.gen_opt.origin_color_map[s].clone(); - } - colors.push(color); - } - } else { - origins.clear(); - for i in 0..ctl.gen_opt.legend.len() { - colors.push(ctl.gen_opt.legend[i].0.clone()); - origins.push(ctl.gen_opt.legend[i].1.clone()); - } - } - for i in 0..colors.len() { - substitute_enclone_color(&mut colors[i]); - } - labels = origins.clone(); - } - for s in labels.iter() { - max_string_width = max_string_width.max(arial_width(s, FONT_SIZE)); - } - - // Calculate the actual height of the svg. - - let mut actual_height = 0.0f64; - let fields = svg.split(' ').collect::>(); - let mut y = 0.0; - for i in 0..fields.len() { - if fields[i].starts_with("cy=") { - y = fields[i].between("\"", "\"").force_f64(); - } - if fields[i].starts_with("r=") { - let r = fields[i].between("\"", "\"").force_f64(); - actual_height = actual_height.max(y + r); - } - } - - // Build the legend. - - let n = labels.len(); - const FONT_SIZE: usize = 20; - const LEGEND_CIRCLE_RADIUS: usize = 4; - const LEGEND_BOX_STROKE_WIDTH: usize = 2; - let legend_height = (FONT_SIZE + BOUNDARY / 2) * n + BOUNDARY; - let legend_width = BOUNDARY as f64 * 2.5 + max_string_width; - let legend_ystart = actual_height + (BOUNDARY as f64) * 1.5; - svg = svg.rev_before("<").to_string(); - svg += &format!( - "\n", - BOUNDARY, legend_ystart, legend_width, legend_height, LEGEND_BOX_STROKE_WIDTH - ); - for i in 0..labels.len() { - let y = legend_ystart as f64 - + BOUNDARY as f64 * 2.5 - + ((FONT_SIZE + BOUNDARY / 2) * i) as f64; - svg += &format!( - "{}\n", - BOUNDARY * 3, - y, - FONT_SIZE, - labels[i] - ); - svg += &format!( - "\n", - BOUNDARY * 2, - y - BOUNDARY as f64 / 2.0, - LEGEND_CIRCLE_RADIUS, - colors[i] - ); - } - let (svg1, svg2) = (svg.before("height="), svg.after("height=\"").after("\"")); - let new_height = legend_ystart + (legend_height + LEGEND_BOX_STROKE_WIDTH) as f64; - svg = format!("{}height=\"{}\"{}", svg1, new_height, svg2); - } - - // Output the svg file. - - if ctl.gen_opt.plot_file != "stdout".to_string() { - let f = File::create(&ctl.gen_opt.plot_file); - if f.is_err() { - eprintln!( - "\nThe file {} in your PLOT argument could not be created.\n", - ctl.gen_opt.plot_file - ); - std::process::exit(1); - } - let mut f = BufWriter::new(f.unwrap()); - fwriteln!(f, "{}", svg); - } else { - print!("{}", svg); - if !ctl.gen_opt.noprint { - println!(""); - } - } -} diff --git a/enclone_tail/src/string_width.rs b/enclone_tail/src/string_width.rs deleted file mode 100644 index acb58ff0e..000000000 --- a/enclone_tail/src/string_width.rs +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -use vector_utils::*; - -// Estimate the width in pixels of an Arial string at a given font size. -// This uses a hardcoded table of widths of Arial 1000 point characters. The table -// is incomplete, and we use a fixed value for all other characters. - -pub fn arial_width(s: &str, font_size: usize) -> f64 { - const DEFAULT_WIDTH: usize = 1000; - let mut len = 0; - for c in s.chars() { - let p = bin_position1_2(&ARIAL_1000_WIDTH_TABLE, &c); - if p < 0 { - len += DEFAULT_WIDTH; - } else { - len += ARIAL_1000_WIDTH_TABLE[p as usize].1; - } - } - len as f64 * font_size as f64 / 1000.0 -} - -// Note that the following table must be sorted. - -const ARIAL_1000_WIDTH_TABLE: [(char, usize); 66] = [ - (' ', 278), - ('-', 334), - ('.', 278), - ('0', 557), - ('1', 557), - ('2', 557), - ('3', 557), - ('4', 557), - ('5', 557), - ('6', 557), - ('7', 557), - ('8', 557), - ('9', 557), - ('A', 667), - ('B', 667), - ('C', 723), - ('D', 723), - ('E', 667), - ('F', 611), - ('G', 778), - ('H', 723), - ('I', 278), - ('J', 500), - ('K', 667), - ('L', 557), - ('M', 834), - ('N', 723), - ('O', 778), - ('P', 667), - ('Q', 778), - ('R', 723), - ('S', 667), - ('T', 611), - ('U', 723), - ('V', 667), - ('W', 944), - ('X', 667), - ('Y', 667), - ('Z', 611), - ('_', 557), - ('a', 557), - ('b', 557), - ('c', 500), - ('d', 557), - ('e', 557), - ('f', 278), - ('g', 557), - ('h', 557), - ('i', 223), - ('j', 223), - ('k', 500), - ('l', 223), - ('m', 834), - ('n', 557), - ('o', 557), - ('p', 557), - ('q', 557), - ('r', 334), - ('s', 500), - ('t', 278), - ('u', 557), - ('v', 500), - ('w', 723), - ('x', 500), - ('y', 500), - ('z', 500), -]; - -// Computed using this html code from the internet. For each character, -// edit the code, refresh the html window, and click the button. -// Obviously one write write a code that did all characters and generated -// the rust code directly with one button push. - -/* - - - - Calculate the text width with JavaScript - - -

- - - - -*/ diff --git a/enclone_tail/src/tail.rs b/enclone_tail/src/tail.rs deleted file mode 100644 index 61f8e6a3b..000000000 --- a/enclone_tail/src/tail.rs +++ /dev/null @@ -1,229 +0,0 @@ -// Copyright (c) 2020 10X Genomics, Inc. All rights reserved. - -// Group and print clonotypes. For now, limited grouping functionality. - -use crate::group::*; -use crate::plot::*; -use enclone_core::defs::*; -use enclone_proto::types::*; -use io_utils::*; -use ndarray::s; -use rayon::prelude::*; -use std::collections::HashMap; -use std::io::Write; -use std::time::Instant; -use string_utils::*; -use tables::*; -use vdj_ann::refx::*; -use vector_utils::*; - -pub fn tail_code( - tall: &Instant, - refdata: &RefData, - pics: &Vec, - exacts: &Vec>, - rsi: &Vec, - exact_clonotypes: &Vec, - ctl: &EncloneControl, - mut out_datas: &mut Vec>>, - join_info: &Vec<(usize, usize, bool, Vec)>, - gex_info: &GexInfo, - tests: &Vec, - controls: &Vec, - h5_data: &Vec<(usize, Vec, Vec)>, - d_readers: &Vec>, - ind_readers: &Vec>, - dref: &Vec, -) { - // Group and print clonotypes. - - group_and_print_clonotypes( - &tall, - &refdata, - &pics, - &exacts, - &rsi, - &exact_clonotypes, - &ctl, - &mut out_datas, - &join_info, - &gex_info, - &dref, - ); - - // Do gene scan. - - if ctl.gen_opt.gene_scan_test.is_some() { - println!("\nFEATURE SCAN\n"); - let mut test_cells = 0; - for i in tests.iter() { - for u in exacts[*i].iter() { - test_cells += exact_clonotypes[*u].ncells(); - } - } - println!( - "{} clonotypes containing {} cells in test set", - tests.len(), - test_cells - ); - let mut control_cells = 0; - for i in controls.iter() { - for u in exacts[*i].iter() { - control_cells += exact_clonotypes[*u].ncells(); - } - } - println!( - "{} clonotypes containing {} cells in control set\n", - controls.len(), - control_cells - ); - if tests.len() == 0 { - eprintln!("Gene scan failed, no test clonotypes.\n"); - std::process::exit(1); - } - if controls.len() == 0 { - eprintln!("Gene scan failed, no control clonotypes.\n"); - std::process::exit(1); - } - println!("enriched features\n"); - let mut results = Vec::<(usize, Vec, f64, f64, f64)>::new(); - let nf = gex_info.gex_features[0].len(); - for fid in 0..nf { - results.push((fid, Vec::::new(), 0.0, 0.0, 0.0)); - } - results.par_iter_mut().for_each(|res| { - let fid = res.0; - // NOT SURE THIS IS BACKWARD COMPATIBLE! - let gene = gex_info.gex_features[0][fid] - .after("\t") - .after("\t") - .contains("Gene"); - let mut test_values = Vec::::new(); - let mut control_values = Vec::::new(); - for pass in 1..=2 { - let tc; - let vals; - if pass == 1 { - tc = &tests; - vals = &mut test_values; - } else { - tc = &controls; - vals = &mut control_values; - } - for j in 0..tc.len() { - for m in 0..exacts[tc[j]].len() { - let ex = &exact_clonotypes[exacts[tc[j]][m]]; - for l in 0..ex.clones.len() { - let li = ex.clones[l][0].dataset_index; - let bc = ex.clones[l][0].barcode.clone(); - let p = bin_position(&gex_info.gex_barcodes[li], &bc); - if p >= 0 { - let mut raw_count = 0 as f64; - if gex_info.gex_matrices[li].initialized() { - raw_count = - gex_info.gex_matrices[li].value(p as usize, fid) as f64; - } else { - let z1 = gex_info.h5_indptr[li][p as usize] as usize; - // p+1 OK? - let z2 = gex_info.h5_indptr[li][p as usize + 1] as usize; - let d: Vec; - let ind: Vec; - if ctl.gen_opt.h5_pre { - d = h5_data[li].1[z1..z2].to_vec(); - ind = h5_data[li].2[z1..z2].to_vec(); - } else { - d = d_readers[li] - .as_ref() - .unwrap() - .read_slice(s![z1..z2]) - .unwrap() - .to_vec(); - ind = ind_readers[li] - .as_ref() - .unwrap() - .read_slice(s![z1..z2]) - .unwrap() - .to_vec(); - } - for j in 0..d.len() { - if ind[j] == fid as u32 { - raw_count = d[j] as f64; - break; - } - } - } - let mult: f64; - if gene { - mult = gex_info.gex_mults[li]; - } else { - mult = gex_info.fb_mults[li]; - } - if !ctl.gen_opt.full_counts { - vals.push(raw_count * mult); - } else { - vals.push(raw_count); - } - } - } - } - } - } - let mut test_mean = 0.0; - for i in 0..test_values.len() { - test_mean += test_values[i]; - } - test_mean /= test_values.len() as f64; - let mut control_mean = 0.0; - for i in 0..control_values.len() { - control_mean += control_values[i]; - } - control_mean /= control_values.len() as f64; - let mut vals = Vec::::new(); - let threshold = ctl.gen_opt.gene_scan_threshold.clone().unwrap(); - for i in 0..threshold.var.len() { - if threshold.var[i] == "t".to_string() { - vals.push(test_mean); - } else { - vals.push(control_mean); - } - } - if threshold.satisfied(&vals) { - fwrite!(res.1, "{}", gex_info.gex_features[0][fid]); - res.2 = test_mean; - res.3 = control_mean; - res.4 = test_mean / control_mean; - } - }); - let mut rows = Vec::>::new(); - let row = vec![ - "id".to_string(), - "name".to_string(), - "library_type".to_string(), - "test".to_string(), - "control".to_string(), - "enrichment".to_string(), - ]; - rows.push(row); - for fid in 0..nf { - if results[fid].1.len() > 0 { - let stuff = strme(&results[fid].1); - let fields = stuff.split('\t').collect::>(); - let mut row = Vec::::new(); - row.push(fields[0].to_string()); - row.push(fields[1].to_string()); - row.push(fields[2].to_string()); - row.push(format!("{:.2}", results[fid].2)); - row.push(format!("{:.2}", results[fid].3)); - row.push(format!("{:.2}", results[fid].4)); - rows.push(row); - } - } - let mut log = Vec::::new(); - print_tabular(&mut log, &rows, 2, Some(b"lllrrr".to_vec())); - print!("{}", strme(&log)); - } - - // Plot clonotypes. - - plot_clonotypes(&ctl, &refdata, &exacts, &exact_clonotypes); -} diff --git a/enclone_vars/Cargo.toml b/enclone_vars/Cargo.toml new file mode 100644 index 000000000..c2c096720 --- /dev/null +++ b/enclone_vars/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "enclone_vars" +version = "0.5.219" +authors = ["""David Jaffe , + Nigel Delaney , + Keri Dockter , + Jessica Hamel , + Lance Hepler , + Shaun Jackman , + Sreenath Krishnan , + Meryl Lewis , + Alvin Liang , + Patrick Marks , + Wyatt McDonnell """] +edition = "2021" +license-file = "LICENSE.txt" +publish = false +exclude = ["src/bin/var_test.rs", "src/vars"] + +# Please do not edit crate versions within this file. Instead edit the file master.toml +# in the root of the enclone repo. + +[dependencies] +io_utils = { version = "0.3", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +itertools.workspace = true +pretty_trace = { version = "0.5", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +string_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } +vector_utils = { version = "0.1", git = "https://github.com/10XGenomics/rust-toolbox.git", branch = "master" } diff --git a/enclone_vars/LICENSE.txt b/enclone_vars/LICENSE.txt new file mode 120000 index 000000000..4ab43736a --- /dev/null +++ b/enclone_vars/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/enclone_vars/src/bin/export_code.rs b/enclone_vars/src/bin/export_code.rs new file mode 100644 index 000000000..2837fde90 --- /dev/null +++ b/enclone_vars/src/bin/export_code.rs @@ -0,0 +1,20 @@ +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. + +// Actually export code. + +// Read the vars file and export code. This is a partial implementation. + +use enclone_vars::export_code::export_code; +use io_utils::{fwrite, open_for_write_new}; +use pretty_trace::PrettyTrace; + +use std::io::Write; + +fn main() { + PrettyTrace::new().on(); + let outs = export_code(0); + for out in outs { + let mut f = open_for_write_new![&out.0]; + fwrite!(f, "{}", out.1); + } +} diff --git a/enclone_vars/src/bin/var_sort.rs b/enclone_vars/src/bin/var_sort.rs new file mode 100644 index 000000000..e10b32753 --- /dev/null +++ b/enclone_vars/src/bin/var_sort.rs @@ -0,0 +1,17 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Sort and replace the vars file. + +use enclone_vars::sort_vars; +use pretty_trace::PrettyTrace; +use std::io::Write; + +fn main() { + PrettyTrace::new().on(); + let old = std::fs::read_to_string("enclone_vars/src/vars").unwrap(); + let new = sort_vars(&old); + if new != old { + let mut f = std::fs::File::create("enclone_vars/src/vars").unwrap(); + f.write_all(new.as_bytes()).unwrap(); + } +} diff --git a/enclone_vars/src/bin/var_test.rs b/enclone_vars/src/bin/var_test.rs new file mode 100644 index 000000000..9a59fb74f --- /dev/null +++ b/enclone_vars/src/bin/var_test.rs @@ -0,0 +1,12 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Parse the vars file to test if it's valid. + +use enclone_vars::var::parse_variables; +use pretty_trace::PrettyTrace; + +fn main() { + PrettyTrace::new().on(); + let old = std::fs::read_to_string("enclone_vars/src/vars").unwrap(); + let _ = parse_variables(&old); +} diff --git a/enclone_vars/src/export_code.rs b/enclone_vars/src/export_code.rs new file mode 100644 index 000000000..35fd3187e --- /dev/null +++ b/enclone_vars/src/export_code.rs @@ -0,0 +1,854 @@ +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. + +// Read the vars file and export code. This is a partial implementation. +// Output is {(filename, contents)}. +// +// This writes a temporary file. + +use crate::var::{parse_variables, Variable}; +use io_utils::{fwrite, fwriteln, open_for_write_new}; +use itertools::Itertools; +use std::fmt::Write as _; +use std::io::{BufWriter, Write}; +use std::process::Command; +use string_utils::TextUtils; + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Find upper case strings in var. + +fn get_uppers(var: &str) -> Vec<(String, usize)> { + let uppers = { + let mut uppers = Vec::<(String, usize)>::new(); + let mut s = String::with_capacity(var.len()); + let mut start = 0; + for (i, ch) in var.chars().enumerate() { + if ch.is_ascii_uppercase() { + s.push(ch); + } else if !s.is_empty() { + uppers.push((s.to_string(), start)); + start = i + 1; + s.clear(); + } + } + if !s.is_empty() { + uppers.push((s, start)); + } + uppers + }; + if uppers.len() > 1 { + eprintln!("\nIllegal variable {var}, has more than one uppercase string in it.\n"); + std::process::exit(1); + } + uppers +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +fn process_var( + v: &Variable, + exact: &str, + cell: &str, + code: &str, + f: &mut BufWriter, + class: &str, +) { + let var = &v.name; + let uppers = get_uppers(var); + let mut rega = false; + let mut dataset = false; + let mut name = false; + let bc = var == "BC"; + let info = var == "INFO"; + for upper in &uppers { + if upper.0 == "REGA" { + rega = true; + } else if upper.0 == "DATASET" { + dataset = true; + } else if upper.0 == "NAME" { + name = true; + } + } + let upper = !uppers.is_empty(); + if !upper || rega || dataset || name || bc || info { + let mut passes = 1; + if v.level == "cell-exact" { + passes = 2; + } + for pass in 1..=passes { + let mut var = var.clone(); + if pass == 2 { + var += "_cell"; + } + emit_code_to_test_for_var(&var, f, class); + fwriteln!(f, "{}", code); + if pass == 1 { + fwriteln!(f, "({}, {}, \"{}\".to_string())", exact, cell, v.level); + } else { + fwriteln!(f, "let _exact = {};", exact); // to circumvent warning + fwriteln!(f, "(String::new(), {}, \"{}\".to_string())", cell, v.level); + } + } + } +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +fn parse_value_return_lines(code: &mut String, level: &str, exact: &mut String, cell: &mut String) { + *exact = "String::new()".to_string(); + *cell = "Vec::new()".to_string(); + let lines = code.lines().collect::>(); + let n = lines.len(); + if n > 0 { + let mut sub = 0; + for i in (0..lines.len()).rev() { + if lines[i].contains("exact: ") { + *exact = lines[i].after("exact: ").to_string(); + sub += 1; + } else if lines[i].contains("cell: ") { + *cell = lines[i].after("cell: ").to_string(); + sub += 1; + } + } + let mut code2 = String::new(); + for &line in &lines[..lines.len() - sub] { + writeln!(code2, "{line}").unwrap(); + } + *code = code2; + } + if level == "cell-exact" { + assert!(!exact.is_empty()); + assert!(!cell.is_empty()); + } + if level == "cell" { + assert!(!cell.is_empty()); + } +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +fn run_rustfmt(f: &str) { + let new = Command::new("rustfmt") + .arg(f) + .output() + .unwrap_or_else(|_| panic!("{}", "failed to execute rustfmt".to_string())); + if new.status.code() != Some(0) { + eprintln!("\nrustfmt failed\n"); + eprintln!("You can observe the problem by typing rustfmt {f}.\n"); + std::process::exit(1); + } +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Emit code that tests for a given variable, allowing for up to three bracket expressions +// in the variable. See "Test for implemented" before for precisely what is supported. +// also +// ...REGA... +// ...{}...REGA... + +fn quote_str_or_char(s: &str) -> String { + if s.len() == 1 { + format!("'{s}'") + } else { + format!("\"{s}\"") + } +} + +fn emit_code_to_test_for_var(var: &str, f: &mut BufWriter, class: &str) { + let uppers = get_uppers(var); + assert!(uppers.len() <= 1); + let bc = var == "BC"; + let info = var == "INFO"; + let (rega, dataset, name) = { + let mut rega = None; + let mut dataset = None; + let mut name = None; + for upper in uppers { + if upper.0 == "REGA" { + rega = Some(upper.1); + } else if upper.0 == "DATASET" { + dataset = Some(upper.1); + } else if upper.0 == "NAME" { + name = Some(upper.1); + } + } + (rega, dataset, name) + }; + let nranges = var.matches('{').count(); + + // Test for implemented. + + assert_eq!(nranges, var.matches('}').count()); + assert!(nranges <= 1 || rega.is_none()); + assert!(nranges == 0 || dataset.is_none()); + assert!(nranges == 0 || name.is_none()); + assert!(nranges <= 3); + + // Proceed. + + if nranges == 0 { + if rega.is_none() && dataset.is_none() && name.is_none() && !bc && !info { + fwriteln!(f, r###"}} else if vname == "{}" {{"###, var); + } else if info && class == "lvar" { + fwriteln!( + f, + r###"}} else if bin_member(&ctl.gen_opt.info_fields, var) {{"### + ); + } else if bc && class == "lvar" { + fwriteln!(f, r###"}} else if bin_member(alt_bcs, var) {{"###); + } else if name.is_some() { + let (start, stop) = (var.before("NAME"), var.after("NAME")); + fwriteln!( + f, + r###"}} else if vname.starts_with({start_c}) + && vname.after({start_c}).ends_with({stop_c}) + && ( bin_member(&ctl.origin_info.dataset_list, + &vname.between2("{start}", "{stop}").to_string()) + || bin_member(&ctl.origin_info.origin_list, + &vname.between2("{start}", "{stop}").to_string()) + || bin_member(&ctl.origin_info.donor_list, + &vname.between2("{start}", "{stop}").to_string()) + || bin_member(&ctl.origin_info.tag_list, + &vname.between2("{start}", "{stop}").to_string()) + ) {{"###, + start_c = quote_str_or_char(start), + stop_c = quote_str_or_char(stop), + start = start, + stop = stop, + ); + fwriteln!( + f, + r###"let name = vname.between2("{}", "{}");"###, + start, + stop + ); + } else if dataset.is_some() { + let (start, stop) = (var.before("DATASET"), var.after("DATASET")); + fwriteln!( + f, + r###"}} else if vname.starts_with({start_c}) + && vname.after("{start}").ends_with({stop_c}) + && bin_member(&ctl.origin_info.dataset_list, + vname.between2("{start}", "{stop}")) {{"###, + start_c = quote_str_or_char(start), + stop_c = quote_str_or_char(stop), + start = start, + stop = stop, + ); + fwriteln!( + f, + r###"let dataset = vname.between2("{}", "{}"));"###, + start, + stop + ); + } else { + let start = var.before("REGA"); + let stop = var.after("REGA"); + fwriteln!( + f, + r###"}} else if vname.starts_with({start_c}) + && vname.after("{start}").ends_with({stop_c}) + && !vname.between2("{start}", "{stop}").contains('_') + && Regex::new(vname.between2("{start}", "{stop}")).is_ok() {{"###, + start_c = quote_str_or_char(start), + stop_c = quote_str_or_char(stop), + start = start, + stop = stop, + ); + // Note inefficiency here, as we are instantiating the regular expression for every + // exact subclonotype, whereas it only needs to be done once (in principle). + fwriteln!( + f, + r###"let reg = Regex::new(vname.between2("{}", "{}")).unwrap();"###, + start, + stop + ); + } + } else if nranges == 1 { + if rega.is_none() { + let begin = var.before("{"); + let end = var.after("}"); + let low = var.after("{").before(".."); + let high = var.after("{").between("..", "}"); + let mut conditions = Vec::::new(); + conditions.push(format!( + r###"vname.starts_with({})"###, + quote_str_or_char(begin) + )); + conditions.push(format!( + r###"vname.ends_with({})"###, + quote_str_or_char(end) + )); + conditions.push(format!( + r###"vname.between2("{begin}", "{end}").parse::().is_ok()"###, + )); + if !low.is_empty() { + conditions.push(format!( + r###"vname.between2("{begin}", "{end}").force_i64() >= {low}"###, + )); + } + if !high.is_empty() { + conditions.push(format!( + r###"vname.between2("{begin}", "{end}").force_i64() <= {high}"###, + )); + } + fwriteln!(f, "}} else if {} {{ ", conditions.iter().format(" && ")); + fwriteln!( + f, + r###"let arg1 = vname.between2("{}", "{}").force_i64();"###, + begin, + end, + ); + } else { + // {}REGA + let begin = var.before("{"); + let start = var.between("}", "REGA"); + let stop = var.after("REGA"); + let low = var.after("{").before(".."); + let high = var.after("{").between("..", "}"); + let mut conditions = Vec::::new(); + conditions.push(format!(r###"vname.starts_with("{begin}")"###)); + conditions.push(format!(r###"vname.ends_with("{stop}")"###)); + conditions.push(format!( + r###"vname.between2("{begin}", "{stop}").contains('{start}')"###, + )); + conditions.push(format!( + r###"vname.between("{begin}", "{start}").parse::().is_ok()"###, + )); + if !low.is_empty() { + conditions.push(format!( + r###"vname.between("{begin}", "{start}").force_i64() >= {low}"###, + )); + } + if !high.is_empty() { + conditions.push(format!( + r###"vname.between("{begin}", "{start}").force_i64() <= {high}"###, + )); + } + conditions.push(format!( + r###"!vname.after("{begin}").between2("{start}", "{stop}").contains('_')"###, + )); + conditions.push(format!( + r###"Regex::new(vname.between2("{start}", "{stop}")).is_ok()"###, + )); + fwriteln!(f, "}} else if {} {{ ", conditions.iter().format(" && ")); + fwriteln!( + f, + r###"let arg1 = vname.between("{}", "{}").force_i64();"###, + begin, + start, + ); + fwriteln!( + f, + r###"let reg = Regex::new(vname.after("{}").between2("{}", "{}")).unwrap();"###, + begin, + start, + stop + ); + } + } else if nranges == 2 { + // This code has not been exercised. + let begin = var.before("{"); + let middle = var.between("}", "{"); + let end = var.rev_after("}").to_string(); + let low1 = var.after("{").before(".."); + let high1 = var.after("{").between("..", "}"); + let low2 = var.rev_after("{").before(".."); + let high2 = var.rev_after("{").between("..", "}"); + let mut conditions = Vec::::new(); + conditions.push(format!(r###"vname.starts_with("{begin}")"###)); + conditions.push(format!( + r###"vname.after("{}").contains({})"###, + begin, + quote_str_or_char(middle), + )); + conditions.push(format!( + r###"vname.after("{begin}").after("{middle}").ends_with("{end}")"###, + )); + conditions.push(format!( + r###"vname.between2("{begin}", "{middle}").parse::().is_ok()"###, + )); + if !low1.is_empty() { + conditions.push(format!( + r###"vname.between2("{begin}", "{middle}").force_i64() >= {low1}"###, + )); + } + if !high1.is_empty() { + conditions.push(format!( + r###"vname.between2("{begin}", "{middle}").force_i64() <= {high1}"###, + )); + } + conditions.push(format!( + r###"vname.after("{begin}").between2("{middle}", "{end}").parse::().is_ok()"###, + )); + if !low2.is_empty() { + conditions.push(format!( + r###"vname.after("{begin}").between2("{middle}", "{end}").force_i64() >= {low2}"###, + )); + } + if !high2.is_empty() { + conditions.push(format!( + r###"vname.after("{begin}").between2("{middle}", "{end}").force_i64() <= {high2}"###, + )); + } + fwriteln!(f, "}} else if {} {{ ", conditions.iter().format(" && ")); + fwriteln!( + f, + r###"let arg1 = vname.between2("{}", "{}").force_i64();"###, + begin, + middle, + ); + fwriteln!( + f, + r###"let arg2 = vname.after("{}"),between2("{}", "{}").force_i64();"###, + begin, + middle, + end, + ); + } else { + let begin = var.before("{"); + let mid1 = var.between("}", "{"); + let mid2 = var.after("}").between("}", "{"); + let end = var.rev_after("}"); + let low1 = var.after("{").before(".."); + let high1 = var.after("{").between("..", "}"); + let low2 = var.after("{").after("{").before(".."); + let high2 = var.after("{").after("{").between("..", "}"); + let low3 = var.rev_after("{").before(".."); + let high3 = var.rev_after("{").between("..", "}"); + let mut conditions = Vec::::new(); + conditions.push(format!(r###"vname.starts_with("{begin}")"###)); + conditions.push(format!( + r###"vname.after("{}").contains({})"###, + begin, + quote_str_or_char(mid1), + )); + conditions.push(format!( + r###"vname.after("{}").after("{}").contains({})"###, + begin, + mid1, + quote_str_or_char(mid2), + )); + conditions.push(format!( + r###"vname.after("{}").after("{}").after("{}").ends_with({})"###, + begin, + mid1, + mid2, + quote_str_or_char(end), + )); + conditions.push(format!( + r###"vname.between("{begin}", "{mid1}").parse::().is_ok()"###, + )); + if !low1.is_empty() { + conditions.push(format!( + r###"vname.between("{begin}", "{mid1}").force_i64() >= {low1}"###, + )); + } + if !high1.is_empty() { + conditions.push(format!( + r###"vname.between("{begin}", "{mid1}").force_i64() <= {high1}"###, + )); + } + if !low2.is_empty() { + conditions.push(format!( + r###"vname.after("{begin}").between("{mid1}", "{mid2}").force_i64() >= {low2}"###, + )); + } + if !high2.is_empty() { + conditions.push(format!( + r###"vname.after("{begin}").between("{mid1}", "{mid2}").force_i64() <= {high2}"###, + )); + } + conditions.push(format!( + r###"vname.after("{begin}").after("{mid1}").between("{mid2}", "{end}").parse::().is_ok()"###, + )); + if !low3.is_empty() { + conditions.push(format!( + r###"vname.after("{begin}").after("{mid1}").between("{mid2}", "{end}").force_i64() >= {low3}"###, + )); + } + if !high3.is_empty() { + conditions.push(format!( + r###"vname.after("{begin}").after("{mid1}").between("{mid2}", "{end}").force_i64() <= {high3}"###, + )); + } + fwriteln!(f, "}} else if {} {{ ", conditions.iter().format(" && ")); + fwriteln!( + f, + r###"let arg1 = vname.between("{}", "{}").force_i64();"###, + begin, + mid1, + ); + fwriteln!( + f, + r###"let arg2 = vname.after("{}").between("{}", "{}").force_i64();"###, + begin, + mid1, + mid2, + ); + fwriteln!( + f, + r###"let arg3 = vname.after("{}").after("{}").between("{}", "{}").force_i64();"###, + begin, + mid1, + mid2, + end, + ); + } +} + +// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ + +// Export code. + +pub fn export_code(level: usize) -> Vec<(String, String)> { + let mut outs = Vec::<(String, String)>::new(); + + // Define code start/stop for cvar_vdj. + + let cvar_vdj_start = r###" + + // Copyright (c) 2021 10x Genomics, Inc. All rights reserved. + // This file is auto-generated by the crate enclone_vars, please do not edit. + + use amino::*; + use crate::print_utils1::*; + use crate::print_utils3::*; + use enclone_core::align_to_vdj_ref::*; + use enclone_core::defs::*; + use enclone_core::median::*; + use enclone_core::opt_d::*; + use enclone_proto::types::*; + use itertools::Itertools; + use stats_utils::*; + use std::cmp::min; + use std::collections::HashMap; + use string_utils::*; + use vdj_ann::refx::RefData; + use vector_utils::*; + + pub fn proc_cvar_auto( + j: usize, + pass: usize, + var: &String, + ex: &ExactClonotype, + exacts: &Vec, + exact_clonotypes: &Vec, + mid: usize, + col: usize, + u: usize, + rsi: &ColInfo, + refdata: &RefData, + dref: &Vec, + ctl: &EncloneControl, + extra_args: &Vec, + pcols_sort: &Vec, + cx: &mut Vec>, + varmat: &Vec>>, + out_data: &mut Vec>, + stats: &mut Vec<(String, Vec)>, + allele_data: &AlleleData, + ) -> Result { + + let mut vname = var.clone(); + if var.contains(':') { + vname = var.after(":").to_string(); + } + let cvars = &ctl.clono_print_opt.cvars; + let mut abbrc = format!("{}{}", var, col + 1); + if var.contains(':') { + abbrc = var.before(":").to_string(); + } + let val = + if false { + (String::new(), Vec::::new(), String::new()) + + "###; + + let cvar_vdj_stop = r###" + + } else { + ("$UNDEFINED".to_string(), Vec::::new(), String::new()) + }; + if val.0 == "$UNDEFINED" { + Ok(false) + } else { + let (exact, cell, _level) = &val; + let mut varc = format!("{}{}", var, col + 1); + if !exact.is_empty() { + if j < rsi.cvars[col].len() && cvars.contains(var) { + cx[col][j] = exact.clone(); + } + if pass == 2 + && ((!ctl.parseable_opt.pout.is_empty() + && (ctl.parseable_opt.pchains == "max" + || col < ctl.parseable_opt.pchains.force_usize())) + || !extra_args.is_empty()) + { + abbrc = abbrc.replace("_Σ", "_sum"); + abbrc = abbrc.replace("_μ", "_mean"); + varc = varc.replace("_Σ", "_sum"); + varc = varc.replace("_μ", "_mean"); + + // Strip escape character sequences from exact. Can happen in notes, + // maybe other places. + + let mut val_clean = String::new(); + let mut chars = Vec::::new(); + let valx = exact.to_string(); + for c in valx.chars() { + chars.push(c); + } + let mut escaped = false; + for l in 0..chars.len() { + if chars[l] == '' { + escaped = true; + } + if escaped { + if chars[l] == 'm' { + escaped = false; + } + continue; + } + val_clean.push(chars[l]); + } + + // Proceed. + + // let varc = format!("{}{}", v, col + 1); + if pcols_sort.is_empty() + || bin_member(pcols_sort, &varc) + || bin_member(extra_args, &varc) + { + out_data[u].insert(abbrc.clone(), val_clean); + } + } + if val.1.is_empty() { + stats.push((abbrc, vec![exact.to_string(); ex.ncells()])); + } else { + stats.push((abbrc, cell.to_vec())); + } + } else if !cell.is_empty() + && pass == 2 + && ((ctl.parseable_opt.pchains == "max" + || col < ctl.parseable_opt.pchains.force_usize()) + || !extra_args.is_empty()) + && (pcols_sort.is_empty() || bin_member(pcols_sort, &varc)) + { + let vals = format!("{}", cell.iter().format(POUT_SEP)); + out_data[u].insert(abbrc, vals); + } + Ok(true) + } + } + + "###; + + // Build cvar auto file. + + let actual_out = "enclone_print/src/proc_cvar_auto.rs".to_string(); + let mut temp_out = "enclone_exec/testx/outputs/proc_cvar_auto.rs".to_string(); + let mut vars_loc = "enclone_vars/src/vars".to_string(); + if level == 1 { + temp_out = format!("../{temp_out}"); + vars_loc = format!("../{vars_loc}"); + } + { + let mut f = open_for_write_new![&temp_out]; + fwrite!(f, "{}", cvar_vdj_start); + let vars = std::fs::read_to_string(&vars_loc).unwrap(); + let vars = parse_variables(&vars); + for v in vars.iter() { + if v.inputs == "cvar_vdj" { + let (mut exact, mut cell) = (String::new(), String::new()); + let mut code = v.code.clone(); + parse_value_return_lines(&mut code, &v.level, &mut exact, &mut cell); + process_var(v, &exact, &cell, &code, &mut f, "cvar"); + } + } + fwrite!(f, "{}", cvar_vdj_stop); + } + + // Rustfmt and save. + + run_rustfmt(&temp_out); + let f = std::fs::read_to_string(&temp_out).unwrap(); + outs.push((actual_out, f)); + + // Define code start/stop for lvar_vdj. + + let lvar_vdj_start = r###" + + // Copyright (c) 2021 10x Genomics, Inc. All rights reserved. + // This file is auto-generated by the crate enclone_vars, please do not edit. + + use amino::*; + use enclone_core::defs::*; + use enclone_core::median::*; + use enclone_proto::types::*; + use itertools::Itertools; + use ndarray::s; + use regex::Regex; + use std::cmp::{max, min}; + use std::collections::HashMap; + use string_utils::*; + use vdj_ann::refx::RefData; + use vector_utils::*; + use hdf5::Reader; + + pub fn proc_lvar_auto( + i: usize, + pass: usize, + var: &String, + exacts: &Vec, + exact_clonotypes: &Vec, + u: usize, + rsi: &ColInfo, + refdata: &RefData, + ctl: &EncloneControl, + extra_args: &Vec, + out_data: &mut Vec>, + stats: &mut Vec<(String, Vec)>, + lvars: &Vec, + row: &mut Vec, + fate: &Vec>, + dref: &Vec, + varmat: &Vec>>, + fp: &Vec>, + n_vdj_gex: &Vec, + vdj_cells: &Vec>, + gex_info: &GexInfo, + groups: &HashMap>, + mults: &Vec, + nd_fields: &Vec, + gex_counts_unsorted: &Vec, + gex_fcounts_unsorted: &Vec, + n_gexs: &Vec, + d_readers: &Vec>, + ind_readers: &Vec>, + h5_data: &Vec<(usize, Vec, Vec)>, + alt_bcs: &Vec, + ) -> Result { + + let clonotype_id = exacts[u]; + let ex = &exact_clonotypes[clonotype_id]; + let mat = &rsi.mat; + let cols = varmat[0].len(); + let verbose = ctl.gen_opt.row_fill_verbose; + let mut vname = var.clone(); + let mut abbr = var.clone(); + if var.contains(':') { + abbr = var.before(":").to_string(); + vname = var.after(":").to_string(); + } + + macro_rules! speak { + ($u:expr, $var:expr, $val:expr) => { + if pass == 2 && (!ctl.parseable_opt.pout.is_empty() || !extra_args.is_empty()) { + let mut v = $var.to_string(); + v = v.replace("_Σ", "_sum"); + v = v.replace("_μ", "_mean"); + if ctl.parseable_opt.pcols.is_empty() + || bin_member(&ctl.parseable_opt.pcols_sortx, &v) + || bin_member(&extra_args, &v) + { + out_data[$u].insert(v, $val); + } + } + }; + } + + let val = + if false { + (String::new(), Vec::::new(), String::new()) + + "###; + + let lvar_vdj_stop = r###" + + } else { + ("$UNDEFINED".to_string(), Vec::::new(), String::new()) + }; + if val.0 == "$UNDEFINED" { + Ok(false) + } else { + let (exact, cell, level) = &val; + if level == "cell" && !var.ends_with("_cell") { + if verbose { + eprint!("lvar {} ==> {}; ", var, String::new()); + eprintln!("i = {}, lvars.len() = {}", i, lvars.len()); + } + if i < lvars.len() { + row.push(String::new()) + } + if pass == 2 { + speak!(u, abbr, String::new()); + } + stats.push((abbr.to_string(), cell.clone())); + if pass == 2 { + speak!(u, abbr, format!("{}", cell.iter().format(POUT_SEP))); + } + } else if ( !exact.is_empty() && !var.ends_with("_cell") ) || cell.is_empty() { + if verbose { + eprint!("lvar {} ==> {}; ", var, exact); + eprintln!("i = {}, lvars.len() = {}", i, lvars.len()); + } + if i < lvars.len() { + row.push(exact.clone()) + } + if pass == 2 { + speak!(u, abbr, exact.to_string()); + } + if cell.is_empty() { + stats.push((abbr, vec![exact.to_string(); ex.ncells()])); + } else { + stats.push((abbr, cell.to_vec())); + } + } else if !cell.is_empty() { + if pass == 2 { + speak!(u, abbr, format!("{}", cell.iter().format(POUT_SEP))); + } + stats.push((abbr, cell.to_vec())); + } + Ok(true) + } + } + + "###; + + // Build lvar auto file. + + let actual_out = "enclone_print/src/proc_lvar_auto.rs".to_string(); + let mut temp_out = "enclone_exec/testx/outputs/proc_lvar_auto.rs".to_string(); + let mut vars_loc = "enclone_vars/src/vars".to_string(); + if level == 1 { + temp_out = format!("../{temp_out}"); + vars_loc = format!("../{vars_loc}"); + } + { + let mut f = open_for_write_new![&temp_out]; + fwrite!(f, "{}", lvar_vdj_start); + let vars = std::fs::read_to_string(&vars_loc).unwrap(); + let vars = parse_variables(&vars); + for v in vars.iter() { + if v.inputs == "lvar_vdj" { + let (mut exact, mut cell) = (String::new(), String::new()); + let mut code = v.code.clone(); + parse_value_return_lines(&mut code, &v.level, &mut exact, &mut cell); + process_var(v, &exact, &cell, &code, &mut f, "lvar"); + } + } + fwrite!(f, "{}", lvar_vdj_stop); + } + + // Rustfmt and save. + + run_rustfmt(&temp_out); + let f = std::fs::read_to_string(&temp_out).unwrap(); + outs.push((actual_out, f)); + + // Return. + + outs +} diff --git a/enclone_vars/src/lib.rs b/enclone_vars/src/lib.rs new file mode 100644 index 000000000..23e734b55 --- /dev/null +++ b/enclone_vars/src/lib.rs @@ -0,0 +1,94 @@ +// Copyright (c) 2021 10x Genomics, Inc. All rights reserved. + +pub mod export_code; +pub mod var; + +use std::fmt::Write; +use string_utils::TextUtils; +use vector_utils::sort_sync2; + +pub fn sort_vars(input: &str) -> String { + let mut preamble = String::new(); + let mut in_vars = false; + let div = "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\ + ━━━━━━━━━━━━━━━━━━━━━"; + let mut groups = Vec::::new(); + let mut this_group = String::new(); + for line in input.lines() { + if !in_vars && line != div { + writeln!(preamble, "{line}").unwrap(); + } else if !in_vars && line == div { + in_vars = true; + this_group = format!("{div}\n"); + } else if line == div { + groups.push(this_group); + this_group = format!("{div}\n"); + } else { + writeln!(this_group, "{line}").unwrap(); + } + } + let mut vars = groups + .iter() + .map(|group| group.between("name: ", "\n").to_string()) + .collect::>(); + sort_sync2(&mut vars, &mut groups); + drop(vars); + let mut out = preamble; + for group in groups { + out += group.as_str(); + } + writeln!(out, "{div}").unwrap(); + out +} + +// Functions to encode and decode arithmetic operators. Because the symbols - and / appear in gene +// names, and because these symbols are also arithmetic operators, we need a system for hiding +// them. The system is that we encode/decode all the standard arithmetic operators + - * / +// according to the following table, but only if they appear with characters on both sides, with +// neither character being a blank. +// +// NORMAL ENCODED +// + ©add© +// - ©sub© +// * ©mul© +// / ©div© +// +// The funny symbol © is the copyright symbol. + +pub fn encode_arith(x: &str) -> String { + let mut m = Vec::::new(); + for c in x.chars() { + m.push(c); + } + let mut encoded = String::new(); + for i in 0..m.len() { + let mut saved = false; + if i >= 1 && i < m.len() - 1 && m[i - 1] != ' ' && m[i + 1] != ' ' { + if m[i] == '+' { + encoded += "©add©"; + saved = true; + } else if m[i] == '-' { + encoded += "©sub©"; + saved = true; + } else if m[i] == '*' { + encoded += "©mul©"; + saved = true; + } else if m[i] == '/' { + encoded += "©div©"; + saved = true; + } + } + if !saved { + encoded.push(m[i]); + } + } + encoded +} + +pub fn decode_arith(x: &str) -> String { + let mut x = x.replace("©add©", "+"); + x = x.replace("©sub©", "-"); + x = x.replace("©mul©", "*"); + x = x.replace("©div©", "/"); + x +} diff --git a/enclone_vars/src/var.rs b/enclone_vars/src/var.rs new file mode 100644 index 000000000..854cd7c66 --- /dev/null +++ b/enclone_vars/src/var.rs @@ -0,0 +1,164 @@ +// Copyright (c) 2021 10X Genomics, Inc. All rights reserved. + +// Variable specification. Fields are currently Strings, but could be given more structure. + +use std::fmt::Write as _; +use string_utils::{stringme, TextUtils}; + +pub struct Variable { + pub name: String, + pub inputs: String, + pub limits: String, + pub class: String, + pub level: String, + pub val: String, + pub doc: String, + pub brief: String, + pub page: String, + pub avail: String, + pub notes: String, + pub code: String, +} + +// Parse variables, and exit if requirements are not satisfied. + +pub fn parse_variables(input: &str) -> Vec { + const FIELDS: [&str; 12] = [ + "name", "inputs", "limits", "class", "level", "val", "doc", "brief", "page", "avail", + "notes", "code", + ]; + const INDENT: &str = " "; + let mut in_vars = false; + let div = "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\ + ━━━━━━━━━━━━━━━━━━━━━"; + let mut groups = Vec::>::new(); + let mut this_group = Vec::::new(); + let mut fc = 0; + for (i, line) in input.lines().enumerate() { + if !in_vars && line != div { + } else if !in_vars && line == div { + in_vars = true; + this_group.clear(); + fc = 0; + } else if line == div { + if this_group.len() != FIELDS.len() { + eprintln!("\nWrong number of fields before line {}.\n", i + 1); + std::process::exit(1); + } + groups.push(this_group.clone()); + this_group.clear(); + fc = 0; + } else { + if fc > FIELDS.len() { + eprintln!("\nToo many fields at line {}.\n", i + 1); + std::process::exit(1); + } + let new = fc < FIELDS.len() && line.starts_with(&format!("{}:", FIELDS[fc])); + if new { + if line == format!("{}:", FIELDS[fc]) { + this_group.push(String::new()); + } else { + for k in FIELDS[fc].len() + 1..INDENT.len() { + if k >= line.as_bytes().len() || line.as_bytes()[k] != b' ' { + eprintln!( + "\nIllegal indentation or trailing blanks at line {}:\n{}\n", + i + 1, + line, + ); + std::process::exit(1); + } + } + this_group.push(stringme(&line.as_bytes()[INDENT.len()..])); + } + fc += 1; + if fc > FIELDS.len() { + eprintln!("\nToo many fields at line {}.\n", i + 1); + std::process::exit(1); + } + } else { + if !line.starts_with(INDENT) { + eprintln!( + "\nIllegal field or indentation rule violation at line {}:\n{}\n", + i + 1, + line, + ); + std::process::exit(1); + } + let n = this_group.len(); + if !this_group[n - 1].ends_with('\n') { + this_group[n - 1] += "\n"; + } + write!(this_group[n - 1], " {}", line.after(INDENT)).unwrap(); + if FIELDS[fc - 1] == "code" { + this_group[n - 1] += "\n"; + } + } + } + } + + // Form variables. + + let mut vars = Vec::::new(); + for g in groups.iter() { + vars.push(Variable { + name: g[0].clone(), + inputs: g[1].clone(), + limits: g[2].clone(), + class: g[3].clone(), + level: g[4].clone(), + val: g[5].clone(), + doc: g[6].clone(), + brief: g[7].clone(), + page: g[8].clone(), + avail: g[9].clone(), + notes: g[10].clone(), + code: g[11].clone(), + }); + } + + // Test for duplicated entries. + + for i in 1..vars.len() { + if vars[i].name == vars[i - 1].name { + eprintln!( + "\nThe variable name {} appears more than once.\n", + vars[i].name + ); + std::process::exit(1); + } + } + + // Test upper-case rule. + + let classes = ["BC", "DATASET", "FEATURE", "INFO", "NAME", "REGA", "VARDEF"]; + for var in &vars { + let chars = var.name.as_bytes(); + let mut j = 0; + while j < chars.len() { + if chars[j] < b'A' || chars[j] > b'Z' { + j += 1; + } else { + let mut k = j + 1; + while k < chars.len() { + if chars[k] < b'A' || chars[k] > b'Z' { + break; + } + k += 1; + } + let s: &str = &var.name[j..k]; + if !classes.contains(&s) { + eprintln!( + "\nFound illegal class {} in variable name {}.\n", + s, var.name + ); + std::process::exit(1); + } + j = k; + } + } + } + + // Return. + + vars +} diff --git a/enclone_vars/src/vars b/enclone_vars/src/vars new file mode 100644 index 000000000..8ffb72078 --- /dev/null +++ b/enclone_vars/src/vars @@ -0,0 +1,3970 @@ +# Copyright (c) 2021 10X Genomics, Inc. All rights reserved. +# +# This is the start of a variable registry for enclone. Every variable must specify all the +# following fields, in the same order. +# +# The exact formatting as exhibited by existing variables # must be followed. This includes +# indentation. +# +# name: name of the variable, which may include special strings so as to represent +# variable classes; this excludes the chain number suffix (1,...) for cvars. +# +# Upper case characters always define variable class strings. The following are +# recognized: +# BC user-defined variable from BC or META/bc +# DATASET name of a dataset +# FEATURE name of a feature +# INFO user-defined variable from INFO +# NAME name, see n_ at enclone help lvars +# REGA regular expression for amino acids +# VARDEF user-defined variable from VAR_DEF +# +# Inclusive integer ranges: these have the form {a..b} or {a..} or {..b} or {..}. Up to +# three ranges are allowed in a variable. +# +# inputs: data structures needed as input +# ex: ExactClonotype = exact subclonotype structure +# mid: usize = chain id in clonotype +# refdata +# fm: gex_info.feature_metrics = feature values by barcode +# col +# exacts +# exact_clonotypes +# rsi +# +# limits: limitations on availability of the variable +# +# class: lvar or cvar +# +# level: scope of the variable, one of +# cell, cell-exact, exact, clono, dataset, global +# +# val: the type of the value, one of +# positive_integer, float[0,100].precision(1) +# +# doc: documentation +# +# brief: brief documentation, for use in compact html tables +# +# page: reference page +# +# avail: availability, public or private or ? (temp) +# +# notes: things to clarify +# +# code: code for computing the variable value +# There are one or two special lines at the end with form like +# exact: +# where exact can be another keyword. + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: BC +inputs: lvar_vdj +limits: +class: lvar +level: cell +val: string +doc: TBD +brief: user-defined variable from BC or META/bc +page: enclone help input +avail: public +notes: +code: let mut r = Vec::::new(); + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = ex.clones[l][0].barcode.clone(); + let mut val = String::new(); + let alt = &ctl.origin_info.alt_bc_fields[li]; + for j in 0..alt.len() { + if alt[j].0 == *var && alt[j].1.contains_key(&bc.clone()) { + val = alt[j].1[&bc.clone()].clone(); + } + } + r.push(val); + } + cell: r +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: DATASET_barcode +inputs: ? +limits: only implemented for parseable output +class: lvar +level: cell +val: string +doc: TBD +brief: barcode from the given dataset (or null) +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: DATASET_barcodes +inputs: ? +limits: only implemented for parseable output +class: lvar +level: exact +val: string +doc: TBD +brief: barcodes from the given dataset +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: FEATURE +inputs: ? +limits: +class: lvar +level: cell-exact +val: float +doc: TBD +brief: median count for a gene expression or antibody feature +page: enclone help lvars +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: FEATURE_% +inputs: ? +limits: +class: lvar +level: cell-exact +val: float[0,100].precision(1) +doc: TBD +brief: median percent of total expression for a particular gene +page: enclone help lvars +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: FEATURE_%_cell +inputs: ? +limits: +class: lvar +level: cell +val: float[0,100].precision(1) +doc: TBD +brief: percent of total expression for a particular gene +page: enclone help lvars +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: FEATURE_cell +inputs: ? +limits: +class: lvar +level: cell +val: nonnegative_integer +doc: TBD +brief: count for a gene expression or antibody feature +page: enclone help lvars +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: FEATURE_cellular_r +inputs: fm +limits: +class: lvar +level: dataset +val: float[0,100].precision(1) +doc: For a given feature, the percent of reads that are identified by the + cellranger pipeline as lying in a cell. +brief: percent of feature reads in cells, for a given dataset +page: enclone help display +avail: ? +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: FEATURE_cellular_u +inputs: fm +limits: +class: lvar +level: dataset +val: float[0,100].precision(1) +doc: For a given feature, the percent of UMIs that are identified by the + cellranger pipeline as lying in a cell. +brief: percent of feature UMIs in cells, for a given dataset +page: enclone help display +avail: ? +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: FEATURE_max +inputs: ? +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: maximum count for a feature +page: enclone help lvars +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: FEATURE_mean +inputs: ? +limits: +class: lvar +level: exact +val: float +doc: TBD +brief: mean count for a feature (same as μ for mean) +page: enclone help lvars +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: FEATURE_min +inputs: ? +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: minimum count for a feature +page: enclone help lvars +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: FEATURE_sum +inputs: ? +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: sum of counts for a feature (same as Σ for sum) +page: enclone help lvars +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: FEATURE_Σ +inputs: ? +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: sum of counts for a feature (same as sum for Σ) +page: enclone help lvars +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: FEATURE_μ +inputs: ? +limits: +class: lvar +level: exact +val: float +doc: TBD +brief: mean count for a feature (same with mean for μ) +page: enclone help lvars +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: INFO +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: string +doc: TBD +brief: user-defined variable from INFO +page: enclone help input +avail: public +notes: +code: let mut val = String::new(); + for q in 0..ctl.gen_opt.info_fields.len() { + if *var == ctl.gen_opt.info_fields[q] + && ex.share.len() == 2 && ex.share[0].left != ex.share[1].left { + let mut tag = String::new(); + for j in 0..ex.share.len() { + if ex.share[j].left { + tag += strme(&ex.share[j].seq); + } + } + tag += "_"; + for j in 0..ex.share.len() { + if !ex.share[j].left { + tag += strme(&ex.share[j].seq); + } + } + if ctl.gen_opt.info_data.contains_key(&tag) { + val = ctl.gen_opt.info_data[&tag][q].clone(); + } + } + } + exact: val +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: VARDEF +inputs: ? +limits: +class: lvar +level: cell-exact +val: string +doc: TBD +brief: user-defined variable from VAR_DEF +page: enclone help input +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: aa% +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: float[0,100].precision(1) +doc: TBD +brief: amino acid identity with donor reference +page: enclone help cvars +avail: public +notes: +code: let xm = &ex.share[mid]; + let mut diffs = 0; + let mut denom = 0; + let aa_seq = &xm.aa_mod_indel; + let mut vref = refdata.refs[xm.v_ref_id].to_ascii_vec(); + if xm.v_ref_id_donor_alt_id.is_some() { + vref = dref[xm.v_ref_id_donor.unwrap()].nt_sequence.clone(); + } + let jref = refdata.refs[xm.j_ref_id].to_ascii_vec(); + let z = 3 * aa_seq.len() + 1; + for p in 0..aa_seq.len() { + if aa_seq[p] == b'-' { + diffs += 1; + denom += 1; + continue; + } + if 3 * p + 3 <= vref.len() - ctl.heur.ref_v_trim { + denom += 1; + if aa_seq[p] != codon_to_aa(&vref[3 * p..3 * p + 3]) { + diffs += 1; + } + } + if 3 * p > z - (jref.len() - ctl.heur.ref_j_trim) + 3 { + denom += 1; + if aa_seq[p] + != codon_to_aa(&jref[jref.len() - (z - 3 * p)..jref.len() - (z - 3 * p) + 3]) + { + diffs += 1; + } + } + } + exact: format!("{:.1}", percent_ratio(denom - diffs, denom)) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: allele +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: non_negative_integer +doc: TBD +brief: donor allele for V region id +page: enclone help cvars +avail: public +notes: +code: let mut allele = 0; + if ex.share[mid].v_ref_id_donor_alt_id.is_some() { + allele = ex.share[mid].v_ref_id_donor_alt_id.unwrap() + 1; + } + exact: format!("{}", allele) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: allele_d +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: string +doc: TBD +brief: donor allele evidence +page: enclone help cvars +avail: public +notes: +code: let mut refs = Vec::>::new(); + let alt_refs = &allele_data.alt_refs; + refs.push(refdata.refs[ex.share[mid].v_ref_id].to_ascii_vec()); + for i in 0..alt_refs.len() { + // The following does not work correctly if an exact subclonotype contains cells + // from more than one donor. But that is extremely rare. + if ex.clones[0][0].donor_index.is_some() { + if alt_refs[i].0 == ex.clones[0][0].donor_index.unwrap() + && alt_refs[i].1 == ex.share[mid].v_ref_id { + refs.push(alt_refs[i].2.to_ascii_vec()); + } + } + } + let mut m = refs[0].len(); + for i in 1..refs.len() { + m = min(m, refs[i].len()); + } + let mut ps = Vec::::new(); + let mut variant = Vec::>::new(); + for p in 0..m { + let mut bases = Vec::::new(); + for i in 0..refs.len() { + bases.push(refs[i][p]); + } + let mut bases_sorted = bases.clone(); + unique_sort(&mut bases_sorted); + if bases_sorted.len() > 1 { + ps.push(p); + variant.push(bases); + } + } + let mut xs = Vec::::new(); + for i in 0..refs.len() { + let mut x = String::new(); + for j in 0..ps.len() { + x.push(variant[j][i] as char); + } + xs.push(x); + } + let mut me = String::new(); + for j in 0..ps.len() { + let base = ex.share[mid].seq_del_amino[ps[j]]; + me.push(base as char); + } + let mut details = String::new(); + if ps.len() > 0 { + details = format!("{me} : {}", xs.iter().format(",")); + } + exact: details +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: barcode +inputs: ? +limits: only implemented for parseable output, and as an automatic field for clonotype tables in + PER_CELL mode +class: lvar +level: cell +val: string +doc: TBD +brief: barcode of the cell +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: barcodes +inputs: ? +limits: only implemented for parseable output +class: lvar +level: exact +val: string +doc: TBD +brief: barcodes for the exact subclonotype +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: cdiff +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: differences of const region with universal reference +page: enclone help cvars +avail: public +notes: +code: let cstart = ex.share[mid].j_stop; + let clen = ex.share[mid].full_seq.len() - cstart; + let cid = ex.share[mid].c_ref_id; + let mut cdiff = String::new(); + let mut ndiffs = 0; + if cid.is_some() { + let r = &refdata.refs[cid.unwrap()]; + let mut extra = 0; + if clen > r.len() { + extra = clen - r.len(); + } + for i in 0..min(clen, r.len()) { + let tb = ex.share[mid].full_seq[cstart + i]; + let rb = r.to_ascii_vec()[i]; + if tb != rb { + ndiffs += 1; + if ndiffs <= 5 { + cdiff += &format!("{}{}", i, tb as char); + } + } + } + if ndiffs > 5 { + cdiff += "..."; + } + if extra > 0 { + cdiff += &format!("+{}", extra); + } + } else if clen > 0 { + cdiff = format!("+{}", clen); + } + exact: cdiff +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: cdr3_aa_conp +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: string +doc: TBD +brief: CDR3 amino acid consensus, symbols at variants +page: enclone help cvars +avail: public +notes: +code: exact: cdr3_aa_con("p", col, exacts, exact_clonotypes, rsi) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: cdr3_aa_conx +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: string +doc: TBD +brief: CDR3 amino acid clonotype consensus, Xs at variants +page: enclone help cvars +avail: public +notes: +code: exact: cdr3_aa_con("x", col, exacts, exact_clonotypes, rsi) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: cdr3_start +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: positive_integer +doc: TBD +brief: nucleotide start of CDR3 sequence on full sequence +page: enclone help parseable +avail: public +notes: Check if type is really positive integer. +code: exact: ex.share[mid].cdr3_start.to_string() +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: cdr{1..2}_aa_ref +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: string +doc: TBD +brief: CDR* amino acid sequence for universal reference +page: enclone help cvars +avail: public +notes: +code: let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + if arg1 == 1 { + if x.cdr1_start.is_some() + && x.fr2_start.is_some() + && x.cdr1_start.unwrap() <= x.fr2_start.unwrap() + { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec() + [x.cdr1_start.unwrap()..x.fr2_start.unwrap()] + .to_vec(); + y = stringme(&aa_seq(&dna, 0)); + } + } else if x.cdr2_start.is_some() + && x.fr3_start.is_some() + && x.cdr2_start.unwrap() <= x.fr3_start.unwrap() + { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec() + [x.cdr2_start.unwrap()..x.fr3_start.unwrap()] + .to_vec(); + y = stringme(&aa_seq(&dna, 0)); + } + exact: y +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: cdr{1..2}_dna_ref +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: string +doc: TBD +brief: CDR* nucleotide sequence for universal reference +page: enclone help cvars +avail: public +notes: +code: let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + if arg1 == 1 { + if x.cdr1_start.is_some() + && x.fr2_start.is_some() + && x.cdr1_start.unwrap() <= x.fr2_start.unwrap() + { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec() + [x.cdr1_start.unwrap()..x.fr2_start.unwrap()] + .to_vec(); + y = stringme(&dna); + } + } else if x.cdr2_start.is_some() + && x.fr3_start.is_some() + && x.cdr2_start.unwrap() <= x.fr3_start.unwrap() + { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec() + [x.cdr2_start.unwrap()..x.fr3_start.unwrap()] + .to_vec(); + y = stringme(&dna); + } + exact: y +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: cdr{1..3}_aa +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: CDR* amino acid sequence +page: enclone help cvars +avail: public +notes: +code: let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + let c; + if arg1 == 1 { + c = get_cdr1(x, 0, 0); + if c.is_some() { + y = stringme(&aa_seq(c.unwrap().as_bytes(), 0)); + } + } else if arg1 == 2 { + c = get_cdr2(x, 0, 0); + if c.is_some() { + y = stringme(&aa_seq(c.unwrap().as_bytes(), 0)); + } + } else { + y = x.cdr3_aa.clone(); + } + exact: y +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: cdr{1..3}_aa_north +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: North version of CDR* amino acid sequence +page: enclone help cvars +avail: public +notes: +code: let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + let c; + if arg1 == 1 { + let (mut left, mut right) = (0, 0); + if x.left { + left = 3; + right = 3; + } + c = get_cdr1(x, left, right); + if c.is_some() { + y = stringme(&aa_seq(c.unwrap().as_bytes(), 0)); + } + } else if arg1 == 2 { + let (left, right); + if ex.share[mid].left { + left = 2; + right = 3; + } else { + left = 1; + right = 0; + } + c = get_cdr2(x, left, right); + if c.is_some() { + y = stringme(&aa_seq(c.unwrap().as_bytes(), 0)); + } + } else { + c = get_cdr3(x, -1, -1); + if c.is_some() { + y = stringme(&aa_seq(c.unwrap().as_bytes(), 0)); + } + } + exact: y +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: cdr{1..3}_aa_{..}_{..}_ext +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: CDR* region with specified extension length +page: enclone help cvars +avail: public +notes: +code: let (left, right) = (arg2 * 3, arg3 * 3); + let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + let mut dna = Vec::::new(); + if arg1 == 1 { + if x.cdr1_start.is_some() + && x.fr2_start.is_some() + && x.cdr1_start.unwrap() <= x.fr2_start.unwrap() + && x.cdr1_start.unwrap() as i64 - left >= 0 + && x.cdr1_start.unwrap() as i64 - left < x.seq_del_amino.len() as i64 + && x.fr2_start.unwrap() as i64 + right > 0 + && x.fr2_start.unwrap() as i64 + right <= x.seq_del_amino.len() as i64 + { + for p in x.cdr1_start.unwrap() as i64 - left.. + x.fr2_start.unwrap() as i64 + right { + let p = p as usize; + for j in 0..x.ins.len() { + if x.ins[j].0 == p { + let mut z = x.ins[j].1.clone(); + dna.append(&mut z); + } + } + if x.seq_del_amino[p] != b'-' { + dna.push(x.seq_del_amino[p]); + } + } + test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa)?; + y = stringme(&aa_seq(&dna, 0)); + } + } else if arg1 == 2 { + if x.cdr2_start.is_some() + && x.fr3_start.is_some() + && x.cdr2_start.unwrap() <= x.fr3_start.unwrap() + && x.cdr2_start.unwrap() as i64 - left >= 0 + && x.cdr2_start.unwrap() as i64 - left < x.seq_del_amino.len() as i64 + && x.fr3_start.unwrap() as i64 + right > 0 + && x.fr3_start.unwrap() as i64 + right <= x.seq_del_amino.len() as i64 + { + for p in x.cdr2_start.unwrap() as i64 - left.. + x.fr3_start.unwrap() as i64 + right { + let p = p as usize; + for j in 0..x.ins.len() { + if x.ins[j].0 == p { + let mut z = x.ins[j].1.clone(); + dna.append(&mut z); + } + } + if x.seq_del_amino[p] != b'-' { + dna.push(x.seq_del_amino[p]); + } + } + test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa)?; + y = stringme(&aa_seq(&dna, 0)); + } + } else if x.cdr3_start as i64 - left >= 0 + && x.cdr3_start as i64 - left < x.seq_del_amino.len() as i64 + && x.cdr3_start as i64 + 3 * x.cdr3_aa.len() as i64 + right > 0 + && x.cdr3_start as i64 + 3 * x.cdr3_aa.len() as i64 + right + <= x.seq_del_amino.len() as i64 + { + for p in + x.cdr3_start as i64 - left.. + x.cdr3_start as i64 + 3 * x.cdr3_aa.len() as i64 + right + { + let p = p as usize; + for j in 0..x.ins.len() { + if x.ins[j].0 == p { + let mut z = x.ins[j].1.clone(); + dna.append(&mut z); + } + } + if x.seq_del_amino[p] != b'-' { + dna.push(x.seq_del_amino[p]); + } + } + test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa)?; + y = stringme(&aa_seq(&dna, 0)); + } + exact: y +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: cdr{1..3}_dna +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: CDR* nucleotide sequence +page: enclone help cvars +avail: public +notes: +code: let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + let c; + if arg1 == 1 { + c = get_cdr1(x, 0, 0); + } else if arg1 == 2 { + c = get_cdr2(x, 0, 0); + } else { + c = Some(x.cdr3_dna.clone()); + } + if c.is_some() { + y = c.unwrap(); + } + exact: y +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: cdr{1..3}_len +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: positive_integer +doc: TBD +brief: length of CDR* amino acid sequence +page: enclone help cvars +avail: public +notes: +code: let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + let c; + if arg1 == 1 { + c = get_cdr1(x, 0, 0); + } else if arg1 == 2 { + c = get_cdr2(x, 0, 0); + } else { + c = Some(x.cdr3_dna.clone()); + } + if c.is_some() { + y = format!("{}", c.unwrap().len() / 3); + } + exact: y +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: cigar +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: CIGAR string defining edit of the V..J contig sequence +page: enclone help cvars +avail: public +notes: +code: let vref = refdata.refs[rsi.vids[col]].to_ascii_vec(); + let mut dref = Vec::::new(); + if rsi.dids[col].is_some() { + dref = refdata.refs[rsi.dids[col].unwrap()].to_ascii_vec(); + } + let d2ref = Vec::::new(); + let jref = refdata.refs[rsi.jids[col]].to_ascii_vec(); + let td = &ex.share[mid]; + let tig = &td.seq; + let ops = align_to_vdj_ref( + tig, + &vref, + &dref, + &d2ref, + &jref, + "", // drefname + ex.share[mid].left, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + ) + .0; + exact: cigar(&ops, 0, tig.len(), tig.len()) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: clen +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: length of observed constant region +page: enclone help cvars +avail: public +notes: +code: exact: format!("{}", ex.share[mid].full_seq.len() - ex.share[mid].j_stop) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: clonotype_id +inputs: ? +limits: only implemented for parseable output +class: lvar +level: clono +val: positive_integer +doc: TBD +brief: identifier of clonotype within clonotype group +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: clonotype_ncells +inputs: lvar_vdj +limits: +class: lvar +level: clono +val: positive_integer +doc: TBD +brief: number of cells in the clonotype +page: enclone help parseable +avail: public +notes: +code: let mut n = 0; + for u in exacts.iter() { + n += exact_clonotypes[*u].ncells(); + } + exact: format!("{}", n) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: clust +inputs: lvar_vdj +limits: +class: lvar +level: cell-exect +val: positive_integer +doc: TBD +brief: cluster id +page: UNDOCUMENTED +avail: private +notes: +code: let mut clust = Vec::::new(); + for j in 0..ex.clones.len() { + let mut cid = 0; + let bc = &ex.clones[j][0].barcode; + let li = ex.clones[j][0].dataset_index; + if gex_info.cluster[li].contains_key(&bc.clone()) { + cid = gex_info.cluster[li][&bc.clone()]; + } + clust.push(cid); + } + let mut clustf = Vec::::new(); + for x in clust.iter() { + clustf.push(format!("{}", x)); + } + clust.sort_unstable(); + cell: clustf + exact: abbrev_list(&clust) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: comp +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: CDR3 complexity number +page: enclone help cvars +avail: public +notes: +code: let (comp, _edit) = comp_edit(ex, mid, col, refdata, dref, rsi); + exact: format!("{}", comp) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: const +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: constant region name +page: enclone help cvars +avail: public +notes: +code: let mut constx = Vec::::new(); + let cid = ex.share[mid].c_ref_id; + if cid.is_some() { + constx.push(refdata.name[cid.unwrap()].clone()); + } else { + constx.push("?".to_string()); + } + unique_sort(&mut constx); + // This is overcomplicated because there is now at most one + // const entry per exact subclonotype. + exact: format!("{}", constx.iter().format(",")) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: const_id +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: positive_integer_or_null +doc: TBD +brief: numerical identifier of constant region (or null) +page: enclone help parseable +avail: public +notes: +code: let mut const_id = String::new(); + if ex.share[mid].c_ref_id.is_some() { + const_id = format!("{}", refdata.id[ex.share[mid].c_ref_id.unwrap()]); + } + exact: const_id +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: count_REGA +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: nonnegative_integer +doc: TBD +brief: count amino acid motifs +page: enclone help lvars +avail: public +notes: +code: let mut n = 0; + for j in 0..ex.share.len() { + let aa = aa_seq(&ex.share[j].seq, 0); // seems inefficient + n += reg.find_iter(strme(&aa)).count(); + } + cell: vec![format!("{}", n); ex.ncells()] + exact: format!("{}", n) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: count_cdr_REGA +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: nonnegative_integer +doc: TBD +brief: count amino acid motifs using only cdr +page: enclone help lvars +avail: public +notes: +code: let mut n = 0; + for j in 0..ex.share.len() { + if ex.share[j].cdr1_start.is_some() && ex.share[j].fr2_start.is_some() { + let cdr1 = ex.share[j].cdr1_start.unwrap(); + let fwr2 = ex.share[j].fr2_start.unwrap(); + if cdr1 < fwr2 { + let aa = aa_seq(&ex.share[j].seq[cdr1..fwr2], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + if ex.share[j].cdr2_start.is_some() && ex.share[j].fr3_start.is_some() { + let cdr2 = ex.share[j].cdr2_start.unwrap(); + let fwr3 = ex.share[j].fr3_start.unwrap(); + if cdr2 < fwr3 { + let aa = aa_seq(&ex.share[j].seq[cdr2..fwr3], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + let cdr3 = ex.share[j].cdr3_start; + let fwr4 = cdr3 + 3 * ex.share[j].cdr3_aa.len(); + let aa = aa_seq(&ex.share[j].seq[cdr3..fwr4], 0); + n += reg.find_iter(strme(&aa)).count(); + } + cell: vec![format!("{}", n); ex.ncells()] + exact: format!("{}", n) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: count_cdr{1..3}_REGA +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: nonnegative_integer +doc: TBD +brief: count amino acid motifs using only the given CDR +page: enclone help lvars +avail: public +notes: +code: let mut n = 0; + if arg1 == 1 { + for j in 0..ex.share.len() { + if ex.share[j].cdr1_start.is_some() && ex.share[j].fr2_start.is_some() { + let cdr1 = ex.share[j].cdr1_start.unwrap(); + let fwr2 = ex.share[j].fr2_start.unwrap(); + if cdr1 < fwr2 { + let aa = aa_seq(&ex.share[j].seq[cdr1..fwr2], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else if arg1 == 2 { + for j in 0..ex.share.len() { + if ex.share[j].cdr2_start.is_some() && ex.share[j].fr3_start.is_some() { + let cdr2 = ex.share[j].cdr2_start.unwrap(); + let fwr3 = ex.share[j].fr3_start.unwrap(); + if cdr2 < fwr3 { + let aa = aa_seq(&ex.share[j].seq[cdr2..fwr3], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else { + for j in 0..ex.share.len() { + let cdr3 = ex.share[j].cdr3_start; + let fwr4 = cdr3 + 3 * ex.share[j].cdr3_aa.len(); + let aa = aa_seq(&ex.share[j].seq[cdr3..fwr4], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + cell: vec![format!("{}", n); ex.ncells()] + exact: format!("{}", n) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: count_fwr_REGA +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: nonnegative_integer +doc: TBD +brief: count amino acid motifs using only FWR +page: enclone help lvars +avail: public +notes: +code: let mut n = 0; + for j in 0..ex.share.len() { + if ex.share[j].cdr1_start.is_some() { + let fwr1 = ex.share[j].fr1_start; + let cdr1 = ex.share[j].cdr1_start.unwrap(); + if fwr1 < cdr1 { + let aa = aa_seq(&ex.share[j].seq[fwr1..cdr1], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + if ex.share[j].fr2_start.is_some() && ex.share[j].cdr2_start.is_some() { + let fwr2 = ex.share[j].fr2_start.unwrap(); + let cdr2 = ex.share[j].cdr2_start.unwrap(); + if fwr2 < cdr2 { + let aa = aa_seq(&ex.share[j].seq[fwr2..cdr2], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + if ex.share[j].fr3_start.is_some() { + let fwr3 = ex.share[j].fr3_start.unwrap(); + let cdr3 = ex.share[j].cdr3_start; + if fwr3 < cdr3 { + let aa = aa_seq(&ex.share[j].seq[fwr3..cdr3], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + let fwr4 = ex.share[j].cdr3_start + 3 * ex.share[j].cdr3_aa.len(); + let aa = aa_seq(&ex.share[j].seq[fwr4..], 0); + n += reg.find_iter(strme(&aa)).count(); + } + cell: vec![format!("{}", n); ex.ncells()] + exact: format!("{}", n) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: count_fwr{1..4}_REGA +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: nonnegative_integer +doc: TBD +brief: count amino acid motifs using only the given FWR +page: enclone help lvars +avail: public +notes: +code: let mut n = 0; + if arg1 == 1 { + for j in 0..ex.share.len() { + if ex.share[j].cdr1_start.is_some() { + let fwr1 = ex.share[j].fr1_start; + let cdr1 = ex.share[j].cdr1_start.unwrap(); + if fwr1 < cdr1 { + let aa = aa_seq(&ex.share[j].seq[fwr1..cdr1], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else if arg1 == 2 { + for j in 0..ex.share.len() { + if ex.share[j].fr2_start.is_some() && ex.share[j].cdr2_start.is_some() { + let fwr2 = ex.share[j].fr2_start.unwrap(); + let cdr2 = ex.share[j].cdr2_start.unwrap(); + if fwr2 < cdr2 { + let aa = aa_seq(&ex.share[j].seq[fwr2..cdr2], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else if arg1 == 3 { + for j in 0..ex.share.len() { + if ex.share[j].fr3_start.is_some() { + let fwr3 = ex.share[j].fr3_start.unwrap(); + let cdr3 = ex.share[j].cdr3_start; + if fwr3 < cdr3 { + let aa = aa_seq(&ex.share[j].seq[fwr3..cdr3], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + } + } else { + for j in 0..ex.share.len() { + let fwr4 = ex.share[j].cdr3_start + 3 * ex.share[j].cdr3_aa.len(); + let aa = aa_seq(&ex.share[j].seq[fwr4..], 0); + n += reg.find_iter(strme(&aa)).count(); + } + } + cell: vec![format!("{}", n); ex.ncells()] + exact: format!("{}", n) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: cred +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: float +doc: TBD +brief: median credibility assessed using GEX data +page: enclone help lvars +avail: public +notes: +code: let mut credsx = Vec::::new(); + for l in 0..ex.clones.len() { + let bc = &ex.clones[l][0].barcode; + let li = ex.clones[l][0].dataset_index; + if gex_info.pca[li].contains_key(&bc.clone()) { + let mut creds = 0; + let mut z = Vec::<(f64, String)>::new(); + let x = &gex_info.pca[li][&bc.clone()]; + for y in gex_info.pca[li].iter() { + let mut dist2 = 0.0; + for m in 0..x.len() { + dist2 += (y.1[m] - x[m]) * (y.1[m] - x[m]); + } + z.push((dist2, y.0.clone())); + } + z.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let top = n_vdj_gex[li]; + for i in 0..top { + if bin_member(&vdj_cells[li], &z[i].1) { + creds += 1; + } + } + let pc = 100.0 * creds as f64 / top as f64; + credsx.push(pc); + } else { + credsx.push(0.0); + } + } + let credsx_unsorted = credsx.clone(); + credsx.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let mut r = Vec::::new(); + for j in 0..credsx_unsorted.len() { + r.push(format!("{:.1}", credsx_unsorted[j])); + } + cell: r + exact: format!("{:.1}", median_f64(&credsx)) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d1_name +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: optimal D segment name +page: enclone help cvars +avail: public +notes: +code: let mut opt_name = String::new(); + if ex.share[mid].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, dref, &mut scores, &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col]); + let mut opt = Vec::new(); + if !ds.is_empty() { + opt = ds[0].clone(); + } + if opt.is_empty() { + opt_name = "none".to_string(); + } else { + for i in 0..opt.len() { + if i > 0 { + opt_name += ":"; + } + opt_name += &refdata.name[opt[i]]; + } + } + } + exact: opt_name +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d1_score +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: float +doc: TBD +brief: score of optimal D segment +page: enclone help cvars +avail: public +notes: +code: let mut score = String::new(); + if ex.share[mid].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, dref, &mut scores, &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col]); + let mut delta = 0.0; + if scores.len() > 1 { + delta = scores[0] - scores[1]; + } + score = format!("{:.1}", delta) + } + exact: score +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d2_name +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: second best D segment name +page: enclone help cvars +avail: public +notes: +code: let mut opt2_name = String::new(); + if ex.share[mid].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, dref, &mut scores, &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col]); + let mut opt2 = Vec::new(); + if ds.len() > 1 { + opt2 = ds[1].clone(); + } + if opt2.is_empty() { + opt2_name = "none".to_string(); + } else { + for i in 0..opt2.len() { + if i > 0 { + opt2_name += ":"; + } + opt2_name += &refdata.name[opt2[i]]; + } + } + } + exact: opt2_name +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d2_score +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: float +doc: TBD +brief: score of second best D segment +page: enclone help cvars +avail: public +notes: +code: let mut scorex = String::new(); + if ex.share[mid].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, dref, &mut scores, &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col]); + let mut score = 0.0; + if scores.len() > 1 { + score = scores[1]; + } + scorex = format!("{:.1}", score) + } + exact: scorex +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d_delta +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: float +doc: TBD +brief: score delta between top two D gene assignments +page: enclone help cvars +avail: public +notes: +code: let mut del = String::new(); + if ex.share[mid].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, dref, &mut scores, &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col]); + let mut delta = 0.0; + if scores.len() > 1 { + delta = scores[0] - scores[1]; + } + del = format!("{:.1}", delta) + } + exact: del +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d_donor +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: distance from donor reference +page: enclone help cvars +avail: public +notes: +code: let vid = ex.share[mid].v_ref_id; + let mut vref = refdata.refs[vid].to_ascii_vec(); + if rsi.vpids[col].is_some() { + vref = dref[rsi.vpids[col].unwrap()].nt_sequence.clone(); + } + let jid = ex.share[mid].j_ref_id; + let jref = &refdata.refs[jid].to_ascii_vec(); + let tig = &ex.share[mid].seq_del; + let n = tig.len(); + let mut diffs = 0; + for p in 0..n { + if tig[p] == b'-' { + continue; + } + if p < vref.len() - ctl.heur.ref_v_trim && tig[p] != vref[p] { + diffs += 1; + } else if p >= n - (jref.len() - ctl.heur.ref_j_trim) + && tig[p] != jref[jref.len() - (n - p)] + { + diffs += 1; + } + } + exact: format!("{}", diffs) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d_frame +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: reading frame of D segment (0, 1, 2 or null) +page: enclone help parseable +avail: public +notes: +code: let mut d_frame = String::new(); + if ex.share[mid].d_start.is_some() { + d_frame = format!( + "{}", + (ex.share[mid].d_start.unwrap() - ex.share[mid].v_start) % 3 + ); + } + exact: d_frame +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d_id +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: positive_integer +doc: TBD +brief: D region id +page: enclone help parseable +avail: public +notes: +code: let did = if rsi.dids[col].is_some() { + format!("{}", refdata.id[rsi.dids[col].unwrap()]) + } else { + String::new() + }; + exact: did +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d_inconsistent_% +inputs: ? +limits: +class: lvar +level: global +val: float[0,100].precision(1) +doc: TBD +brief: inconsistency percent for D gene assignment +page: d_genes.html +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d_inconsistent_n +inputs: ? +limits: +class: lvar +level: global +val: positive_integer +doc: TBD +brief: sample size for computation of d_inconsistent_% +page: enclone help lvars +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d_name +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: string +doc: TBD +brief: D region name +page: enclone help parseable +avail: public +notes: +code: let dname = if rsi.dids[col].is_some() { + refdata.name[rsi.dids[col].unwrap()].clone() + } else { + String::new() + }; + exact: dname +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d_start +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: start of D on full nucleotide sequence (or null) +page: enclone help parseable +avail: public +notes: Starts at 0 or 1? +code: let mut d_start = String::new(); + if ex.share[mid].d_start.is_some() { + d_start = format!("{}", ex.share[mid].d_start.unwrap()); + } + exact: d_start +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d_univ +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: distance from universal reference +page: enclone help cvars +avail: public +notes: +code: let vid = ex.share[mid].v_ref_id; + let vref = &refdata.refs[vid].to_ascii_vec(); + let jid = ex.share[mid].j_ref_id; + let jref = &refdata.refs[jid].to_ascii_vec(); + let tig = &ex.share[mid].seq_del; + let n = tig.len(); + let mut diffs = 0; + for p in 0..n { + if tig[p] == b'-' { + continue; + } + if p < vref.len() - ctl.heur.ref_v_trim && tig[p] != vref[p] { + diffs += 1; + } else if p >= n - (jref.len() - ctl.heur.ref_j_trim) + && tig[p] != jref[jref.len() - (n - p)] + { + diffs += 1; + } + } + exact: format!("{}", diffs) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: d_Δ +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: float +doc: TBD +brief: score delta between top two D gene assignments +page: enclone help cvars +avail: public +notes: +code: let mut del = String::new(); + if ex.share[mid].left { + let mut scores = Vec::::new(); + let mut ds = Vec::>::new(); + let mid = rsi.mat[col][u].unwrap(); + opt_d( + ex.share[mid].v_ref_id, + ex.share[mid].j_ref_id, + &ex.share[mid].seq_del, + &ex.share[mid].annv, + &ex.share[mid].cdr3_aa, + refdata, dref, &mut scores, &mut ds, + ctl.gen_opt.jscore_match, + ctl.gen_opt.jscore_mismatch, + ctl.gen_opt.jscore_gap_open, + ctl.gen_opt.jscore_gap_extend, + ctl.gen_opt.jscore_bits_multiplier, + rsi.vpids[col]); + let mut delta = 0.0; + if scores.len() > 1 { + delta = scores[0] - scores[1]; + } + del = format!("{:.1}", delta) + } + exact: del +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: datasets +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: string +doc: TBD +brief: dataset names +page: enclone help lvars +avail: public +notes: +code: let mut datasets = Vec::::new(); + for j in 0..ex.clones.len() { + datasets.push(ctl.origin_info.dataset_id[ex.clones[j][0].dataset_index].clone()); + } + let mut datasets_unique = datasets.clone(); + unique_sort(&mut datasets_unique); + cell: datasets + exact: format!("{}", datasets_unique.iter().format(",")) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: dna% +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: float[0,100].precision(1) +doc: TBD +brief: nucleotide identity with donor reference +page: enclone help cvars +avail: public +notes: +code: let xm = &ex.share[mid]; + let mut diffs = 0; + let mut denom = 0; + let seq = &xm.seq_del_amino; + let mut vref = refdata.refs[xm.v_ref_id].to_ascii_vec(); + if xm.v_ref_id_donor_alt_id.is_some() { + vref = dref[xm.v_ref_id_donor.unwrap()].nt_sequence.clone(); + } + let jref = refdata.refs[xm.j_ref_id].to_ascii_vec(); + let z = seq.len(); + for p in 0..z { + let b = seq[p]; + if b == b'-' { + diffs += 1; + denom += 1; + continue; + } + if p < vref.len() - ctl.heur.ref_v_trim { + denom += 1; + if b != vref[p] { + diffs += 1; + } + } + if p >= z - (jref.len() - ctl.heur.ref_j_trim) { + denom += 1; + if b != jref[jref.len() - (z - p)] { + diffs += 1; + } + } + } + exact: format!("{:.1}", percent_ratio(denom - diffs, denom)) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: donors +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: string +doc: TBD +brief: donor names +page: enclone help lvars +avail: public +notes: +code: let mut donors = Vec::::new(); + for j in 0..ex.clones.len() { + if ex.clones[j][0].donor_index.is_some() { + donors + .push(ctl.origin_info.donor_list[ex.clones[j][0].donor_index.unwrap()] + .clone()); + } else { + donors.push("?".to_string()); + } + } + let donors_unsorted = donors.clone(); + unique_sort(&mut donors); + cell: donors_unsorted + exact: format!("{}", donors.iter().format(",")) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: dref +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: nucleotide distance to donor reference +page: enclone help lvars +avail: public +notes: +code: + let mut diffs = 0; + for m in 0..cols { + if mat[m][u].is_some() { + let r = mat[m][u].unwrap(); + let seq = &ex.share[r].seq_del_amino; + let mut vref = refdata.refs[rsi.vids[m]].to_ascii_vec(); + if rsi.vpids[m].is_some() { + vref = dref[rsi.vpids[m].unwrap()].nt_sequence.clone(); + } + let jref = refdata.refs[rsi.jids[m]].to_ascii_vec(); + let z = seq.len(); + for p in 0..z { + let b = seq[p]; + if p < vref.len() - ctl.heur.ref_v_trim && b != vref[p] { + diffs += 1; + } + if p >= z - (jref.len() - ctl.heur.ref_j_trim) + && b != jref[jref.len() - (z - p)] + { + diffs += 1; + } + } + } + } + exact: format!("{}", diffs) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: dref_aa +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: amino acid distance to donor reference +page: enclone help lvars +avail: public +notes: +code: let mut diffs = 0; + for m in 0..cols { + if mat[m][u].is_some() { + let r = mat[m][u].unwrap(); + let aa_seq = &ex.share[r].aa_mod_indel; + let mut vref = refdata.refs[rsi.vids[m]].to_ascii_vec(); + if rsi.vpids[m].is_some() { + vref = dref[rsi.vpids[m].unwrap()].nt_sequence.clone(); + } + let jref = refdata.refs[rsi.jids[m]].to_ascii_vec(); + let z = 3 * aa_seq.len() + 1; + for p in 0..aa_seq.len() { + if aa_seq[p] == b'-' { + diffs += 1; + continue; + } + if 3 * p + 3 <= vref.len() - ctl.heur.ref_v_trim + && aa_seq[p] != codon_to_aa(&vref[3 * p..3 * p + 3]) + { + diffs += 1; + } + if 3 * p > z - (jref.len() - ctl.heur.ref_j_trim) + 3 + && aa_seq[p] + != codon_to_aa( + &jref[jref.len() - (z - 3 * p)..jref.len() - (z - 3 * p) + 3], + ) + { + diffs += 1; + } + } + } + } + exact: format!("{}", diffs) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: dref_max +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: nucleotide distance to donor reference, max over chains +page: enclone help lvars +avail: public +notes: +code: + let mut mx = 0; + for m in 0..cols { + let mut diffs = 0; + if mat[m][u].is_some() { + let r = mat[m][u].unwrap(); + let seq = &ex.share[r].seq_del_amino; + let mut vref = refdata.refs[rsi.vids[m]].to_ascii_vec(); + if rsi.vpids[m].is_some() { + vref = dref[rsi.vpids[m].unwrap()].nt_sequence.clone(); + } + let jref = refdata.refs[rsi.jids[m]].to_ascii_vec(); + let z = seq.len(); + for p in 0..z { + let b = seq[p]; + if p < vref.len() - ctl.heur.ref_v_trim && b != vref[p] { + diffs += 1; + } + if p >= z - (jref.len() - ctl.heur.ref_j_trim) + && b != jref[jref.len() - (z - p)] + { + diffs += 1; + } + } + mx = std::cmp::max(mx, diffs); + } + } + exact: format!("{}", mx) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: edit +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: edit versus reference CDR3 +page: enclone help cvars +avail: public +notes: +code: let (_comp, edit) = comp_edit(ex, mid, col, refdata, dref, rsi); + exact: edit +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: entropy +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: float +doc: TBD +brief: entropy of gene expression values +page: UNDOCUMENTED +avail: private +notes: +code: let mut total_counts = Vec::::new(); + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = ex.clones[l][0].barcode.clone(); + if !gex_info.gex_barcodes.is_empty() { + let p = bin_position(&gex_info.gex_barcodes[li], &bc); + if p >= 0 { + let mut raw_count = 0; + if gex_info.gex_matrices[li].initialized() { + let row = gex_info.gex_matrices[li].row(p as usize); + for j in 0..row.len() { + let f = row[j].0; + let n = row[j].1; + if gex_info.is_gex[li][f] { + raw_count += n; + } + } + } else { + let z1 = gex_info.h5_indptr[li][p as usize] as usize; + let z2 = gex_info.h5_indptr[li][p as usize + 1] as usize; // is p+1 OK?? + let d: Vec; + let ind: Vec; + if ctl.gen_opt.h5_pre { + d = h5_data[li].1[z1..z2].to_vec(); + ind = h5_data[li].2[z1..z2].to_vec(); + } else { + d = d_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + ind = ind_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + } + for j in 0..d.len() { + if gex_info.is_gex[li][ind[j] as usize] { + raw_count += d[j] as usize; + } + } + } + total_counts.push(raw_count); + } + } + } + let mut entropies = Vec::::new(); + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = ex.clones[l][0].barcode.clone(); + if !gex_info.gex_barcodes.is_empty() { + let mut entropy = 0.0; + let p = bin_position(&gex_info.gex_barcodes[li], &bc); + if p >= 0 { + if gex_info.gex_matrices[li].initialized() { + let row = gex_info.gex_matrices[li].row(p as usize); + for j in 0..row.len() { + let f = row[j].0; + let n = row[j].1; + if gex_info.is_gex[li][f] { + let q = n as f64 / total_counts[l] as f64; + entropy -= q * q.log2(); + } + } + } else { + let z1 = gex_info.h5_indptr[li][p as usize] as usize; + let z2 = gex_info.h5_indptr[li][p as usize + 1] as usize; // is p+1 OK?? + let d: Vec; + let ind: Vec; + if ctl.gen_opt.h5_pre { + d = h5_data[li].1[z1..z2].to_vec(); + ind = h5_data[li].2[z1..z2].to_vec(); + } else { + d = d_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + ind = ind_readers[li] + .as_ref() + .unwrap() + .read_slice(s![z1..z2]) + .unwrap() + .to_vec(); + } + for j in 0..d.len() { + if gex_info.is_gex[li][ind[j] as usize] { + let n = d[j] as usize; + let q = n as f64 / total_counts[l] as f64; + entropy -= q * q.log2(); + } + } + } + } + entropies.push(entropy); + } + } + let entropies_unsorted = entropies.clone(); + entropies.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let mut entropy = 0.0; + if !entropies.is_empty() { + entropy = median_f64(&entropies); + } + let mut e = Vec::::new(); + for x in entropies_unsorted.iter() { + e.push(format!("{:.2}", x)); + } + cell: e + exact: format!("{:.2}", entropy) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: exact_subclonotype_id +inputs: ? +limits: +class: lvar +level: exact +val: positive_integer +doc: TBD +brief: identifier of exact subclonotype +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: far +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: Hamming distance to farthest neighbor +page: enclone help lvars +avail: public +notes: +code: let mut dist = -1_isize; + for i2 in 0..varmat.len() { + if i2 == u || fp[i2] != fp[u] { + continue; + } + let mut d = 0_isize; + for c in fp[u].iter() { + for j in 0..varmat[u][*c].len() { + if varmat[u][*c][j] != varmat[i2][*c][j] { + d += 1; + } + } + } + dist = max(dist, d); + } + let d; + if dist == -1_isize { + d = "".to_string(); + } else { + d = format!("{}", dist); + } + exact: d +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: fb{1..} +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: string +doc: TBD +brief: sequence of the nth most frequent feature barcode +page: enclone help lvars +avail: private +notes: +code: let ncols = gex_info.fb_top_matrices[0].ncols(); + let n = (arg1 - 1) as usize; + let fb = if n < ncols { + gex_info.fb_top_matrices[0].col_label(n) + } else { + String::new() + }; + exact: (*fb).to_string() +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: fb{1..}_n +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: nonnegative integer +doc: TBD +brief: number of UMIs for the nth most frequent feature barcode +page: enclone help lvars +avail: private +notes: +code: let ncols = gex_info.fb_top_matrices[0].ncols(); + let n = (arg1 - 1) as usize; + let median; + let mut counts; + if n >= ncols { + median = 0; + counts = vec!["0".to_string(); ex.ncells()]; + } else { + counts = Vec::::new(); + let mut counts_sorted = Vec::::new(); + for l in 0..ex.clones.len() { + let bc = ex.clones[l][0].barcode.clone(); + let p = bin_position(&gex_info.fb_top_barcodes[0], &bc); + if p < 0 { + counts.push("0".to_string()); + counts_sorted.push(0); + } else { + let x = gex_info.fb_top_matrices[0].value(p as usize, n); + counts.push(format!("{}", x)); + counts_sorted.push(x); + } + } + counts_sorted.sort_unstable(); + median = rounded_median(&counts_sorted); + } + cell: counts + exact: format!("{}", median) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: filter +inputs: lvar_vdj +limits: +class: lvar +level: cell +val: string +doc: TBD +brief: name of filter that would be applied (if filters off) +page: enclone help lvars +avail: public +notes: +code: let mut fates = Vec::::new(); + for j in 0..ex.clones.len() { + let mut f = String::new(); + let bc = &ex.clones[j][0].barcode; + let li = ex.clones[j][0].dataset_index; + if fate[li].contains_key(&bc.clone()) { + f = fate[li][&bc.clone()].clone(); + f = f.between(" ", " ").to_string(); + } + fates.push(f); + } + cell: fates +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: fwr{1..4}_aa +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: FWR* amino acid sequence +page: enclone help cvars +avail: public +notes: +code: let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + let c; + if arg1 == 1 { + c = get_fwr1(x); + } else if arg1 == 2 { + c = get_fwr2(x); + } else if arg1 == 3 { + c = get_fwr3(x); + } else { + let x = &ex.share[mid]; + let start = rsi.cdr3_starts[col] + 3 * rsi.cdr3_lens[col]; + let stop = rsi.seq_del_lens[col]; + let dna = &x.seq_del_amino[start..stop]; + c = Some(stringme(dna)); + } + if c.is_some() { + y = stringme(&aa_seq(c.unwrap().as_bytes(), 0)); + } + exact: y +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: fwr{1..4}_aa_ref +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: string +doc: TBD +brief: FWR* amino acid seq for universal reference +page: enclone help cvars +avail: public +notes: +code: let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + if arg1 == 1 { + if x.cdr1_start.is_some() && x.fr1_start <= x.cdr1_start.unwrap() { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec()[x.fr1_start..x.cdr1_start + .unwrap()] + .to_vec(); + y = stringme(&aa_seq(&dna, 0)); + } + } else if arg1 == 2 { + if x.fr2_start.unwrap() <= x.cdr2_start.unwrap() { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec() + [x.fr2_start.unwrap()..x.cdr2_start.unwrap()] + .to_vec(); + y = stringme(&aa_seq(&dna, 0)); + } + } else if arg1 == 3 { + if x.fr3_start.is_some() && x.fr3_start.unwrap() <= x.cdr3_start - x.ins_len() { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec(); + if x.cdr3_start <= dna.len() { + let dna = dna[x.fr3_start.unwrap()..x.cdr3_start - x.ins_len()].to_vec(); + y = stringme(&aa_seq(&dna, 0)); + } + } + } else { + let heavy = refdata.rtype[x.j_ref_id] == 0; + let aa_len; + if heavy { + aa_len = 10; + } else { + aa_len = 9; + } + let dna = refdata.refs[x.j_ref_id].to_ascii_vec(); + let dna = dna[dna.len() - 1 - 3 * aa_len..dna.len() - 1].to_vec(); + y = stringme(&aa_seq(&dna, 0)); + } + exact: y +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: fwr{1..4}_dna +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: FWR* nucleotide sequence +page: enclone help cvars +avail: public +notes: +code: let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + let c; + if arg1 == 1 { + c = get_fwr1(x); + } else if arg1 == 2 { + c = get_fwr2(x); + } else if arg1 == 3 { + c = get_fwr3(x); + } else { + let x = &ex.share[mid]; + let start = rsi.cdr3_starts[col] + 3 * rsi.cdr3_lens[col]; + let stop = rsi.seq_del_lens[col]; + let dna = &x.seq_del_amino[start..stop]; + c = Some(stringme(dna)); + } + if c.is_some() { + y = c.unwrap(); + } + exact: y +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: fwr{1..4}_dna_ref +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: string +doc: TBD +brief: FWR* nucleotide seq for universal reference +page: enclone help cvars +avail: public +notes: +code: let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + if arg1 == 1 { + if x.cdr1_start.is_some() && x.fr1_start <= x.cdr1_start.unwrap() { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec()[x.fr1_start..x.cdr1_start + .unwrap()] + .to_vec(); + y = stringme(&dna); + } + } else if arg1 == 2 { + if x.fr2_start.unwrap() <= x.cdr2_start.unwrap() { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec() + [x.fr2_start.unwrap()..x.cdr2_start.unwrap()] + .to_vec(); + y = stringme(&dna); + } + } else if arg1 == 3 { + if x.fr3_start.is_some() && x.fr3_start.unwrap() <= x.cdr3_start - x.ins_len() { + let dna = refdata.refs[x.v_ref_id].to_ascii_vec(); + if x.cdr3_start <= dna.len() { + let dna = dna[x.fr3_start.unwrap()..x.cdr3_start - x.ins_len()].to_vec(); + y = stringme(&dna); + } + } + } else { + let heavy = refdata.rtype[x.j_ref_id] == 0; + let aa_len; + if heavy { + aa_len = 10; + } else { + aa_len = 9; + } + let dna = refdata.refs[x.j_ref_id].to_ascii_vec(); + let dna = dna[dna.len() - 1 - 3 * aa_len..dna.len() - 1].to_vec(); + y = stringme(&dna); + } + exact: y +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: fwr{1..4}_len +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: length of FWR* nucleotide sequence +page: enclone help cvars +avail: public +notes: +code: let x = &ex.share[mid]; + let mut y = "unknown".to_string(); + let c; + if arg1 == 1 { + c = get_fwr1(x); + } else if arg1 == 2 { + c = get_fwr2(x); + } else if arg1 == 3 { + c = get_fwr3(x); + } else { + let x = &ex.share[mid]; + let start = rsi.cdr3_starts[col] + 3 * rsi.cdr3_lens[col]; + let stop = rsi.seq_del_lens[col]; + let dna = &x.seq_del_amino[start..stop]; + c = Some(stringme(dna)); + } + if c.is_some() { + y = format!("{}", c.unwrap().len() / 3); + } + exact: y +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: gex +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: float +doc: TBD +brief: number of GEX UMIs (or median) +page: enclone help lvars +avail: public +notes: +code: let mut f = Vec::::new(); + for x in gex_fcounts_unsorted.iter() { + f.push(format!("{}", *x)); + } + let mut counts = gex_counts_unsorted.clone(); + counts.sort_unstable(); + let gex_median = rounded_median(&counts); + cell: f + exact: format!("{}", gex_median) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: gex_max +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: maximum number of GEX UMIs across exact subclonotype +page: enclone help lvars +avail: public +notes: +code: exact: format!("{}", gex_counts_unsorted.iter().max().unwrap()) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: gex_mean +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: float +doc: TBD +brief: mean of GEX UMIs across exact subclonotype (= gex_μ) +page: enclone help lvars +avail: public +notes: +code: let gex_sum = gex_fcounts_unsorted.iter().sum::(); + let gex_mean = gex_sum / gex_fcounts_unsorted.len() as f64; + exact: format!("{}", gex_mean.round() as usize) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: gex_min +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: minimum number of GEX UMIs across exact subclonotype +page: enclone help lvars +avail: public +notes: +code: exact: format!("{}", gex_counts_unsorted.iter().min().unwrap()) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: gex_sum +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: sum of GEX UMIs across exact subclonotype (= gex_Σ) +page: enclone help lvars +avail: public +notes: +code: let gex_sum = gex_fcounts_unsorted.iter().sum::(); + exact: format!("{}", gex_sum.round() as usize) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: gex_Σ +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: sum of GEX UMIs across exact subclonotype (= gex_sum) +page: enclone help lvars +avail: public +notes: +code: let gex_sum = gex_fcounts_unsorted.iter().sum::(); + exact: format!("{}", gex_sum.round() as usize) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: gex_μ +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: float +doc: TBD +brief: mean of GEX UMIs across exact subclonotype (= gex_mean) +page: enclone help lvars +avail: public +notes: +code: let gex_sum = gex_fcounts_unsorted.iter().sum::(); + let gex_mean = gex_sum / gex_fcounts_unsorted.len() as f64; + exact: format!("{}", gex_mean.round() as usize) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: group_id +inputs: ? +limits: only implemented for parseable output +class: lvar +level: group +val: positive_integer +doc: TBD +brief: identifier of clonotype group +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: group_ncells +inputs: ? +limits: only implemented for parseable output +class: lvar +level: group +val: positive_integer +doc: TBD +brief: number of cells in clonotype group +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: g{0..} +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: positive_integer +doc: TBD +brief: exact subclonotype group, by Hamming distance +page: enclone help lvars +avail: public +notes: +code: let d = arg1 as usize; + let answer = if groups.contains_key(&d) { + format!("{}", groups[&d][u] + 1) + } else { + String::new() + }; + exact: answer +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: hcomp +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: CDR3 complexity number, heavy chain only, canonical only, computed better +page: enclone help lvars +avail: public +notes: +code: let mut hcomp = String::new(); + if ex.share.len() == 2 { + for j in 0..ex.share.len() { + if ex.share[j].left { + hcomp = format!("{}", ex.share[j].jun.hcomp); + } + } + } + exact: hcomp +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: inkt +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: string +doc: TBD +brief: evidence for iNKT cell +page: enclone help lvars +avail: public +notes: +code: let mut s = String::new(); + let alpha_g = ex.share[0].inkt_alpha_chain_gene_match; + let alpha_j = ex.share[0].inkt_alpha_chain_junction_match; + let beta_g = ex.share[0].inkt_beta_chain_gene_match; + let beta_j = ex.share[0].inkt_beta_chain_junction_match; + if alpha_g || alpha_j { + s += "𝝰"; + if alpha_g { + s += "g"; + } + if alpha_j { + s += "j"; + } + } + if beta_g || beta_j { + s += "𝝱"; + if beta_g { + s += "g"; + } + if beta_j { + s += "j"; + } + } + exact: s +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: ivalbcumis +inputs: cvar_vdj +limits: +class: cvar +level: cell +val: string +doc: TBD +brief: invalidated UMIs, preceded by barcode +page: UNDOCUMENTED +avail: private +notes: +code: let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = String::new(); + if ex.clones[k][mid].invalidated_umis.is_some() { + let mut bc_umis = ex.clones[k][mid].invalidated_umis.clone().unwrap(); + for i in 0..bc_umis.len() { + bc_umis[i] = + format!("{}{}", ex.clones[k][mid].barcode.before("-"), bc_umis[i]); + } + n = format!("{}", bc_umis.iter().format(",")); + } + vals.push(n.to_string()); + } + cell: vals +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: ivalumis +inputs: cvar_vdj +limits: +class: cvar +level: cell +val: string +doc: TBD +brief: invalidated UMIs +page: UNDOCUMENTED +avail: private +notes: +code: let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = String::new(); + if ex.clones[k][mid].invalidated_umis.is_some() { + n = format!( + "{}", + ex.clones[k][mid] + .invalidated_umis + .as_ref() + .unwrap() + .iter() + .format(",") + ); + } + vals.push(n.to_string()); + } + cell: vals +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: j_id +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: positive_integer +doc: TBD +brief: J region id +page: enclone help parseable +avail: public +notes: +code: exact: format!("{}", refdata.id[rsi.jids[col]]) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: j_name +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: string +doc: TBD +brief: J region name +page: enclone help parseable +avail: public +notes: +code: exact: refdata.name[rsi.jids[col]].clone() +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: jun_ins +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: number of inserted bases in heavy chain junction, only computed in two-chain case +page: enclone help lvars +avail: public +notes: +code: let mut jun_ins = String::new(); + if ex.share.len() == 2 { + for j in 0..ex.share.len() { + if ex.share[j].left { + jun_ins = format!("{}", ex.share[j].jun.jun_ins); + } + } + } + exact: jun_ins +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: jun_mat +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: number of matching bases in heavy chain junction, only computed in two-chain case +page: enclone help lvars +avail: public +notes: +code: let mut jun_mat = String::new(); + if ex.share.len() == 2 { + for j in 0..ex.share.len() { + if ex.share[j].left { + jun_mat = format!("{}", ex.share[j].jun.matches); + } + } + } + exact: jun_mat +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: jun_sub +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: number of mismatched bases in heavy chain junction, only computed in two-chain case +page: enclone help lvars +avail: public +notes: +code: let mut jun_mis = String::new(); + if ex.share.len() == 2 { + for j in 0..ex.share.len() { + if ex.share[j].left { + jun_mis = format!("{}", ex.share[j].jun.mismatches); + } + } + } + exact: jun_mis +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: mait +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: string +doc: TBD +brief: evidence for MAIT cell +page: enclone help lvars +avail: public +notes: +code: let mut s = String::new(); + let alpha_g = ex.share[0].mait_alpha_chain_gene_match; + let alpha_j = ex.share[0].mait_alpha_chain_junction_match; + let beta_g = ex.share[0].mait_beta_chain_gene_match; + let beta_j = ex.share[0].mait_beta_chain_junction_match; + if alpha_g || alpha_j { + s += "𝝰"; + if alpha_g { + s += "g"; + } + if alpha_j { + s += "j"; + } + } + if beta_g || beta_j { + s += "𝝱"; + if beta_g { + s += "g"; + } + if beta_j { + s += "j"; + } + } + exact: s +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: mark +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative integer +doc: TBD +brief: is cell marked +page: UNDOCUMENTED +avail: private +notes: +code: let mut n = 0; + for j in 0..ex.clones.len() { + if ex.clones[j][0].marked { + n += 1; + } + } + exact: format!("{}", n) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: mem +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: nonnegative integer +doc: TBD +brief: for human or mouse BCR, number of GEX UMIs that are characterized as membrane +page: UNDOCUMENTED +avail: private +notes: +code: let mut n = 0; + let mut y = Vec::::new(); + if ctl.gen_opt.using_secmem { + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = &ex.clones[l][0].barcode; + let mut count = 0; + if ctl.origin_info.secmem[li].contains_key(&bc.clone()) { + count = ctl.origin_info.secmem[li][&bc.clone()].1; + n += count; + } + y.push(format!("{}", count)); + } + } + cell: y + exact: format!("{}", n) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: n +inputs: lvar_vdj +limits: PER_CELL values are not displayed because they are all 1, + which would be distracting; also there is no _cell version +class: lvar +level: cell-exact +val: positive_integer +doc: TBD +brief: number of cells +page: enclone help lvars +avail: public +notes: +code: let counts = vec!["1.0".to_string(); mults[u]]; + cell: counts + exact: format!("{}", mults[u]) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: n_NAME +inputs: lvar_vdj +limits: PER_CELLvalues are not displayed because they are all 1, + which would be distracting; also there is no _cell version +class: lvar +level: cell-exact +val: nonnegative_integer +doc: TBD +brief: number of cells associated to the given name +page: enclone help lvars +avail: public +notes: +code: let mut count = 0; + let mut counts = Vec::::new(); + for j in 0..ex.clones.len() { + let x = &ex.clones[j][0]; + if ctl.origin_info.dataset_id[x.dataset_index] == name { + count += 1; + counts.push("1.0".to_string()); + } else if x.origin_index.is_some() + && ctl.origin_info.origin_list[x.origin_index.unwrap()] == name + { + count += 1; + counts.push("1.0".to_string()); + } else if x.donor_index.is_some() + && ctl.origin_info.donor_list[x.donor_index.unwrap()] == name + { + count += 1; + counts.push("1.0".to_string()); + } else if x.tag_index.is_some() + && ctl.origin_info.tag_list[x.tag_index.unwrap()] == name + { + count += 1; + counts.push("1.0".to_string()); + } + } + cell: counts + exact: format!("{}", count) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: n_b +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: nonnegative_integer +doc: TBD +brief: number of B cells +page: UNDOCUMENTED +avail: private +notes: +code: let mut n_b = 0; + let mut ns = Vec::::new(); + for j in 0..ex.clones.len() { + let bc = &ex.clones[j][0].barcode; + let li = ex.clones[j][0].dataset_index; + if gex_info.cell_type[li].contains_key(&bc.clone()) { + if gex_info.cell_type[li][&bc.clone()].starts_with('B') { + n_b += 1; + ns.push("1.0".to_string()); + } else { + ns.push("0.0".to_string()); + } + } + } + cell: ns + exact: format!("{}", n_b) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: n_gex +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: float +doc: TBD +brief: median number of cells seen by GEX pipeline +page: enclone help lvars +avail: public +notes: +code: let mut n = Vec::::new(); + let mut n_gex = 0; + for x in n_gexs.iter() { + n.push(format!("{}", *x)); + n_gex += *x; + } + cell: n + exact: format!("{}", n_gex) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: n_other +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: nonnegative_integer +doc: TBD +brief: when using nd, the number of cells assigned to another dataset +page: enclone help lvars +avail: public +notes: +code: let mut n = 0; + let mut ns = Vec::::new(); + for j in 0..ex.clones.len() { + let mut found = false; + let di = ex.clones[j][0].dataset_index; + let f = format!("n_{}", ctl.origin_info.dataset_id[di]); + for i in 0..nd_fields.len() { + if f == nd_fields[i] { + found = true; + } + } + if !found { + n += 1; + ns.push("1.0".to_string()); + } else { + ns.push("0.0".to_string()); + } + } + cell: ns + exact: format!("{}", n) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: nbc +inputs: lvar_vdj +limits: +class: lvar +level: cell +val: string +doc: TBD +brief: numeric barcode +page: enclone help lvars +avail: public +notes: +code: let mut nbc = Vec::::new(); + for j in 0..ex.clones.len() { + let bc = ex.clones[j][0].barcode.before("-").as_bytes(); + let mut n = 0_u64; + for k in 0..bc.len() { + if k > 0 { + n *= 4; + } + if bc[k] == b'C' { + n += 1; + } else if bc[k] == b'G' { + n += 2; + } else if bc[k] == b'T' { + n += 3; + } + } + nbc.push(format!("{:010}", n)); + } + cell: nbc +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: nchains +inputs: lvar_vdj +limits: +class: lvar +level: clono +val: positive_integer +doc: TBD +brief: number of chains in the clonotype +page: enclone help lvars +avail: public +notes: +code: exact: format!("{}", rsi.mat.len()) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: nchains_present +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: positive_integer +doc: The number of chains that are present in a given exact subclonotype. +brief: number of chains present in an exact subclonotype +page: enclone help lvars +avail: public +notes: +code: exact: format!("{}", exact_clonotypes[exacts[u]].share.len()) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: ndiff{1..}vj +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: number of base differences with exact subclonotype of that number +page: enclone help cvars +avail: public +notes: +code: let nd; + let mat = &rsi.mat; + let u0 = (arg1 - 1) as usize; + if u0 < exacts.len() && mat[col][u0].is_some() && mat[col][u].is_some() { + let m0 = mat[col][u0].unwrap(); + let m = mat[col][u].unwrap(); + let mut ndiff = 0; + let ex0 = &exact_clonotypes[exacts[u0]]; + let ex = &exact_clonotypes[exacts[u]]; + for p in 0..ex0.share[m0].seq_del.len() { + if ex0.share[m0].seq_del[p] != ex.share[m].seq_del[p] { + ndiff += 1; + } + } + nd = format!("{}", ndiff) + } else { + nd = "_".to_string() + } + exact: nd +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: near +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: nonnegative_integer +doc: TBD +brief: Hamming distance to nearest neighbor +page: enclone help lvars +avail: public +notes: +code: let near; + let mut dist = 1_000_000; + for i2 in 0..varmat.len() { + if i2 == u || fp[i2] != fp[u] { + continue; + } + let mut d = 0; + for c in fp[u].iter() { + for j in 0..varmat[u][*c].len() { + if varmat[u][*c][j] != varmat[i2][*c][j] { + d += 1; + } + } + } + dist = min(dist, d); + } + if dist == 1_000_000 { + near = "".to_string() + } else { + near = format!("{}", dist) + } + exact: near +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: nival +inputs: cvar_vdj +limits: +class: cvar +level: cell +val: nonnegative_integer +doc: TBD +brief: number of invalidated UMIs +page: UNDOCUMENTED +avail: private +notes: +code: let mut valsx = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = 0; + if ex.clones[k][mid].invalidated_umis.is_some() { + n = ex.clones[k][mid].invalidated_umis.as_ref().unwrap().len(); + } + valsx.push(format!("{}", n)); + } + cell: valsx +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: nnval +inputs: cvar_vdj +limits: +class: cvar +level: cell +val: nonnegative_integer +doc: TBD +brief: number of non-validated UMIs +page: UNDOCUMENTED +avail: private +notes: +code: let mut valsx = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = 0; + if ex.clones[k][mid].non_validated_umis.is_some() { + n = ex.clones[k][mid].non_validated_umis.as_ref().unwrap().len(); + } + valsx.push(format!("{}", n)); + } + cell: valsx +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: notes +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: notes for exact subclonotype +page: enclone help cvars +avail: public +notes: +code: exact: ex.share[mid].vs_notesx.clone() +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: npe +inputs: lvar_vdj +limits: +class: lvar +level: cell +val: string +doc: TBD +brief: # of clonotype cells within PCA distance n of this cell +page: UNDOCUMENTED +avail: private +notes: +code: exact: String::new() +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: nval +inputs: cvar_vdj +limits: +class: cvar +level: cell +val: nonnegative_integer +doc: TBD +brief: number of validated UMIs +page: UNDOCUMENTED +avail: private +notes: +code: let mut valsx = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = 0; + if ex.clones[k][mid].validated_umis.is_some() { + n = ex.clones[k][mid].validated_umis.as_ref().unwrap().len(); + } + valsx.push(format!("{}", n)); + } + cell: valsx +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: nvalbcumis +inputs: cvar_vdj +limits: +class: cvar +level: cell +val: string +doc: TBD +brief: non-validated UMIs, preceded by barcode +page: UNDOCUMENTED +avail: private +notes: +code: let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = String::new(); + if ex.clones[k][mid].non_validated_umis.is_some() { + let mut bc_umis = ex.clones[k][mid].non_validated_umis.clone().unwrap(); + for i in 0..bc_umis.len() { + bc_umis[i] = + format!("{}{}", ex.clones[k][mid].barcode.before("-"), bc_umis[i]); + } + n = format!("{}", bc_umis.iter().format(",")); + } + vals.push(n.to_string()); + } + cell: vals +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: nvalumis +inputs: cvar_vdj +limits: +class: cvar +level: cell +val: string +doc: TBD +brief: non-validated UMIs +page: UNDOCUMENTED +avail: private +notes: +code: let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = String::new(); + if ex.clones[k][mid].non_validated_umis.is_some() { + n = format!( + "{}", + ex.clones[k][mid] + .non_validated_umis + .as_ref() + .unwrap() + .iter() + .format(",") + ); + } + vals.push(n.to_string()); + } + cell: vals +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: origins +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: string +doc: TBD +brief: origin names +page: enclone help lvars +avail: public +notes: +code: let mut origins = Vec::::new(); + for j in 0..ex.clones.len() { + if ex.clones[j][0].origin_index.is_some() { + origins.push( + ctl.origin_info.origin_list[ex.clones[j][0].origin_index.unwrap()].clone(), + ); + } else { + origins.push("?".to_string()); + } + } + let origins_unsorted = origins.clone(); + unique_sort(&mut origins); + cell: origins_unsorted + exact: format!("{}", origins.iter().format(",")) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: pe +inputs: lvar_vdj +limits: +class: lvar +level: cell +val: string +doc: TBD +brief: PCA equivalence class at distance ≤ n +page: UNDOCUMENTED +avail: private +notes: +code: exact: String::new() +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: ppe +inputs: lvar_vdj +limits: +class: lvar +level: cell +val: string +doc: TBD +brief: % of gex cells within PCA dist n of this cell that are in this clonotype +page: UNDOCUMENTED +avail: private +notes: +code: exact: String::new() +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: q{0..}_ +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: read quality scores at position n +page: enclone help cvars +avail: public +notes: +code: let mut val = String::new(); + if (arg1 as usize) < ex.share[mid].seq.len() { + let mut quals = Vec::::new(); + for j in 0..ex.clones.len() { + quals.push(ex.clones[j][mid].quals[arg1 as usize]); + } + val = format!("{}", quals.iter().format(",")); + } + exact: val +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: r +inputs: cvar_vdj +limits: +class: cvar +level: cell-exact +val: float +doc: TBD +brief: median number of reads supporting chain +page: enclone help cvars +avail: public +notes: +code: let mut nreads = Vec::::new(); + let mut nreads_sorted = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(format!("{}", ex.clones[j][mid].read_count)); + nreads_sorted.push(ex.clones[j][mid].read_count); + } + nreads_sorted.sort_unstable(); + cell: nreads + exact: format!("{}", rounded_median(&nreads_sorted)) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: r_max +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: maximum chain read count across exact subclonotype +page: enclone help cvars +avail: public +notes: +code: let mut nreads = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(ex.clones[j][mid].read_count); + } + exact: format!("{}", *nreads.iter().max().unwrap()) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: r_mean +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: float +doc: TBD +brief: mean chain reads across exact subclonotype (= r_μ) +page: enclone help cvars +avail: public +notes: +code: let mut nreads = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(ex.clones[j][mid].read_count); + } + let rtot: usize = nreads.iter().sum(); + let r_mean = (rtot as f64 / nreads.len() as f64).round() as usize; + exact: format!("{}", r_mean) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: r_min +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: minimum chain read count across exact subclonotype +page: enclone help cvars +avail: public +notes: +code: let mut nreads = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(ex.clones[j][mid].read_count); + } + exact: format!("{}", *nreads.iter().min().unwrap()) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: r_sum +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: sum of chain read counts across exact subclonotype (= r_Σ) +page: enclone help cvars +avail: public +notes: +code: let mut nreads = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(ex.clones[j][mid].read_count); + } + let rtot: usize = nreads.iter().sum(); + exact: format!("{}", rtot) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: r_Σ +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: sum of chain reads across exact subclonotype (= r_sum) +page: enclone help cvars +avail: public +notes: +code: let mut nreads = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(ex.clones[j][mid].read_count); + } + let rtot: usize = nreads.iter().sum(); + exact: format!("{}", rtot) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: r_μ +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: float +doc: TBD +brief: mean chain read count across exact subclonotype (= r_μ) +page: enclone help cvars +avail: public +notes: +code: let mut nreads = Vec::::new(); + for j in 0..ex.clones.len() { + nreads.push(ex.clones[j][mid].read_count); + } + let rtot: usize = nreads.iter().sum(); + let r_mean = (rtot as f64 / nreads.len() as f64).round() as usize; + exact: format!("{}", r_mean) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: sec +inputs: lvar_vdj +limits: +class: lvar +level: cell-exact +val: nonnegative integer +doc: TBD +brief: for human or mouse BCR, number of GEX UMIs that are characterized as secreted +page: UNDOCUMENTED +avail: private +notes: +code: let mut n = 0; + let mut y = Vec::::new(); + if ctl.gen_opt.using_secmem { + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = &ex.clones[l][0].barcode; + let mut count = 0; + if ctl.origin_info.secmem[li].contains_key(&bc.clone()) { + count = ctl.origin_info.secmem[li][&bc.clone()].0; + n += count; + } + y.push(format!("{}", count)); + } + } + cell: y + exact: format!("{}", n) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: seq +inputs: ? +limits: only implemented for parseable output +class: cvar +level: exact +val: string +doc: TBD +brief: full nucleotide sequence of exact subclonotype +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: share_indices_aa +inputs: ? +limits: +class: cvar +level: clono +val: string +doc: TBD +brief: shared amino acid positions +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: share_indices_dna +inputs: ? +limits: +class: cvar +level: clono +val: string +doc: TBD +brief: shared nucleotide positions +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: type +inputs: lvar_vdj +limits: +class: lvar +level: exact +val: string +doc: TBD +brief: cell type +page: UNDOCUMENTED +avail: public +notes: +code: let mut cell_types = Vec::::new(); + /* + for j in 0..ex.clones.len() { + let mut cell_type = "".to_string(); + let bc = &ex.clones[j][0].barcode; + let li = ex.clones[j][0].dataset_index; + if gex_info.cell_type[li].contains_key(&bc.clone()) { + cell_type = gex_info.cell_type[li][&bc.clone()].clone(); + } + cell_types.push(cell_type); + } + */ + cell_types.sort(); + exact: abbrev_list(&cell_types) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: u +inputs: cvar_vdj +limits: +class: cvar +level: cell-exact +val: nonnegative_integer +doc: TBD +brief: median number of UMIs supporting chain +page: enclone help cvars +avail: public +notes: +code: let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + numis.sort_unstable(); + let median_numis = rounded_median(&numis); + let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + vals.push(format!("{}", ex.clones[k][mid].umi_count)); + } + cell: vals + exact: format!("{}", median_numis) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: u_max +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: For a chain in an exact subclonotype, the maximum number of UMIs, ranging across the + cells. +brief: maximum chain UMIs across exact subclonotype +page: enclone help cvars +avail: public +notes: +code: let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + numis.sort_unstable(); + exact: format!("{}", numis.iter().max().unwrap()) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: u_mean +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: float +doc: TBD +brief: mean chain UMIs across exact subclonotype (= u_μ) +page: enclone help cvars +avail: public +notes: +code: let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + numis.sort_unstable(); + let utot: usize = numis.iter().sum(); + let u_mean = (utot as f64 / numis.len() as f64).round() as usize; + exact: format!("{}", u_mean) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: u_min +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: minimum chain UMIs across exact subclonotype +page: enclone help cvars +avail: public +notes: +code: let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + numis.sort_unstable(); + exact: format!("{}", numis.iter().min().unwrap()) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: u_sum +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: sum of chain UMIs for exact subclonotype (= u_Σ) +page: enclone help cvars +avail: public +notes: +code: let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + let utot: usize = numis.iter().sum(); + exact: format!("{}", utot) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: u_Σ +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: sum of chain UMIs across exact subclonotype (= u_sum) +page: enclone help cvars +avail: public +notes: +code: let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + let utot: usize = numis.iter().sum(); + exact: format!("{}", utot) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: u_μ +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: float +doc: TBD +brief: mean chain UMIs for exact subclonotype (= u_mean) +page: enclone help cvars +avail: public +notes: +code: let mut numis = Vec::::new(); + for j in 0..ex.clones.len() { + numis.push(ex.clones[j][mid].umi_count); + } + numis.sort_unstable(); + let utot: usize = numis.iter().sum(); + let u_mean = (utot as f64 / numis.len() as f64).round() as usize; + exact: format!("{}", u_mean) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: udiff +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: differences of 5'-UTR region with universal reference +page: enclone help cvars +avail: public +notes: +code: let ulen = ex.share[mid].v_start; + let uid = ex.share[mid].u_ref_id; + let mut udiff = String::new(); + let mut ndiffs = 0; + if uid.is_some() { + let r = &refdata.refs[uid.unwrap()]; + let mut extra = 0; + if ulen > r.len() { + extra = ulen - r.len(); + } + for i in 0..ulen { + let mut rpos = i; + if ulen < r.len() { + rpos += r.len() - ulen; + } else { + if i + r.len() < ulen { + continue; + } + rpos -= ulen - r.len(); + } + let tb = ex.share[mid].full_seq[i]; + let rb = r.to_ascii_vec()[rpos]; + if tb != rb { + ndiffs += 1; + if ndiffs <= 5 { + udiff += &format!("{}{}", rpos, tb as char); + } + } + } + if ndiffs > 5 { + udiff += "..."; + } + if extra > 0 { + udiff += &format!("+{}", extra); + } + } else if ulen > 0 { + udiff = format!("+{}", ulen); + } + exact: udiff +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: ulen +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: nonnegative_integer +doc: TBD +brief: length of observed 5'-UTR sequence +page: enclone help cvars +avail: public +notes: +code: exact: format!("{}", ex.share[mid].v_start) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: utr_id +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: positive_integer +doc: TBD +brief: numerical identifier of 5'-UTR region (or null) +page: enclone help parseable +avail: public +notes: +code: let mut u = String::new(); + let uid = ex.share[mid].u_ref_id; + if uid.is_some() { + u = format!("{}", refdata.id[uid.unwrap()]); + } + exact: u +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: utr_name +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: name of 5'-UTR region (or null) +page: enclone help parseable +avail: public +notes: +code: let mut u = String::new(); + let uid = ex.share[mid].u_ref_id; + if uid.is_some() { + u = refdata.name[uid.unwrap()].clone(); + } + exact: u +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: v_id +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: positive_integer +doc: TBD +brief: V region id +page: enclone help parseable +avail: public +notes: +code: exact: format!("{}", refdata.id[rsi.vids[col]]) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: v_name +inputs: cvar_vdj +limits: +class: cvar +level: clono +val: string +doc: TBD +brief: V region name +page: enclone help cvars +avail: public +notes: +code: exact: refdata.name[rsi.vids[col]].clone() +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: v_name_orig +inputs: cvar_vdj +limits: +class: cvar +level: cell-exact +val: string +doc: TBD +brief: V region name that was originally assigned +page: enclone help cvars +avail: public +notes: +code: let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + vals.push(refdata.name[ex.clones[k][mid].v_ref_id].clone()); + } + let mut vals_uniq = vals.clone(); + unique_sort(&mut vals_uniq); + cell: vals + exact: format!("{}", vals_uniq.iter().format(",")) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: v_start +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: positive_integer_or_null +doc: TBD +brief: start of V on full nucleotide sequence +page: enclone help parseable +avail: public +notes: Positive integer, or nonnegative integer? +code: exact: format!("{}", ex.share[mid].v_start) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: valbcumis +inputs: cvar_vdj +limits: +class: cvar +level: cell +val: string +doc: TBD +brief: validated UMIs, preceded by barcode +page: UNDOCUMENTED +avail: private +notes: +code: let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = String::new(); + if ex.clones[k][mid].validated_umis.is_some() { + let mut bc_umis = ex.clones[k][mid].validated_umis.clone().unwrap(); + for i in 0..bc_umis.len() { + bc_umis[i] = + format!("{}{}", ex.clones[k][mid].barcode.before("-"), bc_umis[i]); + } + n = format!("{}", bc_umis.iter().format(",")); + } + vals.push(n.to_string()); + } + cell: vals +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: valumis +inputs: cvar_vdj +limits: +class: cvar +level: cell +val: string +doc: TBD +brief: validated UMIs +page: UNDOCUMENTED +avail: private +notes: +code: let mut vals = Vec::::new(); + for k in 0..ex.ncells() { + let mut n = String::new(); + if ex.clones[k][mid].validated_umis.is_some() { + n = format!( + "{}", + ex.clones[k][mid] + .validated_umis + .as_ref() + .unwrap() + .iter() + .format(",") + ); + } + vals.push(n.to_string()); + } + cell: vals +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: var +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: string +doc: TBD +brief: bases at position in chain that vary across the clonotype +page: enclone help cvars +avail: public +notes: +code: exact: stringme(&varmat[u][col]) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: var_aa +inputs: ? +limits: only implemented for parseable output +class: cvar +level: exact +val: string +doc: TBD +brief: variant residue indices in clonotype (including synonymous) +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: var_indices_aa +inputs: ? +limits: only implemented for parseable output +class: cvar +level: clono +val: string +doc: TBD +brief: variable amino acid positions +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: var_indices_dna +inputs: ? +limits: only implemented for parseable output +class: cvar +level: clono +val: string +doc: TBD +brief: variable nucleotide positions +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: vj_aa +inputs: ? +limits: only implemented for parseable output +class: cvar +level: exact +val: string +doc: TBD +brief: amino acid sequence of V..J +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: vj_aa_nl +inputs: ? +limits: only implemented for parseable output +class: cvar +level: exact +val: string +doc: TBD +brief: amino acid sequence of V..J, excluding leader +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: vj_seq +inputs: ? +limits: only implemented for parseable output +class: cvar +level: exact +val: string +doc: TBD +brief: nucleotide sequence of V..J +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: vj_seq_nl +inputs: ? +limits: only implemented for parseable output +class: cvar +level: exact +val: string +doc: TBD +brief: nucleotide sequence of V..J, excluding leader +page: enclone help parseable +avail: public +notes: +code: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: vjlen +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: positive_integer +doc: TBD +brief: length in bases of V..J +page: enclone help cvars +avail: public +notes: +code: exact: format!("{}", ex.share[mid].j_stop - ex.share[mid].v_start) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +name: white +inputs: cvar_vdj +limits: +class: cvar +level: exact +val: float +doc: TBD +brief: percent of sequences implicated in whitelist expansion +page: UNDOCUMENTED +avail: private +notes: +code: let mut bch = vec![Vec::<(usize, String, usize, usize)>::new(); 2]; + for l in 0..ex.clones.len() { + let li = ex.clones[l][0].dataset_index; + let bc = &ex.clones[l][0].barcode; + let mut numi = 0; + for j in 0..ex.clones[l].len() { + numi += ex.clones[l][j].umi_count; + } + bch[0].push((li, bc[0..8].to_string(), numi, l)); + bch[1].push((li, bc[8..16].to_string(), numi, l)); + } + let mut junk = 0; + let mut bad = vec![false; ex.clones.len()]; + for l in 0..2 { + bch[l].sort(); + let mut m = 0; + while m < bch[l].len() { + let n = next_diff12_4(&bch[l], m as i32) as usize; + for u1 in m..n { + for u2 in m..n { + if bch[l][u1].2 >= 10 * bch[l][u2].2 { + bad[bch[l][u2].3] = true; + } + } + } + m = n; + } + } + for u in 0..bad.len() { + if bad[u] { + junk += 1; + } + } + let junk_rate = percent_ratio(junk, ex.clones.len()); + exact: format!("{:.1}", junk_rate) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ diff --git a/enclone_versions/Cargo.toml b/enclone_versions/Cargo.toml deleted file mode 100644 index 77b5c74d7..000000000 --- a/enclone_versions/Cargo.toml +++ /dev/null @@ -1,44 +0,0 @@ -[package] -name = "enclone_versions" -version = "0.4.49" -authors = ["""David Jaffe , - Keri Dockter , - Shaun Jackman , - Sreenath Krishnan , - Meryl Lewis , - Patrick Marks , - Wyatt McDonnell """] -edition = "2018" -license = "LICENSE.txt" -publish = false - -[dependencies] -assert_cmd = "0.12.0" -bio = "0.31.0" -byteorder = "1.3.2" -bytes = "0.5.5" -chrono = "0.4.11" -dirs = "2.0.2" -failure = "0.1.5" -flate2 = "1.0.16" -io_utils = "0.2" -itertools = "0.9.0" -ndarray = "0.13" -pager = "0.15.0" -permutation = "0.2.5" -petgraph = "0.4.13" -pretty_trace = "0.3.2" -prost = "0.6.1" -prost-build = "0.6.1" -rayon = "1.0.2" -regex = "1.3.1" -serde = "1.0.90" -serde_derive = "1.0.102" -stats_utils = "0.1.1" -stirling_numbers = "0.1.2" -string_utils = "0.1.1" -tar = "0.4.29" -tilde-expand = "0.1.1" -vector_utils = "0.1.3" - - diff --git a/fonts/DejaVuLGCSansMono-Bold.ttf b/fonts/DejaVuLGCSansMono-Bold.ttf deleted file mode 100644 index 3913466ea..000000000 Binary files a/fonts/DejaVuLGCSansMono-Bold.ttf and /dev/null differ diff --git a/fonts/DejaVuLGCSansMono.ttf b/fonts/DejaVuLGCSansMono.ttf deleted file mode 100644 index 30fce765a..000000000 Binary files a/fonts/DejaVuLGCSansMono.ttf and /dev/null differ diff --git a/fonts/README b/fonts/README deleted file mode 100644 index 421f1801f..000000000 --- a/fonts/README +++ /dev/null @@ -1,13 +0,0 @@ -These monospace fonts were downloaded from https://dejavu-fonts.github.io/Download.html -on 4/16/2020. The actual link was - -http://sourceforge.net/projects/dejavu/files/dejavu/2.37/dejavu-lgc-fonts-ttf-2.37.tar.bz2. - -They are freely licensed, see -https://github.com/dejavu-fonts/dejavu-fonts/blob/master/LICENSE. - -Originally we were going to use Menlo, which is on public sites, but the licensing of it -is unclear and there's no explanation where the files came from. - -The reason we're supplying monospace fonts at all is that the only universally available -monospace font is Courier New, which poorly displays the enclone clonotypes. diff --git a/fonts/google_font_test.html b/fonts/google_font_test.html deleted file mode 100644 index 965ad7c46..000000000 --- a/fonts/google_font_test.html +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - - - - - - -This tests a Google font to see if it works for clonotypes. -Things are not quite lined up perfectly on the right. We tested all the Google -fonts (list below) and none worked at the time of testing, however possibly that might change. - -
-
- -
-Roboto Mono        Ralph Levien       Source Code Pro        PT Mono          Nanum Gothic Coding
-Ubuntu Mono        Share Tech Mono    IBM Plex Mono          Anonymous Pro    Fira Mono
-Cousine            VT323              Space Mono             Overpass Mono    Courier Prime
-Cutive Mono        Oxygen Mono        B612 Mono              Nova Mono        Fira Code
-Major Mono Display
-
- -
-
-[1] GROUP = 1 CLONOTYPES = 54 CELLS
-
-[1.1] CLONOTYPE = 54 CELLS
-┌───────────┬──────────────────────────────────────┬───────────────────────────────┐
-│           │  CHAIN 1                             │  CHAIN 2                      │
-│           │  144.1.2|IGHV3-49 ◆ 53|IGHJ3         │  282|IGKV3-11 ◆ 218|IGKJ5     │
-│           ├──────────────────────────────────────┼───────────────────────────────┤
-│           │   1 11111111111111111 1              │    1111111111111              │
-│           │  51 11112222222222333 4              │  6 0001111111111              │
-│           │  53 67890123456789012 1              │  4 7890123456789              │
-│           │     ═══════CDR3══════                │    ═════CDR3════              │
-│reference  │  VV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W SR CQQ◦◦◦◦◦◦◦◦◦◦              │
-│donor ref  │  FV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W SR CQQ◦◦◦◦◦◦◦◦◦◦              │
-├───────────┼──────────────────────────────────────┼───────────────────────────────┤
-│#   n        .x ................. x    u  const    x .x...........      u  const│
-│1  48      │  FV CTRDRDLRGATDAFDIW S  101  IGHG1  │  R CQQRSNWPPSITF   3454  IGKC │
-│2   3      │  FM CTRDRDLRGATDAFDIW S   89  IGHG1  │  R CHQRSNWPPSITF   7663  IGKC │
-│3   1      │  FV CTRDRDLRGATDAFDIW S  172  IGHG1  │  S CQQRSNWPPSITF  12603  IGKC │
-│4   1      │  FV CTRDRDLRGATDAFDIW S   33  IGHG1  │  R CQQRSNWPPSITF    116  IGKC │
-│5   1      │                                      │  R CQQRSNWPPSITF   3471  IGKC │
-└───────────┴──────────────────────────────────────┴───────────────────────────────┘
-
-
- - diff --git a/gex_files b/gex_files deleted file mode 100644 index af08b3563..000000000 --- a/gex_files +++ /dev/null @@ -1,25 +0,0 @@ -Here we spec the GEX file requirements. This is complicated by differences between the -customer (CS) and internal (PD) pipeline outs file structures and the need for backward -compatibility. - -These requirements are likely to change over time. - -CS PD -metrics_summary.csv metrics_summary_csv.csv -raw_feature_bc_matrix.h5 raw_feature_bc_matrix.h5 * -analysis/clustering/graphclust/clusters.csv analysis_csv/clustering/graphclust/clusters.csv -analysis/pca/10_components/projection.csv analysis_csv/pca/10_components/projection.csv -(not present) analysis_csv/clustering/graphclust/celltypes/celltypes.csv ** - -*: named raw_gene_bc_matrices_h5.h5 in older pipeline versions -**: not present in older pipeline versions - -Given a GEX path p, enclone will look for these files in p, if p ends with outs, and if not, -first p/outs, then p. - -For internal runs, if the GEX path p does not exist, and (after removing PRE), p is an integer, -then we assume it is an internal identifier and look for it in the internal pipeline outputs. - -If the NH5 option is specified to enclone, it will write out a file feature_barcode_matrix.bin to -this same directory (p or p/outs, as above), provided that feature_barcode_matrix.bin does not -already exist. Subsequently, this file will be used for input whether or not NH5 is specified. diff --git a/img/by_dataset.svg b/img/by_dataset.svg new file mode 100644 index 000000000..12127d552 --- /dev/null +++ b/img/by_dataset.svg @@ -0,0 +1,3228 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +123085 + +123089 + +124547 + + diff --git a/img/cat_var.svg b/img/cat_var.svg new file mode 100644 index 000000000..54ec6bcfa --- /dev/null +++ b/img/cat_var.svg @@ -0,0 +1,965 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +IGHV3-64D, IGLV3-1 + +IGHV3-49, IGKV3-11 + +IGHV3-7, IGLV5-37 + +IGHV3-7, IGLV4-69 + +IGHV1-69D, IGKV3-20 + +IGHV4-30-4, IGKV1D-39 + +IGHV4-59, IGKV1D-39 + +IGHV3-21, IGKV1-27 + +IGHV5-51, IGLV1-47 + +other + + diff --git a/img/iso.svg b/img/iso.svg index 9c9cd97d7..37a9aec76 100644 --- a/img/iso.svg +++ b/img/iso.svg @@ -1,1861 +1,1909 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -IGHA1 - -IGHA2 - -IGHD - -IGHE - -IGHG1 - -IGHG2 - -IGHG3 - -IGHG4 - -IGHM - -undetermined - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +IGHA1 + +IGHA2 + +IGHD + +IGHE + +IGHG1 + +IGHG2 + +IGHG3 + +IGHG4 + +IGHM + +undetermined + diff --git a/img/quad_hive.svg b/img/quad_hive.svg new file mode 100644 index 000000000..790d57a4f --- /dev/null +++ b/img/quad_hive.svg @@ -0,0 +1,3009 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/img/samples.svg b/img/samples.svg index d5fd00092..68ee49a9c 100644 --- a/img/samples.svg +++ b/img/samples.svg @@ -1,1562 +1,1661 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -123085 - -123089 - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +123085 + +123089 + diff --git a/img/sim_mat_plot.svg b/img/sim_mat_plot.svg new file mode 100644 index 000000000..4d8996022 --- /dev/null +++ b/img/sim_mat_plot.svg @@ -0,0 +1,78 @@ + + + + +variable    mean  # +CDKN1A_g     3.1  1 +CDKN1B_g     0.5  2 +RBX1_g       1.0  3 +IGLC1_g     19.2  4 +IGLV3-21_g  16.8  5 +1 +2 +3 +4 +5 + +1.00 + +0.48 + +0.55 + +0.07 + +0.03 + +0.48 + +1.00 + +0.41 + +0.03 + +0.00 + +0.55 + +0.41 + +1.00 + +0.04 + +0.10 + +0.07 + +0.03 + +0.04 + +1.00 + +0.36 + +0.03 + +0.00 + +0.10 + +0.36 + +1.00 + + diff --git a/img/twin_plot.svg b/img/twin_plot.svg new file mode 100644 index 000000000..7c9824fee --- /dev/null +++ b/img/twin_plot.svg @@ -0,0 +1,3029 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +IGHA1 + +IGHA2 + +IGHD + +IGHE + +IGHG1 + +IGHG2 + +IGHG3 + +IGHG4 + +IGHM + +undetermined + + diff --git a/img/two_genes.svg b/img/two_genes.svg new file mode 100644 index 000000000..75c143d31 --- /dev/null +++ b/img/two_genes.svg @@ -0,0 +1,655 @@ + + +HLA-A_g versus CD74_g + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +CD74_g + + +HLA-A_g + + + + + + + + + +500 + + + +1000 + + + + +50 + + + +100 + + + +150 + + + +200 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/img/var.png b/img/var.png new file mode 100644 index 000000000..fcc2602f3 Binary files /dev/null and b/img/var.png differ diff --git a/img/visual1.png b/img/visual1.png new file mode 100644 index 000000000..f1ce6b9c1 Binary files /dev/null and b/img/visual1.png differ diff --git a/img/visual2.png b/img/visual2.png new file mode 100644 index 000000000..97287a423 Binary files /dev/null and b/img/visual2.png differ diff --git a/img/visual_archive1.png b/img/visual_archive1.png new file mode 100644 index 000000000..81027811d Binary files /dev/null and b/img/visual_archive1.png differ diff --git a/index.html b/index.html deleted file mode 100644 index 54b6d3eb3..000000000 --- a/index.html +++ /dev/null @@ -1,752 +0,0 @@ - - - - - - -enclone (bit.ly/enclone) - - - - - - - - - - - -
-enclone banner - -
-

enclone (beta)

-

Accurate and user-friendly computational tool for clonal grouping to study the adaptive immune system

-

- -

10x Genomics Chromium Single Cell V(D)J data - containing B cell - receptor (BCR) and T cell receptor (TCR) RNA sequences are entered as input data to - enclone. Based on the - input, enclone finds and organizes cells arising from the same progenitors into groups - (clonotypes) and - compactly displays each clonotype along with its salient features, including mutated amino - acids.

-

-enclone (beta) is provided as a tool for use by the community to accelerate immunology research. -enclone is only supported via -enclone@10xgenomics.com. -The clonotype assignment algorithm that is part of enclone will be integrated into a future -release of Cell Ranger. -

- -

enclone has been designed for immunologists but anyone can download and experiment with it.

- -

Background: when you get sick, your body mounts an immune response by selectively -amplifying immune cells and mutations within these selected cells. enclone allows you to see the -history of -single immune cells within a biological sample (such as a blood draw or biopsy). This history -reflects how the cognate receptors of these cells evolved in response to antigens, including -viruses, bacteria, and tumors.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 1. Introduction  8. Help 
 2. Objective  9. Understanding enclone output 
 3. Why enclone 10. Combining multiomic data 
 4. Data input  11. Visualizing multiple clonotypes at once 
 5. Software  12. The power of enclone 
 6. Installing enclone  13. Questions 
 7. Running enclone  14. Where am I? 
- -
- -
- -

-

Introduction

-

The body defends itself from antigens, like viruses, bacteria, and tumors, by recognizing the -antigens and mounting an immune response through selective amplification of immune cells and -mutations within selected cells. enclone enables profiling of the history of single immune -cells within a biological sample (such as a blood draw or a biopsy) by mapping the evolution of the -cognate BCRs and TCRs of those cells responding to antigen exposure. This history reflects how the -cognate receptors of these cells evolved in response to various antigens.

- -

-

Objective

- - -

Using enclone to profile B and T cell receptors for any sample using Chromium Single Cell -V(D)J as input enables you to make the best use of your data. You can explore the biology of these -cells without help from a computational expert!

- -

The objective of enclone is to:

- - - - - - -
-

- Find and display clonotypes: - groups of T and B cells sharing the same fully rearranged common ancestor. -

-
-

Find: - It is easy to mistakenly put unrelated cells in - the same clonotype, or "pollute" a clonotype with extraneous chains. - enclone's algorithms make finding accurate.

-

Display: - It is challenging to compactly represent a - large repertoire of data. enclone enables compact, easy-to-grasp data - display.

-
- -

The diversity of BCR and TCR chains, containing various combinations of V, D, and/or J segments, -broadens the immune repertoire to protect against a wide variety of pathogens. The figure below -illustrates the concept of a BCR clonotype. A similar concept applies to TCRs but without -somatic hypermutations.

- - -what is a clonotype - -

Each cell in a clonotype is typically represented by two or three chains, and this information is -present and directly observable in single cell V(D)J data. enclone computationally approximates -the clonotypes -from the data with high accuracy (see below). The methods of enclone are described -briefly in the online documentation for enclone, and will -also be described separately in more detail. -

- - - -

-Clonotyping performance. To test the performance of enclone, we combined data from 443 BCR -enriched libraries from 30 donors. enclone detected a total of -381,506 -clonotypes. Of the detected clonotypes, 9,573 contained at least -two cells, -of which 15 (0.16%) of the clonotypes -contained receptors from multiple donors, and thus were erroneous. -The low error rate is a consequence of the paired chain data and the enclone -algorithm, which minimizes the placement of unrelated cells in the same clonotype. -

- -
- -

-

Why use enclone?

-

-enclone has unique features! -

-

-Unique insights into 10x Genomics data: enclone has been designed and tested -extensively to -gain in-depth insight and perspective regarding 10x Genomics single cell V(D)J datasets. Other -similar tools -may be used, but frequently, enclone will provide a different answer, which in turn may affect -the biological interpretation of the data. -

-

-Speed: enclone is very fast, allowing analysis of datasets in seconds. -

-

-Easy installation: The software is easy to install and to use. -

- -
- -

-

Inputs to enclone

- -

10x Genomics single cell 5' data

- -

-BCR or TCR RNA sequences generated using the 10x Genomics -Chromium Single Cell Immune Profiling Solution and Cell Ranger 3.1 or higher are the inputs to -enclone. enclone can also process and display gene expression and Feature Barcode data -from the same cells. The latter can be used to quantify cell surface proteins, antigen binding, CRISPR -sgRNA, and other cellular features. You can see a list of publications that use 10x VDJ data -here. - -

- -
- -

-

The enclone software

- -

enclone is beta software†† released under this license. -Binary executables for Linux and Mac can be directly downloaded from this page, as can sample 10x -Genomics datasets. -enclone can be run on a laptop, desktop, or server. -

-

-To use enclone, basic knowledge of the command line is necessary. The command line is easy to -learn, and a -colleague may be able to help you if you are unfamiliar. Additional skills, like programming, -are not required. The command line can be dynamically changed to select specific clonotypes and -fields you wish to -see. enclone is fast, typically responding in seconds (if run on a single dataset). -

- -

-enclone, in addition to Cell Ranger and -Loupe -(and in which the core algorithm of enclone will be integrated at a later point in time), -supports the -analysis of V(D)J and other data from the -Chromium Single Cell Immune Profiling -solution. -

- -

- ††beta software implies that it is still being actively developed, with -features being added/modified, -and on rare occasions may involve breaking syntax that previously worked. See -this page for the history of changes. -

- -
- -

-

Installing enclone

- -

-You can run enclone directly from a Linux or Mac terminal window; see -here for Windows options; -see here if you have a problem. -

- - - - - - - -
Type this  -
curl -sSf -L bit.ly/enclone_install | sh -s SIZE
-
- where SIZE is -small, medium or large, according to: -
- -
- - - - - - - - - - - - - - - - - - - - - -
-

- small -

-
-

- load small dataset collection (one dataset, 123085) -

-
-

- 30 MB -

-
-

- do this if your internet connection is very slow -

-
-

- medium -

-
-

- load medium dataset collection -

-
-

- 350 MB -

-
-

- do this for a moderate number of datasets (~15) -

-
-

- large -

-
-

- load large dataset collection -

-
-

- 2600 MB -

-
-

- do this for a large number of datasets (~120) -

-
- -

The command does three things:

-
    -
  1. Puts the enclone executable (for Linux or Mac as appropriate) in ~/bin.
  2. -
  3. If needed, adds a line to your bash initialization file so that ~/bin is included. -
  4. -
  5. Puts enclone datasets in ~/enclone.
  6. -
-Additional details can be found here. - -Restart your terminal session; you can now run enclone.

- -

To update, type the same command! - -Only required files will be downloaded. -See history for what has changed.

- -
- -

-

Running enclone

- -

Running enclone can be as simple as typing e.g.

-
enclone BCR=/home/my_name/experiment_123
-
-

where the path is where your Cell Ranger outputs live, but there are many options to learn -about. For example, if you want to combine many datasets, you can do that, but you probably -need to provide a metadata file that describes the datasets. You can find most of the enclone -documentation within its online menus. To get started you should:

-
    -
  1. -

    Type enclone help, to make sure your terminal window works for -enclone.

    -
  2. -
  3. -

    Type enclone to get to the main enclone help menu.

    -
  4. -
-
- -

-

Help

- -

enclone help is split between material on this site and pages that are invocable from the -enclone command line. All the latter pages are also listed here:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
commandwhat it provides
enclone helphelp to test for correct setup
enclonewhat you see here; guide to all the docs
enclone help quickquick guide to getting started
enclone help howoutline of how enclone works, see also heuristics page, below
enclone help commandinfo about enclone command line argument processing
enclone help glossaryglossary of terms used by enclone, and conventions
enclone help example1explanation of an example
enclone help example2example showing gene expression and feature barcodes
enclone help inputhow to provide input to enclone
enclone help input_techhow to provide input to enclone (technical notes)
enclone help parseableparseable output
enclone help filterclonotype filtering options, scanning for feature enrichment
enclone help specialspecial filtering options
enclone help lvarslead column options
enclone help cvarsper chain column options
enclone help aminoper chain column options for amino acids
enclone help displayother clonotype display options
enclone help indelsinsertion and deletion handling
enclone help colorhow enclone uses color, and related things
enclone help faqfrequently asked questions
enclone help developera few things for developers
enclone help allconcatenation of all the help pages (USE THIS TO SEARCH ALL THE HELP PAGES)
- -
- -For completeness, here are all the other pages on this site: - -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
pageaudience
history of changeseveryone
detecting illusory clonotypeseveryone
how to compile enclonepeople who want to contribute code
licenseeveryone
Windowspeople using Windows computers
notes on heuristics - (only a little bit now) - people who want to know details on how the algorithm works
honeycomb plotseveryone
making phylogenetic treeseveryone
installation troubleshooting - if you have trouble installing
installation details - if you're curious about what the install command does
iNKT and MAIT cells - people interested in iNKT and MAIT cells
- -
- -
- -

-

Understanding enclone output

- -

The example below shows how enclone displays clonotypes. Understanding this display is -important for using enclone. Consult the available enclone documentation and use -the sample datasets to understand enclone features and output.

- -enclone annotated example - -Notice the compression in two directions: -
    -
  1. Vertically to group cells into a single line if they have identical V(D)J - transcripts - (instead of showing one line for every cell).
  2. -
  3. Horizontally, a flexible concept, to show by default all positions exhibiting a - difference from the reference and all positions in the CDR3 (instead of showing - all transcript positions, only "interesting" positions are shown).
  4. -
- -

The same exact output would be obtained by typing:

-
enclone BCR=123085 CDR3=CQQRSNWPPSITF
-
-

The directory 123085 is in the directory ~/enclone/datasets and -contains some files from a Cell Ranger run, obtained from a human ovarian cancer sample.

- -

-How does enclone find my data? -It uses a search path called PRE that is preset to -~/enclone/datasets,~/enclone/datasets2, and which can be set to any value, either -by setting PRE=... on the command line, or by setting the environment variable -ENCLONE_PRE. To find your data, enclone prepends PRE to the value of -BCR or TCR given on the command line. -For example, all of the following argument combinations do the same thing: -
1. BCR=123085 (using the default value of PRE) -
2. PRE=~/enclone/datasets BCR=123085 -
3. PRE=~/enclone BCR=datasets/123085 -
4. BCR=~/enclone/datasets/123085. -
There is also an argument META that is convenient for specifying multiple -datasets. See here for how. -
-
-Please note that while paths can have non-Latin characters, best practice is to not have -blanks, tabs, etc. in path names. enclone can be made to work with such characters by double -quoting the paths, but it makes things harder, and other programs you might use may break. -

- -

The argument CDR3=CQQRSNWPPSITF causes enclone to display only clonotypes in which -the given CDR3 sequence occurs. Many other filters are provided. In the absence of filters, all -clonotypes are shown. Clonotypes are shown from largest to smallest, and the output is -automatically paged, so you can scroll through it.

-

By default, enclone prints clonotypes in this human-readable form. You can also instruct -enclone -to print clonotypes in machine-readable forms that are suitable for input to other programs.

-
- -

-

Combining multiomic data

- -

Gene expression and Feature Barcode data can be displayed simultaneously alongside VDJ data. For -example, here we add columns for the same clonotype, showing the median number of UMIs detected -for all genes, a particular gene, and a particular antibody:

- -
[1] GROUP = 1 CLONOTYPES = 54 CELLS
-
-[1.1] CLONOTYPE = 54 CELLS
-┌───────────────────────────────────┬──────────────────────────────────────┬───────────────────────────────┐
-│                                   │  CHAIN 1                             │  CHAIN 2                      │
-│                                   │  144.1.2|IGHV3-49 ◆ 53|IGHJ3         │  282|IGKV3-11 ◆ 218|IGKJ5     │
-│                                   ├──────────────────────────────────────┼───────────────────────────────┤
-│                                   │   1 11111111111111111 1              │    1111111111111              │
-│                                   │  51 11112222222222333 4              │  6 0001111111111              │
-│                                   │  53 67890123456789012 1              │  4 7890123456789              │
-│                                   │     ═══════CDR3══════                │    ═════CDR3════              │
-│reference                          │  VV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W SR CQQ◦◦◦◦◦◦◦◦◦◦              │
-│donor ref                          │  FV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W SR CQQ◦◦◦◦◦◦◦◦◦◦              │
-├───────────────────────────────────┼──────────────────────────────────────┼───────────────────────────────┤
-│#   n    gex  IGHV3-49_g  CD19_ab    .x ................. x    u  const    x .x...........      u  const│
-│1  48   9743         301       72  │  FV CTRDRDLRGATDAFDIW S  101  IGHG1  │  R CQQRSNWPPSITF   3454  IGKC │
-│2   3   9224         544       53  │  FM CTRDRDLRGATDAFDIW S   89  IGHG1  │  R CHQRSNWPPSITF   7663  IGKC │
-│3   1  15850        1530       78  │  FV CTRDRDLRGATDAFDIW S  172  IGHG1  │  S CQQRSNWPPSITF  12603  IGKC │
-│4   1   1519          11       21  │  FV CTRDRDLRGATDAFDIW S   33  IGHG1  │  R CQQRSNWPPSITF    116  IGKC │
-│5   1   5347         142       23  │                                      │  R CQQRSNWPPSITF   3471  IGKC │
-└───────────────────────────────────┴──────────────────────────────────────┴───────────────────────────────┘
-
- -

To obtain this, we added the extra arguments -GEX=123749 LVARSP=gex,IGHV3-49_g,CD19_ab -to the previous command. The GEX part points to the directory containing gene -expression and feature barcode data. The LVARSP part defines the additional columns -to be displayed.

-

Other types of data can be brought in via Feature Barcoding. For example, the response to -multiple antigens can be measured using -LIBRA-seq -and these data can be displayed as additional columns.

-
- -

-

Visualizing multiple clonotypes

- - - - - - - - -
- honeycomb plot - -

After selecting multiple clonotypes in enclone, you can display them using - a "honeycomb" plot.

-

In this instance, pre- and post-vaccination samples were collected from four individuals, - many datasets were generated for each sample, and these were combined in a single call - to enclone. Clonotypes containing at least ten cells are shown. - The plot was generated by adding

-
MIN_CELLS=10 PLOT="clono.svg,pre->blue,post->red
-LEGEND=blue,"pre-vaccination cell",
-       red,"post-vaccination cell"
-

to the enclone command line, yielding the image shown here as the file - clono.svg.

-

For more information about honeycomb plots, - see here.

-
- -
-
- -

-

The power of enclone

- -

There are many ways to use 10x Genomics data to study immunobiology.

-

Response to an antigen or vaccine: enclone is a great tool for studying responses to a -vaccine. For example, in the previous section, the red clonotypes may represent responses to -antigens in the vaccine.

-

Vaccine and therapeutic antibody development: For certain infectious agents e.g. COVID-19, -a vaccine does not currently exist; different approaches may be employed in pursuit of this goal. One such -approach is to identify patient and survivor B cell clonotypes that expand in response to the infectious -disease. These define antibodies that can be used to design passive or active vaccines.

-

Additional power is added by mapping antigen specificity to multiple antigens directly via Feature -Barcoding (LIBRA-seq). These data -are easy to display in enclone. Candidates can be selected directly for vaccine or therapeutic -development by picking large clonotypes with high antigen counts and single or multiple antigen specificities.

-

We are actively working on further functionality that will make this process even more effective.

-

See this vignette to learn how to generate phylogenetic trees using -enclone. -

Another example use of enclone is the detection of -illusory clonotypes.

- -
- -

-

Questions

- -

-Please contact us with your questions and comments! We look forward to hearing your feedback and ideas to -further evolve enclone. -

- -

-Our address is enclone@10xgenomics.com. -

- -

-To send us enclone output, please simply cut and paste text, rather than send a -screenshot, except when necessary. Please send both the command you used and the output. -

- -

-enclone is provided as a tool for use by the community. -enclone is beta software and thus a work in progress. We are actively making many changes and may -be unable to respond promptly to your particular request. -

- -
- -

-

Where am I?

- - -

bit.ly/enclone

- - - diff --git a/install.sh b/install.sh deleted file mode 100755 index fe02c3ab5..000000000 --- a/install.sh +++ /dev/null @@ -1,340 +0,0 @@ -#!/bin/bash - -# This is the installation and update script for enclone. For instructions on how to -# run it, please see bit.ly/enclone. A few more details are here. - -# ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -# This reuses code from the installation script for the rust language. -# [TO ACKNOWLEDGE ELSEWHERE.] -# -# This script expects a single argument, which is small, medium or large, depending on how much -# data is to be downloaded. -# -# If you run it a second time and forget the size, it will use the same size as last time, -# and state that. -# -# Note that version14 is hardcoded! - -# ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -# Get command line arguments. The second argument is for testing. - -size=$1 -if ! [ -z "$2" ]; then - HOME=$2 -fi - -# ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -main() { - - # ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - # 1. Set up; test for existence of needed system commands. - # - # We require only one of curl or wget. The reason for not requiring curl is that at - # the time of writing this script, the standard Ubuntu install did not include curl, - # and so it is possible that someone would not have curl. - # - # We do not use svn, because it is no longer available by default on MacOS. - - need_cmd date - STARTTIME=$(date +%s) - # force failure if error - set -e - need_cmd uname - need_cmd mkdir - need_cmd chmod - need_cmd awk - need_cmd zcat - need_cmd grep - local _have_curl - _have_curl=false - if check_cmd curl; then - _have_curl=true - fi - if ! $_have_curl && ! check_cmd wget; then - printf "\nenclone installation failed because neither the command curl nor the\n" - printf "command wget could be found. This is strange and unexpected.\n" - printf "If you're stuck please ask for help by emailing enclone@10xgenomics.com.\n\n" - exit 1 - fi - - # ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - # 2. Determine if this is a Linux or Mac box; fail if it is not one of these two. - - local _ostype - _ostype="$(uname -s)" - if [ "$_ostype" != Linux ] && [ "$_ostype" != Darwin ]; then - echo - echo "enclone install script fails because operating system type ${_ostype}" \ - "is unknown." - echo "If you're stuck please ask for help by emailing enclone@10xgenomics.com." - echo - exit 1 - fi - - # ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - # 3. Get requested size. - - if [ "$size" != small ] && [ "$size" != medium ] && [ "$size" != large ]; then - printf "\nTo install or update enclone, please supply the single argument SIZE to the\n" - printf "curl command shown on bit.ly/enclone. The argument SIZE can be small, medium " - printf "or large.\n" - echo "If you're stuck please ask for help by emailing enclone@10xgenomics.com." - echo - exit 1 - fi - - # ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - # 4. Determine if datasets are current. - # - # Because there has been only one release of the large dataset collection, if it - # was downloaded, then it is current. - - local _datasets_small_current _datasets_medium_current _datasets_large_current - local _datasets_small_checksum_master _datasets_small_checksum_local - local _datasets_medium_checksum_master _datasets_medium_checksum_local - _datasets_small_current=false - _datasets_medium_current=false - _datasets_large_current=false - raw_repo=https://raw.githubusercontent.com/10XGenomics/enclone - if [ "$size" = small ]; then - if $_have_curl; then - _datasets_small_checksum_master=$(curl -s $raw_repo/master/datasets_small_checksum) - else - _datasets_small_checksum_master=$(wget -q $raw_repo/master/datasets_small_checksum -O -) - if ! [ "$?" -eq "0" ]; then - printf "\nfailed: wget -q $raw_repo/master/datasets_small_checksum\n" - printf "This is strange and unexpected.\n" - echo "If you're stuck please ask for help by emailing enclone@10xgenomics.com." - echo - exit 1 - fi - fi - fi - if test -f "$HOME/enclone/datasets_small_checksum"; then - _datasets_small_checksum_local=$(cat $HOME/enclone/datasets_small_checksum) - if [ "$_datasets_small_checksum_local" = "$_datasets_small_checksum_master" ]; then - _datasets_small_current=true - fi - fi - if [ "$size" = medium ] || [ "$size" = large ]; then - raw_master=$raw_repo/master - if $_have_curl; then - _datasets_medium_checksum_master=$(curl -s $raw_master/datasets_medium_checksum) - else - _datasets_medium_checksum_master=$(wget -q $raw_master/datasets_medium_checksum -O -) - if ! [ "$?" -eq "0" ]; then - printf "\nfailed: wget -q $raw_repo/master/datasets_medium_checksum\n" - printf "This is strange and unexpected.\n" - echo "If you're stuck please ask for help by emailing enclone@10xgenomics.com." - echo - exit 1 - fi - fi - fi - if test -f "$HOME/enclone/datasets_medium_checksum"; then - _datasets_medium_checksum_local=$(cat $HOME/enclone/datasets_medium_checksum) - if [ "$_datasets_medium_checksum_local" = "$_datasets_medium_checksum_master" ]; then - _datasets_medium_current=true - fi - fi - if test -f "$HOME/enclone/datasets2/download_complete"; then - _datasets_large_current=true - fi - - # ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - # 5. Determine if the local enclone executable is current. - # - # This is quite hideous. There must be a better way. - - - local _current_version _enclone_is_current _is_update - repo=https://github.com/10XGenomics/enclone - if $_have_curl; then - _current_version=$(curl -sI $repo/releases/latest/download/enclone_linux | \ - grep "^location:" | tr '/' ' ' | cut -d ' ' -f9) - else - _current_version=$(wget --server-response --max-redirect=0 \ - $repo/releases/latest/download/enclone_linux |& \ - grep " location:" | tr '/' ' ' | cut -d ' ' -f11) - fi - _enclone_is_current=false - if test -f "$HOME/bin/enclone"; then - _is_update=true - local _local_version - if test -f "$HOME/enclone/version"; then - _local_version=$(cat $HOME/enclone/version) - if [ "$_local_version" == "$_current_version" ]; then - printf "\nThe local version of enclone is current so not downloading executable.\n" - _enclone_is_current=true - fi - fi - fi - - # ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - # 6. Make directory ~/bin if needed and download the appropriate enclone executable into it. - - cd $HOME - mkdir -p bin - mkdir -p enclone - if [ "$_enclone_is_current" = false ]; then - cd bin - if [ "$_ostype" = Linux ]; then - printf "\nDownloading the Linux version of the latest enclone executable.\n\n" - if $_have_curl; then - curl -s -L $repo/releases/latest/download/enclone_linux --output enclone - else - wget -q $repo/releases/latest/download/enclone_linux -O enclone - fi - fi - if [ "$_ostype" = Darwin ]; then - printf "\nDownloading the Mac version of the latest enclone executable.\n\n" - if $_have_curl; then - curl -s -L $repo/releases/latest/download/enclone_macos --output enclone - else - wget -q $repo/releases/latest/download/enclone_macos -O enclone - fi - fi - echo "Done downloading the enclone executable." - # set execute permission on the enclone executable - chmod +x enclone - cd .. - # record local version - echo "$_current_version" > enclone/version - fi - - # ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - # 7. Add ~/bin to path if needed. - # - # This does nothing if you already have ~/bin in your path. - # - # This is complicated because some versions of Linux use the file .bash_profile, - # and some use .profile. - # If the instructions here don't work, this post may be helpful: - # https://unix.stackexchange.com/questions/26047/how-to-correctly-add-a-path-to-path. - - if [[ ":$PATH:" != *":$HOME/bin:"* ]]; then - test -r .bash_profile && echo 'PATH=~/bin:$PATH' >> .bash_profile || \ - echo 'PATH=~/bin:$PATH' >> .profile - fi - if test -f .zshrc; then - echo -n 'export PATH=~/bin:$PATH' >> .zshrc - fi - - # ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - - # 8. Download data. - # - # For the medium case, this is not optimal, because if anything changed, - # all the files get re-downloaded. - - raw_data_repo=https://raw.githubusercontent.com/10XGenomics/enclone-data - if [ "$size" = small ]; then - if [ "$_datasets_small_current" = false ]; then - printf "\nDownloading small version of datasets.\n" - printf "This seems to take roughly five seconds, even over home wireless,\n" - printf "however, you might have a slower connection.\n\n" - mkdir -p enclone/datasets - rm -rf enclone/datasets/123085 - cd enclone/datasets - mkdir -p 123085/outs - cd 123085/outs - json="all_contig_annotations.json.lz4" - url="$raw_data_repo/big_inputs/version14/123085/outs/$json" - if $_have_curl; then - curl -s $url -O - else - wget -q $url - fi - cd ../../../.. - echo "$_datasets_small_checksum_master" > enclone/datasets_small_checksum - printf "Done with that download.\n" - else - printf "\nSmall version of datasets already current so not downloading.\n" - fi - fi - if [ "$size" = medium ] || [ "$size" = large ]; then - if [ "$_datasets_medium_current" = false ]; then - echo - if [ "$size" = medium ]; then - echo "Downloading medium version of datasets." - fi - if [ "$size" = large ]; then - echo "Downloading medium version of datasets (as part of large)." - fi - printf "This seems to take roughly thirty seconds, even over home wireless,\n" - printf "however, you might have a slower connection.\n\n" - rm -rf enclone/datasets enclone/version14 - cd enclone - git clone --depth=1 https://github.com/10XGenomics/enclone-data.git - mv enclone-data/big_inputs/version14 datasets - rm -rf enclone-data - cd .. - echo "$_datasets_medium_checksum_master" > enclone/datasets_medium_checksum - printf "Done with that download.\n" - # Remove a funny-looking directory, which is used by enclone only to test if - # weird unicode characters in a path will break it. - rm -rf enclone/datasets/█≈ΠΠΠ≈█ - else - printf "\nMedium version of datasets already current so not downloading them.\n" - fi - fi - if [ "$size" = large ]; then - if [ "$_datasets_large_current" = false ]; then - printf "\nDownloading large version of datasets.\n" - printf "This seems to take roughly one to three minutes, even over home wireless,\n" - printf "however, you might have a slower connection.\n\n" - cd enclone - rm -rf datasets2 - aws=https://s3-us-west-2.amazonaws.com - if $_have_curl; then - curl -s $aws/10x.files/supp/cell-vdj/enclone_data_1.0.tar.gz -O - else - wget -q $aws/10x.files/supp/cell-vdj/enclone_data_1.0.tar.gz - fi - cat enclone_data_1.0.tar.gz | zcat | tar xf - - rm enclone_data_1.0.tar.gz - mv enclone_data_1.0 datasets2 - cd .. - touch enclone/datasets2/download_complete - printf "Done with that download.\n" - else - printf "\nLarge version of datasets already current so not downloading them.\n" - fi - fi - ENDTIME=$(date +%s) - echo - if [ "$is_update" = true ]; then - echo "enclone update took $(($ENDTIME - $STARTTIME)) seconds." - else - echo "enclone installation took $(($ENDTIME - $STARTTIME)) seconds." - fi - printf "\nAll done, have a lovely day!\n\n" - -} - -# ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - -need_cmd() { - if ! check_cmd "$1"; then - printf "\nenclone installation faileds because the command $1 was not found.\n" - printf "If you're stuck please ask for help by emailing enclone@10xgenomics.com.\n" - printf "It is possible that we can rewrite the script to not use $1.\n\n" - exit 1 - fi -} - -check_cmd() { - command -v "$1" > /dev/null 2>&1 -} - -main diff --git a/local_view b/local_view deleted file mode 100755 index ef6ceb189..000000000 --- a/local_view +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/csh - -# Copy website files to ~/public_html. -# -# This wipes out a directory, so please read before using! - -rm -rf ~/public_html/enclone - -mkdir -p ~/public_html/enclone/src - -cp -p index.html ~/public_html/enclone -cp -p README.md ~/public_html/enclone -cp -p LICENSE.txt ~/public_html/enclone -cp -p fonts/google_font_test.html ~/public_html/enclone -cp -pr {pages,fonts,img} ~/public_html/enclone diff --git a/master.toml b/master.toml deleted file mode 100755 index e461ea64d..000000000 --- a/master.toml +++ /dev/null @@ -1,52 +0,0 @@ -# This file contains master definitions for Cargo.toml files in this workspace. -# -# If you want to change the version of a crate that's used, edit this file, and not one -# of the Cargo.toml files in the crates within this repo. Then use sync_to_master (which -# is in the enclone crate) to propagate the change. When you run "cargo t", this is enforced. - -amino = "0.1.1" -ansi_escape = "0.1.0" -assert_cmd = "0.12.0" -attohttpc = { version = "0.12", default-features = false, features = ["compress", "tls-rustls"] } -bio = "0.31.0" -byteorder = "1.3.2" -bytes = "0.5.5" -chrono = "0.4.11" -debruijn = "0.3.2" -dirs = "2.0.2" -enclone_core = { path = "../enclone_core" } -enclone_help = { path = "../enclone_help" } -enclone = { path = "../enclone" } -enclone_print = { path = "../enclone_print" } -enclone_proto = { path = "../enclone_proto" } -enclone_tail = { path = "../enclone_tail" } -equiv = "0.1.1" -failure = "0.1.5" -file-lock = "1.1.20" -flate2 = "1.0.16" -git = "https://github.com/pmarks/hdf5-rs.git" -graph_simple = "0.1.1" -io_utils = "0.2" -itertools = "0.9.0" -mirror_sparse_matrix = "0.1.4" -ndarray = "0.13" -pager = "0.15.0" -perf_stats = "0.1.2" -permutation = "0.2.5" -petgraph = "0.4.13" -pretty_trace = "0.3.2" -prost = "0.6.1" -prost-build = "0.6.1" -rayon = "1.0.2" -regex = "1.3.1" -serde = "1.0.90" -serde_derive = "1.0.102" -sha2 = "0.9.1" -stats_utils = "0.1.1" -stirling_numbers = "0.1.2" -string_utils = "0.1.1" -tables = "0.1.2" -tar = "0.4.29" -tilde-expand = "0.1.1" -vdj_ann = { git = "https://github.com/10XGenomics/rust-toolbox.git", rev="183e2d657e6436494072a32cf8da4f7b753d1e69" } -vector_utils = "0.1.3" diff --git a/pages/README b/pages/README deleted file mode 100644 index 0db03a541..000000000 --- a/pages/README +++ /dev/null @@ -1,3 +0,0 @@ -NOTES -1. Files here should end with .html.src, not .html. -2. Use #enclone for most instances of enclone. But not in enclone commands. diff --git a/pages/auto/DO_NOT_MANUALLY_EDIT_THESE_FILES b/pages/auto/DO_NOT_MANUALLY_EDIT_THESE_FILES deleted file mode 100644 index e69de29bb..000000000 diff --git a/pages/auto/clonotype_with_gex.html b/pages/auto/clonotype_with_gex.html deleted file mode 100644 index 5aa826536..000000000 --- a/pages/auto/clonotype_with_gex.html +++ /dev/null @@ -1,59 +0,0 @@ - - - - - - -enclone example with gex - - - -
[1] GROUP = 1 CLONOTYPES = 54 CELLS
-
-[1.1] CLONOTYPE = 54 CELLS
-┌───────────────────────────────────┬──────────────────────────────────────┬───────────────────────────────┐
-│                                   │  CHAIN 1                             │  CHAIN 2                      │
-│                                   │  144.1.2|IGHV3-49 ◆ 53|IGHJ3         │  282|IGKV3-11 ◆ 218|IGKJ5     │
-│                                   ├──────────────────────────────────────┼───────────────────────────────┤
-│                                   │   1 11111111111111111 1              │    1111111111111              │
-│                                   │  51 11112222222222333 4              │  6 0001111111111              │
-│                                   │  53 67890123456789012 1              │  4 7890123456789              │
-│                                   │     ═══════CDR3══════                │    ═════CDR3════              │
-│reference                          │  VV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W SR CQQ◦◦◦◦◦◦◦◦◦◦              │
-│donor ref                          │  FV ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦W SR CQQ◦◦◦◦◦◦◦◦◦◦              │
-├───────────────────────────────────┼──────────────────────────────────────┼───────────────────────────────┤
-│#   n    gex  IGHV3-49_g  CD19_ab    .x ................. x    u  const    x .x...........      u  const│
-│1  48   9743         301       72  │  FV CTRDRDLRGATDAFDIW S  101  IGHG1  │  R CQQRSNWPPSITF   3454  IGKC │
-│2   3   9224         544       53  │  FM CTRDRDLRGATDAFDIW S   89  IGHG1  │  R CHQRSNWPPSITF   7663  IGKC │
-│3   1  15850        1530       78  │  FV CTRDRDLRGATDAFDIW S  172  IGHG1  │  S CQQRSNWPPSITF  12603  IGKC │
-│4   1   1519          11       21  │  FV CTRDRDLRGATDAFDIW S   33  IGHG1  │  R CQQRSNWPPSITF    116  IGKC │
-│5   1   5347         142       23  │                                      │  R CQQRSNWPPSITF   3471  IGKC │
-└───────────────────────────────────┴──────────────────────────────────────┴───────────────────────────────┘
-
- - - diff --git a/pages/auto/compile.html b/pages/auto/compile.html deleted file mode 100644 index df99bf19a..000000000 --- a/pages/auto/compile.html +++ /dev/null @@ -1,105 +0,0 @@ - - - - - - -enclone compilation - - - - - - - - - - - -
-enclone banner - -

Compilation

-

You should only have to do this if you want to -experimentally modify the enclone software.

-
    -
  1. -

    For now, you can run on an x86-64 linux server or a Mac.

    -
  2. -
  3. -

    You need to have the Rust compiler installed. Detailed instructions on how to do this -can be found here. You can -confirm that you -have successfully installed the Rust compiler by running rustc --version.

    -
  4. - -
  5. -

    Clone the enclone repository and build enclone using Cargo (which -comes with Rust) by running:

    -
    git clone --depth=1 git@github.com:10XGenomics/enclone.git
    -cd enclone
    -cargo b
    - -(The --depth=1 part avoids downloading history, which is faster, but if you want -the history, exclude that argument.) - -

    and then add the full path of enclone/target/debug to your PATH. - -

    -Compilation takes 8-10 minutes on a 2017 MacBook Pro with a dual-core i7 and 5-7 minutes on a -similar Linux machine. -

    - -
  6. - -
- -
    -
  1. -

    Copy the directory enclone/test/inputs to somewhere you can point to, or just leave it -where it is. These are test data you can play with; you can also supply your own output -from a Cell Ranger immune profiling run (so long as there is an -all_contig_annotations.json output). -When you read the documentation at step 6, you'll get to a place where you put -PRE=enclone/test/inputs -or instead with the path where your copied data reside. But you need to supply -PRE with a path that makes sense relative to your working directory.

    -
  2. -
  3. -

    Type enclone help, and read the terminal setup instructions there.

    -
  4. -
  5. -

    Type enclone and study the documentation shown there.

    -
  6. -
  7. -

    If you want to run the built-in tests, type

    - -
    cargo t
    - -
  8. -
- -

If you have problems, please write to us at -enclone@10xgenomics.com.

- - - diff --git a/pages/auto/dang_i_cannot_install.html b/pages/auto/dang_i_cannot_install.html deleted file mode 100644 index e034c84e9..000000000 --- a/pages/auto/dang_i_cannot_install.html +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - -enclone installation problems - - - - - - - - - - - -
-enclone banner - -

enclone installation troubleshooting

- -

The purpose of this page is to provide guidance in case the installation script fails.

- -

We know of one case where the script might fail.

- -
    -
  1. The command curl is not installed on your computer. In that case, -you should have the command wget, and can use that instead. To do so, run the -following command: -
    wget -nv bit.ly/enclone_install -O - | sh -s SIZE
    -where SIZE -is as described on the main enclone page. -
  2. -
- -

If you have a different problem, please let us know by writing to us at -enclone@10xgenomics.com.

- - - diff --git a/pages/auto/expanded.html b/pages/auto/expanded.html deleted file mode 100644 index 1c408b105..000000000 --- a/pages/auto/expanded.html +++ /dev/null @@ -1,314 +0,0 @@ - - - - - - -illusory clonotype expansions - - - - - - - - - - - -
-enclone banner - -

Detecting illusory clonotype expansions

- -

-Please read this! This page was written before we added two major filtering steps, based on -UMI counts, which completely annihilate the particular illusory expansion described here. The -reason we left the page here is that the approach used to analyze the expansion may have -utility for other datasets. To reproduce the actual results shown here, you will need to add -to each enclone command the arguments NUMI and NUMI_RATIO that turn off -the added filter.

- -

This page explains the origin of certain illusory clonotype expansions, and exhibits one example -of how to detect them.

-

These expansions are known to occur occasionally (see below for one possible mechanism), and -we hypothesize that they arise when an individual cell disintegrates or leaks. This leaves -fragments that seed multiple GEM partitions, producing a clonotype that appears larger than -its true size.

-

We believe that events of this type usually originate from plasma or plasmablast B cells. We -thus focus on B cells in this vignette. However with obvious changes, the same methods also apply -to T cells.

-

Disintegration might occur during or after preparation of the sample. One -way to document such an event would be to create two libraries from a single tube of cells. If -the clonotype is large and appears in only one of two libraries, one could be reasonably certain -that a disintegration event occurred during or after cells were drawn from the tube. This method -could not be used to detect disintegration events occurring prior to that point.

-

Here we show that with the aid of gene expression data, illusory clonotype expansions can -generally be detected, even if only a single library was made. The easier case would be a sample -consisting of pure B cells. The case where one has a mix of cell types is more challenging because -a GEM can contain both a B cell fragment, plus a cell of a different type, and thus appear to have -a normal level of gene expression, and no evidence of mixing from the VDJ assay either. We -therefore focus on the case of samples that contain a mixture of cell types. - -
-cell bits -
- -To that end, we show an example, using two libraries obtained from a single tube of PBMC cells, -obtained from a healthy human donor. The two libraries contain 7287 and 9559 cells, respectively, -of which ~12% are B cells. All the data shown here are part of the large dataset -package described in the -download section of the main enclone page.

- -
enclone BCR=128037,128040 NCROSS
-
-

The NCROSS option instructs enclone to not filter out expanded clonotypes -that appear in only one dataset arising from the same sample (and which based on their sizes are -highly improbable). Normally one would want this filtering, but these clonotypes are exactly what -we wish to see now! Here is the top clonotype:

- -
[1.1] CLONOTYPE = 122 CELLS
-┌──────────────────┬───────────────────────────────────────┬─────────────────────────────────────┐
-│                  │  CHAIN 1                              │  CHAIN 2                            │
-│                  │  146.1.1|IGHV3-53 ◆ 55|IGHJ4          │  299|IGKV4-1 ◆ 217|IGKJ4            │
-│                  ├───────────────────────────────────────┼─────────────────────────────────────┤
-│                  │              1111111111111            │              11111111111 1          │
-│                  │  12257777789 1111111222222            │  11345778899 11111112222 2          │
-│                  │  35831234686 3456789012345            │  78291346825 34567890123 7          │
-│                  │              ═════CDR3════            │              ════CDR3═══            │
-│reference         │  STGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦            │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T          │
-│donor ref         │  LSGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦            │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T          │
-├──────────────────┼───────────────────────────────────────┼─────────────────────────────────────┤
-│#  datasets    n    ........... .............  u  const    ........... ........... .  u  const│
-│1  128040    114  │  LSNNGDGNYFV CARGGTTTYFISW  6  IGHA1  │  TNAFSLYRTSE CQQYCDTPLTF T  5  IGKC │
-│2  128040      6  │  LSNNGDGNYFV CARGGTTTYFISW  4  IGHA1  │                                     │
-│3  128040      2  │                                       │  TNAFSLYRTSE CQQYCDTPLTF T  6  IGKC │
-└──────────────────┴───────────────────────────────────────┴─────────────────────────────────────┘
-
- -

If we do not use the NCROSS option, and search for the clonotype using the heavy -chain CDR3 sequence, we see just one cell (the others having been filtered out):

-
enclone BCR=128037,128040 CDR3=CARGGTTTYFISW
-
- -
[1.1] CLONOTYPE = 1 CELLS
-┌────────────────┬──────────────────────────────────────────┬────────────────────────────────────────┐
-│                │  CHAIN 1                                 │  CHAIN 2                               │
-│                │  146.1.1|IGHV3-53 ◆ 55|IGHJ4             │  299|IGKV4-1 ◆ 217|IGKJ4               │
-│                ├──────────────────────────────────────────┼────────────────────────────────────────┤
-│                │              1111111111111               │              11111111111 1             │
-│                │  12257777789 1111111222222               │  11345778899 11111112222 2             │
-│                │  35831234686 3456789012345               │  78291346825 34567890123 7             │
-│                │              ═════CDR3════               │              ════CDR3═══               │
-│reference       │  STGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦               │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T             │
-│donor ref       │  LSGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦               │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T             │
-├────────────────┼──────────────────────────────────────────┼────────────────────────────────────────┤
-│#  datasets  n    ........... .............     u  const    ........... ........... .     u  const│
-│1  128040    1  │  LSNNGDGNYFV CARGGTTTYFISW  1725  IGHA1  │  TNAFSLYRTSE CQQYCDTPLTF T  6300  IGKC │
-└────────────────┴──────────────────────────────────────────┴────────────────────────────────────────┘
-
- -

This is a good answer, but only works if libraries were made from two separate draws of cells. -Now suppose that both a VDJ and a GEX library have been made, from a single draw of cells. (And -we henceforth ignore the data made from the other draw of cells, useful though it is.)

-
enclone BCR=128040 GEX=127801 CDR3=CARGGTTTYFISW
-
- -
[1.1] CLONOTYPE = 44 CELLS
-┌───────────┬───────────────────────────────────────┬─────────────────────────────────────┐
-│           │  CHAIN 1                              │  CHAIN 2                            │
-│           │  146.1.2|IGHV3-53 ◆ 55|IGHJ4          │  299|IGKV4-1 ◆ 217|IGKJ4            │
-│           ├───────────────────────────────────────┼─────────────────────────────────────┤
-│           │              1111111111111            │              11111111111 1          │
-│           │  12257777789 1111111222222            │  11345778899 11111112222 2          │
-│           │  35831234686 3456789012345            │  78291346825 34567890123 7          │
-│           │              ═════CDR3════            │              ════CDR3═══            │
-│reference  │  STGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦            │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T          │
-│donor ref  │  LSGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦            │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T          │
-├───────────┼───────────────────────────────────────┼─────────────────────────────────────┤
-│#   n        ........... .............  u  const    ........... ........... .  u  const│
-│1  38      │  LSNNGDGNYFV CARGGTTTYFISW  4  IGHA1  │  TNAFSLYRTSE CQQYCDTPLTF T  3  IGKC │
-│2   5      │  LSNNGDGNYFV CARGGTTTYFISW  4  IGHA1  │                                     │
-│3   1      │                                       │  TNAFSLYRTSE CQQYCDTPLTF T  6  IGKC │
-└───────────┴───────────────────────────────────────┴─────────────────────────────────────┘
-
- -

Now we see less cells. This is because the default behavior of enclone is to filter out -cells called by the VDJ pipeline that are not also called by the GEX pipeline. Most of these -would have consisted of "nearly empty drops", GEMs containing just a B cell fragment.

-

Now we add the option PER_CELL, causing data for each cell to be displayed, and we -also add two -fields to the display. One is gex, the normalized count of gene expression UMIs, -and the other is a field cred (short for "credibility"), that is more complicated. We -will also hide the onesie (single chain) cells.

-
enclone BCR=128040 GEX=127801 CDR3=CARGGTTTYFISW PER_CELL LVARSP=gex,cred CHAINS_EXACT=2
-
- -
[1.1] CLONOTYPE = 38 CELLS
-┌────────────────────────────────────────┬──────────────────────────────────────────┬────────────────────────────────────────┐
-│                                        │  CHAIN 1                                 │  CHAIN 2                               │
-│                                        │  146.1.2|IGHV3-53 ◆ 55|IGHJ4             │  299|IGKV4-1 ◆ 217|IGKJ4               │
-│                                        ├──────────────────────────────────────────┼────────────────────────────────────────┤
-│                                        │              1111111111111               │              11111111111 1             │
-│                                        │  12257777789 1111111222222               │  11345778899 11111112222 2             │
-│                                        │  35831234686 3456789012345               │  78291346825 34567890123 7             │
-│                                        │              ═════CDR3════               │              ════CDR3═══               │
-│reference                               │  STGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦               │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T             │
-│donor ref                               │  LSGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦               │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T             │
-├────────────────────────────────────────┼──────────────────────────────────────────┼────────────────────────────────────────┤
-│#  barcode              n    gex  cred    ........... .............     u  const    ........... ........... .     u  const│
-│1                      38   4986   0.8  │  LSNNGDGNYFV CARGGTTTYFISW     4  IGHA1  │  TNAFSLYRTSE CQQYCDTPLTF T     3  IGKC │
-│   AAATGCCCACTGAAGG-1       7142   0.7                                  2                                         7       │
-│   AACCATGCAAAGAATC-1       4583   0.8                                  3                                         2       │
-│   AACTGGTGTCGAACAG-1       4252   0.5                                  8                                         7       │
-│   ACGGGTCGTCGCGGTT-1       2544   0.7                                  2                                         3       │
-│   AGACGTTAGAGTAAGG-1       5198   0.9                                  6                                         3       │
-│   AGCATACGTTTCCACC-1       5852   0.8                                  5                                         1       │
-│   AGTGTCAAGTAGTGCG-1       3173   0.8                                 10                                        17       │
-│   ATCCGAAAGGACTGGT-1        842   2.8                                  1                                         2       │
-│   ATCTACTTCAGTTAGC-1       1662   0.5                                  5                                         2       │
-│   ATCTGCCGTTACGACT-1       6078   1.0                                  2                                         2       │
-│   CAAGTTGAGTTACGGG-1       4586   0.5                                  2                                         3       │
-│   CAGAGAGAGATGGGTC-1       6870   0.8                                  4                                         1       │
-│   CATATTCTCCGCTGTT-1       4944   0.7                                  7                                         2       │
-│   CGATTGATCCACGCAG-1       3952   0.3                                  7                                        11       │
-│   CGGCTAGGTCAACTGT-1       5499   0.7                                  2                                         2       │
-│   CGTAGGCCAAACTGTC-1       1320   1.8                                  2                                         1       │
-│   CTAGTGACACGGTTTA-1       3896   0.8                                  1                                         3       │
-│   CTCTAATAGCCGATTT-1       2151   1.6                                  2                                         1       │
-│   CTGGTCTAGCTGCCCA-1      19984  15.0                               1725                                      6300       │
-│   CTTCTCTAGATGCCAG-1       6228   1.0                                  5                                         5       │
-│   GAAGCAGTCGTTACAG-1       5434   1.0                                  3                                         1       │
-│   GACGTTATCTACCAGA-1       3898   0.7                                  2                                         2       │
-│   GAGTCCGTCGGTCTAA-1      11095  10.0                                  3                                         1       │
-│   GATGAGGAGATCTGCT-1       7510   1.1                                  4                                         1       │
-│   GCATACATCGACAGCC-1       1646   1.0                                  3                                         2       │
-│   GGAATAAGTTTGACAC-1       8007   1.4                                  3                                         1       │
-│   GGCTGGTCAGTGGGAT-1       9681   0.9                                 16                                         6       │
-│   GGGAGATTCCGCATAA-1       4633   1.0                                  5                                         4       │
-│   GTACTCCAGGTGTGGT-1       4575   0.5                                  5                                         3       │
-│   GTTAAGCCACATTAGC-1       7601   0.9                                  4                                         2       │
-│   TAGTGGTTCGGCGCTA-1       4986   0.8                                 11                                        14       │
-│   TCAGGATCAAGTTCTG-1       7352   0.5                                  2                                         3       │
-│   TCAGGATGTTGCCTCT-1       3496   0.5                                  2                                         2       │
-│   TCCCGATTCTATCCCG-1       5962   0.9                                  3                                         6       │
-│   TGCGCAGCAAATCCGT-1       5736   0.9                                  8                                         5       │
-│   TTCCCAGCAAGTTAAG-1       5860   0.7                                 11                                        15       │
-│   TTGAACGTCCATTCTA-1       4682   0.6                                  3                                         2       │
-│   TTTGCGCCACACAGAG-1       4958   0.8                                  4                                         3       │
-└────────────────────────────────────────┴──────────────────────────────────────────┴────────────────────────────────────────┘
-
- -

The field cred is a measure of the extent to which cells having gene expression -similar to a -given putative B cell are themselves B cells. In more detail, first for any datasets, let n be -the number of VDJ cells that are also GEX cells. Now for a given cell, we find the n GEX cells -that are closest to it in PCA space, and report the percent of those that are also VDJ cells.
-This is cred. The closer this number is to 100, the more the given cell looks like a -typical B cell. Conversely, a very low number makes the given cell appear suspect.

-

The values of cred vary considerably from dataset to dataset, requiring somewhat -different interpretation. We show the distribution for this one dataset:

- -

-cred_gex_dist -

- -

Thus the values of the cells in the reported clonotype are very low indeed, and almost all -highly suspect. Probably the clonotype originated from a single cell, which broke up into one -major piece (the one for barcode CTGGTCTAGCTGCCCA-1), and many smaller pieces. These -smaller pieces reside in GEMs that may or may not contain an actual intact cell. In fact, many of -the cells are detected as T cells (using TCR data 128024 from the same cell draw). We -can mark these cells in the same display using the command - -

enclone BCR=128040 GEX=127801 BC=128024_cells.csv CDR3=CARGGTTTYFISW PER_CELL LVARSP=gex,cred,T CHAINS_EXACT=2
-
- -where the file 128024_cells.csv is a CSV file with header barcode,T -and having one line for each barcode in -128024/outs/cell_barcodes.json, e.g. AAACGGGAGAGAACAG-1,◯. -(We used the character as a value just because we liked it.) - -
[1.1] CLONOTYPE = 38 CELLS
-┌───────────────────────────────────────────┬──────────────────────────────────────────┬────────────────────────────────────────┐
-│                                           │  CHAIN 1                                 │  CHAIN 2                               │
-│                                           │  146.1.2|IGHV3-53 ◆ 55|IGHJ4             │  299|IGKV4-1 ◆ 217|IGKJ4               │
-│                                           ├──────────────────────────────────────────┼────────────────────────────────────────┤
-│                                           │              1111111111111               │              11111111111 1             │
-│                                           │  12257777789 1111111222222               │  11345778899 11111112222 2             │
-│                                           │  35831234686 3456789012345               │  78291346825 34567890123 7             │
-│                                           │              ═════CDR3════               │              ════CDR3═══               │
-│reference                                  │  STGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦               │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T             │
-│donor ref                                  │  LSGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦               │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T             │
-├───────────────────────────────────────────┼──────────────────────────────────────────┼────────────────────────────────────────┤
-│#  barcode              n    gex  cred  T    ........... .............     u  const    ........... ........... .     u  const│
-│1                      38   4986   0.8     │  LSNNGDGNYFV CARGGTTTYFISW     4  IGHA1  │  TNAFSLYRTSE CQQYCDTPLTF T     3  IGKC │
-│   AAATGCCCACTGAAGG-1       7142   0.7  ◯                                  2                                         7       │
-│   AACCATGCAAAGAATC-1       4583   0.8  ◯                                  3                                         2       │
-│   AACTGGTGTCGAACAG-1       4252   0.5  ◯                                  8                                         7       │
-│   ACGGGTCGTCGCGGTT-1       2544   0.7                                     2                                         3       │
-│   AGACGTTAGAGTAAGG-1       5198   0.9  ◯                                  6                                         3       │
-│   AGCATACGTTTCCACC-1       5852   0.8  ◯                                  5                                         1       │
-│   AGTGTCAAGTAGTGCG-1       3173   0.8                                    10                                        17       │
-│   ATCCGAAAGGACTGGT-1        842   2.8                                     1                                         2       │
-│   ATCTACTTCAGTTAGC-1       1662   0.5  ◯                                  5                                         2       │
-│   ATCTGCCGTTACGACT-1       6078   1.0  ◯                                  2                                         2       │
-│   CAAGTTGAGTTACGGG-1       4586   0.5                                     2                                         3       │
-│   CAGAGAGAGATGGGTC-1       6870   0.8  ◯                                  4                                         1       │
-│   CATATTCTCCGCTGTT-1       4944   0.7                                     7                                         2       │
-│   CGATTGATCCACGCAG-1       3952   0.3                                     7                                        11       │
-│   CGGCTAGGTCAACTGT-1       5499   0.7  ◯                                  2                                         2       │
-│   CGTAGGCCAAACTGTC-1       1320   1.8                                     2                                         1       │
-│   CTAGTGACACGGTTTA-1       3896   0.8                                     1                                         3       │
-│   CTCTAATAGCCGATTT-1       2151   1.6                                     2                                         1       │
-│   CTGGTCTAGCTGCCCA-1      19984  15.0                                  1725                                      6300       │
-│   CTTCTCTAGATGCCAG-1       6228   1.0  ◯                                  5                                         5       │
-│   GAAGCAGTCGTTACAG-1       5434   1.0  ◯                                  3                                         1       │
-│   GACGTTATCTACCAGA-1       3898   0.7                                     2                                         2       │
-│   GAGTCCGTCGGTCTAA-1      11095  10.0                                     3                                         1       │
-│   GATGAGGAGATCTGCT-1       7510   1.1  ◯                                  4                                         1       │
-│   GCATACATCGACAGCC-1       1646   1.0                                     3                                         2       │
-│   GGAATAAGTTTGACAC-1       8007   1.4                                     3                                         1       │
-│   GGCTGGTCAGTGGGAT-1       9681   0.9  ◯                                 16                                         6       │
-│   GGGAGATTCCGCATAA-1       4633   1.0  ◯                                  5                                         4       │
-│   GTACTCCAGGTGTGGT-1       4575   0.5  ◯                                  5                                         3       │
-│   GTTAAGCCACATTAGC-1       7601   0.9  ◯                                  4                                         2       │
-│   TAGTGGTTCGGCGCTA-1       4986   0.8  ◯                                 11                                        14       │
-│   TCAGGATCAAGTTCTG-1       7352   0.5                                     2                                         3       │
-│   TCAGGATGTTGCCTCT-1       3496   0.5  ◯                                  2                                         2       │
-│   TCCCGATTCTATCCCG-1       5962   0.9  ◯                                  3                                         6       │
-│   TGCGCAGCAAATCCGT-1       5736   0.9  ◯                                  8                                         5       │
-│   TTCCCAGCAAGTTAAG-1       5860   0.7  ◯                                 11                                        15       │
-│   TTGAACGTCCATTCTA-1       4682   0.6  ◯                                  3                                         2       │
-│   TTTGCGCCACACAGAG-1       4958   0.8  ◯                                  4                                         3       │
-└───────────────────────────────────────────┴──────────────────────────────────────────┴────────────────────────────────────────┘
-
- -

We thus conclude in this case that the clonotype is likely contaminated with many cells that -are not B cells, and in fact that the entire clonotype probably arose from a single true B cell. -In other examples we have looked at, there appear to be a few true B -cells, along with many that are not, either corresponding to other cell types or nearly empty GEMs.

-

Overall conclusion: illusory clonotypes are rare, and can generally be detected, either with -the aid of a second library made from the same lot of cells, or with gene expression data.

- - - diff --git a/pages/auto/help.all.html b/pages/auto/help.all.html deleted file mode 100644 index 3b26a2023..000000000 --- a/pages/auto/help.all.html +++ /dev/null @@ -1,1624 +0,0 @@ - - - - - - -enclone help developer - - - - - - - -
-enclone banner -

-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone main help page (what you get by typing "enclone")
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-The mission of enclone is to:
-
-  Find and display the clonotypes within single cell VDJ datasets:
-  groups of cells having the same fully rearranged common ancestor.
-
-enclone is part of the 10x Genomics immune profiling tools, including Cell Ranger and Loupe. 
-enclone uses output from Cell Ranger version ≥ 3.1.
-
-The complete enclone documentation is at bit.ly/enclone.  This page catalogs the subset of those
-pages that are directly accessible from the enclone command line.  These pages can be viewed in a
-100 wide x 56 high window, except for those labeled "long" or "wide".
-
-┌─────────────────────────┬─────────────────────────────────────────────────────────────────────┐
-│commandwhat it provides                                                   │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help             │  help to test for correct setup                                     │
-│enclone                  │  what you see here: guide to all the doc                            │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help quick       │  quick guide to getting started                                     │
-│enclone help how         │  how enclone works (long)                                           │
-│enclone help command     │  info about enclone command line argument processing                │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help glossary    │  glossary of terms used by enclone, and conventions                 │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help example1    │  explanation of an example                                          │
-│enclone help example2    │  example showing gene expression and feature barcodes (wide)        │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help input       │  how to provide input to enclone (long)                             │
-│enclone help input_tech  │  how to provide input to enclone (technical notes)                  │
-│enclone help parseable   │  parseable output (long)                                            │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help filter      │  clonotype filtering options, scanning for feature enrichment (long)│
-│enclone help special     │  special filtering options (long)                                   │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help lvars       │  lead column options (long)                                         │
-│enclone help cvars       │  per chain column options (long)                                    │
-│enclone help amino       │  per chain column options for amino acids                           │
-│enclone help display     │  other clonotype display options                                    │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help indels      │  insertion and deletion handling                                    │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help color       │  how enclone uses color, and related things                         │
-│enclone help faq         │  frequently asked questions (long)                                  │
-│enclone help developer   │  a few things for developers                                        │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help allconcatenation of all the help pages (long, wide)                   │
-│                         │  ███ USE THIS TO SEARCH ALL THE HELP PAGES! ███                     │
-└─────────────────────────┴─────────────────────────────────────────────────────────────────────┘
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone setup page (for one time use, what you get by typing "enclone help")
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-
-Welcome to enclone!
-
-The purpose of this first page is to help you make sure that you're set up properly
-to run enclone.  PLEASE READ!
-
-(for the main help page, please type instead: enclone)
-
-Here we go through several setup tests.  If you have any problem that you can't
-resolve, please email us at enclone@10xgenomics.com.
-
-
-1. Are you using a fixed width font?
-Look at this:
-A FAT BROWN CAT JUMPED OVER THE WALL
-||||||||||||||||||||||||||||||||||||
-Do those two lines end at the same position?  If not, you need to switch your font.
-
-2. Is your terminal window wide enough to see the help pages?
-Your terminal needs to be at least 100 columns wide.  Look at this:
-0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
-Does it appear as a single line?  If not, please widen your window.
-
-3. Can your terminal display box characters?
-Look at this:
-┌────────┬─────────┐
-│banana  │  peel   │
-├────────┼─────────┤
-│oops    │  slipped│
-└────────┴─────────┘
-Do you see a neat rectangle composed of four rectangles with words inside them?  Are the vertical
-lines contiguous?  If not, something is wrong with your terminal!  You may need to change the
-terminal font.  For example, Menlo works, but Courier does not.
-
-4. Can your terminal correctly display ANSI escape sequences?
-The following word should be bold.  The following word should be blue.
-If that doesn't make sense, or is messed up, something is wrong, and you have two options:
-(a) seek help to fix your terminal window
-(b) turn off escape sequences by adding PLAIN to every enclone command, or set
-the environment variable ENCLONE_PLAIN.
-But that should be only a last resort.
-
-5. Can your terminal correctly display unicode characters?
-Do you see a centered dot here • ?
-If not, your terminal has a problem!
-
-6. Does this entire help page appear at once in your terminal window?
-If not, please increase the number of rows in your window to 56.
-
-
-If you go through all those tests and everything worked, you should be good to go!
-
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help quick
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-quick guide to getting started
-
-Just type this:
-
-enclone BCR=p
-
-where p is the path to your Cell Ranger VDJ directory.
-
-Substitute TCR if that's what you've got.
-
-This will show you all the clonotypes, in descending order by number of cells.
-
-You'll need to make your window wide enough so that lines are not folded.  This depends on the
-dataset.
-
-Only one page of output is shown at a time.  To navigate within the full output, use the space bar
-to go forward and the b key to go backward.
-
-See enclone help example1 for a detailed guide to how to read the enclone output.  A few key
-things you should know:
-
-1. You'll see numbers near the top.  These are amino acid position numbers, and
-   they read downwards.  Numbering starts at the start codon, numbered zero.
-
-2. Each numbered line represents an exact subclonotype: cells having identical V(D)J transcripts.
-
-3. By default, you'll see data in amino acid space.  Only "interesting" amino acids are shown.
-
-Please read on to learn more!
-
-navigation in enclone
-
-enclone automatically sends its output through the program "less".  This allows you to navigate
-within the output, using the following keys (and many more, not shown, and which you don't need to
-know):
-• space: causes output to page forward
-• b: causes output to page backward
-• /string: finds instances of "string" in the output
-• n: having done the previous, jump to the next instance
-• q: quit, to return to the command line.
-
-When enclone uses less, it passes the argument -R, which causes certain characters to be hidden,
-namely escape codes that color or bold text.
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help how
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-information about how enclone works
-
-The goal of enclone is to find and display the clonotypes within single cell VDJ datasets: groups
-of cells having the same fully rearranged common ancestor.
-
-enclone provides the foundation for fully understanding each cell's antigen affinity and the
-evolutionary relationship between cells within one or more datasets.  This starts with, for each
-cell, the full length sequence of all its VDJ receptor chains.  Such data may be obtained using
-the 10x Genomics immune profiling platform.
-
-See also the heuristics page at bit.ly/enclone.
-
-For this, there are fundamental challenges:
-
-┌──────────────────────────────────────────────────────────────────────────────────────────────────┐
-│1. It is extremely easy to get false positives: the incorrect appearance that two cells have a    │
-│common ancestor.                                                                                  │
-│                                                                                                  │
-│2. Because of somatic hypermutation in B cells, it can be difficult to know that two B cells share│
-│a common ancestor.                                                                                │
-│                                                                                                  │
-│3. There is always some background noise, e.g. from ambient mRNA.  When building large clonotypes,│
-│this noise tends to pile up, yielding ectopic chains, i.e. chains within a clonotype that are     │
-│artifacts and do not represent true biology.                                                      │
-└──────────────────────────────────────────────────────────────────────────────────────────────────┘
-
-To address these challenges, the enclone algorithm has several steps, which we outline:
-
-1.  Input data.  enclone gets its information from the file all_contig_annotations.json that is
-produced by Cell Ranger.  Only productive contigs are used.  Each has an annotated V and J
-segment.  The V segment alignment may have a single indel whose length is divisible by three, and
-in that case, the V reference sequence is edited either to delete or insert sequence.  In the
-insertion case, the bases are taken from the contig.  These indels are noted in the enclone
-output.
-
-2.  Exact subclonotypes.  enclone groups cells into exact subclonotypes, provided that they have
-the same number of chains, identical V..J sequences, identical C segment assignments, and the same
-distance between the J stop and the C start (which is usually zero).
-
-3.  Finding the germline sequences.  For datasets from a given donor, enclone derives "donor
-reference sequences" for the V chains present in the donor's genome.  This is powerful, even
-though based on imperfect information.  V segments vary in their expression frequency and thus the
-more cells which are present, the more complete the information will be.  It is also not possible
-to accurately determine the terminal bases in a V chain from transcript data alone because these
-bases mutate during recombination and because of non-templated nucleotide addition.
-
-The idea for how this is done is roughly the following: for each V segment, we choose one cell
-from each clonotype (although these have not actually been computed yet, so it's an
-approximation).  Next for each position on the V segment, excluding the last 15 bases, we
-determine the distribution of bases that occur within these selected cells.  We only consider
-those positions where a non-reference base occurs at least four times and is at least 25% of the
-total.  Then each cell has a footprint relative to these positions; we require that these
-footprints satisfy similar evidence criteria.  Each such non-reference footprint then defines an
-"alternate allele".  We do not restrict the number of alternate alleles because they may arise
-from duplicated gene copies.
-
-A similar approach was attempted for J segments but at the time of testing did not appear to
-enhance clonotyping specificity.  This could be revisited later and might be of interest even if
-it does not improve specificity.
-
-4.  What joins are tested.  Pairs of exact subclonotypes are considered for joining, as described
-below.  This process only considers exact subclonotypes have two or three chains.  There is some
-separate joining for the case of one chain.  Exact subclonotypes having four chains are not joined
-at present.  These cases are clearly harder because these exact subclonotypes are highly enriched
-for cell doublets, which we discard if we can identify as such.
-
-5.  Initial grouping.  For each pair of exact subclonotypes, and for each pair of chains in each
-of the two exact subclonotypes, for which V..J has the same length for the corresponding chains,
-and the CDR3 segments have the same length for the corresponding chains, enclone considers joining
-the exact subclonotypes into the same clonotype.
-
-6.  Error bounding.  To proceed, as a minimum requirement, there must be at most 50 total
-mismatches between the two exact subclonotypes, within the given two V..J segments.
-This can be changed by setting MAX_DIFFS=n on the command line.
-
-7.  Shared mutations.  enclone next finds shared mutations betweens exact subclonotypes, that is,
-for two exact subclonotypes, common mutations from the reference sequence, using the donor
-reference for the V segments and the universal reference for the J segments.  Shared mutations are
-supposed to be somatic hypermutations, that would be evidence of common ancestry.  By using the
-donor reference sequences, most shared germline mutations are excluded, and this is critical for
-the algorithm's success.
-
-8.  Are there enough shared mutations?  We find the probability p that “the shared mutations occur
-by chance”.  More specifically, given d shared mutations, and k total mutations (across the two
-cells), we compute the probability p that a sample with replacement of k items from a set whose
-size is the total number of bases in the V..J segments, yields at most k – d distinct elements. 
-The probability is an approximation, for the method please see
-https://docs.rs/stirling_numbers/0.1.0/stirling_numbers.
-
-9.  Are there too many CDR3 mutations?  Next, let N be "the number of DNA sequences that differ
-from the given CDR3 sequences by at most the number of observed differences".  More specifically,
-if cd is the number of differences between the given CDR3 nucleotide sequences, and n is the total
-length in nucleotides of the CDR3 sequences (for the two chains), we compute the total number N of
-strings of length n that are obtainable by perturbing a given string of length n, which is
-sum( choose(n,m), m = 0..=cd) ).  We also require that cd is at most 10 (and this bound is
-adjustable via the command-line argument MAX_CDR3_DIFFS).
-
-10.  Key join criteria.  Two cells sharing sufficiently many shared differences and sufficiently
-few CDR3 differences are deemed to be in the same clonotype.  That is, The lower p is, and the
-lower N is, the more likely it is that the shared mutations represent bona fide shared ancestry. 
-Accordingly, the smaller p*N is, the more likely it is that two cells lie in the same true
-clonotype.  To join two cells into the same clonotype, we require that the bound p*n ≤ C is
-satisfied, where C is the constant 1,000,000 (and adjustable via the command-line argument
-MAX_LOG_SCORE, the log10 of this, with default value 6).  This constant was arrived at by
-empirically balancing sensitivity and specificity across a large collection of datasets.  See
-discussion of performance below.
-
-11.  Other join criteria.  We do not join two clonotypes which were assigned different reference
-sequences unless those reference sequences differ by at most 3 positions.  This value can be
-controlled using the command-line argument MAX_DEGRADATION.  There is an additional restriction
-imposed when creating two-cell clonotypes: we require that that cd ≤ d, where cd is the number of
-CDR3 differences and d is the number of shared mutations, as above.  This filter may be turned off
-using the command-line argument EASY.
-
-12.  Junk.  Spurious chains are filtered out based on frequency and connections. See "enclone help
-special" for a description of the filters.
-
-We are actively working to improve the algorithm.  To test the performance of the current version,
-we combined data from 443 BCR libraries from 30 donors, which yielded 9573 clonotypes having at
-least two cells each, of which 15 (0.16%) contained data from multiple donors.  These are errors.
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help command
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-information about enclone command-line argument processing
-
-1. Order of processing
-
-• Before processing its command line, enclone first checks for environment
-variables of the form ENCLONE_<x>.  These are converted into command-line arguments.  You can set
-any command-line argument this way.  The reason why you might want to use this feature is if you
-find yourself using the same command-line option over and over, and it is more convenient to set
-it once as an environment variable.
-• For example, setting the environment variable ENCLONE_PRE to /Users/me/enclone_data is
-equivalent to providing the command-line argument PRE=/Users/me/enclone_data.
-• After checking environment variables, arguments on the command line are read from left to right;
-if an argument name is repeated, only the rightmost value is used, except as noted specifically in
-the documentation.
-
-2. Color
-
-enclone uses ANSI escape codes for color and bolding, frivolously, for emphasis, and more
-importantly for amino acids, to represent different codons.  This is done automatically but you
-can turn it off....
-
-PLEASE READ THIS:
-
-You can turn off escape codes by adding PLAIN to any command.  Use this if you want to peruse
-output using a text editor which does not grok the escape codes.  However some things will not
-make sense without color.
-
-3. Paging
-
-• enclone automatically pipes its output to less -R -F -X.
-• The effect of this will be that you'll see only the first screen of output.  You can then use
-the spacebar to go forward, b to go backward, and q to quit.  The -R option causes escape
-characters to be correctly displayed, the -F option causes an automatic exit if output fits on a
-single screen, and the -X option prevents output from being sent to the "alternate screen" under
-certain platform/version combinations.
-• Type man less if you need more information.
-• If for whatever reason you need to turn off output paging, add the argument NOPAGER to the
-enclone command.
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help glossary
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-glossary of terms used by enclone
-
-┌────────────────────┬─────────────────────────────────────────────────────────────────────────────┐
-│V..J                │  the full sequence of a V(D)J transcript, from the beginning of the V       │
-│                    │  segment to the end of the J segment; this sequence begins with a stop codon│
-│                    │  and ends with a partial codon (its first base)                             │
-│CDR3                │  The terms CDR3 and junction are commonly mistaken and often used           │
-│                    │  interchangeably.  In enclone's nomenclature, "CDR3" actually refers to the │
-│                    │  junction (the CDR3 loop plus the canonical C and W/F at the N and C termini│
-│                    │  respectively).                                                             │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│clonotype           │  all the cells descended from a single fully rearranged T or B cell         │
-│                    │  (approximated computationally)                                             │
-│exact subclonotype  │  all cells having identical transcripts                                    │
-│                    │  (every clonotype is a union of exact subclonotypes)                        │
-│clone               │  a cell in a clonotype, or in an exact subclonotype                         │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│onesie              │  a clonotype or exact subclonotype having exactly one chain                 │
-│twosie              │  a clonotype or exact subclonotype having exactly two chains                │
-│threesie            │  a clonotype or exact subclonotype having exactly three chains;             │
-│                    │  these frequently represent true biological events, arising from expression │
-│                    │  of both alleles                                                            │
-│foursie             │  a clonotype or exact subclonotype having exactly four chains;              │
-│                    │  these very rarely represent true biological events                         │
-│moresie             │  a clonotype having more than four chains;                                  │
-│                    │  these sad clonotypes do not represent true biological events               │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│donor               │  an individual from whom datasets of an origin are obtained                 │
-│origin              │  a tube of cells from a donor, from a particular tissue at a                │
-│                    │  particular point in time, and possibly enriched for particular cells       │
-│cell group          │  an aliquot from an origin, presumed to be a random draw                    │
-│dataset             │  all sequencing data obtained from a particular library type                │
-│                    │  (e.g. TCR or BCR or GEX or FB), from one cell group, processed by running  │
-│                    │  through the Cell Ranger pipeline                                           │
-└────────────────────┴─────────────────────────────────────────────────────────────────────────────┘
-
- The exact requirements for being in the same exact subclonotype are that cells:
-• have the same number of productive contigs identified
-• that these have identical bases within V..J
-• that they are assigned the same constant region reference sequences
-• and that the difference between the V stop and the C start is the same
-  (noting that this difference is nearly always zero).
-Note that we allow mutations within the 5'-UTR and constant regions.
-
-conventions
-
-• When we refer to "V segments", we always include the leader segment.
-• Zero or one?  We number exact subclonotypes as 1, 2, ... and likewise with
-chains within a clonotype, however DNA and amino-acid positions are numbered starting at zero.
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help example1
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-Shown below is the output of the command:
-
-enclone BCR=123089 CDR3=CARRYFGVVADAFDIW
-
-[1] GROUP = 1 CLONOTYPES = 12 CELLS
-
-[1.1] CLONOTYPE = 12 CELLS
-┌───────────┬───────────────────────────────────────────────┬──────────────────────────────┐
-│           │  CHAIN 1                                      │  CHAIN 2                     │
-│           │  181.1.1|IGHV4-30-2 ◆ 53|IGHJ3                │  254|IGKV1D-39 ◆ 218|IGKJ5   │
-│           ├───────────────────────────────────────────────┼──────────────────────────────┤
-│           │              1 1111111111111111               │  1 111111111111              │
-│           │  2224556788990 1111122222222223               │  0 011111111112              │
-│           │  0571380317346 5678901234567890               │  6 901234567890              │
-│           │                ══════CDR3══════               │    ════CDR3════              │
-│reference  │  LSSASRPHPVRST ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦WT CQQ◦◦◦◦◦◦◦◦◦              │
-│donor ref  │  VSPTYRHYPVTST ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦WT CQQ◦◦◦◦◦◦◦◦◦              │
-├───────────┼───────────────────────────────────────────────┼──────────────────────────────┤
-│#   n        ........x.... ..............x.     u  const    x ......x.....      u  const│
-│1  10      │  VSPTYRHYPVTST CARRYFGVVADAFDIW  4285  IGHM   │  T CQQSYSTPPITF  11793  IGKC │
-│2   2      │  VSPTYRHYSVTST CARRYFGVVADAFDIW  4383  IGHM   │  A CQQSYSPPPITF  13922  IGKC │
-└───────────┴───────────────────────────────────────────────┴──────────────────────────────┘
-
-This shows an invocation of enclone that takes one dataset as input and exhibits
-all clonotypes for which some chain has the given CDR3 sequence.
-
-What you see here is a compressed view of the entire information encoded in the
-full length transcripts of the 13 cells comprising this clonotype: every base!
-There is a lot to explain about the compression, so please read carefully.
-
-• Clonotypes are grouped.  Here we see just one group having one clonotype in it.
-• This clonotype has three exact subclonotypes in it, the first of which has 10 cells.
-• This clonotype has two chains.  The reference segments for them are shown at the top.
-• The notation 181.1.1 says that this V reference sequence is an alternate allele
-  derived from the universal reference sequence (contig in the reference file)
-  numbered 181, that is from donor 1 ("181.1") and is alternate allele 1 for that donor.
-• Sometimes chains are missing from exact subclonotypes.
-• Amino acids are assigned different colors depending on which codon they represent.
-• Numbered columns show the state of particular amino acids, e.g. the first column is for amino
-  acid 20 in chain 1 (where 0 is the start codon).  The numbers read vertically, downward!
-• Universal ref: state for the contig in the reference file.
-• Donor ref: state for the inferred donor germline sequence.
-• ◦s are "holes" in the recombined region where the reference doesn't make sense.
-• The "dot and x" line has xs where there's a difference *within* the clonotype.
-• Amino acids are shown if they differ from the universal reference or are in the CDR3.
-• u = median UMI count for a chain in the exact subclonotype.
-• const = const region name for a chain in the exact subclonotype.
-
-The view you see here is configurable: see the documentation at enclone help lvars and enclone
-help cvars.
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help example2
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-Shown below is the output of the command:
-
-enclone BCR=123085 GEX=123749 LVARSP=gex,IGHV2-5_g_μ,CD4_ab_μ CDR3=CALMGTYCSGDNCYSWFDPW
-
-[1] GROUP = 1 CLONOTYPES = 5 CELLS
-
-[1.1] CLONOTYPE = 5 CELLS
-┌────────────────────────────────────┬───────────────────────────────────────┬─────────────────────────────┐
-│                                    │  CHAIN 1                              │  CHAIN 2                    │
-│                                    │  98|IGHV2-5 ◆ 57|IGHJ5                │  352|IGLV3-1 ◆ 314|IGLJ2    │
-│                                    ├───────────────────────────────────────┼─────────────────────────────┤
-│                                    │    11111111111111111111               │    11111111111              │
-│                                    │  8 11111222222222233333               │  6 00000111111              │
-│                                    │  5 56789012345678901234               │  2 56789012345              │
-│                                    │    ════════CDR3════════               │    ════CDR3═══              │
-│reference                           │  S ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦WV CQAWD◦◦◦◦◦◦              │
-│donor ref                           │  S ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦WV CQAWD◦◦◦◦◦◦              │
-├────────────────────────────────────┼───────────────────────────────────────┼─────────────────────────────┤
-│#  n    gex  IGHV2-5_g_μ  CD4_ab_μ    x ....................     u  const    . ...........      u  const│
-│1  3   8852         1850        79  │  S CALMGTYCSGDNCYSWFDPW   592  IGHM   │  V CQAWDSSVVVF   2995  IGLC2│
-│2  1  29657         6515        36  │  S CALMGTYCSGDNCYSWFDPW  6112  IGHG1  │  V CQAWDSSVVVF  15203  IGLC2│
-│3  1  14886         3326        42  │  T CALMGTYCSGDNCYSWFDPW  4045  IGHG1  │  V CQAWDSSVVVF   7025  IGLC2│
-└────────────────────────────────────┴───────────────────────────────────────┴─────────────────────────────┘
-
-This shows an invocation of enclone that takes VDJ, gene expression and feature barcode data as
-input, and exhibits all clonotypes for which some chain has the given CDR3 sequence.  As well the
-command requests UMI (molecule) counts for one hand-selected gene and one antibody.  You can use
-any gene(s) you like and any antibodies for which you have feature barcodes.
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help input
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-enclone has two mechanisms for specifying input datasets: either directly on the command line or
-via a supplementary metadata file. Only one mechanism may be used at a time.
-
-In both cases, you will need to provide paths to directories where the outputs of the Cell Ranger
-pipeline may be found.  enclone uses only some of the pipeline output files, so it is enough that
-those files are present in given directory, and the particular files that are needed may be found
-by typing enclone help input_tech.
-
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃If you use the argument PRE=p then p/ will be prepended to all pipeline paths.  A comma-separated┃
-┃list is also allowed PRE=p1,...,pn, in which case these directories are searched from left to    ┃
-┃right, until one works, and if all fail, the path is used without prepending anything.  Lastly,  ┃
-┃(see enclone help command), you can avoid putting PRE on the command line by setting the         ┃
-┃environment variable ENCLONE_PRE to the desired value.  The default value for PRE is             ┃
-┃~/enclone/datasets,~/enclone/datasets2.                                                          ┃
-┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
-
-Both input forms involve abbreviated names (discussed below), which should be as short as
-possible, as longer abbreviations will increase the width of the clonotype displays.
-
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃enclone can use gene expression and feature barcode data, as represented by a feature matrix.     ┃
-┃Cell Ranger stores this matrix in an hdf5 file, which while generally very efficient, is not      ┃
-┃optimized for interactive use.  Therefore enclone provides an alternate file structure, which     ┃
-┃speeds up enclone overall by up to 50%.  To use this, add the argument NH5 to the enclone command ┃
-┃line.  This will work so long as you have write permission on input directories.  The first time  ┃
-┃you run enclone (using given inputs), an alternate file feature_barcode_matrix.bin will be        ┃
-┃written; then subsequent invocations will be faster.  Once the file has been created, it will     ┃
-┃always be used, regardless of whether NH5 is used.  However, we may occasionally change the format┃
-┃of the alternate file.  If do that, then if you have previously generated the file, then it will  ┃
-┃be rewritten when you invoke enclone for that dataset.  Like with other enclone command-line      ┃
-┃options, if you want NH5 on all the time, you can set the environment variable ENCLONE_NH5.       ┃
-┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
-
-█ 1 █ To point directly at input files on the command line, use e.g.
-TCR=/home/jdoe/runs/dataset345
-or likewise for BCR.  A more complicated syntax is allowed in which commas, colons and semicolons
-act as delimiters.  Commas go between datasets from the same origin, colons between datasets from
-the same donor, and semicolons separate donors.  If semicolons are used, the value must be quoted.
-
-enclone uses the distinction between datasets, origins and donors in the following ways:
-1. If two datasets come from the same origin, then enclone can filter to remove certain artifacts,
-unless you specify the option NCROSS.
-See also illusory clonotype expansion page at bit.ly/enclone.
-2. If two cells came from different donors, then enclone will not put them in the same clonotype,
-unless you specify the option MIX_DONORS.
-More information may be found at `enclone help special`.  In addition, this is enclone's way of
-keeping datasets organized and affects the output of fields like origin, etc.
-
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃Naming.  Using this input system, each dataset is assigned an abbreviated name, which is         ┃
-┃everything after the final slash in the directory name (e.g. dataset345 in the above example), or┃
-┃the entire name if there is no slash; origins and donors are assigned identifers s1,... and      ┃
-┃d1,..., respectively; numbering of origins restarts with each new donor.  To specify origins     ┃
-┃and donors, use the second input form, and see in particular abbr:path.                          ┃
-┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
-
-Examples:
-TCR=p1,p2   -- input data from two libraries from the same origin
-TCR=p1,p2:q -- input data as above plus another from a different origin from the same donor
-TCR="a;b"   -- input one library from each of two donors.
-
-Matching gene expression and/or feature barcode data may also be supplied using an argument GEX=...,
-whose right side must have the exact same structure as the TCR or BCR argument.  Specification of
-both TCR and BCR is not allowed.
-
-In addition, barcode-level data may be specified using BC=..., whose right side is a list of paths
-having the same structure as the TCR or BCR argument.  Each such path must be for a CSV file,
-which must include the field barcode, may include special fields origin, donor, tag and color, and
-may also include arbitrary other fields.  The origin and donor fields allow a particular origin
-and donor to be associated to a given barcode.  A use case for this is genetic demultiplexing. 
-The tag field is intended to be used with tag demultiplexing.  The color field is used by the PLOT
-option.  All other fields are treated as lead variables, but values are only displayed in PER_CELL
-mode, or for parseable output using PCELL.  These fields should not include existing lead variable
-names.  Use of BC automatically turns on the MIX_DONORS option.
-
-█ 2 █ To specify a metadata file, use the command line argument
-META=filename
-This file should be a CSV (comma-separated values) file, with one line per cell group.  After the
-first line, lines starting with # are ignored.  There must be a field tcr or bcr, and some other
-fields are allowed:
-┌────────┬───────────────┬──────────────────────────────────────────────────────────────┐
-│fielddefaultmeaning                                                     │
-├────────┼───────────────┼──────────────────────────────────────────────────────────────┤
-│tcr     │  (required!)  │  path to dataset, or abbr:path, where abbr is an abbreviated │
-│or bcr  │               │  name for the dataset; exactly one of tcr or bcr must be used│
-├────────┼───────────────┼──────────────────────────────────────────────────────────────┤
-│gex     │  null         │  path to GEX dataset, which may include or consist entirely  │
-│        │               │  of FB data                                                  │
-├────────┼───────────────┼──────────────────────────────────────────────────────────────┤
-│origin  │  s1           │  abbreviated name of origin                                  │
-├────────┼───────────────┼──────────────────────────────────────────────────────────────┤
-│donor   │  d1           │  abbreviated name of donor                                   │
-├────────┼───────────────┼──────────────────────────────────────────────────────────────┤
-│color   │  null         │  color to associate to this dataset (for PLOT option)        │
-├────────┼───────────────┼──────────────────────────────────────────────────────────────┤
-│bc      │  null         │  name of CSV file as in the BC option                        │
-└────────┴───────────────┴──────────────────────────────────────────────────────────────┘
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help input_tech
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-information about providing input to enclone (technical notes)
-
-enclone only uses certain files, which are all in the outs subdirectory of a Cell Ranger pipeline
-directory:
-
-┌─────────────────────────────────────────────┬──────────┐
-│filepipeline│
-├─────────────────────────────────────────────┼──────────┤
-│all_contig_annotations.json                  │  VDJ     │
-├─────────────────────────────────────────────┼──────────┤
-│vdj_reference/fasta/regions.fa               │  VDJ     │
-├─────────────────────────────────────────────┼──────────┤
-│metrics_summary.csv                          │  GEX     │
-├─────────────────────────────────────────────┼──────────┤
-│raw_feature_bc_matrix.h5                     │  GEX     │
-├─────────────────────────────────────────────┼──────────┤
-│analysis/clustering/graphclust/clusters.csv  │  GEX     │
-├─────────────────────────────────────────────┼──────────┤
-│analysis/pca/10_components/projection.csv    │  GEX     │
-└─────────────────────────────────────────────┴──────────┘
-
-The first file is required, and the second should be supplied if Cell Ranger version 4.0 or
-greater was used.  The others are required, in the indicated structure, if GEX or META/gex
-arguments are provided.  The exact files that are used could be changed in the future.
-
-Note that the VDJ outs directories must be from Cell Ranger version ≥ 3.1.  There is a workaround
-for earlier versions (which you will be informed of if you try), but it is much slower and the
-results may not be as good.
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help parseable
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-parseable output
-
-The standard output of enclone is designed to be read by humans, but is not readily parseable by
-computers.  We supplement this with parseable output that can be easily read by computers.
-
-The default behavior for this is to generate a CSV file having "every possible" field (over a
-hundred).  We also provide an option to print only selected fields, and some options which enable
-inspection, short of generating a separate CSV file.
-
-Parseable output is targeted primarily at R and Python users, because of the ease of wrangling CSV
-files with these languages.
-
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃Parseable output is invoked by using the argument                                                ┃
-┃POUT=filename                                                                                    ┃
-┃specifying the name of the file that is to be written to.                                        ┃
-┃  The filename "stdout" may be used for a preview; in that case parseable output is generated    ┃
-┃  separately for each clonotype and the two output types are integrated.  There is also          ┃
-┃  "stdouth", which is similar, but uses spaces instead of commas, and lines things up in columns.┃
-┃By default, we show four chains for each clonotype, regardless of how many chains it             ┃
-┃has, filling in with null entries.  One may instead specify n chains using the argument          ┃
-┃PCHAINS=n                                                                                        ┃
-┃The parseable output fields may be specified using                                               ┃
-┃PCOLS=x1,...,xn                                                                                  ┃
-┃where each xi is one of the field names shown below.                                             ┃
-┃This option reduces run time and memory usage, and prevents voluminous output.  Please use it!   ┃
-┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
-
-Over time additional fields may be added and the order of fields may change.
-
-There is an alternate parseable output mode in which one line is emitted for each cell, rather
-then each exact subclonotype.  This mode is enabled by adding the argument PCELL to the command
-line.  Each exact subclonotype then yields a sequence of output lines that are identical except as
-noted below.
-
-If you want to completely suppress the generation of visual clonotypes, add NOPRINT to the enclone
-command line.
-
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃FASTA output.  This is a separate feature.  To generate nucleotide FASTA output for each chain in ┃
-┃each exact subclonotype, use the argument FASTA=filename.  The special case stdout will cause the ┃
-┃FASTA records to be shown as part of standard output.  The FASTA records that are generated are of┃
-┃the form V(D)JC, where V is the full V segment (including the leader) and C is the full constant  ┃
-┃region, copied verbatim from the reference.  If a particular chain in a particular exact          ┃
-┃subclonotype is not assigned a constant region, then we use the constant region that was assigned ┃
-┃to the clonotype.  If no constant region at all was assigned, then the FASTA record is omitted.   ┃
-┃Similarly, FASTA_AA=filename may be used to generate a matching amino acid FASTA file.            ┃
-┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
-
-───────────────────────
-parseable output fields
-───────────────────────
-
-1. per clonotype group fields
-
-┌──────────────┬──────────────────────────────────────────┐
-│group_id      │  identifier of clonotype group - 0,1, ...│
-├──────────────┼──────────────────────────────────────────┤
-│group_ncells  │  total number of cells in the group      │
-└──────────────┴──────────────────────────────────────────┘
-
-2. per clonotype fields
-
-┌──────────────────┬────────────────────────────────────────────────────────────────┐
-│clonotype_id      │  identifier of clonotype within the clonotype group = 0, 1, ...│
-├──────────────────┼────────────────────────────────────────────────────────────────┤
-│clonotype_ncells  │  total number of cells in the clonotype                        │
-├──────────────────┼────────────────────────────────────────────────────────────────┤
-│nchains           │  total number of chains in the clonotype                       │
-└──────────────────┴────────────────────────────────────────────────────────────────┘
-
-3. per chain fields, where <i> is 1,2,... (see above)
-each of these has the same value for each exact clonotype
-
-┌──────────────────────┬───────────────────────────────────────────────────────────────────────┐
-│v_name<i>             │  name of V segment                                                    │
-│d_name<i>             │  name of D segment (or null)                                          │
-│j_name<i>             │  name of J segment                                                    │
-├──────────────────────┼───────────────────────────────────────────────────────────────────────┤
-│v_id<i>               │  id of V segment                                                      │
-│d_id<i>               │  id of D segment (or null)                                            │
-│j_id<i>               │  id of J segment                                                      │
-├──────────────────────┼───────────────────────────────────────────────────────────────────────┤
-│var_indices_dna<i>    │  DNA positions in chain that vary across the clonotype                │
-│var_indices_aa<i>     │  amino acid positions in chain that vary across the clonotype         │
-│share_indices_dna<i>  │  DNA positions in chain that are constant across the clonotype,       │
-│                      │  but differ from the donor ref                                        │
-│share_indices_aa<i>   │  amino acid positions in chain that are constant across the clonotype,│
-│                      │  all of these are comma-separated lists                               │
-│                      │  but differ from the donor ref                                        │
-└──────────────────────┴───────────────────────────────────────────────────────────────────────┘
-
-4. per exact subclonotype fields
-
-┌───────────────────────┬─────────────────────────────────────────────────────────────────────────┐
-│exact_subclonotype_id  │  identifer of exact subclonotype = 1, 2, ...                            │
-├───────────────────────┼─────────────────────────────────────────────────────────────────────────┤
-│barcodes               │  comma-separated list of barcodes for the exact subclonotype            │
-│<dataset>_barcodes     │  like "barcodes", but restricted to the dataset with the given name     │
-│barcode                │  if PCELL is specified, barcode for one cell                            │
-│<dataset>_barcode      │  if PCELL is specified, barcode for one cell, or null, if the barcode is│
-│                       │  not from the given dataset                                             │
-├───────────────────────┴─────────────────────────────────────────────────────────────────────────┤
-│In addition, every lead variable may be specified as a field.  See "enclone help lvars".         │
-└─────────────────────────────────────────────────────────────────────────────────────────────────┘
-
-5. per chain, per exact subclonotype fields, where <i> is 1,2,... (see above)
-
-[all apply to chain i of a particular exact clonotype]
-
-┌───────────────┬──────────────────────────────────────────────────────────────────────────┐
-│vj_seq<i>      │  DNA sequence of V..J                                                    │
-│seq<i>         │  full DNA sequence                                                       │
-│q<n>_<i>       │  special option to display a comma-separated list of the quality         │
-│               │  scores for chain i, at zero-based position n, numbered starting at the  │
-│               │  beginning of the V segment, for each cell in the exact subclonotype     │
-├───────────────┼──────────────────────────────────────────────────────────────────────────┤
-│v_start<i>     │  start of V segment on full DNA sequence                                 │
-├───────────────┼──────────────────────────────────────────────────────────────────────────┤
-│const_id<i>    │  numerical identifier of constant region (or null, if not known)         │
-├───────────────┼──────────────────────────────────────────────────────────────────────────┤
-│utr_id<i>      │  numerical identifier of 5'-UTR region (or null, if not known)           │
-│utr_name<i>    │  name of 5'-UTR region (or null, if not known)                           │
-├───────────────┼──────────────────────────────────────────────────────────────────────────┤
-│cdr3_start<i>  │  base position start of CDR3 sequence on full contig                     │
-│cdr3_aa<i>     │  amino acid sequence of CDR3                                             │
-├───────────────┼──────────────────────────────────────────────────────────────────────────┤
-│var_aa<i>      │  amino acids that vary across the clonotype (synonymous changes included)│
-├───────────────┴──────────────────────────────────────────────────────────────────────────┤
-│In addition, every chain variable, after suffixing by <i>, may be used as a field.        │
-│See "enclone help cvars".                                                                 │
-└──────────────────────────────────────────────────────────────────────────────────────────┘
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help filter
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-clonotype filtering options
-
-enclone provides filtering by cell, by exact subclonotype, and by clonotype.  This page describes
-filtering by clonotype.  These options cause only certain clonotypes to be printed.  See also
-"enclone help special", which describes other filtering options.  This page also described
-scanning for feature enrichment.
-
-┌─────────────────────┬────────────────────────────────────────────────────────────────────────┐
-│MIN_CELLS=n          │  only show clonotypes having at least n cells                          │
-│MAX_CELLS=n          │  only show clonotypes having at most n cells                           │
-│CELLS=n              │  only show clonotypes having exactly n cells                           │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│MIN_UMIS=n           │  only show clonotypes having ≳ n UMIs on some chain on some cell       │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│MIN_CHAINS=n         │  only show clonotypes having at least n chains                         │
-│MAX_CHAINS=n         │  only show clonotypes having at most n chains                          │
-│CHAINS=n             │  only show clonotypes having exactly n chains                          │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│CDR3=<pattern>       │  only show clonotypes having a CDR3 amino acid seq that matches        │
-│                     │  the given pattern (regular expression)*, from beginning to end        │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│SEG="s_1|...|s_n"    │  only show clonotypes using one of the given reference segment names   │
-│SEGN="s_1|...|s_n"   │  only show clonotypes using one of the given reference segment numbers │
-│                     │  both: looks for V, D, J and C segments; double quote only             │
-│                     │  needed if n > 1                                                       │
-│                     │  For both SEG and SEGN, multiple instances are allowed, and their      │
-│                     │  effects are cumulative.                                               │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│MIN_EXACTS=n         │  only show clonotypes having at least n exact subclonotypes            │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│VJ=seq               │  only show clonotypes using exactly the given V..J sequence            │
-│                     │  (string in alphabet ACGT)                                             │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│MIN_DATASETS=n       │  only show clonotypes containing cells from at least n datasets        │
-│MAX_DATASETS=n       │  only show clonotypes containing cells from at most n datasets         │
-│MIN_DATASET_RATIO=n  │  only show clonotypes having at least n cells and for which the ratio  │
-│                     │  of the number of cells in the must abundant dataset to the next most  │
-│                     │  abundant one is at least n                                            │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│CDIFF                │  only show clonotypes having a difference in constant region with the  │
-│                     │  universal reference                                                   │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│DEL                  │  only show clonotypes exhibiting a deletion                            │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│BARCODE=bc1,...,bcn  │  only show clonotypes that use one of the given barcodes               │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│INKT                 │  only show clonotypes for which some exact subclonotype is annotated as│
-│                     │  having some iNKT evidence, see bit.ly/enclone for details             │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│MAIT                 │  only show clonotypes for which some exact subclonotype is annotated as│
-│                     │  having some MAIT evidence, see bit.ly/enclone for details             │
-└─────────────────────┴────────────────────────────────────────────────────────────────────────┘
-
-* Examples of how to specify CDR3:
-
-┌────────────────────────────────────────┬────────────────────────────────────────────────┐
-│CDR3=CARPKSDYIIDAFDIW                   │  have exactly this sequence as a CDR3          │
-│CDR3="CARPKSDYIIDAFDIW|CQVWDSSSDHPYVF"  │  have at least one of these sequences as a CDR3│
-│CDR3=".*DYIID.*"                        │  have a CDR3 that contains DYIID inside it     │
-└────────────────────────────────────────┴────────────────────────────────────────────────┘
-
-Note that double quotes should be used if the pattern contains characters other than letters.
-
-A gentle introduction to regular expressions may be found at
-https://en.wikipedia.org/wiki/Regular_expression#Basic_concepts, and a precise
-specification for the regular expression version used by enclone may be found at
-https://docs.rs/regex.
-
-linear conditions
-
-enclone understands linear conditions of the form
-c1*v1 ± ... ± cn*vn > d
-where each ci is a constant, "ci*" may be omitted, each vi is a variable, and d is a constant. 
-Blank spaces are ignored.  The > sign may be replaced by >= or ≥ or < or <= or ≤.  Each vi is a
-lead variable (see "enclone help lvars") that represents a origin/donor/tag count or gene/feature
-barcode UMI count.  In evaluating the condition, each vi is replaced by the mean of its values
-across all cells in the clonotype.  Because the minus sign - doubles as a hyphen and is used in
-some feature names, we allow parentheses around variable names to prevent erroneous parsing, like
-this (IGHV3-7_g) >= 1.
-
-filtering by linear conditions
-
-enclone has the capability to filter by bounding certain lead variables, using the command-line
-argument:
-F="L"
-where L is a linear condition (as defined above).  Currently this is limited to the case where the
-lead variables have been selected using LVARS or LVARSP!  Multiple bounds may be imposed by using
-multiple instances of F=... .
-
-feature scanning
-
-If gene expression and/or feature barcode data have been generated, enclone can scan all features
-to find those that are enriched in certain clonotypes relative to certain other clonotypes.  This
-feature is turned on using the command line argument
-SCAN="test,control,threshold"
-where each of test, control and threshold are linear conditions as defined above.  Blank spaces
-are ignored.  The test condition defines the "test clonotypes" and the control condition defines
-the "control clonotypes".  Currently, the lead variables in test and control must be specified by
-LVARS or LVARSP!  The threshold condition is special: it may use only the variables "t" and "c"
-that represent the raw UMI count for a particular gene or feature, for the test (t) or control (c)
-clonotypes.  To get a meaningful result, you should specify MIN_CELLS appropriately and manually
-examine the test and control clonotypes to make sure that they make sense.
-
-an example
-
-Suppose that your data are comprised of two origins with datasets
-            named pre and post, representing time points relative to some event.  Then
-SCAN="n_post - 10*n_pre >= 0, n_pre - 0.5*n_post >= 0, t - 2*c >= 0.1"
-would define the test clonotypes to be those satisfying n_post >= 10*n_pre (so having far more
-post cells then pre cells), the control clonotypes to be those satisfying n_pre >= 0.5*n_post (so
-having lots of pre cells), and thresholding on t >= 2*c * 0.1, so that the feature must have a bit
-more than twice as many UMIs in the test than the control.  The 0.1 is there to exclude noise from
-features having very low UMI counts.
-
-Feature scanning is not a proper statistical test.  It is a tool for generating a list of feature
-candidates that may then be examined in more detail by rerunning enclone using some of the
-detected features as lead variables (appropriately suffixed).  Ultimately the power of the scan is
-determined by having "enough" cells in both the test and control sets, and in having those sets
-cleanly defined.
-
-Currently feature scanning requires that each dataset have identical features.
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help special
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-special filtering options
-
-This page documents some options, most of which allow noise filters to be turned off, and which
-normally should not be invoked.  The last two options can be used to simplify the view of a
-clonotype.
-
-┌────────────────────┬─────────────────────────────────────────────────────────────────────────────┐
-│NALL                │  Turn off all the noise filters shown below.  This may yield quite a mess.  │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NCELL               │  Use contigs found by Cell Ranger even if they were not in a called cell,   │
-│                    │  or not called high confidence.                                             │
-│NALL_CELL           │  turn off all the noise filters except for the cell filter                  │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NGEX                │  If gene expression and/or feature barcode data are provided, if a barcode  │
-│                    │  is called a cell by the VDJ part of the Cell Ranger pipeline, but not      │
-│                    │  called a cell by the gene expression and/or feature barcode part, then the │
-│                    │  default behavior of enclone is to remove such cells from clonotypes.  This │
-│                    │  option disables that behavior.                                             │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NCROSS              │  If you specify that two or more libraries arose from the same origin (i.e. │
-│                    │  cells from the same tube or tissue), then by default enclone will          │
-│                    │  "cross filter" so as to remove expanded exact subclonotypes that are       │
-│                    │  present in one library but not another, in a fashion that would be highly  │
-│                    │  improbable, assuming random draws of cells from the tube.  These are       │
-│                    │  believed to arise when a plasma or plasmablast cell breaks up during during│
-│                    │  or after pipetting from the tube, and the resulting fragments seed GEMs,   │
-│                    │  yielding expanded 'fake' clonotypes that are residues of real single plasma│
-│                    │  cells.  The NCROSS options turns off this filter, which could be useful so │
-│                    │  long as you interpret the restored clonotypes as representing what are     │
-│                    │  probably single cells.  There may also be other situations where the filter│
-│                    │  should be turned off, and in particular the filter can do weird things if  │
-│                    │  inputs are somehow mis-specified to enclone.  Note that for purposes of    │
-│                    │  this option, enclone defines an origin by the pair                         │
-│                    │  (origin name, donor name).                                                 │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NUMI                │  Filter out B cells based on low BCR UMI counts.  The heuristics for this   │
-│                    │  are described on the enclone site at bit.ly/enclone.                       │
-│NUMI_RATIO          │  Filter out B cells based on low BCR UMI counts relative to another cell    │
-│                    │  in a given clonotype.  The heuristics for this                             │
-│                    │  are described on the enclone site at bit.ly/enclone.                       │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NGRAPH_FILTER       │  By default, enclone filters to remove exact subclonotypes that by virtue of│
-│                    │  their relationship to other exact subclonotypes, appear to arise from      │
-│                    │  background mRNA or a phenotypically similar phenomenon.  The               │
-│                    │  NGRAPH_FILTER option turns off this filtering.                             │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NQUAL               │  By default, enclone filters out exact subclonotypes having a base in V..J  │
-│                    │  that looks like it might be wrong.  More specifically, enclone finds bases │
-│                    │  which are not Q60 for a barcode, not Q40 for two barcodes, are not         │
-│                    │  supported by other exact subclonotypes, are variant within the clonotype,  │
-│                    │  and which disagree with the donor reference.  NQUAL turns this off.        │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NWEAK_CHAINS        │  By default, enclone filters chains from clonotypes that are                │
-│                    │  weak and appear to be artifacts, perhaps arising from a stray mRNA molecule│
-│                    │  that floated into a GEM.  The NWEAK_CHAINS option turns off this filter.   │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NWEAK_ONESIES       │  By default, enclone filters out onesie clonotypes having a single exact    │
-│                    │  subclonotype, and that are light chain or TRA, and whose number of cells is│
-│                    │  greater than one but less than 0.1% of the total number of cells.          │
-│                    │  This filter reduces the likelihood of creating clonotypes containing cells │
-│                    │  that arose from different recombination events.                            │
-│                    │  NWEAK_ONESIES turns this filter off.                                       │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NFOURSIE_KILL       │  By default, if enclone finds a foursie exact subclonotype that             │
-│                    │  contains a twosie exact subclonotype having at least ten cells, it kills   │
-│                    │  the foursie exact subclonotype, no matter how many cells it has.  The      │
-│                    │  foursies that are killed are believed to be rare oddball artifacts arising │
-│                    │  from repeated cell doublets or GEMs that contain two cells and multiple gel│
-│                    │  beads.  The argument NFOURSIE_KILL turns off this filtering.               │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NWHITEF             │  By default, enclone filters out rare artifacts arising from contamination  │
-│                    │  of oligos on gel beads.  The NWHITEF option turns off this filter.         │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NBC_DUP             │  By default, enclone filters out duplicated barcodes within an exact        │
-│                    │  subclonotype.  The NBC_DUP option turns off this filter.                   │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│MIX_DONORS          │  By default, enclone will prevent cells from different donors from being    │
-│                    │  placed in the same clonotype.  The MIX_DONORS option turns off this        │
-│                    │  behavior, thus allowing cells from different donors to be placed in the    │
-│                    │  same clonotype.  The main use of this option is for specificity testing, in│
-│                    │  which data from different donors are deliberately combined in an attempt   │
-│                    │  to find errors.  Use of the bc field for META input specification          │
-│                    │  automatically turns on this option.                                        │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│KEEP_IMPROPER       │  An exact subclonotype is improper if it does not have one chain            │
-│                    │  of each type.  This option causes all improper exact subclonotypes to be   │
-│                    │  retained, although they may be removed by other filters.                   │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│MIN_CHAINS_EXACT=n  │  Delete any exact subclonotype having less than n chains.  You can use this │
-│                    │  to "purify" a clonotype so as to display only exact subclonotypes having   │
-│                    │  all their chains.                                                          │
-│CHAINS_EXACT=n      │  Delete any exact subclonotype not having exactly n chains.                 │
-│MIN_CELLS_EXACT=n   │  Delete any exact subclonotype having less than n cells.  You might want    │
-│                    │  to use this if you have a very large and complex expanded clonotype,       │
-│COMPLETE            │  delete any exact subclonotype that has less chains than the clonotype      │
-│                    │  for which you would like to see a simplified view.                         │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│FCELL=var=value     │  Supposing that var has been specified as a field using the BC option       │
-│                    │  (or equivalently, using bc, via META), see "enclone help input", this      │
-│                    │  option filters out all barcodes that do not satisfy the given constraint.  │
-│                    │  Note that for purposes of testing the constraint, if the value for a       │
-│                    │  particular barcode has not been specified via BC or bc, then its value is  │
-│                    │  taken to be null.  Also multiple instances of FCELL may be used to impose  │
-│                    │  multiple filters.                                                          │
-└────────────────────┴─────────────────────────────────────────────────────────────────────────────┘
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help lvars
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-lead column options
-
-These options define lead variables, which correspond to columns that appear once in each
-clonotype, on the left side, and have one entry for each exact subclonotype row.
-
-Lead variables are specified using LVARS=x1,...,xn where each xi is one of:
-
-┌───────────────┬──────────────────────────────────────────────────────────────────────────────────┐
-│datasets       │  dataset identifiers                                                             │
-│origin         │  origin identifiers                                                              │
-│donors         │  donor identifiers                                                               │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│n              │  number of cells                                                                 │
-│n_<name>       │  number of cells associated to the given name, which can be a dataset            │
-│               │  or origin or donor or tag short name; may name only one such category           │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│nd<k>          │  For k a positive integer, this creates k+1 fields, that are specific to each    │
-│               │  clonotype.  The first field is n_<d1>, where d1 is the name of the dataset      │
-│               │  having the most cells in the clonotype.  If k ≥ 2, then you'll get a            │
-│               │  "runner-up" field n_<d2>, etc.  Finally you get a field n_other, however        │
-│               │  fields will be elided if they represent no cells.                               │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│near           │  Hamming distance of V..J DNA sequence to nearest neighbor                       │
-│far            │  Hamming distance of V..J DNA sequence to farthest neighbor                      │
-│               │  both compare to cells having chains in the same columns of the clonotype,       │
-│               │  with - shown if there is no other exact subclonotype to compare to              │
-│dref           │  Hamming distance of V..J DNA sequence to donor reference, excluding             │
-│               │  region of recombination                                                         │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│inkt           │  A string showing the extent to which the T cells in an exact subclonotype       │
-│               │  have evidence for being an iNKT cell.  The most evidence is denoted 𝝰gj𝝱gj,     │
-│               │  representing both gene name and junction sequence (CDR3) requirements for       │
-│               │  both chains.  See bit.ly/enclone for details on the requirements.               │
-│mait           │  Same as with inkt but for MAIT cells instead.                                   │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│g<d>           │  Here d is a nonnegative integer.  Then all the exact subclonotypes are          │
-│               │  grouped according to the Hamming distance of their V..J sequences.  Those       │
-│               │  within distance d are defined to be in the same group, and this is              │
-│               │  extended transitively.  The group identifier 1, 2, ... is shown.  The           │
-│               │  ordering of these identifiers is arbitrary.  This option is best applied        │
-│               │  to cases where all exact subclonotypes have a complete set of chains.           │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│gex            │   median gene expression UMI count                                              │
-│n_gex          │   number of cells reported by GEX                                               │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│<gene>_g       │   all five feature types: look for a declared feature of the given type         │
-│<antibody>_ab  │  with the given id or name; report the median UMI count for it; we allow         │
-│<antigen>_ag   │  the form e.g. <abbr>:<gene>_g where abbr is an abbreviation to be shown;        │
-│<crispr>_cr    │  we also allow <regular expression>_g where g can be replaced by ab, ag, cr      │
-│<custom>_cu    │  or cu; this represents a sum of UMI counts across the matching features. ●      │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│sec            │  for human or mouse BCR, number of GEX UMIs that are characterized as secreted   │
-│mem            │  for human or mouse BCR, number of GEX UMIs that are characterized as membrane   │
-│               │  For both of these, the algorithm looks for reads that are aligned through the   │
-│               │  right end of a constant region CH3 exon, and then read into a CH3-CHS or        │
-│               │  CH4-CHS exon, in the secreted case, or a M, M1 or M2 exon, in the membrane case.│
-│               │  This choice is determined by sequence tables in the code, and we cannot be      │
-│               │  absolutely certain that these tables are complete.                              │
-│               │  These fields require the presence of the files possorted_genome_bam.bam         │
-│               │  and possorted_genome_bam.bam.bai.                                               │
-│               │  These fields also require that you have samtools in your path.                  │
-│               │  Note that these counts tend to be low.                                          │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│cred           │  Short for credibility.  It is a measure of the extent to which cells            │
-│               │  having gene expression similar to a given putative B cell are themselves        │
-│               │  B cells.  (Or similarly for T cells.)  For the actual definition, let n         │
-│               │  be the number of VDJ cells that are also GEX cells.  For a given cell,          │
-│               │  find the n GEX cells that are closest to it in PCA space, and report the        │
-│               │  percent of those that are also VDJ cells.  For multiple datasets, it would      │
-│               │  be better to "aggr" the data, however that is not currently supported           │
-│               │  The computation is also inefficient, so let us know if it's causing             │
-│               │  problems for you.  And cred makes much better sense for datasets that           │
-│               │  consist of mixed cell types, rather than consisting of pure B or T cells.       │
-└───────────────┴──────────────────────────────────────────────────────────────────────────────────┘
-For gene expression and feature barcode stats, such data must be provided as input to enclone.
-
-● Example: IG.*_g matches all genes that begin with IG, and TR(A|B).*_g matches all genes that
-begin with TRA or TRB.  Double quotes as in LVARS="..." may be needed.  The regular expression
-must be in the alphabet A-Za-z0-9+_-.[]()|* and is only interpreted as a regular expression if it
-contains a character in []()|*.  See "enclone help filter" for more information about regular
-expressions.
-
-   These variables have some alternate versions, as shown in the table below:
-  
-  ┌──────────┬───────────────────────────────┬──────────┬──────────────┬─────────────┬────────────┐
-  │variable  │  semantics                    │  visual  │  visual      │  parseable  │  parseable │
-  │          │                               │          │  (one cell)  │             │  (one cell)│
-  ├──────────┼───────────────────────────────┼──────────┼──────────────┼─────────────┼────────────┤
-  │x         │  median over cells            │  yes     │  this cell   │  yes        │  yes       │
-  │x_mean    │  mean over cells              │  yes     │  null        │  yes        │  yes       │
-  │x_μ       │  (same as above)              │  yes     │  null        │  yes        │  yes       │
-  │x_sum     │  sum over cells               │  yes     │  null        │  yes        │  yes       │
-  │x_Σ       │  (same as above)              │  yes     │  null        │  yes        │  yes       │
-  │x_min     │  min over cells               │  yes     │  null        │  yes        │  yes       │
-  │x_max     │  max over cells               │  yes     │  null        │  yes        │  yes       │
-  │x_%       │  % of total GEX (genes only)  │  yes     │  this cell   │  yes        │  yes       │
-  │x_cell    │  this cell                    │  no      │  no          │  no         │  this cell │
-  └──────────┴───────────────────────────────┴──────────┴──────────────┴─────────────┴────────────┘
-  Some explanation is required.  If you use enclone without certain options, you get the "visual"
-  column.
-  • Add the option PER_CELL (see "enclone help display") and then you get visual output with extra
-  lines for each cell within an exact subclonotype, and each of those extra lines is described by
-  the "visual (one cell)" column.
-  • If you generate parseable output (see "enclone help parseable"), then you get the "parseable"
-  column for that output, unless you specify PCELL, and then you get the last column.
-  • For the forms with μ and Σ, the Greek letters are only used in column headings for visual output
-  (to save space), and optionally, in names of fields on the command line.
-   If you try out these features, you'll see exactly what happens! 
-
- Similar to the above but simpler: n_gex is just a count of cells, visual (one cell) shows 0 or
-1, n_gex_cell is defined for parseable (one cell), and the x_mean etc. forms do not apply.
-
-The default is datasets,n, except that datasets is suppressed if there is only one dataset.
-
-LVARSP=x1,...,xn is like LVARS but appends to the list.
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help cvars
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-per-chain column options: These options define per-chain variables, which correspond to columns
-that appear once for each chain in each clonotype, and have one entry for each exact subclonotype.
-
-Per-column variables are specified using
-CVARS=x1,...,xn
-where each xi is one of:
-
-┌────────────┬──────────────────────────────────────────────────────────────────────────────┐
-│var         │  bases at positions in chain that vary across the clonotype                  │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│u           │   VDJ UMI count for each exact subclonotype, median across cells            │
-│r           │   VDJ read count for each exact subclonotype, median across cells           │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│const       │  constant region name                                                        │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│edit        │  a string that defines the edit of the reference V(D)J concatenation versus  │
-│            │  the contig, from the beginning of the CDR3 to the end of the J segment;     │
-│            │  this uses a coordinate system in which 0 is the first base of the J ref     │
-│            │  segment (or the first base of the D ref segment for IGH and TRB); for       │
-│            │  example D-4:4 denotes the deletion of the last 4 bases of the V segment,    │
-│            │  I0:2 denotes an insertion of 2 bases after the V                            │
-│            │  and I0:2•S5 denotes that plus a substitution at position 5; in computing    │
-│            │  "edit", for IGH and TRB, we always test every possible D segment,           │
-│            │  regardless of whether one is annotated, and pick the best one; for this     │
-│            │  reason, "edit" may be slow                                                  │
-│comp        │  a measure of CDR3 complexity, which is the total number of S, D and I       │
-│            │  symbols in "edit" as defined above                                          │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│cdr3_dna    │  the CDR3_DNA sequence                                                       │
-│cdr3_len    │  number of amino acids in the CDR3 sequence                                  │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│vjlen       │  number of bases from the start of the V region to the end of the J region   │
-│clen        │  length of observed constant region (usually truncated at primer start)      │
-│ulen        │  length of observed 5'-UTR sequence;                                         │
-│            │  note however that what report is just the start of the V segment            │
-│            │  on the contig, and thus the length may include junk before the UTR          │
-│cdiff       │  differences with universal reference constant region, shown in the          │
-│            │  abbreviated form e.g. 22T (ref changed to T at base 22) or 22T+10           │
-│            │  (same but contig has 10 additional bases beyond end of ref C region         │
-│            │  At most five differences are shown, and if there are more, ... is appended. │
-│udiff       │  like cdiff, but for the 5'-UTR                                              │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│notes       │  optional note if there is an insertion or the end of J does not exactly abut│
-│            │  the beginning of C; elided if empty; also single base overlaps between      │
-│            │  J and C are not shown unless you use the special option JC1; we do this     │
-│            │  because with some VDJ references, one nearly always has such an overlap     │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│ndiff<n>vj  │  number of base differences within V..J between this exact subclonotype and  │
-│            │  exact subclonotype n                                                        │
-│d_univ      │  distance from universal reference, more specifically,                       │
-│            │  number of base differences within V..J between this exact                   │
-│            │  clonotype and universal reference, exclusive of indels, the last 15         │
-│            │  bases of the V and the first 15 bases of the J                              │
-│d_donor     │  distance from donor reference,                                              │
-│            │  as above but computed using donor reference                                 │
-└────────────┴──────────────────────────────────────────────────────────────────────────────┘
-
-   These variables have some alternate versions, as shown in the table below:
-  
-  ┌──────────┬───────────────────────────────┬──────────┬──────────────┬─────────────┬────────────┐
-  │variable  │  semantics                    │  visual  │  visual      │  parseable  │  parseable │
-  │          │                               │          │  (one cell)  │             │  (one cell)│
-  ├──────────┼───────────────────────────────┼──────────┼──────────────┼─────────────┼────────────┤
-  │x         │  median over cells            │  yes     │  this cell   │  yes        │  yes       │
-  │x_mean    │  mean over cells              │  yes     │  null        │  yes        │  yes       │
-  │x_μ       │  (same as above)              │  yes     │  null        │  yes        │  yes       │
-  │x_sum     │  sum over cells               │  yes     │  null        │  yes        │  yes       │
-  │x_Σ       │  (same as above)              │  yes     │  null        │  yes        │  yes       │
-  │x_min     │  min over cells               │  yes     │  null        │  yes        │  yes       │
-  │x_max     │  max over cells               │  yes     │  null        │  yes        │  yes       │
-  │x_%       │  % of total GEX (genes only)  │  yes     │  this cell   │  yes        │  yes       │
-  │x_cell    │  this cell                    │  no      │  no          │  no         │  this cell │
-  └──────────┴───────────────────────────────┴──────────┴──────────────┴─────────────┴────────────┘
-  Some explanation is required.  If you use enclone without certain options, you get the "visual"
-  column.
-  • Add the option PER_CELL (see "enclone help display") and then you get visual output with extra
-  lines for each cell within an exact subclonotype, and each of those extra lines is described by
-  the "visual (one cell)" column.
-  • If you generate parseable output (see "enclone help parseable"), then you get the "parseable"
-  column for that output, unless you specify PCELL, and then you get the last column.
-  • For the forms with μ and Σ, the Greek letters are only used in column headings for visual output
-  (to save space), and optionally, in names of fields on the command line.
-   If you try out these features, you'll see exactly what happens! 
-
-At least one variable must be listed.  The default is u,const,notes.  CVARSP: same as CVARS but
-appends.
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help amino
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-There is a complex per-chain column to the left of other per-chain columns, defined by
-AMINO=x1,...,xn: display amino acid columns for the given categories, in one combined ordered
-group, where each xi is one of:
-
-┌────────┬───────────────────────────────────────────────────────────────────────────────────┐
-│cdr3    │  CDR3 sequence                                                                    │
-├────────┼───────────────────────────────────────────────────────────────────────────────────┤
-│var     │  positions in chain that vary across the clonotype                                │
-│share   │  positions in chain that differ consistently from the donor reference             │
-├────────┼───────────────────────────────────────────────────────────────────────────────────┤
-│donor   │  positions in chain where the donor reference differs from the universal reference│
-├────────┼───────────────────────────────────────────────────────────────────────────────────┤
-│donorn  │  positions in chain where the donor reference differs nonsynonymously             │
-│        │  from the universal reference                                                     │
-├────────┼───────────────────────────────────────────────────────────────────────────────────┤
-│a-b     │  amino acids numbered a through b (zero-based, inclusive)                         │
-└────────┴───────────────────────────────────────────────────────────────────────────────────┘
-
-Note that we compute positions in base space, and then divide by three to get positions in amino
-acid space.  Thus it can happen that a position in amino acid space is shown for both var and share.
-
-The default value for AMINO is cdr3,var,share,donor.  Note that we only report amino acids that
-are strictly within V..J, thus specifically excluding the codon bridging J and C.
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help display
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-other options that control clonotype display
-
-┌───────────┬───────────────────────────────────────────────────────────────────────────────┐
-│PER_CELL   │  expand out each exact clonotype line, showing one line per cell,             │
-│           │  for each such line, displaying the barcode name, the number of UMIs assigned,│
-│           │  and the gene expression UMI count, if applicable, under gex_med              │
-├───────────┼───────────────────────────────────────────────────────────────────────────────┤
-│BARCODES   │  print list of all barcodes of the cells in each clonotype, in a              │
-│           │  single line near the top of the printout for a given clonotype               │
-├───────────┼───────────────────────────────────────────────────────────────────────────────┤
-│SEQC       │  print V..J sequence for each chain in the first exact subclonotype, near     │
-│           │  the top of the printout for a given clonotype                                │
-├───────────┼───────────────────────────────────────────────────────────────────────────────┤
-│FULL_SEQC  │  print full sequence for each chain in the first exact subclonotype,          │
-│           │  near the top of the printout for a given clonotype                           │
-├───────────┼───────────────────────────────────────────────────────────────────────────────┤
-│SUM        │  print sum row for each clonotype                                             │
-│MEAN       │  print mean row for each clonotype                                            │
-└───────────┴───────────────────────────────────────────────────────────────────────────────┘
-
-options that control clonotype grouping
-
-By default, enclone organizes clonotypes into groups, and each group contains just one clonotype! 
-If you prefer not to see the grouping messages, you can turn them off by adding the option NGROUP
-to the enclone command line.  We intend to add useful versions of grouping to a future version of
-enclone, that are reflective of functional (antigen-binding) differences.  For now there are the
-following "toy" options:
-
-┌─────────────────────────┬─────────────────────────────────────────────────────────────────────┐
-│GROUP_HEAVY_CDR3         │  group by perfect identity of CDR3 amino acid sequence of IGH or TRB│
-│GROUP_VJ_REFNAME         │  group by sharing identical V and J reference gene names,           │
-│GROUP_VJ_REFNAME_STRONG  │  same but also require identical length V..J sequences              │
-│                         │  (after correction for indels) and identical length CDR3 sequences, │
-│                         │  but ignores foursies and moresies                                  │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│MIN_GROUP                │  minimum number of clonotypes in group to print (default = 1)       │
-└─────────────────────────┴─────────────────────────────────────────────────────────────────────┘
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help indels
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-handling of insertions and deletions
-
-enclone can recognize and display a single insertion or deletion in a contig relative to the
-reference, so long as its length is divisible by three, is relatively short, and occurs within the
-V segment, not too close to its right end.
-
-These indels could be germline, however most such events are already captured in a reference
-sequence.  Currently the donor reference code in enclone does not recognize indels.
-
-SHM deletions are rare, and SHM insertions are even more rare.
-
-Deletions are displayed using hyphens (-).  If you use the var option for cvars, the hyphens will
-be displayed in base space, where they are initially observed.  For the AMINO option, the deletion
-is first shifted by up to two bases, so that the deletion starts at a base position that is
-divisible by three.  Then the deleted amino acids are shown as hyphens.
-
-Insertions are shown only in amino acid space, in a special per-chain column called notes that
-appears if there is an insertion.  Colored amino acids are shown for the insertion, and the
-position of the insertion is shown.  The position is the position of the amino acid after which
-the insertion appears, where the first amino acid (start codon) is numbered 0.
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help color
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-Here is the color palette that enclone uses for amino acids:
-
-       
-
-When enclone shows amino acids, it uses one of two coloring schemes.  The first scheme (the
-default, or using the argument COLOR=codon), colors amino acids by codon, according to the
-following scheme:
-
-Alanine        A  GCT GCC GCA GCG
-Arginine       R  CGT CGC CGA CGG AGA AGG
-Asparagine     N  AAT AAC
-Aspartic Acid  D  GAT GAC
-Cysteine       C  TGT TGC
-Glutamine      Q  CAA CAG
-Glutamic Acid  E  GAA GAG
-Glycine        G  GGT GGC GGA GGG
-Histidine      H  CAT CAC
-Isoleucine     I  ATT ATC ATA
-Leucine        L  TTA TTG CTT CTC CTA CTG
-Lysine         K  AAA AAG
-Methionine     M  ATG
-Phenylalanine  F  TTT TTC
-Proline        P  CCT CCC CCA CCG
-Serine         S  TCT TCC TCA TCG AGT AGC
-Threonine      T  ACT ACC ACA ACG
-Tryptophan     W  TGG
-Tyrosine       Y  TAT TAC
-Valine         V  GTT GTC GTA GTG
-
-Colored amino acids enable the compact display of all the information in a clonotype.
-
-The second scheme for coloring amino acids, COLOR=property, colors amino acids by their
-properties, according to the following scheme:
-
-1. Aliphatic: A G I L P V
-2. Aromatic: F W Y
-3. Acidic: D E
-4. Basic: R H K
-5. Hydroxylic: S T
-6. Sulfurous: C M
-7. Amidic: N Q
-
-In both cases, the coloring is done using special characters, called ANSI escape characters. 
-Color is used occasionally elsewhere by enclone, and there is also some bolding, accomplished
-using the same mechanism.
-
-Correct display of colors and bolding depends on having a terminal window that is properly set up.
- As far as we know, this may always be the case, but it is possible that there are exceptions.  In
-addition, in general, text editors do not correctly interpret escape characters.
-
-For both of these reasons, you may wish to turn off the "special effects", either some or all of
-the time.  You can do this by adding the argument
-PLAIN
-to any enclone command.
-
-We know of two methods to get enclone output into another document, along with colors:
-1. Take a screenshot.
-2. Open a new terminal window, type the enclone command, and then convert the terminal window into
-a pdf.  See enclone help faq for related instructions.
-
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help faq
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-Frequently Asked Questions
-
-We're sorry you're having difficulty!  Please see the answers below, check out the other help
-guides, and if you're still stuck, write to us at enclone@10xgenomics.com.
-
-1. Why is my enclone output garbled?
-
-We can think of two possibilities:
-
-A. The escape characters that enclone emits for color and bolding are not getting
-translated.  You have some options:
-(a) Turn off escape character generation by adding PLAIN to your enclone commands.
-This will work but you'll lose some information.
-(b) If your terminal window is not translating escape characters, ask someone
-with appropriate expertise to help you.  We have not observed this phenomenon,
-but it should be fixable.
-(c) If you're trying to view enclone output, with escape characters, using an editor,
-that's probably not going to work well.
-
-B. Perhaps enclone is emitting very wide lines.  Here are things you can do about this:
-(a) Make your terminal window wider or reduce the font size.
-(b) Identify the field that is very wide and use the column controls to remove that
-field.  See the help for lvars and cvars.  For example,
-AMINO=cdr3
-may help, or even
-AMINO=
-
-2. Can I convert the enclone visual output into other forms?
-
-Yes, there are choices:
-A. On a Mac, you can screenshot from a terminal window.
-B. Add the argument HTML to the enclone command line.  Then the output will be presented as html,
-with title "enclone output".  If you want to set the title, use HTML="...".
-C. You can then convert the html to pdf.  The best way on a Mac is to open Safari, which is the
-best browser for this particular purpose, select the file where you've saved the html, and then
-export as pdf.  Do not convert to pdf via printing, which produces a less readable file, and also
-distorts colors.  (We do not know why the colors are distorted.)
-D. If you want to put enclone output in a Google Doc, you can do it via approach A, although then
-you won't be able to select text within the copied region.  Alternatively, if you open the html
-file in a browser, you can then select text (including clonotype box text) and paste into a Google
-Doc.  It will be pretty ugly, but will capture color and correctly render the box structure,
-provided that you use an appropriate fixed-width font for that part of the Doc.  We found that
-Courier New works, with line spacing set to 0.88.  You may have to reduce the font size.
-
-3. Why is enclone slow for me?
-
-On a single VDJ dataset, it typically runs for us in a few seconds, on a Mac or Linux server. 
-Runs where we combine several hundred datasets execute in a couple minutes (on a server).  Your
-mileage could vary, and we are interested in cases where it is underperforming.  Let us know.  We
-are aware of several things that could be done to speed up enclone.
-
-4. How does enclone fit into the 10x Genomics software ecosystem?
-
-There are several parts to the answer:
-• enclone is a standalone executable that by default produces human-readable output.
-• You can also run enclone to produce parseable output (see enclone help parseable), and that
-output can be digested using code that you write (for example, in R).
-• When you run Cell Ranger to process 10x single cell immune profiling data, it in effect calls
-enclone with a special option that yields only an output file for the 10x visualization tool
-Loupe.
-• Clonotypes may then be viewed using Loupe.  The view of a clonotype provided by Loupe is
-different than the view provided by enclone.  Loupe shows a continuous expanse of bases across
-each chain, which you can scroll across, rather than the compressed view of "important" bases or
-amino acids that enclone shows.
-
-5. What platforms does enclone run on?
-
-1. Linux/x86-64 (that's most servers)
-2. Mac.
-
-However, we have not and cannot test every possible configuration of these platforms.  Please let
-us know if you encounter problems!
-
-6. How can I print out all the donor reference sequences?
-
-Add the argument DONOR_REF_FILE=filename to your enclone command, and fasta for the donor
-reference sequences will be dumped there.
-
-7. How does enclone know what VDJ reference sequences I'm using?
-
-If you used Cell Ranger version 4.0 or greater, then the VDJ reference file was included in the
-outs directory, and so enclone knows the reference sequence from that.
-
-For outs from older Cell Ranger versions, enclone has to guess which VDJ reference sequences were
-used, and may or may not do so correctly.  As part of this, if you have mouse data from older Cell
-Ranger versions, you need to supply the argument MOUSE on the command line.
-
-It is also possible to set the reference sequence directly by adding by adding REF=f to your
-command line, where f is the name of your VDJ reference fasta file, but if that is different than
-the reference supplied to Cell Ranger, then you will have to add the additional argument RE to
-recompute annotations, and that will slow down enclone somewhat.
-
-8. Can I provide data from more than one donor?
-
-Yes.  Type enclone help input for details.  The default behavior of enclone is to prevent cells
-from different donors from being placed in the same clonotype.  The MIX_DONORS option may be used
-to turn off this behavior.  If you employ this option, then clonotypes containing cells from more
-than one donor will be flagged as errors, unless you use the NWARN option to turn off those
-warnings.  The primary reason for allowing entry of data from multiple donors is to allow
-estimation of enclone's error rate.
-
-9. What are some command line argument values quoted?
-
-Command line argument values that contain any of these characters ;|* need to be quoted like so
-TCR="a;b"
-to prevent the shell from interpreting them for a purpose completely unrelated to enclone.  This
-is a trap, because forgetting to add the quotes can result in nonsensical and confusing behavior!
-
-10. If enclone fails, does it return nonzero exit status?
-
-Yes, unless output of enclone is going to a terminal.  In that case, you'll always get zero.
-
-11. Could a cell be missing from an enclone clonotype?
-
-Yes, some cells are deliberately deleted.  The cell might have been deleted by one of the filters
-described in enclone help special, and which you can turn off.  We also delete cells for which
-more than four chains were found.
-
-12. Can enclone print summary stats?
-
-Yes, if you add the option SUMMARY, then some summary stats will be printed.  If you only want to
-see the summary stats, then also add the option NOPRINT.
-
-13. What is the notes column?
-
-The notes column appears if one of two relatively rare events occurs:
-
-1. An insertion is detected in a chain sequence, relative to the reference.
-
-2. The end of the J segment on a chain sequence does not exactly coincide with
-   the beginning of the C segment.
-The latter could correspond to one of several phenomena:
-a. A transcript has an insertion between its J and C segments.
-   This can happen.  See e.g. Behlke MA, Loh DY.
-   Alternative splicing of murine T-cell receptor beta-chain transcripts.
-   Nature 322(1986), 379-382.
-b. There is an error in a reference sequence segment.
-   We have tried to eliminate all such errors from the built-in references for
-   human and mouse.
-c. A cell produced a nonstandard transcript and also standard ones, and the
-   Cell Ranger pipeline just happened to pick a nonstandard one.
-d. There was a technical artifact and the sequence does not actually represent
-   an mRNA molecule.
-
-Overlaps of length exactly one between J and C segments are not shown unless you specify the
-option JC1.  The reason for this is that certain reference sequences (notably those from IMGT and
-those supplied with Cell Ranger 3.1) often have an extra base at the beginning of their C
-segments, resulting in annoying overlap notes for a large fraction of clonotypes.
-
-14. Can I cap the number of threads used by enclone?
-
-You can use the command-line argument MAX_CORES=n to cap the number of cores used in parallel
-loops.  The number of threads used is typically one higher.
-
-15. Can I use enclone if I have only gene expression data?
-
-Possibly.  In some cases this works very well, but in other cases it does not.  Success depends on
-dataset characteristics that have not been carefully investigated.  To attempt this, you need to
-invoke Cell Ranger on the GEX dataset as if it was a VDJ dataset, and you need to specify to Cell
-Ranger that the run is to be treated as BCR or TCR.  Two separate invocations can be used to get
-both.  Note also that Cell Ranger has been only minimally tested for this configuration and that
-this is not an officially supported Cell Ranger configuration.
-
-16. How can I cite enclone?
-
-This version of enclone has been provided under a non-disclosure agreement,
-however once enclone has officially launched, you will be able to cite this version as:
-10x Genomics, https://github.com/10XGenomics/enclone,
-(your enclone version information will be printed here).
-At some point subsequent to that, there will be a white paper to which you can refer, in addition
-to a DOI minted at Zenodo.  In the spirit of reproducibility, you should provide the arguments
-that you used when you ran enclone and indicate the version of Cell Ranger that you used to
-generate the input data.
-
-17. Can I print the enclone version?
-
-Yes, type "enclone version".
-
-18. Can enclone ingest multiple datasets from the same library?
-
-If enclone detects significant (≥ 25%) barcode reuse between datasets, it will exit.  This
-behavior can be overridden using the argument ACCEPT_REUSE.
-19. Can I turn off all the filters used in joining clonotypes?
-
-Pretty much.  You can run with the following arguments:
-MAX_CDR3_DIFFS=100
-MAX_LOG_SCORE=100
-EASY
-MAX_DIFFS=200
-MAX_DEGRADATION=150,
-however this will in general be very slow and not produce useful results.  Depending on what your
-goal is, you may find it helpful to use some of these arguments, and with lower values.  You can
-see the meaning of the arguments and their default values by typing "enclone help how".
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-enclone help developer
-▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
-
-a few options for developers
-
-For instructions on how to compile, please see
-bit.ly/enclone.
-
-┌───────────┬──────────────────────────────────────────────────────────────────────────────────┐
-│COMP       │  report computational performance stats; use this with NOPRINT if you            │
-│           │  only want to see the computational performance stats, and with NOPAGER if you   │
-│           │  want output to be unbuffered                                                    │
-│COMP2      │  like COMP, but adds more detailed lines that are prefixed with --               │
-├───────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│LONG_HELP  │  allow long lines in help pages, which will otherwise trigger an assert          │
-├───────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│CTRLC      │  upon CTRL-C, emit a traceback and then exit; can be used as a primitive         │
-│           │  but easy profiling method, to know what the code is doing if it seems to be     │
-│           │  very slow                                                                       │
-├───────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│HAPS=n     │  Interrupt code n times, at one second intervals, get a traceback, and then tally│
-│           │  the tracebacks.  This only works if the n tracebacks can be obtained before     │
-│           │  enclone terminates.  Interrupts that occur in the allocator are ignored, and    │
-│           │  in some cases, this accounts for most interrupts, resulting in confusing        │
-│           │  output.  In such cases, consider using CTRLC or a more sophisticated tool       │
-│           │  like perf.  Also HAPS only reports on the master thread, so to get useful       │
-│           │  information, you probably need to change an instance in the code of             │
-│           │  par_iter_mut to iter_mut, to turn off parallelization for a strategically       │
-│           │  selected section.                                                               │
-└───────────┴──────────────────────────────────────────────────────────────────────────────────┘
-
-
- - diff --git a/pages/auto/help.amino.html b/pages/auto/help.amino.html deleted file mode 100644 index a25dd52f9..000000000 --- a/pages/auto/help.amino.html +++ /dev/null @@ -1,73 +0,0 @@ - - - - - - -enclone help amino - - - - - - - -
-enclone banner -

-There is a complex per-chain column to the left of other per-chain columns, defined by
-AMINO=x1,...,xn: display amino acid columns for the given categories, in one combined ordered
-group, where each xi is one of:
-
-┌────────┬───────────────────────────────────────────────────────────────────────────────────┐
-│cdr3    │  CDR3 sequence                                                                    │
-├────────┼───────────────────────────────────────────────────────────────────────────────────┤
-│var     │  positions in chain that vary across the clonotype                                │
-│share   │  positions in chain that differ consistently from the donor reference             │
-├────────┼───────────────────────────────────────────────────────────────────────────────────┤
-│donor   │  positions in chain where the donor reference differs from the universal reference│
-├────────┼───────────────────────────────────────────────────────────────────────────────────┤
-│donorn  │  positions in chain where the donor reference differs nonsynonymously             │
-│        │  from the universal reference                                                     │
-├────────┼───────────────────────────────────────────────────────────────────────────────────┤
-│a-b     │  amino acids numbered a through b (zero-based, inclusive)                         │
-└────────┴───────────────────────────────────────────────────────────────────────────────────┘
-
-Note that we compute positions in base space, and then divide by three to get positions in amino
-acid space.  Thus it can happen that a position in amino acid space is shown for both var and share.
-
-The default value for AMINO is cdr3,var,share,donor.  Note that we only report amino acids that
-are strictly within V..J, thus specifically excluding the codon bridging J and C.
-
-
- - diff --git a/pages/auto/help.color.html b/pages/auto/help.color.html deleted file mode 100644 index e84cb6ad7..000000000 --- a/pages/auto/help.color.html +++ /dev/null @@ -1,109 +0,0 @@ - - - - - - -enclone help color - - - - - - - -
-enclone banner -

-Here is the color palette that enclone uses for amino acids:
-
-       
-
-When enclone shows amino acids, it uses one of two coloring schemes.  The first scheme (the
-default, or using the argument COLOR=codon), colors amino acids by codon, according to the
-following scheme:
-
-Alanine        A  GCT GCC GCA GCG
-Arginine       R  CGT CGC CGA CGG AGA AGG
-Asparagine     N  AAT AAC
-Aspartic Acid  D  GAT GAC
-Cysteine       C  TGT TGC
-Glutamine      Q  CAA CAG
-Glutamic Acid  E  GAA GAG
-Glycine        G  GGT GGC GGA GGG
-Histidine      H  CAT CAC
-Isoleucine     I  ATT ATC ATA
-Leucine        L  TTA TTG CTT CTC CTA CTG
-Lysine         K  AAA AAG
-Methionine     M  ATG
-Phenylalanine  F  TTT TTC
-Proline        P  CCT CCC CCA CCG
-Serine         S  TCT TCC TCA TCG AGT AGC
-Threonine      T  ACT ACC ACA ACG
-Tryptophan     W  TGG
-Tyrosine       Y  TAT TAC
-Valine         V  GTT GTC GTA GTG
-
-Colored amino acids enable the compact display of all the information in a clonotype.
-
-The second scheme for coloring amino acids, COLOR=property, colors amino acids by their
-properties, according to the following scheme:
-
-1. Aliphatic: A G I L P V
-2. Aromatic: F W Y
-3. Acidic: D E
-4. Basic: R H K
-5. Hydroxylic: S T
-6. Sulfurous: C M
-7. Amidic: N Q
-
-In both cases, the coloring is done using special characters, called ANSI escape characters. 
-Color is used occasionally elsewhere by enclone, and there is also some bolding, accomplished
-using the same mechanism.
-
-Correct display of colors and bolding depends on having a terminal window that is properly set up.
- As far as we know, this may always be the case, but it is possible that there are exceptions.  In
-addition, in general, text editors do not correctly interpret escape characters.
-
-For both of these reasons, you may wish to turn off the "special effects", either some or all of
-the time.  You can do this by adding the argument
-PLAIN
-to any enclone command.
-
-We know of two methods to get enclone output into another document, along with colors:
-1. Take a screenshot.
-2. Open a new terminal window, type the enclone command, and then convert the terminal window into
-a pdf.  See enclone help faq for related instructions.
-
-
- - diff --git a/pages/auto/help.command.html b/pages/auto/help.command.html deleted file mode 100644 index ceb57f588..000000000 --- a/pages/auto/help.command.html +++ /dev/null @@ -1,88 +0,0 @@ - - - - - - -enclone help command - - - - - - - -
-enclone banner -

-information about enclone command-line argument processing
-
-1. Order of processing
-
-• Before processing its command line, enclone first checks for environment
-variables of the form ENCLONE_<x>.  These are converted into command-line arguments.  You can set
-any command-line argument this way.  The reason why you might want to use this feature is if you
-find yourself using the same command-line option over and over, and it is more convenient to set
-it once as an environment variable.
-• For example, setting the environment variable ENCLONE_PRE to /Users/me/enclone_data is
-equivalent to providing the command-line argument PRE=/Users/me/enclone_data.
-• After checking environment variables, arguments on the command line are read from left to right;
-if an argument name is repeated, only the rightmost value is used, except as noted specifically in
-the documentation.
-
-2. Color
-
-enclone uses ANSI escape codes for color and bolding, frivolously, for emphasis, and more
-importantly for amino acids, to represent different codons.  This is done automatically but you
-can turn it off....
-
-PLEASE READ THIS:
-
-You can turn off escape codes by adding PLAIN to any command.  Use this if you want to peruse
-output using a text editor which does not grok the escape codes.  However some things will not
-make sense without color.
-
-3. Paging
-
-• enclone automatically pipes its output to less -R -F -X.
-• The effect of this will be that you'll see only the first screen of output.  You can then use
-the spacebar to go forward, b to go backward, and q to quit.  The -R option causes escape
-characters to be correctly displayed, the -F option causes an automatic exit if output fits on a
-single screen, and the -X option prevents output from being sent to the "alternate screen" under
-certain platform/version combinations.
-• Type man less if you need more information.
-• If for whatever reason you need to turn off output paging, add the argument NOPAGER to the
-enclone command.
-
-
- - diff --git a/pages/auto/help.cvars.html b/pages/auto/help.cvars.html deleted file mode 100644 index 7d57ced36..000000000 --- a/pages/auto/help.cvars.html +++ /dev/null @@ -1,136 +0,0 @@ - - - - - - -enclone help cvars - - - - - - - -
-enclone banner -

-per-chain column options: These options define per-chain variables, which correspond to columns
-that appear once for each chain in each clonotype, and have one entry for each exact subclonotype.
-
-Per-column variables are specified using
-CVARS=x1,...,xn
-where each xi is one of:
-
-┌────────────┬──────────────────────────────────────────────────────────────────────────────┐
-│var         │  bases at positions in chain that vary across the clonotype                  │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│u           │   VDJ UMI count for each exact subclonotype, median across cells            │
-│r           │   VDJ read count for each exact subclonotype, median across cells           │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│const       │  constant region name                                                        │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│edit        │  a string that defines the edit of the reference V(D)J concatenation versus  │
-│            │  the contig, from the beginning of the CDR3 to the end of the J segment;     │
-│            │  this uses a coordinate system in which 0 is the first base of the J ref     │
-│            │  segment (or the first base of the D ref segment for IGH and TRB); for       │
-│            │  example D-4:4 denotes the deletion of the last 4 bases of the V segment,    │
-│            │  I0:2 denotes an insertion of 2 bases after the V                            │
-│            │  and I0:2•S5 denotes that plus a substitution at position 5; in computing    │
-│            │  "edit", for IGH and TRB, we always test every possible D segment,           │
-│            │  regardless of whether one is annotated, and pick the best one; for this     │
-│            │  reason, "edit" may be slow                                                  │
-│comp        │  a measure of CDR3 complexity, which is the total number of S, D and I       │
-│            │  symbols in "edit" as defined above                                          │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│cdr3_dna    │  the CDR3_DNA sequence                                                       │
-│cdr3_len    │  number of amino acids in the CDR3 sequence                                  │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│vjlen       │  number of bases from the start of the V region to the end of the J region   │
-│clen        │  length of observed constant region (usually truncated at primer start)      │
-│ulen        │  length of observed 5'-UTR sequence;                                         │
-│            │  note however that what report is just the start of the V segment            │
-│            │  on the contig, and thus the length may include junk before the UTR          │
-│cdiff       │  differences with universal reference constant region, shown in the          │
-│            │  abbreviated form e.g. 22T (ref changed to T at base 22) or 22T+10           │
-│            │  (same but contig has 10 additional bases beyond end of ref C region         │
-│            │  At most five differences are shown, and if there are more, ... is appended. │
-│udiff       │  like cdiff, but for the 5'-UTR                                              │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│notes       │  optional note if there is an insertion or the end of J does not exactly abut│
-│            │  the beginning of C; elided if empty; also single base overlaps between      │
-│            │  J and C are not shown unless you use the special option JC1; we do this     │
-│            │  because with some VDJ references, one nearly always has such an overlap     │
-├────────────┼──────────────────────────────────────────────────────────────────────────────┤
-│ndiff<n>vj  │  number of base differences within V..J between this exact subclonotype and  │
-│            │  exact subclonotype n                                                        │
-│d_univ      │  distance from universal reference, more specifically,                       │
-│            │  number of base differences within V..J between this exact                   │
-│            │  clonotype and universal reference, exclusive of indels, the last 15         │
-│            │  bases of the V and the first 15 bases of the J                              │
-│d_donor     │  distance from donor reference,                                              │
-│            │  as above but computed using donor reference                                 │
-└────────────┴──────────────────────────────────────────────────────────────────────────────┘
-
-   These variables have some alternate versions, as shown in the table below:
-  
-  ┌──────────┬───────────────────────────────┬──────────┬──────────────┬─────────────┬────────────┐
-  │variable  │  semantics                    │  visual  │  visual      │  parseable  │  parseable │
-  │          │                               │          │  (one cell)  │             │  (one cell)│
-  ├──────────┼───────────────────────────────┼──────────┼──────────────┼─────────────┼────────────┤
-  │x         │  median over cells            │  yes     │  this cell   │  yes        │  yes       │
-  │x_mean    │  mean over cells              │  yes     │  null        │  yes        │  yes       │
-  │x_μ       │  (same as above)              │  yes     │  null        │  yes        │  yes       │
-  │x_sum     │  sum over cells               │  yes     │  null        │  yes        │  yes       │
-  │x_Σ       │  (same as above)              │  yes     │  null        │  yes        │  yes       │
-  │x_min     │  min over cells               │  yes     │  null        │  yes        │  yes       │
-  │x_max     │  max over cells               │  yes     │  null        │  yes        │  yes       │
-  │x_%       │  % of total GEX (genes only)  │  yes     │  this cell   │  yes        │  yes       │
-  │x_cell    │  this cell                    │  no      │  no          │  no         │  this cell │
-  └──────────┴───────────────────────────────┴──────────┴──────────────┴─────────────┴────────────┘
-  Some explanation is required.  If you use enclone without certain options, you get the "visual"
-  column.
-  • Add the option PER_CELL (see "enclone help display") and then you get visual output with extra
-  lines for each cell within an exact subclonotype, and each of those extra lines is described by
-  the "visual (one cell)" column.
-  • If you generate parseable output (see "enclone help parseable"), then you get the "parseable"
-  column for that output, unless you specify PCELL, and then you get the last column.
-  • For the forms with μ and Σ, the Greek letters are only used in column headings for visual output
-  (to save space), and optionally, in names of fields on the command line.
-   If you try out these features, you'll see exactly what happens! 
-
-At least one variable must be listed.  The default is u,const,notes.  CVARSP: same as CVARS but
-appends.
-
-
- - diff --git a/pages/auto/help.developer.html b/pages/auto/help.developer.html deleted file mode 100644 index d5dd91901..000000000 --- a/pages/auto/help.developer.html +++ /dev/null @@ -1,77 +0,0 @@ - - - - - - -enclone help developer - - - - - - - -
-enclone banner -

-a few options for developers
-
-For instructions on how to compile, please see
-bit.ly/enclone.
-
-┌───────────┬──────────────────────────────────────────────────────────────────────────────────┐
-│COMP       │  report computational performance stats; use this with NOPRINT if you            │
-│           │  only want to see the computational performance stats, and with NOPAGER if you   │
-│           │  want output to be unbuffered                                                    │
-│COMP2      │  like COMP, but adds more detailed lines that are prefixed with --               │
-├───────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│LONG_HELP  │  allow long lines in help pages, which will otherwise trigger an assert          │
-├───────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│CTRLC      │  upon CTRL-C, emit a traceback and then exit; can be used as a primitive         │
-│           │  but easy profiling method, to know what the code is doing if it seems to be     │
-│           │  very slow                                                                       │
-├───────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│HAPS=n     │  Interrupt code n times, at one second intervals, get a traceback, and then tally│
-│           │  the tracebacks.  This only works if the n tracebacks can be obtained before     │
-│           │  enclone terminates.  Interrupts that occur in the allocator are ignored, and    │
-│           │  in some cases, this accounts for most interrupts, resulting in confusing        │
-│           │  output.  In such cases, consider using CTRLC or a more sophisticated tool       │
-│           │  like perf.  Also HAPS only reports on the master thread, so to get useful       │
-│           │  information, you probably need to change an instance in the code of             │
-│           │  par_iter_mut to iter_mut, to turn off parallelization for a strategically       │
-│           │  selected section.                                                               │
-└───────────┴──────────────────────────────────────────────────────────────────────────────────┘
-
-
- - diff --git a/pages/auto/help.display.html b/pages/auto/help.display.html deleted file mode 100644 index 411c2f0ea..000000000 --- a/pages/auto/help.display.html +++ /dev/null @@ -1,87 +0,0 @@ - - - - - - -enclone help display - - - - - - - -
-enclone banner -

-other options that control clonotype display
-
-┌───────────┬───────────────────────────────────────────────────────────────────────────────┐
-│PER_CELL   │  expand out each exact clonotype line, showing one line per cell,             │
-│           │  for each such line, displaying the barcode name, the number of UMIs assigned,│
-│           │  and the gene expression UMI count, if applicable, under gex_med              │
-├───────────┼───────────────────────────────────────────────────────────────────────────────┤
-│BARCODES   │  print list of all barcodes of the cells in each clonotype, in a              │
-│           │  single line near the top of the printout for a given clonotype               │
-├───────────┼───────────────────────────────────────────────────────────────────────────────┤
-│SEQC       │  print V..J sequence for each chain in the first exact subclonotype, near     │
-│           │  the top of the printout for a given clonotype                                │
-├───────────┼───────────────────────────────────────────────────────────────────────────────┤
-│FULL_SEQC  │  print full sequence for each chain in the first exact subclonotype,          │
-│           │  near the top of the printout for a given clonotype                           │
-├───────────┼───────────────────────────────────────────────────────────────────────────────┤
-│SUM        │  print sum row for each clonotype                                             │
-│MEAN       │  print mean row for each clonotype                                            │
-└───────────┴───────────────────────────────────────────────────────────────────────────────┘
-
-options that control clonotype grouping
-
-By default, enclone organizes clonotypes into groups, and each group contains just one clonotype! 
-If you prefer not to see the grouping messages, you can turn them off by adding the option NGROUP
-to the enclone command line.  We intend to add useful versions of grouping to a future version of
-enclone, that are reflective of functional (antigen-binding) differences.  For now there are the
-following "toy" options:
-
-┌─────────────────────────┬─────────────────────────────────────────────────────────────────────┐
-│GROUP_HEAVY_CDR3         │  group by perfect identity of CDR3 amino acid sequence of IGH or TRB│
-│GROUP_VJ_REFNAME         │  group by sharing identical V and J reference gene names,           │
-│GROUP_VJ_REFNAME_STRONG  │  same but also require identical length V..J sequences              │
-│                         │  (after correction for indels) and identical length CDR3 sequences, │
-│                         │  but ignores foursies and moresies                                  │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│MIN_GROUP                │  minimum number of clonotypes in group to print (default = 1)       │
-└─────────────────────────┴─────────────────────────────────────────────────────────────────────┘
-
-
- - diff --git a/pages/auto/help.example1.html b/pages/auto/help.example1.html deleted file mode 100644 index 38d7933aa..000000000 --- a/pages/auto/help.example1.html +++ /dev/null @@ -1,100 +0,0 @@ - - - - - - -enclone help example1 - - - - - - - -
-enclone banner -

-Shown below is the output of the command:
-
-enclone BCR=123089 CDR3=CARRYFGVVADAFDIW
-
-[1] GROUP = 1 CLONOTYPES = 12 CELLS
-
-[1.1] CLONOTYPE = 12 CELLS
-┌───────────┬───────────────────────────────────────────────┬──────────────────────────────┐
-│           │  CHAIN 1                                      │  CHAIN 2                     │
-│           │  181.1.1|IGHV4-30-2 ◆ 53|IGHJ3                │  254|IGKV1D-39 ◆ 218|IGKJ5   │
-│           ├───────────────────────────────────────────────┼──────────────────────────────┤
-│           │              1 1111111111111111               │  1 111111111111              │
-│           │  2224556788990 1111122222222223               │  0 011111111112              │
-│           │  0571380317346 5678901234567890               │  6 901234567890              │
-│           │                ══════CDR3══════               │    ════CDR3════              │
-│reference  │  LSSASRPHPVRST ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦WT CQQ◦◦◦◦◦◦◦◦◦              │
-│donor ref  │  VSPTYRHYPVTST ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦WT CQQ◦◦◦◦◦◦◦◦◦              │
-├───────────┼───────────────────────────────────────────────┼──────────────────────────────┤
-│#   n        ........x.... ..............x.     u  const    x ......x.....      u  const│
-│1  10      │  VSPTYRHYPVTST CARRYFGVVADAFDIW  4285  IGHM   │  T CQQSYSTPPITF  11793  IGKC │
-│2   2      │  VSPTYRHYSVTST CARRYFGVVADAFDIW  4383  IGHM   │  A CQQSYSPPPITF  13922  IGKC │
-└───────────┴───────────────────────────────────────────────┴──────────────────────────────┘
-
-This shows an invocation of enclone that takes one dataset as input and exhibits
-all clonotypes for which some chain has the given CDR3 sequence.
-
-What you see here is a compressed view of the entire information encoded in the
-full length transcripts of the 13 cells comprising this clonotype: every base!
-There is a lot to explain about the compression, so please read carefully.
-
-• Clonotypes are grouped.  Here we see just one group having one clonotype in it.
-• This clonotype has three exact subclonotypes in it, the first of which has 10 cells.
-• This clonotype has two chains.  The reference segments for them are shown at the top.
-• The notation 181.1.1 says that this V reference sequence is an alternate allele
-  derived from the universal reference sequence (contig in the reference file)
-  numbered 181, that is from donor 1 ("181.1") and is alternate allele 1 for that donor.
-• Sometimes chains are missing from exact subclonotypes.
-• Amino acids are assigned different colors depending on which codon they represent.
-• Numbered columns show the state of particular amino acids, e.g. the first column is for amino
-  acid 20 in chain 1 (where 0 is the start codon).  The numbers read vertically, downward!
-• Universal ref: state for the contig in the reference file.
-• Donor ref: state for the inferred donor germline sequence.
-• ◦s are "holes" in the recombined region where the reference doesn't make sense.
-• The "dot and x" line has xs where there's a difference *within* the clonotype.
-• Amino acids are shown if they differ from the universal reference or are in the CDR3.
-• u = median UMI count for a chain in the exact subclonotype.
-• const = const region name for a chain in the exact subclonotype.
-
-The view you see here is configurable: see the documentation at enclone help lvars and enclone
-help cvars.
-
-
- - diff --git a/pages/auto/help.example2.html b/pages/auto/help.example2.html deleted file mode 100644 index 346cdb9fd..000000000 --- a/pages/auto/help.example2.html +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - -enclone help example2 - - - - - - - -
-enclone banner -

-Shown below is the output of the command:
-
-enclone BCR=123085 GEX=123749 LVARSP=gex,IGHV2-5_g_μ,CD4_ab_μ CDR3=CALMGTYCSGDNCYSWFDPW
-
-[1] GROUP = 1 CLONOTYPES = 5 CELLS
-
-[1.1] CLONOTYPE = 5 CELLS
-┌────────────────────────────────────┬───────────────────────────────────────┬─────────────────────────────┐
-│                                    │  CHAIN 1                              │  CHAIN 2                    │
-│                                    │  98|IGHV2-5 ◆ 57|IGHJ5                │  352|IGLV3-1 ◆ 314|IGLJ2    │
-│                                    ├───────────────────────────────────────┼─────────────────────────────┤
-│                                    │    11111111111111111111               │    11111111111              │
-│                                    │  8 11111222222222233333               │  6 00000111111              │
-│                                    │  5 56789012345678901234               │  2 56789012345              │
-│                                    │    ════════CDR3════════               │    ════CDR3═══              │
-│reference                           │  S ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦WV CQAWD◦◦◦◦◦◦              │
-│donor ref                           │  S ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦WV CQAWD◦◦◦◦◦◦              │
-├────────────────────────────────────┼───────────────────────────────────────┼─────────────────────────────┤
-│#  n    gex  IGHV2-5_g_μ  CD4_ab_μ    x ....................     u  const    . ...........      u  const│
-│1  3   8852         1850        79  │  S CALMGTYCSGDNCYSWFDPW   592  IGHM   │  V CQAWDSSVVVF   2995  IGLC2│
-│2  1  29657         6515        36  │  S CALMGTYCSGDNCYSWFDPW  6112  IGHG1  │  V CQAWDSSVVVF  15203  IGLC2│
-│3  1  14886         3326        42  │  T CALMGTYCSGDNCYSWFDPW  4045  IGHG1  │  V CQAWDSSVVVF   7025  IGLC2│
-└────────────────────────────────────┴───────────────────────────────────────┴─────────────────────────────┘
-
-This shows an invocation of enclone that takes VDJ, gene expression and feature barcode data as
-input, and exhibits all clonotypes for which some chain has the given CDR3 sequence.  As well the
-command requests UMI (molecule) counts for one hand-selected gene and one antibody.  You can use
-any gene(s) you like and any antibodies for which you have feature barcodes.
-
-
- - diff --git a/pages/auto/help.faq.html b/pages/auto/help.faq.html deleted file mode 100644 index 95001c0c8..000000000 --- a/pages/auto/help.faq.html +++ /dev/null @@ -1,242 +0,0 @@ - - - - - - -enclone help faq - - - - - - - -
-enclone banner -

-Frequently Asked Questions
-
-We're sorry you're having difficulty!  Please see the answers below, check out the other help
-guides, and if you're still stuck, write to us at enclone@10xgenomics.com.
-
-1. Why is my enclone output garbled?
-
-We can think of two possibilities:
-
-A. The escape characters that enclone emits for color and bolding are not getting
-translated.  You have some options:
-(a) Turn off escape character generation by adding PLAIN to your enclone commands.
-This will work but you'll lose some information.
-(b) If your terminal window is not translating escape characters, ask someone
-with appropriate expertise to help you.  We have not observed this phenomenon,
-but it should be fixable.
-(c) If you're trying to view enclone output, with escape characters, using an editor,
-that's probably not going to work well.
-
-B. Perhaps enclone is emitting very wide lines.  Here are things you can do about this:
-(a) Make your terminal window wider or reduce the font size.
-(b) Identify the field that is very wide and use the column controls to remove that
-field.  See the help for lvars and cvars.  For example,
-AMINO=cdr3
-may help, or even
-AMINO=
-
-2. Can I convert the enclone visual output into other forms?
-
-Yes, there are choices:
-A. On a Mac, you can screenshot from a terminal window.
-B. Add the argument HTML to the enclone command line.  Then the output will be presented as html,
-with title "enclone output".  If you want to set the title, use HTML="...".
-C. You can then convert the html to pdf.  The best way on a Mac is to open Safari, which is the
-best browser for this particular purpose, select the file where you've saved the html, and then
-export as pdf.  Do not convert to pdf via printing, which produces a less readable file, and also
-distorts colors.  (We do not know why the colors are distorted.)
-D. If you want to put enclone output in a Google Doc, you can do it via approach A, although then
-you won't be able to select text within the copied region.  Alternatively, if you open the html
-file in a browser, you can then select text (including clonotype box text) and paste into a Google
-Doc.  It will be pretty ugly, but will capture color and correctly render the box structure,
-provided that you use an appropriate fixed-width font for that part of the Doc.  We found that
-Courier New works, with line spacing set to 0.88.  You may have to reduce the font size.
-
-3. Why is enclone slow for me?
-
-On a single VDJ dataset, it typically runs for us in a few seconds, on a Mac or Linux server. 
-Runs where we combine several hundred datasets execute in a couple minutes (on a server).  Your
-mileage could vary, and we are interested in cases where it is underperforming.  Let us know.  We
-are aware of several things that could be done to speed up enclone.
-
-4. How does enclone fit into the 10x Genomics software ecosystem?
-
-There are several parts to the answer:
-• enclone is a standalone executable that by default produces human-readable output.
-• You can also run enclone to produce parseable output (see enclone help parseable), and that
-output can be digested using code that you write (for example, in R).
-• When you run Cell Ranger to process 10x single cell immune profiling data, it in effect calls
-enclone with a special option that yields only an output file for the 10x visualization tool
-Loupe.
-• Clonotypes may then be viewed using Loupe.  The view of a clonotype provided by Loupe is
-different than the view provided by enclone.  Loupe shows a continuous expanse of bases across
-each chain, which you can scroll across, rather than the compressed view of "important" bases or
-amino acids that enclone shows.
-
-5. What platforms does enclone run on?
-
-1. Linux/x86-64 (that's most servers)
-2. Mac.
-
-However, we have not and cannot test every possible configuration of these platforms.  Please let
-us know if you encounter problems!
-
-6. How can I print out all the donor reference sequences?
-
-Add the argument DONOR_REF_FILE=filename to your enclone command, and fasta for the donor
-reference sequences will be dumped there.
-
-7. How does enclone know what VDJ reference sequences I'm using?
-
-If you used Cell Ranger version 4.0 or greater, then the VDJ reference file was included in the
-outs directory, and so enclone knows the reference sequence from that.
-
-For outs from older Cell Ranger versions, enclone has to guess which VDJ reference sequences were
-used, and may or may not do so correctly.  As part of this, if you have mouse data from older Cell
-Ranger versions, you need to supply the argument MOUSE on the command line.
-
-It is also possible to set the reference sequence directly by adding by adding REF=f to your
-command line, where f is the name of your VDJ reference fasta file, but if that is different than
-the reference supplied to Cell Ranger, then you will have to add the additional argument RE to
-recompute annotations, and that will slow down enclone somewhat.
-
-8. Can I provide data from more than one donor?
-
-Yes.  Type enclone help input for details.  The default behavior of enclone is to prevent cells
-from different donors from being placed in the same clonotype.  The MIX_DONORS option may be used
-to turn off this behavior.  If you employ this option, then clonotypes containing cells from more
-than one donor will be flagged as errors, unless you use the NWARN option to turn off those
-warnings.  The primary reason for allowing entry of data from multiple donors is to allow
-estimation of enclone's error rate.
-
-9. What are some command line argument values quoted?
-
-Command line argument values that contain any of these characters ;|* need to be quoted like so
-TCR="a;b"
-to prevent the shell from interpreting them for a purpose completely unrelated to enclone.  This
-is a trap, because forgetting to add the quotes can result in nonsensical and confusing behavior!
-
-10. If enclone fails, does it return nonzero exit status?
-
-Yes, unless output of enclone is going to a terminal.  In that case, you'll always get zero.
-
-11. Could a cell be missing from an enclone clonotype?
-
-Yes, some cells are deliberately deleted.  The cell might have been deleted by one of the filters
-described in enclone help special, and which you can turn off.  We also delete cells for which
-more than four chains were found.
-
-12. Can enclone print summary stats?
-
-Yes, if you add the option SUMMARY, then some summary stats will be printed.  If you only want to
-see the summary stats, then also add the option NOPRINT.
-
-13. What is the notes column?
-
-The notes column appears if one of two relatively rare events occurs:
-
-1. An insertion is detected in a chain sequence, relative to the reference.
-
-2. The end of the J segment on a chain sequence does not exactly coincide with
-   the beginning of the C segment.
-The latter could correspond to one of several phenomena:
-a. A transcript has an insertion between its J and C segments.
-   This can happen.  See e.g. Behlke MA, Loh DY.
-   Alternative splicing of murine T-cell receptor beta-chain transcripts.
-   Nature 322(1986), 379-382.
-b. There is an error in a reference sequence segment.
-   We have tried to eliminate all such errors from the built-in references for
-   human and mouse.
-c. A cell produced a nonstandard transcript and also standard ones, and the
-   Cell Ranger pipeline just happened to pick a nonstandard one.
-d. There was a technical artifact and the sequence does not actually represent
-   an mRNA molecule.
-
-Overlaps of length exactly one between J and C segments are not shown unless you specify the
-option JC1.  The reason for this is that certain reference sequences (notably those from IMGT and
-those supplied with Cell Ranger 3.1) often have an extra base at the beginning of their C
-segments, resulting in annoying overlap notes for a large fraction of clonotypes.
-
-14. Can I cap the number of threads used by enclone?
-
-You can use the command-line argument MAX_CORES=n to cap the number of cores used in parallel
-loops.  The number of threads used is typically one higher.
-
-15. Can I use enclone if I have only gene expression data?
-
-Possibly.  In some cases this works very well, but in other cases it does not.  Success depends on
-dataset characteristics that have not been carefully investigated.  To attempt this, you need to
-invoke Cell Ranger on the GEX dataset as if it was a VDJ dataset, and you need to specify to Cell
-Ranger that the run is to be treated as BCR or TCR.  Two separate invocations can be used to get
-both.  Note also that Cell Ranger has been only minimally tested for this configuration and that
-this is not an officially supported Cell Ranger configuration.
-
-16. How can I cite enclone?
-
-This version of enclone has been provided under a non-disclosure agreement,
-however once enclone has officially launched, you will be able to cite this version as:
-10x Genomics, https://github.com/10XGenomics/enclone,
-(your enclone version information will be printed here).
-At some point subsequent to that, there will be a white paper to which you can refer, in addition
-to a DOI minted at Zenodo.  In the spirit of reproducibility, you should provide the arguments
-that you used when you ran enclone and indicate the version of Cell Ranger that you used to
-generate the input data.
-
-17. Can I print the enclone version?
-
-Yes, type "enclone version".
-
-18. Can enclone ingest multiple datasets from the same library?
-
-If enclone detects significant (≥ 25%) barcode reuse between datasets, it will exit.  This
-behavior can be overridden using the argument ACCEPT_REUSE.
-19. Can I turn off all the filters used in joining clonotypes?
-
-Pretty much.  You can run with the following arguments:
-MAX_CDR3_DIFFS=100
-MAX_LOG_SCORE=100
-EASY
-MAX_DIFFS=200
-MAX_DEGRADATION=150,
-however this will in general be very slow and not produce useful results.  Depending on what your
-goal is, you may find it helpful to use some of these arguments, and with lower values.  You can
-see the meaning of the arguments and their default values by typing "enclone help how".
-
- - diff --git a/pages/auto/help.filter.html b/pages/auto/help.filter.html deleted file mode 100644 index 36032eed3..000000000 --- a/pages/auto/help.filter.html +++ /dev/null @@ -1,171 +0,0 @@ - - - - - - -enclone help filter - - - - - - - -
-enclone banner -

-clonotype filtering options
-
-enclone provides filtering by cell, by exact subclonotype, and by clonotype.  This page describes
-filtering by clonotype.  These options cause only certain clonotypes to be printed.  See also
-"enclone help special", which describes other filtering options.  This page also described
-scanning for feature enrichment.
-
-┌─────────────────────┬────────────────────────────────────────────────────────────────────────┐
-│MIN_CELLS=n          │  only show clonotypes having at least n cells                          │
-│MAX_CELLS=n          │  only show clonotypes having at most n cells                           │
-│CELLS=n              │  only show clonotypes having exactly n cells                           │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│MIN_UMIS=n           │  only show clonotypes having ≳ n UMIs on some chain on some cell       │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│MIN_CHAINS=n         │  only show clonotypes having at least n chains                         │
-│MAX_CHAINS=n         │  only show clonotypes having at most n chains                          │
-│CHAINS=n             │  only show clonotypes having exactly n chains                          │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│CDR3=<pattern>       │  only show clonotypes having a CDR3 amino acid seq that matches        │
-│                     │  the given pattern (regular expression)*, from beginning to end        │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│SEG="s_1|...|s_n"    │  only show clonotypes using one of the given reference segment names   │
-│SEGN="s_1|...|s_n"   │  only show clonotypes using one of the given reference segment numbers │
-│                     │  both: looks for V, D, J and C segments; double quote only             │
-│                     │  needed if n > 1                                                       │
-│                     │  For both SEG and SEGN, multiple instances are allowed, and their      │
-│                     │  effects are cumulative.                                               │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│MIN_EXACTS=n         │  only show clonotypes having at least n exact subclonotypes            │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│VJ=seq               │  only show clonotypes using exactly the given V..J sequence            │
-│                     │  (string in alphabet ACGT)                                             │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│MIN_DATASETS=n       │  only show clonotypes containing cells from at least n datasets        │
-│MAX_DATASETS=n       │  only show clonotypes containing cells from at most n datasets         │
-│MIN_DATASET_RATIO=n  │  only show clonotypes having at least n cells and for which the ratio  │
-│                     │  of the number of cells in the must abundant dataset to the next most  │
-│                     │  abundant one is at least n                                            │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│CDIFF                │  only show clonotypes having a difference in constant region with the  │
-│                     │  universal reference                                                   │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│DEL                  │  only show clonotypes exhibiting a deletion                            │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│BARCODE=bc1,...,bcn  │  only show clonotypes that use one of the given barcodes               │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│INKT                 │  only show clonotypes for which some exact subclonotype is annotated as│
-│                     │  having some iNKT evidence, see bit.ly/enclone for details             │
-├─────────────────────┼────────────────────────────────────────────────────────────────────────┤
-│MAIT                 │  only show clonotypes for which some exact subclonotype is annotated as│
-│                     │  having some MAIT evidence, see bit.ly/enclone for details             │
-└─────────────────────┴────────────────────────────────────────────────────────────────────────┘
-
-* Examples of how to specify CDR3:
-
-┌────────────────────────────────────────┬────────────────────────────────────────────────┐
-│CDR3=CARPKSDYIIDAFDIW                   │  have exactly this sequence as a CDR3          │
-│CDR3="CARPKSDYIIDAFDIW|CQVWDSSSDHPYVF"  │  have at least one of these sequences as a CDR3│
-│CDR3=".*DYIID.*"                        │  have a CDR3 that contains DYIID inside it     │
-└────────────────────────────────────────┴────────────────────────────────────────────────┘
-
-Note that double quotes should be used if the pattern contains characters other than letters.
-
-A gentle introduction to regular expressions may be found at
-https://en.wikipedia.org/wiki/Regular_expression#Basic_concepts, and a precise
-specification for the regular expression version used by enclone may be found at
-https://docs.rs/regex.
-
-linear conditions
-
-enclone understands linear conditions of the form
-c1*v1 ± ... ± cn*vn > d
-where each ci is a constant, "ci*" may be omitted, each vi is a variable, and d is a constant. 
-Blank spaces are ignored.  The > sign may be replaced by >= or ≥ or < or <= or ≤.  Each vi is a
-lead variable (see "enclone help lvars") that represents a origin/donor/tag count or gene/feature
-barcode UMI count.  In evaluating the condition, each vi is replaced by the mean of its values
-across all cells in the clonotype.  Because the minus sign - doubles as a hyphen and is used in
-some feature names, we allow parentheses around variable names to prevent erroneous parsing, like
-this (IGHV3-7_g) >= 1.
-
-filtering by linear conditions
-
-enclone has the capability to filter by bounding certain lead variables, using the command-line
-argument:
-F="L"
-where L is a linear condition (as defined above).  Currently this is limited to the case where the
-lead variables have been selected using LVARS or LVARSP!  Multiple bounds may be imposed by using
-multiple instances of F=... .
-
-feature scanning
-
-If gene expression and/or feature barcode data have been generated, enclone can scan all features
-to find those that are enriched in certain clonotypes relative to certain other clonotypes.  This
-feature is turned on using the command line argument
-SCAN="test,control,threshold"
-where each of test, control and threshold are linear conditions as defined above.  Blank spaces
-are ignored.  The test condition defines the "test clonotypes" and the control condition defines
-the "control clonotypes".  Currently, the lead variables in test and control must be specified by
-LVARS or LVARSP!  The threshold condition is special: it may use only the variables "t" and "c"
-that represent the raw UMI count for a particular gene or feature, for the test (t) or control (c)
-clonotypes.  To get a meaningful result, you should specify MIN_CELLS appropriately and manually
-examine the test and control clonotypes to make sure that they make sense.
-
-an example
-
-Suppose that your data are comprised of two origins with datasets
-            named pre and post, representing time points relative to some event.  Then
-SCAN="n_post - 10*n_pre >= 0, n_pre - 0.5*n_post >= 0, t - 2*c >= 0.1"
-would define the test clonotypes to be those satisfying n_post >= 10*n_pre (so having far more
-post cells then pre cells), the control clonotypes to be those satisfying n_pre >= 0.5*n_post (so
-having lots of pre cells), and thresholding on t >= 2*c * 0.1, so that the feature must have a bit
-more than twice as many UMIs in the test than the control.  The 0.1 is there to exclude noise from
-features having very low UMI counts.
-
-Feature scanning is not a proper statistical test.  It is a tool for generating a list of feature
-candidates that may then be examined in more detail by rerunning enclone using some of the
-detected features as lead variables (appropriately suffixed).  Ultimately the power of the scan is
-determined by having "enough" cells in both the test and control sets, and in having those sets
-cleanly defined.
-
-Currently feature scanning requires that each dataset have identical features.
-
-
- - diff --git a/pages/auto/help.glossary.html b/pages/auto/help.glossary.html deleted file mode 100644 index 3b8c5c3d6..000000000 --- a/pages/auto/help.glossary.html +++ /dev/null @@ -1,99 +0,0 @@ - - - - - - -enclone help glossary - - - - - - - -
-enclone banner -

-glossary of terms used by enclone
-
-┌────────────────────┬─────────────────────────────────────────────────────────────────────────────┐
-│V..J                │  the full sequence of a V(D)J transcript, from the beginning of the V       │
-│                    │  segment to the end of the J segment; this sequence begins with a stop codon│
-│                    │  and ends with a partial codon (its first base)                             │
-│CDR3                │  The terms CDR3 and junction are commonly mistaken and often used           │
-│                    │  interchangeably.  In enclone's nomenclature, "CDR3" actually refers to the │
-│                    │  junction (the CDR3 loop plus the canonical C and W/F at the N and C termini│
-│                    │  respectively).                                                             │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│clonotype           │  all the cells descended from a single fully rearranged T or B cell         │
-│                    │  (approximated computationally)                                             │
-│exact subclonotype  │  all cells having identical transcripts                                    │
-│                    │  (every clonotype is a union of exact subclonotypes)                        │
-│clone               │  a cell in a clonotype, or in an exact subclonotype                         │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│onesie              │  a clonotype or exact subclonotype having exactly one chain                 │
-│twosie              │  a clonotype or exact subclonotype having exactly two chains                │
-│threesie            │  a clonotype or exact subclonotype having exactly three chains;             │
-│                    │  these frequently represent true biological events, arising from expression │
-│                    │  of both alleles                                                            │
-│foursie             │  a clonotype or exact subclonotype having exactly four chains;              │
-│                    │  these very rarely represent true biological events                         │
-│moresie             │  a clonotype having more than four chains;                                  │
-│                    │  these sad clonotypes do not represent true biological events               │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│donor               │  an individual from whom datasets of an origin are obtained                 │
-│origin              │  a tube of cells from a donor, from a particular tissue at a                │
-│                    │  particular point in time, and possibly enriched for particular cells       │
-│cell group          │  an aliquot from an origin, presumed to be a random draw                    │
-│dataset             │  all sequencing data obtained from a particular library type                │
-│                    │  (e.g. TCR or BCR or GEX or FB), from one cell group, processed by running  │
-│                    │  through the Cell Ranger pipeline                                           │
-└────────────────────┴─────────────────────────────────────────────────────────────────────────────┘
-
- The exact requirements for being in the same exact subclonotype are that cells:
-• have the same number of productive contigs identified
-• that these have identical bases within V..J
-• that they are assigned the same constant region reference sequences
-• and that the difference between the V stop and the C start is the same
-  (noting that this difference is nearly always zero).
-Note that we allow mutations within the 5'-UTR and constant regions.
-
-conventions
-
-• When we refer to "V segments", we always include the leader segment.
-• Zero or one?  We number exact subclonotypes as 1, 2, ... and likewise with
-chains within a clonotype, however DNA and amino-acid positions are numbered starting at zero.
-
-
- - diff --git a/pages/auto/help.how.html b/pages/auto/help.how.html deleted file mode 100644 index 901d66134..000000000 --- a/pages/auto/help.how.html +++ /dev/null @@ -1,170 +0,0 @@ - - - - - - -enclone help how - - - - - - - -
-enclone banner -

-information about how enclone works
-
-The goal of enclone is to find and display the clonotypes within single cell VDJ datasets: groups
-of cells having the same fully rearranged common ancestor.
-
-enclone provides the foundation for fully understanding each cell's antigen affinity and the
-evolutionary relationship between cells within one or more datasets.  This starts with, for each
-cell, the full length sequence of all its VDJ receptor chains.  Such data may be obtained using
-the 10x Genomics immune profiling platform.
-
-See also the heuristics page at bit.ly/enclone.
-
-For this, there are fundamental challenges:
-
-┌──────────────────────────────────────────────────────────────────────────────────────────────────┐
-│1. It is extremely easy to get false positives: the incorrect appearance that two cells have a    │
-│common ancestor.                                                                                  │
-│                                                                                                  │
-│2. Because of somatic hypermutation in B cells, it can be difficult to know that two B cells share│
-│a common ancestor.                                                                                │
-│                                                                                                  │
-│3. There is always some background noise, e.g. from ambient mRNA.  When building large clonotypes,│
-│this noise tends to pile up, yielding ectopic chains, i.e. chains within a clonotype that are     │
-│artifacts and do not represent true biology.                                                      │
-└──────────────────────────────────────────────────────────────────────────────────────────────────┘
-
-To address these challenges, the enclone algorithm has several steps, which we outline:
-
-1.  Input data.  enclone gets its information from the file all_contig_annotations.json that is
-produced by Cell Ranger.  Only productive contigs are used.  Each has an annotated V and J
-segment.  The V segment alignment may have a single indel whose length is divisible by three, and
-in that case, the V reference sequence is edited either to delete or insert sequence.  In the
-insertion case, the bases are taken from the contig.  These indels are noted in the enclone
-output.
-
-2.  Exact subclonotypes.  enclone groups cells into exact subclonotypes, provided that they have
-the same number of chains, identical V..J sequences, identical C segment assignments, and the same
-distance between the J stop and the C start (which is usually zero).
-
-3.  Finding the germline sequences.  For datasets from a given donor, enclone derives "donor
-reference sequences" for the V chains present in the donor's genome.  This is powerful, even
-though based on imperfect information.  V segments vary in their expression frequency and thus the
-more cells which are present, the more complete the information will be.  It is also not possible
-to accurately determine the terminal bases in a V chain from transcript data alone because these
-bases mutate during recombination and because of non-templated nucleotide addition.
-
-The idea for how this is done is roughly the following: for each V segment, we choose one cell
-from each clonotype (although these have not actually been computed yet, so it's an
-approximation).  Next for each position on the V segment, excluding the last 15 bases, we
-determine the distribution of bases that occur within these selected cells.  We only consider
-those positions where a non-reference base occurs at least four times and is at least 25% of the
-total.  Then each cell has a footprint relative to these positions; we require that these
-footprints satisfy similar evidence criteria.  Each such non-reference footprint then defines an
-"alternate allele".  We do not restrict the number of alternate alleles because they may arise
-from duplicated gene copies.
-
-A similar approach was attempted for J segments but at the time of testing did not appear to
-enhance clonotyping specificity.  This could be revisited later and might be of interest even if
-it does not improve specificity.
-
-4.  What joins are tested.  Pairs of exact subclonotypes are considered for joining, as described
-below.  This process only considers exact subclonotypes have two or three chains.  There is some
-separate joining for the case of one chain.  Exact subclonotypes having four chains are not joined
-at present.  These cases are clearly harder because these exact subclonotypes are highly enriched
-for cell doublets, which we discard if we can identify as such.
-
-5.  Initial grouping.  For each pair of exact subclonotypes, and for each pair of chains in each
-of the two exact subclonotypes, for which V..J has the same length for the corresponding chains,
-and the CDR3 segments have the same length for the corresponding chains, enclone considers joining
-the exact subclonotypes into the same clonotype.
-
-6.  Error bounding.  To proceed, as a minimum requirement, there must be at most 50 total
-mismatches between the two exact subclonotypes, within the given two V..J segments.
-This can be changed by setting MAX_DIFFS=n on the command line.
-
-7.  Shared mutations.  enclone next finds shared mutations betweens exact subclonotypes, that is,
-for two exact subclonotypes, common mutations from the reference sequence, using the donor
-reference for the V segments and the universal reference for the J segments.  Shared mutations are
-supposed to be somatic hypermutations, that would be evidence of common ancestry.  By using the
-donor reference sequences, most shared germline mutations are excluded, and this is critical for
-the algorithm's success.
-
-8.  Are there enough shared mutations?  We find the probability p that “the shared mutations occur
-by chance”.  More specifically, given d shared mutations, and k total mutations (across the two
-cells), we compute the probability p that a sample with replacement of k items from a set whose
-size is the total number of bases in the V..J segments, yields at most k – d distinct elements. 
-The probability is an approximation, for the method please see
-https://docs.rs/stirling_numbers/0.1.0/stirling_numbers.
-
-9.  Are there too many CDR3 mutations?  Next, let N be "the number of DNA sequences that differ
-from the given CDR3 sequences by at most the number of observed differences".  More specifically,
-if cd is the number of differences between the given CDR3 nucleotide sequences, and n is the total
-length in nucleotides of the CDR3 sequences (for the two chains), we compute the total number N of
-strings of length n that are obtainable by perturbing a given string of length n, which is
-sum( choose(n,m), m = 0..=cd) ).  We also require that cd is at most 10 (and this bound is
-adjustable via the command-line argument MAX_CDR3_DIFFS).
-
-10.  Key join criteria.  Two cells sharing sufficiently many shared differences and sufficiently
-few CDR3 differences are deemed to be in the same clonotype.  That is, The lower p is, and the
-lower N is, the more likely it is that the shared mutations represent bona fide shared ancestry. 
-Accordingly, the smaller p*N is, the more likely it is that two cells lie in the same true
-clonotype.  To join two cells into the same clonotype, we require that the bound p*n ≤ C is
-satisfied, where C is the constant 1,000,000 (and adjustable via the command-line argument
-MAX_LOG_SCORE, the log10 of this, with default value 6).  This constant was arrived at by
-empirically balancing sensitivity and specificity across a large collection of datasets.  See
-discussion of performance below.
-
-11.  Other join criteria.  We do not join two clonotypes which were assigned different reference
-sequences unless those reference sequences differ by at most 3 positions.  This value can be
-controlled using the command-line argument MAX_DEGRADATION.  There is an additional restriction
-imposed when creating two-cell clonotypes: we require that that cd ≤ d, where cd is the number of
-CDR3 differences and d is the number of shared mutations, as above.  This filter may be turned off
-using the command-line argument EASY.
-
-12.  Junk.  Spurious chains are filtered out based on frequency and connections. See "enclone help
-special" for a description of the filters.
-
-We are actively working to improve the algorithm.  To test the performance of the current version,
-we combined data from 443 BCR libraries from 30 donors, which yielded 9573 clonotypes having at
-least two cells each, of which 15 (0.16%) contained data from multiple donors.  These are errors.
-
-
- - diff --git a/pages/auto/help.indels.html b/pages/auto/help.indels.html deleted file mode 100644 index 48f1ab695..000000000 --- a/pages/auto/help.indels.html +++ /dev/null @@ -1,70 +0,0 @@ - - - - - - -enclone help indels - - - - - - - -
-enclone banner -

-handling of insertions and deletions
-
-enclone can recognize and display a single insertion or deletion in a contig relative to the
-reference, so long as its length is divisible by three, is relatively short, and occurs within the
-V segment, not too close to its right end.
-
-These indels could be germline, however most such events are already captured in a reference
-sequence.  Currently the donor reference code in enclone does not recognize indels.
-
-SHM deletions are rare, and SHM insertions are even more rare.
-
-Deletions are displayed using hyphens (-).  If you use the var option for cvars, the hyphens will
-be displayed in base space, where they are initially observed.  For the AMINO option, the deletion
-is first shifted by up to two bases, so that the deletion starts at a base position that is
-divisible by three.  Then the deleted amino acids are shown as hyphens.
-
-Insertions are shown only in amino acid space, in a special per-chain column called notes that
-appears if there is an insertion.  Colored amino acids are shown for the insertion, and the
-position of the insertion is shown.  The position is the position of the amino acid after which
-the insertion appears, where the first amino acid (start codon) is numbered 0.
-
-
- - diff --git a/pages/auto/help.input.html b/pages/auto/help.input.html deleted file mode 100644 index 5100b50cd..000000000 --- a/pages/auto/help.input.html +++ /dev/null @@ -1,148 +0,0 @@ - - - - - - -enclone help input - - - - - - - -
-enclone banner -

-enclone has two mechanisms for specifying input datasets: either directly on the command line or
-via a supplementary metadata file. Only one mechanism may be used at a time.
-
-In both cases, you will need to provide paths to directories where the outputs of the Cell Ranger
-pipeline may be found.  enclone uses only some of the pipeline output files, so it is enough that
-those files are present in given directory, and the particular files that are needed may be found
-by typing enclone help input_tech.
-
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃If you use the argument PRE=p then p/ will be prepended to all pipeline paths.  A comma-separated┃
-┃list is also allowed PRE=p1,...,pn, in which case these directories are searched from left to    ┃
-┃right, until one works, and if all fail, the path is used without prepending anything.  Lastly,  ┃
-┃(see enclone help command), you can avoid putting PRE on the command line by setting the         ┃
-┃environment variable ENCLONE_PRE to the desired value.  The default value for PRE is             ┃
-┃~/enclone/datasets,~/enclone/datasets2.                                                          ┃
-┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
-
-Both input forms involve abbreviated names (discussed below), which should be as short as
-possible, as longer abbreviations will increase the width of the clonotype displays.
-
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃enclone can use gene expression and feature barcode data, as represented by a feature matrix.     ┃
-┃Cell Ranger stores this matrix in an hdf5 file, which while generally very efficient, is not      ┃
-┃optimized for interactive use.  Therefore enclone provides an alternate file structure, which     ┃
-┃speeds up enclone overall by up to 50%.  To use this, add the argument NH5 to the enclone command ┃
-┃line.  This will work so long as you have write permission on input directories.  The first time  ┃
-┃you run enclone (using given inputs), an alternate file feature_barcode_matrix.bin will be        ┃
-┃written; then subsequent invocations will be faster.  Once the file has been created, it will     ┃
-┃always be used, regardless of whether NH5 is used.  However, we may occasionally change the format┃
-┃of the alternate file.  If do that, then if you have previously generated the file, then it will  ┃
-┃be rewritten when you invoke enclone for that dataset.  Like with other enclone command-line      ┃
-┃options, if you want NH5 on all the time, you can set the environment variable ENCLONE_NH5.       ┃
-┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
-
-█ 1 █ To point directly at input files on the command line, use e.g.
-TCR=/home/jdoe/runs/dataset345
-or likewise for BCR.  A more complicated syntax is allowed in which commas, colons and semicolons
-act as delimiters.  Commas go between datasets from the same origin, colons between datasets from
-the same donor, and semicolons separate donors.  If semicolons are used, the value must be quoted.
-
-enclone uses the distinction between datasets, origins and donors in the following ways:
-1. If two datasets come from the same origin, then enclone can filter to remove certain artifacts,
-unless you specify the option NCROSS.
-See also illusory clonotype expansion page at bit.ly/enclone.
-2. If two cells came from different donors, then enclone will not put them in the same clonotype,
-unless you specify the option MIX_DONORS.
-More information may be found at `enclone help special`.  In addition, this is enclone's way of
-keeping datasets organized and affects the output of fields like origin, etc.
-
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃Naming.  Using this input system, each dataset is assigned an abbreviated name, which is         ┃
-┃everything after the final slash in the directory name (e.g. dataset345 in the above example), or┃
-┃the entire name if there is no slash; origins and donors are assigned identifers s1,... and      ┃
-┃d1,..., respectively; numbering of origins restarts with each new donor.  To specify origins     ┃
-┃and donors, use the second input form, and see in particular abbr:path.                          ┃
-┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
-
-Examples:
-TCR=p1,p2   -- input data from two libraries from the same origin
-TCR=p1,p2:q -- input data as above plus another from a different origin from the same donor
-TCR="a;b"   -- input one library from each of two donors.
-
-Matching gene expression and/or feature barcode data may also be supplied using an argument GEX=...,
-whose right side must have the exact same structure as the TCR or BCR argument.  Specification of
-both TCR and BCR is not allowed.
-
-In addition, barcode-level data may be specified using BC=..., whose right side is a list of paths
-having the same structure as the TCR or BCR argument.  Each such path must be for a CSV file,
-which must include the field barcode, may include special fields origin, donor, tag and color, and
-may also include arbitrary other fields.  The origin and donor fields allow a particular origin
-and donor to be associated to a given barcode.  A use case for this is genetic demultiplexing. 
-The tag field is intended to be used with tag demultiplexing.  The color field is used by the PLOT
-option.  All other fields are treated as lead variables, but values are only displayed in PER_CELL
-mode, or for parseable output using PCELL.  These fields should not include existing lead variable
-names.  Use of BC automatically turns on the MIX_DONORS option.
-
-█ 2 █ To specify a metadata file, use the command line argument
-META=filename
-This file should be a CSV (comma-separated values) file, with one line per cell group.  After the
-first line, lines starting with # are ignored.  There must be a field tcr or bcr, and some other
-fields are allowed:
-┌────────┬───────────────┬──────────────────────────────────────────────────────────────┐
-│fielddefaultmeaning                                                     │
-├────────┼───────────────┼──────────────────────────────────────────────────────────────┤
-│tcr     │  (required!)  │  path to dataset, or abbr:path, where abbr is an abbreviated │
-│or bcr  │               │  name for the dataset; exactly one of tcr or bcr must be used│
-├────────┼───────────────┼──────────────────────────────────────────────────────────────┤
-│gex     │  null         │  path to GEX dataset, which may include or consist entirely  │
-│        │               │  of FB data                                                  │
-├────────┼───────────────┼──────────────────────────────────────────────────────────────┤
-│origin  │  s1           │  abbreviated name of origin                                  │
-├────────┼───────────────┼──────────────────────────────────────────────────────────────┤
-│donor   │  d1           │  abbreviated name of donor                                   │
-├────────┼───────────────┼──────────────────────────────────────────────────────────────┤
-│color   │  null         │  color to associate to this dataset (for PLOT option)        │
-├────────┼───────────────┼──────────────────────────────────────────────────────────────┤
-│bc      │  null         │  name of CSV file as in the BC option                        │
-└────────┴───────────────┴──────────────────────────────────────────────────────────────┘
-
-
- - diff --git a/pages/auto/help.input_tech.html b/pages/auto/help.input_tech.html deleted file mode 100644 index 37ae4738a..000000000 --- a/pages/auto/help.input_tech.html +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - -enclone help input_tech - - - - - - - -
-enclone banner -

-information about providing input to enclone (technical notes)
-
-enclone only uses certain files, which are all in the outs subdirectory of a Cell Ranger pipeline
-directory:
-
-┌─────────────────────────────────────────────┬──────────┐
-│filepipeline│
-├─────────────────────────────────────────────┼──────────┤
-│all_contig_annotations.json                  │  VDJ     │
-├─────────────────────────────────────────────┼──────────┤
-│vdj_reference/fasta/regions.fa               │  VDJ     │
-├─────────────────────────────────────────────┼──────────┤
-│metrics_summary.csv                          │  GEX     │
-├─────────────────────────────────────────────┼──────────┤
-│raw_feature_bc_matrix.h5                     │  GEX     │
-├─────────────────────────────────────────────┼──────────┤
-│analysis/clustering/graphclust/clusters.csv  │  GEX     │
-├─────────────────────────────────────────────┼──────────┤
-│analysis/pca/10_components/projection.csv    │  GEX     │
-└─────────────────────────────────────────────┴──────────┘
-
-The first file is required, and the second should be supplied if Cell Ranger version 4.0 or
-greater was used.  The others are required, in the indicated structure, if GEX or META/gex
-arguments are provided.  The exact files that are used could be changed in the future.
-
-Note that the VDJ outs directories must be from Cell Ranger version ≥ 3.1.  There is a workaround
-for earlier versions (which you will be informed of if you try), but it is much slower and the
-results may not be as good.
-
-
- - diff --git a/pages/auto/help.lvars.html b/pages/auto/help.lvars.html deleted file mode 100644 index eb8102496..000000000 --- a/pages/auto/help.lvars.html +++ /dev/null @@ -1,165 +0,0 @@ - - - - - - -enclone help lvars - - - - - - - -
-enclone banner -

-lead column options
-
-These options define lead variables, which correspond to columns that appear once in each
-clonotype, on the left side, and have one entry for each exact subclonotype row.
-
-Lead variables are specified using LVARS=x1,...,xn where each xi is one of:
-
-┌───────────────┬──────────────────────────────────────────────────────────────────────────────────┐
-│datasets       │  dataset identifiers                                                             │
-│origin         │  origin identifiers                                                              │
-│donors         │  donor identifiers                                                               │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│n              │  number of cells                                                                 │
-│n_<name>       │  number of cells associated to the given name, which can be a dataset            │
-│               │  or origin or donor or tag short name; may name only one such category           │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│nd<k>          │  For k a positive integer, this creates k+1 fields, that are specific to each    │
-│               │  clonotype.  The first field is n_<d1>, where d1 is the name of the dataset      │
-│               │  having the most cells in the clonotype.  If k ≥ 2, then you'll get a            │
-│               │  "runner-up" field n_<d2>, etc.  Finally you get a field n_other, however        │
-│               │  fields will be elided if they represent no cells.                               │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│near           │  Hamming distance of V..J DNA sequence to nearest neighbor                       │
-│far            │  Hamming distance of V..J DNA sequence to farthest neighbor                      │
-│               │  both compare to cells having chains in the same columns of the clonotype,       │
-│               │  with - shown if there is no other exact subclonotype to compare to              │
-│dref           │  Hamming distance of V..J DNA sequence to donor reference, excluding             │
-│               │  region of recombination                                                         │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│inkt           │  A string showing the extent to which the T cells in an exact subclonotype       │
-│               │  have evidence for being an iNKT cell.  The most evidence is denoted 𝝰gj𝝱gj,     │
-│               │  representing both gene name and junction sequence (CDR3) requirements for       │
-│               │  both chains.  See bit.ly/enclone for details on the requirements.               │
-│mait           │  Same as with inkt but for MAIT cells instead.                                   │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│g<d>           │  Here d is a nonnegative integer.  Then all the exact subclonotypes are          │
-│               │  grouped according to the Hamming distance of their V..J sequences.  Those       │
-│               │  within distance d are defined to be in the same group, and this is              │
-│               │  extended transitively.  The group identifier 1, 2, ... is shown.  The           │
-│               │  ordering of these identifiers is arbitrary.  This option is best applied        │
-│               │  to cases where all exact subclonotypes have a complete set of chains.           │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│gex            │   median gene expression UMI count                                              │
-│n_gex          │   number of cells reported by GEX                                               │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│<gene>_g       │   all five feature types: look for a declared feature of the given type         │
-│<antibody>_ab  │  with the given id or name; report the median UMI count for it; we allow         │
-│<antigen>_ag   │  the form e.g. <abbr>:<gene>_g where abbr is an abbreviation to be shown;        │
-│<crispr>_cr    │  we also allow <regular expression>_g where g can be replaced by ab, ag, cr      │
-│<custom>_cu    │  or cu; this represents a sum of UMI counts across the matching features. ●      │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│sec            │  for human or mouse BCR, number of GEX UMIs that are characterized as secreted   │
-│mem            │  for human or mouse BCR, number of GEX UMIs that are characterized as membrane   │
-│               │  For both of these, the algorithm looks for reads that are aligned through the   │
-│               │  right end of a constant region CH3 exon, and then read into a CH3-CHS or        │
-│               │  CH4-CHS exon, in the secreted case, or a M, M1 or M2 exon, in the membrane case.│
-│               │  This choice is determined by sequence tables in the code, and we cannot be      │
-│               │  absolutely certain that these tables are complete.                              │
-│               │  These fields require the presence of the files possorted_genome_bam.bam         │
-│               │  and possorted_genome_bam.bam.bai.                                               │
-│               │  These fields also require that you have samtools in your path.                  │
-│               │  Note that these counts tend to be low.                                          │
-├───────────────┼──────────────────────────────────────────────────────────────────────────────────┤
-│cred           │  Short for credibility.  It is a measure of the extent to which cells            │
-│               │  having gene expression similar to a given putative B cell are themselves        │
-│               │  B cells.  (Or similarly for T cells.)  For the actual definition, let n         │
-│               │  be the number of VDJ cells that are also GEX cells.  For a given cell,          │
-│               │  find the n GEX cells that are closest to it in PCA space, and report the        │
-│               │  percent of those that are also VDJ cells.  For multiple datasets, it would      │
-│               │  be better to "aggr" the data, however that is not currently supported           │
-│               │  The computation is also inefficient, so let us know if it's causing             │
-│               │  problems for you.  And cred makes much better sense for datasets that           │
-│               │  consist of mixed cell types, rather than consisting of pure B or T cells.       │
-└───────────────┴──────────────────────────────────────────────────────────────────────────────────┘
-For gene expression and feature barcode stats, such data must be provided as input to enclone.
-
-● Example: IG.*_g matches all genes that begin with IG, and TR(A|B).*_g matches all genes that
-begin with TRA or TRB.  Double quotes as in LVARS="..." may be needed.  The regular expression
-must be in the alphabet A-Za-z0-9+_-.[]()|* and is only interpreted as a regular expression if it
-contains a character in []()|*.  See "enclone help filter" for more information about regular
-expressions.
-
-   These variables have some alternate versions, as shown in the table below:
-  
-  ┌──────────┬───────────────────────────────┬──────────┬──────────────┬─────────────┬────────────┐
-  │variable  │  semantics                    │  visual  │  visual      │  parseable  │  parseable │
-  │          │                               │          │  (one cell)  │             │  (one cell)│
-  ├──────────┼───────────────────────────────┼──────────┼──────────────┼─────────────┼────────────┤
-  │x         │  median over cells            │  yes     │  this cell   │  yes        │  yes       │
-  │x_mean    │  mean over cells              │  yes     │  null        │  yes        │  yes       │
-  │x_μ       │  (same as above)              │  yes     │  null        │  yes        │  yes       │
-  │x_sum     │  sum over cells               │  yes     │  null        │  yes        │  yes       │
-  │x_Σ       │  (same as above)              │  yes     │  null        │  yes        │  yes       │
-  │x_min     │  min over cells               │  yes     │  null        │  yes        │  yes       │
-  │x_max     │  max over cells               │  yes     │  null        │  yes        │  yes       │
-  │x_%       │  % of total GEX (genes only)  │  yes     │  this cell   │  yes        │  yes       │
-  │x_cell    │  this cell                    │  no      │  no          │  no         │  this cell │
-  └──────────┴───────────────────────────────┴──────────┴──────────────┴─────────────┴────────────┘
-  Some explanation is required.  If you use enclone without certain options, you get the "visual"
-  column.
-  • Add the option PER_CELL (see "enclone help display") and then you get visual output with extra
-  lines for each cell within an exact subclonotype, and each of those extra lines is described by
-  the "visual (one cell)" column.
-  • If you generate parseable output (see "enclone help parseable"), then you get the "parseable"
-  column for that output, unless you specify PCELL, and then you get the last column.
-  • For the forms with μ and Σ, the Greek letters are only used in column headings for visual output
-  (to save space), and optionally, in names of fields on the command line.
-   If you try out these features, you'll see exactly what happens! 
-
- Similar to the above but simpler: n_gex is just a count of cells, visual (one cell) shows 0 or
-1, n_gex_cell is defined for parseable (one cell), and the x_mean etc. forms do not apply.
-
-The default is datasets,n, except that datasets is suppressed if there is only one dataset.
-
-LVARSP=x1,...,xn is like LVARS but appends to the list.
-
-
- - diff --git a/pages/auto/help.main.html b/pages/auto/help.main.html deleted file mode 100644 index e40fa30d1..000000000 --- a/pages/auto/help.main.html +++ /dev/null @@ -1,97 +0,0 @@ - - - - - - -enclone help - - - - - - - -
-enclone banner -

-The mission of enclone is to:
-
-  Find and display the clonotypes within single cell VDJ datasets:
-  groups of cells having the same fully rearranged common ancestor.
-
-enclone is part of the 10x Genomics immune profiling tools, including Cell Ranger and Loupe. 
-enclone uses output from Cell Ranger version ≥ 3.1.
-
-The complete enclone documentation is at bit.ly/enclone.  This page catalogs the subset of those
-pages that are directly accessible from the enclone command line.  These pages can be viewed in a
-100 wide x 56 high window, except for those labeled "long" or "wide".
-
-┌─────────────────────────┬─────────────────────────────────────────────────────────────────────┐
-│commandwhat it provides                                                   │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help             │  help to test for correct setup                                     │
-│enclone                  │  what you see here: guide to all the doc                            │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help quick       │  quick guide to getting started                                     │
-│enclone help how         │  how enclone works (long)                                           │
-│enclone help command     │  info about enclone command line argument processing                │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help glossary    │  glossary of terms used by enclone, and conventions                 │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help example1    │  explanation of an example                                          │
-│enclone help example2    │  example showing gene expression and feature barcodes (wide)        │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help input       │  how to provide input to enclone (long)                             │
-│enclone help input_tech  │  how to provide input to enclone (technical notes)                  │
-│enclone help parseable   │  parseable output (long)                                            │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help filter      │  clonotype filtering options, scanning for feature enrichment (long)│
-│enclone help special     │  special filtering options (long)                                   │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help lvars       │  lead column options (long)                                         │
-│enclone help cvars       │  per chain column options (long)                                    │
-│enclone help amino       │  per chain column options for amino acids                           │
-│enclone help display     │  other clonotype display options                                    │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help indels      │  insertion and deletion handling                                    │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help color       │  how enclone uses color, and related things                         │
-│enclone help faq         │  frequently asked questions (long)                                  │
-│enclone help developer   │  a few things for developers                                        │
-├─────────────────────────┼─────────────────────────────────────────────────────────────────────┤
-│enclone help allconcatenation of all the help pages (long, wide)                   │
-│                         │  ███ USE THIS TO SEARCH ALL THE HELP PAGES! ███                     │
-└─────────────────────────┴─────────────────────────────────────────────────────────────────────┘
-
- - diff --git a/pages/auto/help.parseable.html b/pages/auto/help.parseable.html deleted file mode 100644 index 835e08183..000000000 --- a/pages/auto/help.parseable.html +++ /dev/null @@ -1,182 +0,0 @@ - - - - - - -enclone help parseable - - - - - - - -
-enclone banner -

-parseable output
-
-The standard output of enclone is designed to be read by humans, but is not readily parseable by
-computers.  We supplement this with parseable output that can be easily read by computers.
-
-The default behavior for this is to generate a CSV file having "every possible" field (over a
-hundred).  We also provide an option to print only selected fields, and some options which enable
-inspection, short of generating a separate CSV file.
-
-Parseable output is targeted primarily at R and Python users, because of the ease of wrangling CSV
-files with these languages.
-
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃Parseable output is invoked by using the argument                                                ┃
-┃POUT=filename                                                                                    ┃
-┃specifying the name of the file that is to be written to.                                        ┃
-┃  The filename "stdout" may be used for a preview; in that case parseable output is generated    ┃
-┃  separately for each clonotype and the two output types are integrated.  There is also          ┃
-┃  "stdouth", which is similar, but uses spaces instead of commas, and lines things up in columns.┃
-┃By default, we show four chains for each clonotype, regardless of how many chains it             ┃
-┃has, filling in with null entries.  One may instead specify n chains using the argument          ┃
-┃PCHAINS=n                                                                                        ┃
-┃The parseable output fields may be specified using                                               ┃
-┃PCOLS=x1,...,xn                                                                                  ┃
-┃where each xi is one of the field names shown below.                                             ┃
-┃This option reduces run time and memory usage, and prevents voluminous output.  Please use it!   ┃
-┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
-
-Over time additional fields may be added and the order of fields may change.
-
-There is an alternate parseable output mode in which one line is emitted for each cell, rather
-then each exact subclonotype.  This mode is enabled by adding the argument PCELL to the command
-line.  Each exact subclonotype then yields a sequence of output lines that are identical except as
-noted below.
-
-If you want to completely suppress the generation of visual clonotypes, add NOPRINT to the enclone
-command line.
-
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃FASTA output.  This is a separate feature.  To generate nucleotide FASTA output for each chain in ┃
-┃each exact subclonotype, use the argument FASTA=filename.  The special case stdout will cause the ┃
-┃FASTA records to be shown as part of standard output.  The FASTA records that are generated are of┃
-┃the form V(D)JC, where V is the full V segment (including the leader) and C is the full constant  ┃
-┃region, copied verbatim from the reference.  If a particular chain in a particular exact          ┃
-┃subclonotype is not assigned a constant region, then we use the constant region that was assigned ┃
-┃to the clonotype.  If no constant region at all was assigned, then the FASTA record is omitted.   ┃
-┃Similarly, FASTA_AA=filename may be used to generate a matching amino acid FASTA file.            ┃
-┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
-
-───────────────────────
-parseable output fields
-───────────────────────
-
-1. per clonotype group fields
-
-┌──────────────┬──────────────────────────────────────────┐
-│group_id      │  identifier of clonotype group - 0,1, ...│
-├──────────────┼──────────────────────────────────────────┤
-│group_ncells  │  total number of cells in the group      │
-└──────────────┴──────────────────────────────────────────┘
-
-2. per clonotype fields
-
-┌──────────────────┬────────────────────────────────────────────────────────────────┐
-│clonotype_id      │  identifier of clonotype within the clonotype group = 0, 1, ...│
-├──────────────────┼────────────────────────────────────────────────────────────────┤
-│clonotype_ncells  │  total number of cells in the clonotype                        │
-├──────────────────┼────────────────────────────────────────────────────────────────┤
-│nchains           │  total number of chains in the clonotype                       │
-└──────────────────┴────────────────────────────────────────────────────────────────┘
-
-3. per chain fields, where <i> is 1,2,... (see above)
-each of these has the same value for each exact clonotype
-
-┌──────────────────────┬───────────────────────────────────────────────────────────────────────┐
-│v_name<i>             │  name of V segment                                                    │
-│d_name<i>             │  name of D segment (or null)                                          │
-│j_name<i>             │  name of J segment                                                    │
-├──────────────────────┼───────────────────────────────────────────────────────────────────────┤
-│v_id<i>               │  id of V segment                                                      │
-│d_id<i>               │  id of D segment (or null)                                            │
-│j_id<i>               │  id of J segment                                                      │
-├──────────────────────┼───────────────────────────────────────────────────────────────────────┤
-│var_indices_dna<i>    │  DNA positions in chain that vary across the clonotype                │
-│var_indices_aa<i>     │  amino acid positions in chain that vary across the clonotype         │
-│share_indices_dna<i>  │  DNA positions in chain that are constant across the clonotype,       │
-│                      │  but differ from the donor ref                                        │
-│share_indices_aa<i>   │  amino acid positions in chain that are constant across the clonotype,│
-│                      │  all of these are comma-separated lists                               │
-│                      │  but differ from the donor ref                                        │
-└──────────────────────┴───────────────────────────────────────────────────────────────────────┘
-
-4. per exact subclonotype fields
-
-┌───────────────────────┬─────────────────────────────────────────────────────────────────────────┐
-│exact_subclonotype_id  │  identifer of exact subclonotype = 1, 2, ...                            │
-├───────────────────────┼─────────────────────────────────────────────────────────────────────────┤
-│barcodes               │  comma-separated list of barcodes for the exact subclonotype            │
-│<dataset>_barcodes     │  like "barcodes", but restricted to the dataset with the given name     │
-│barcode                │  if PCELL is specified, barcode for one cell                            │
-│<dataset>_barcode      │  if PCELL is specified, barcode for one cell, or null, if the barcode is│
-│                       │  not from the given dataset                                             │
-├───────────────────────┴─────────────────────────────────────────────────────────────────────────┤
-│In addition, every lead variable may be specified as a field.  See "enclone help lvars".         │
-└─────────────────────────────────────────────────────────────────────────────────────────────────┘
-
-5. per chain, per exact subclonotype fields, where <i> is 1,2,... (see above)
-
-[all apply to chain i of a particular exact clonotype]
-
-┌───────────────┬──────────────────────────────────────────────────────────────────────────┐
-│vj_seq<i>      │  DNA sequence of V..J                                                    │
-│seq<i>         │  full DNA sequence                                                       │
-│q<n>_<i>       │  special option to display a comma-separated list of the quality         │
-│               │  scores for chain i, at zero-based position n, numbered starting at the  │
-│               │  beginning of the V segment, for each cell in the exact subclonotype     │
-├───────────────┼──────────────────────────────────────────────────────────────────────────┤
-│v_start<i>     │  start of V segment on full DNA sequence                                 │
-├───────────────┼──────────────────────────────────────────────────────────────────────────┤
-│const_id<i>    │  numerical identifier of constant region (or null, if not known)         │
-├───────────────┼──────────────────────────────────────────────────────────────────────────┤
-│utr_id<i>      │  numerical identifier of 5'-UTR region (or null, if not known)           │
-│utr_name<i>    │  name of 5'-UTR region (or null, if not known)                           │
-├───────────────┼──────────────────────────────────────────────────────────────────────────┤
-│cdr3_start<i>  │  base position start of CDR3 sequence on full contig                     │
-│cdr3_aa<i>     │  amino acid sequence of CDR3                                             │
-├───────────────┼──────────────────────────────────────────────────────────────────────────┤
-│var_aa<i>      │  amino acids that vary across the clonotype (synonymous changes included)│
-├───────────────┴──────────────────────────────────────────────────────────────────────────┤
-│In addition, every chain variable, after suffixing by <i>, may be used as a field.        │
-│See "enclone help cvars".                                                                 │
-└──────────────────────────────────────────────────────────────────────────────────────────┘
-
-
- - diff --git a/pages/auto/help.quick.html b/pages/auto/help.quick.html deleted file mode 100644 index c01ac20b8..000000000 --- a/pages/auto/help.quick.html +++ /dev/null @@ -1,93 +0,0 @@ - - - - - - -enclone help quick - - - - - - - -
-enclone banner -

-quick guide to getting started
-
-Just type this:
-
-enclone BCR=p
-
-where p is the path to your Cell Ranger VDJ directory.
-
-Substitute TCR if that's what you've got.
-
-This will show you all the clonotypes, in descending order by number of cells.
-
-You'll need to make your window wide enough so that lines are not folded.  This depends on the
-dataset.
-
-Only one page of output is shown at a time.  To navigate within the full output, use the space bar
-to go forward and the b key to go backward.
-
-See enclone help example1 for a detailed guide to how to read the enclone output.  A few key
-things you should know:
-
-1. You'll see numbers near the top.  These are amino acid position numbers, and
-   they read downwards.  Numbering starts at the start codon, numbered zero.
-
-2. Each numbered line represents an exact subclonotype: cells having identical V(D)J transcripts.
-
-3. By default, you'll see data in amino acid space.  Only "interesting" amino acids are shown.
-
-Please read on to learn more!
-
-navigation in enclone
-
-enclone automatically sends its output through the program "less".  This allows you to navigate
-within the output, using the following keys (and many more, not shown, and which you don't need to
-know):
-• space: causes output to page forward
-• b: causes output to page backward
-• /string: finds instances of "string" in the output
-• n: having done the previous, jump to the next instance
-• q: quit, to return to the command line.
-
-When enclone uses less, it passes the argument -R, which causes certain characters to be hidden,
-namely escape codes that color or bold text.
-
-
- - diff --git a/pages/auto/help.setup.html b/pages/auto/help.setup.html deleted file mode 100644 index afce2693e..000000000 --- a/pages/auto/help.setup.html +++ /dev/null @@ -1,102 +0,0 @@ - - - - - - -enclone help setup - - - - - - - -
-enclone banner -

-
-Welcome to enclone!
-
-The purpose of this first page is to help you make sure that you're set up properly
-to run enclone.  PLEASE READ!
-
-(for the main help page, please type instead: enclone)
-
-Here we go through several setup tests.  If you have any problem that you can't
-resolve, please email us at enclone@10xgenomics.com.
-
-
-1. Are you using a fixed width font?
-Look at this:
-A FAT BROWN CAT JUMPED OVER THE WALL
-||||||||||||||||||||||||||||||||||||
-Do those two lines end at the same position?  If not, you need to switch your font.
-
-2. Is your terminal window wide enough to see the help pages?
-Your terminal needs to be at least 100 columns wide.  Look at this:
-0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
-Does it appear as a single line?  If not, please widen your window.
-
-3. Can your terminal display box characters?
-Look at this:
-┌────────┬─────────┐
-│banana  │  peel   │
-├────────┼─────────┤
-│oops    │  slipped│
-└────────┴─────────┘
-Do you see a neat rectangle composed of four rectangles with words inside them?  Are the vertical
-lines contiguous?  If not, something is wrong with your terminal!  You may need to change the
-terminal font.  For example, Menlo works, but Courier does not.
-
-4. Can your terminal correctly display ANSI escape sequences?
-The following word should be bold.  The following word should be blue.
-If that doesn't make sense, or is messed up, something is wrong, and you have two options:
-(a) seek help to fix your terminal window
-(b) turn off escape sequences by adding PLAIN to every enclone command, or set
-the environment variable ENCLONE_PLAIN.
-But that should be only a last resort.
-
-5. Can your terminal correctly display unicode characters?
-Do you see a centered dot here • ?
-If not, your terminal has a problem!
-
-6. Does this entire help page appear at once in your terminal window?
-If not, please increase the number of rows in your window to 56.
-
-
-If you go through all those tests and everything worked, you should be good to go!
-
-
-
- - diff --git a/pages/auto/help.special.html b/pages/auto/help.special.html deleted file mode 100644 index 629c0cda1..000000000 --- a/pages/auto/help.special.html +++ /dev/null @@ -1,155 +0,0 @@ - - - - - - -enclone help special - - - - - - - -
-enclone banner -

-special filtering options
-
-This page documents some options, most of which allow noise filters to be turned off, and which
-normally should not be invoked.  The last two options can be used to simplify the view of a
-clonotype.
-
-┌────────────────────┬─────────────────────────────────────────────────────────────────────────────┐
-│NALL                │  Turn off all the noise filters shown below.  This may yield quite a mess.  │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NCELL               │  Use contigs found by Cell Ranger even if they were not in a called cell,   │
-│                    │  or not called high confidence.                                             │
-│NALL_CELL           │  turn off all the noise filters except for the cell filter                  │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NGEX                │  If gene expression and/or feature barcode data are provided, if a barcode  │
-│                    │  is called a cell by the VDJ part of the Cell Ranger pipeline, but not      │
-│                    │  called a cell by the gene expression and/or feature barcode part, then the │
-│                    │  default behavior of enclone is to remove such cells from clonotypes.  This │
-│                    │  option disables that behavior.                                             │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NCROSS              │  If you specify that two or more libraries arose from the same origin (i.e. │
-│                    │  cells from the same tube or tissue), then by default enclone will          │
-│                    │  "cross filter" so as to remove expanded exact subclonotypes that are       │
-│                    │  present in one library but not another, in a fashion that would be highly  │
-│                    │  improbable, assuming random draws of cells from the tube.  These are       │
-│                    │  believed to arise when a plasma or plasmablast cell breaks up during during│
-│                    │  or after pipetting from the tube, and the resulting fragments seed GEMs,   │
-│                    │  yielding expanded 'fake' clonotypes that are residues of real single plasma│
-│                    │  cells.  The NCROSS options turns off this filter, which could be useful so │
-│                    │  long as you interpret the restored clonotypes as representing what are     │
-│                    │  probably single cells.  There may also be other situations where the filter│
-│                    │  should be turned off, and in particular the filter can do weird things if  │
-│                    │  inputs are somehow mis-specified to enclone.  Note that for purposes of    │
-│                    │  this option, enclone defines an origin by the pair                         │
-│                    │  (origin name, donor name).                                                 │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NUMI                │  Filter out B cells based on low BCR UMI counts.  The heuristics for this   │
-│                    │  are described on the enclone site at bit.ly/enclone.                       │
-│NUMI_RATIO          │  Filter out B cells based on low BCR UMI counts relative to another cell    │
-│                    │  in a given clonotype.  The heuristics for this                             │
-│                    │  are described on the enclone site at bit.ly/enclone.                       │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NGRAPH_FILTER       │  By default, enclone filters to remove exact subclonotypes that by virtue of│
-│                    │  their relationship to other exact subclonotypes, appear to arise from      │
-│                    │  background mRNA or a phenotypically similar phenomenon.  The               │
-│                    │  NGRAPH_FILTER option turns off this filtering.                             │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NQUAL               │  By default, enclone filters out exact subclonotypes having a base in V..J  │
-│                    │  that looks like it might be wrong.  More specifically, enclone finds bases │
-│                    │  which are not Q60 for a barcode, not Q40 for two barcodes, are not         │
-│                    │  supported by other exact subclonotypes, are variant within the clonotype,  │
-│                    │  and which disagree with the donor reference.  NQUAL turns this off.        │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NWEAK_CHAINS        │  By default, enclone filters chains from clonotypes that are                │
-│                    │  weak and appear to be artifacts, perhaps arising from a stray mRNA molecule│
-│                    │  that floated into a GEM.  The NWEAK_CHAINS option turns off this filter.   │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NWEAK_ONESIES       │  By default, enclone filters out onesie clonotypes having a single exact    │
-│                    │  subclonotype, and that are light chain or TRA, and whose number of cells is│
-│                    │  greater than one but less than 0.1% of the total number of cells.          │
-│                    │  This filter reduces the likelihood of creating clonotypes containing cells │
-│                    │  that arose from different recombination events.                            │
-│                    │  NWEAK_ONESIES turns this filter off.                                       │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NFOURSIE_KILL       │  By default, if enclone finds a foursie exact subclonotype that             │
-│                    │  contains a twosie exact subclonotype having at least ten cells, it kills   │
-│                    │  the foursie exact subclonotype, no matter how many cells it has.  The      │
-│                    │  foursies that are killed are believed to be rare oddball artifacts arising │
-│                    │  from repeated cell doublets or GEMs that contain two cells and multiple gel│
-│                    │  beads.  The argument NFOURSIE_KILL turns off this filtering.               │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NWHITEF             │  By default, enclone filters out rare artifacts arising from contamination  │
-│                    │  of oligos on gel beads.  The NWHITEF option turns off this filter.         │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│NBC_DUP             │  By default, enclone filters out duplicated barcodes within an exact        │
-│                    │  subclonotype.  The NBC_DUP option turns off this filter.                   │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│MIX_DONORS          │  By default, enclone will prevent cells from different donors from being    │
-│                    │  placed in the same clonotype.  The MIX_DONORS option turns off this        │
-│                    │  behavior, thus allowing cells from different donors to be placed in the    │
-│                    │  same clonotype.  The main use of this option is for specificity testing, in│
-│                    │  which data from different donors are deliberately combined in an attempt   │
-│                    │  to find errors.  Use of the bc field for META input specification          │
-│                    │  automatically turns on this option.                                        │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│KEEP_IMPROPER       │  An exact subclonotype is improper if it does not have one chain            │
-│                    │  of each type.  This option causes all improper exact subclonotypes to be   │
-│                    │  retained, although they may be removed by other filters.                   │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│MIN_CHAINS_EXACT=n  │  Delete any exact subclonotype having less than n chains.  You can use this │
-│                    │  to "purify" a clonotype so as to display only exact subclonotypes having   │
-│                    │  all their chains.                                                          │
-│CHAINS_EXACT=n      │  Delete any exact subclonotype not having exactly n chains.                 │
-│MIN_CELLS_EXACT=n   │  Delete any exact subclonotype having less than n cells.  You might want    │
-│                    │  to use this if you have a very large and complex expanded clonotype,       │
-│COMPLETE            │  delete any exact subclonotype that has less chains than the clonotype      │
-│                    │  for which you would like to see a simplified view.                         │
-├────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
-│FCELL=var=value     │  Supposing that var has been specified as a field using the BC option       │
-│                    │  (or equivalently, using bc, via META), see "enclone help input", this      │
-│                    │  option filters out all barcodes that do not satisfy the given constraint.  │
-│                    │  Note that for purposes of testing the constraint, if the value for a       │
-│                    │  particular barcode has not been specified via BC or bc, then its value is  │
-│                    │  taken to be null.  Also multiple instances of FCELL may be used to impose  │
-│                    │  multiple filters.                                                          │
-└────────────────────┴─────────────────────────────────────────────────────────────────────────────┘
-
-
- - diff --git a/pages/auto/heuristics.html b/pages/auto/heuristics.html deleted file mode 100644 index 13eeae636..000000000 --- a/pages/auto/heuristics.html +++ /dev/null @@ -1,117 +0,0 @@ - - - - - - -enclone heuristics - - - - - - - - - - - -
-enclone banner - -

enclone heuristics

- -

This page is a start in describing the heuristics that enclone uses. It will be gradually -expanded. See also enclone help how. The content here -is geeky and technical. -

- -
- -

UMI filtering. enclone filters out B cells having low UMI counts, relative to a baseline -that is determined for each dataset, according to a -heuristic described here, unless the argument NUMI is supplied, to turn off that -filter. - -

The motivation for this filter is to mitigate illusory clonotype expansions arising from -fragmentation of plasma cells or other physical processes (not all fully understood). These -processes all result in "cells" having low UMI counts, many of which do not correspond to intact -real cells. Illusory clonotype expansions are generally infrequent, but occasionally cluster -in individual datasets.

- -

Nomenclature: for any cell, find the maximum UMI count for its zero or more heavy chains, -and the maximum for its light chains. The sum of these two maxima is -denoted umitot.

- -

The algorithm for this filter first establishes a baseline for the expected value of -umitot, for each dataset taken individually. To do this, all clonotypes having -exactly one cell and exactly one heavy and light chain each are examined. If there are less than -20 such cells, the filter is not applied to cells in that dataset. Otherwise, -let n_50% denote the median of the umitot values for the dataset, and let -n_10% the 10th percentile. Let -

umin = min( n_10%, n_50% - 4 * sqrt(n_50%) ).
-This is the baseline low value for umitot. The reason for having the second -part of the min is to prevent filtering in cases where UMI counts are sufficiently -low that poisson variability could cause a real cell to appear fake.

- -

Next we scan each clonotype having at least two cells, and delete every cell having -umitot < umin, with the following qualifications: -

    -
  • Let k be the number of cells to be deleted in clonotype having n -cells. Then we require that for a binomial distribution having p = 0.1, the -probability of observing k or more events in a sample of size n is -less then 0.01. The more cells are flagged in a clonotype, the more likely this -test is satisfied, which is the point of the test. -
  • -
  • If every cell in a clonotype would be deleted, then we find its exact subclonotype -having the highest sum for umitot, summing across its cells. Then we protect from -deletion the cell in this exact subclonotype having the highest umitot value. We -do this because in general even if a clonotype expansion did not occur, there was probably at -least a single bona fide cell that gave rise to it. -
  • -
- -A better test could probably be devised that started from the expected distribution of UMI counts. -The test would trigger based on the number and improbability of low UMI counts. The current test -only considers the number of counts that fall below a threshold, and not their particular values. - -

This UMI filter is carried out before most of the other filters.

- -
- -

UMI ratio filtering. enclone filters out B cells having low UMI counts, relative to -other UMI counts in a given clonotype, according to a -heuristic described here, unless the argument NUMI_RATIO is supplied, to turn off that -filter. - -

First we mark a cell for possible deletion, if the VDJ UMI count for some chain of some other -cell is at least 500 times greater than the total VDJ UMI count for the given cell.

- -

Then we scan each clonotype having at least two cells, and delete every cell marked as above, -with the following qualification. -Let k be the number of cells to be deleted in clonotype having n -cells. Then we require that for a binomial distribution having p = 0.1, the -probability of observing k or more events in a sample of size n is -less then 0.01.

- - - diff --git a/pages/auto/history.html b/pages/auto/history.html deleted file mode 100644 index eb5101e81..000000000 --- a/pages/auto/history.html +++ /dev/null @@ -1,178 +0,0 @@ - - - - - - -enclone history - - - - - - - - - - - -
-enclone banner - -

History

- -

This page provides a selective history of what was changed in enclone and when.

- -

-We show changes that affect users like new features, changes to results, and the like. This -log starts with with initial public availability. The complete history may be seen by cloning the -enclone repo and typing git log. -

- -

Breaking changes are shown in red.

- -

-Please be aware that our workflow when we make changes is to automatically update the GitHub -site, including all the website pages and this page too. This happens in advance of -actually making a release (which might follow in a couple days). This means that the website -may describe features that are not yet available in a release (although they will be in the -source code that's available). We apologize for this asynchrony! Note however that the -command-line help that comes with your copy of enclone will always match its behavior.

- -
- -

-8/10/20: tweak the definition of the weak onesies filter so that it does not -delete single-cell clonotypes. -

- -

-8/7/20: add the ability to analyze alternate splicing using GEX data to characterize -UMIs as secreted or membrane, and display this information using lvars "sec" and "mem". See -enclone help lvars for limitations. -

- -

-8/5/20: -

    -
  1. A reference file is now required as part of the Cell Ranger outs directory, -if Cell Ranger version 4.0 or greater was used.
  2. -
  3. Deprecate MAX_SCORE and replace by MAX_LOG_SCORE, the - base 10 logarithm of it.
  4. -
  5. Add cvar cdr3_len
  6. -
  7. Add and document knobs that allow nearly all clonotype join filtering to be turned off. Please - see \"enclone help how\" and the end of \"enclone help faq\".
  8. -

    - -

    -6/24/20: -

      -
    1. Add support for iNKT and MAIT cells.
    2. -
    3. Make enclone faster. This is most noticeable in cases where many GEX datasets are - provided as input.
    4. -
    -

    - -

    -6/19/20: -

      -
    1. We now use a data hierarchy of donor (top), origin, dataset (bottom), where an -origin is a set of 1 or more datasets from the same source (tube of cells, tissue, timepoint, etc.). -(This breaks previous invocations of META.) -
    2. -
    3. Improve the alternate (faster) internal storage structure for the GEX matrix -created using the option NH5 as described using the command -enclone help input. -This will speed things up, particularly for the case where several datasets are combined. If -you have already used the NH5 option for a given dataset, then the next time you -run enclone on it, the file will be automatically rewritten. This would also apply to some -datasets obtained as part of the large download. -
    4. -
    -

    - -

    -6/17/20: -

      -
    1. Now TREE=const can be used to show a tree with heavy chain constant region - names attached to the leaves.
    2. -
    3. SEG and SEGN are now cumulative, so that multiple instances may - be used to progressively filter.
    4. -
    5. The clonotype joining heuristic parameters MAX_SCORE and - MAX_CDR3_DIFFS are now accessible.
    6. -
    -

    - -

    -6/10/20: -

      -
    1. Add a complex of features for generating phylogenetic trees from clonotypes, see - here.
    2. -
    3. New "single button" installation procedure.
    4. -
    5. Change the default value for PRE - to ~/enclone/datasets,~/enclone/datasets2.
    6. -
    7. Add argument NALL to turn off all filters.
    8. -
    9. Add new lead variables nd<k> that display the number of cells in the - top datasets for a given clonotype.
    10. -
    11. Add new lead variable dref that shows the distance of V..J from the reference - outside the region of recombination. -
    12. Add argument COMPLETE to remove exact subclonotypes that do not have all - chains.
    13. -
    14. Test for consistency between VDJ and GEX barcodes, and exit if this is not the case.
    15. -
    16. Add option COLOR=property to color amino acids by their properties.
    17. -
    18. Add option FCELL to allow filtering by cells.
    19. -
    -

    - -

    -5/29/20: -Add new "UMI ratio" filter that further reduces noise in certain cases. This can be turned -off using the argument NUMI_RATIO. -

    - -

    -5/22/20: -Add major new "UMI" filter that greatly reduces noise in certain cases. This can be turned -off using the argument NUMI. -

    - -

    -5/12/20: -Add PLOT_BY_ISOTYPE to generate honeycomb plots colored by isotype. -

    - -

    -5/1/20: -Change the definition of the fields "edit" and "comp" to be based on -alignment from the beginning of the CDR3 up to the end of the J, rather than stopping at -the end of the CDR3. The intention is to capture the full region of recombination, which -may not have been done before. -

    - -

    -4/30/20: -First release. -

    - - - diff --git a/pages/auto/illusory1.html b/pages/auto/illusory1.html deleted file mode 100644 index e64244a7b..000000000 --- a/pages/auto/illusory1.html +++ /dev/null @@ -1,55 +0,0 @@ - - - - - - -illusory clonotype expansion 1 - - - -
    [1.1] CLONOTYPE = 122 CELLS
    -┌──────────────────┬───────────────────────────────────────┬─────────────────────────────────────┐
    -│                  │  CHAIN 1                              │  CHAIN 2                            │
    -│                  │  146.1.1|IGHV3-53 ◆ 55|IGHJ4          │  299|IGKV4-1 ◆ 217|IGKJ4            │
    -│                  ├───────────────────────────────────────┼─────────────────────────────────────┤
    -│                  │              1111111111111            │              11111111111 1          │
    -│                  │  12257777789 1111111222222            │  11345778899 11111112222 2          │
    -│                  │  35831234686 3456789012345            │  78291346825 34567890123 7          │
    -│                  │              ═════CDR3════            │              ════CDR3═══            │
    -│reference         │  STGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦            │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T          │
    -│donor ref         │  LSGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦            │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T          │
    -├──────────────────┼───────────────────────────────────────┼─────────────────────────────────────┤
    -│#  datasets    n    ........... .............  u  const    ........... ........... .  u  const│
    -│1  128040    114  │  LSNNGDGNYFV CARGGTTTYFISW  6  IGHA1  │  TNAFSLYRTSE CQQYCDTPLTF T  5  IGKC │
    -│2  128040      6  │  LSNNGDGNYFV CARGGTTTYFISW  4  IGHA1  │                                     │
    -│3  128040      2  │                                       │  TNAFSLYRTSE CQQYCDTPLTF T  6  IGKC │
    -└──────────────────┴───────────────────────────────────────┴─────────────────────────────────────┘
    -
    - - - diff --git a/pages/auto/illusory2.html b/pages/auto/illusory2.html deleted file mode 100644 index 24cdbdafd..000000000 --- a/pages/auto/illusory2.html +++ /dev/null @@ -1,53 +0,0 @@ - - - - - - -illusory clonotype expansion 2 - - - -
    [1.1] CLONOTYPE = 1 CELLS
    -┌────────────────┬──────────────────────────────────────────┬────────────────────────────────────────┐
    -│                │  CHAIN 1                                 │  CHAIN 2                               │
    -│                │  146.1.1|IGHV3-53 ◆ 55|IGHJ4             │  299|IGKV4-1 ◆ 217|IGKJ4               │
    -│                ├──────────────────────────────────────────┼────────────────────────────────────────┤
    -│                │              1111111111111               │              11111111111 1             │
    -│                │  12257777789 1111111222222               │  11345778899 11111112222 2             │
    -│                │  35831234686 3456789012345               │  78291346825 34567890123 7             │
    -│                │              ═════CDR3════               │              ════CDR3═══               │
    -│reference       │  STGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦               │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T             │
    -│donor ref       │  LSGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦               │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T             │
    -├────────────────┼──────────────────────────────────────────┼────────────────────────────────────────┤
    -│#  datasets  n    ........... .............     u  const    ........... ........... .     u  const│
    -│1  128040    1  │  LSNNGDGNYFV CARGGTTTYFISW  1725  IGHA1  │  TNAFSLYRTSE CQQYCDTPLTF T  6300  IGKC │
    -└────────────────┴──────────────────────────────────────────┴────────────────────────────────────────┘
    -
    - - - diff --git a/pages/auto/illusory3.html b/pages/auto/illusory3.html deleted file mode 100644 index bbf80fad7..000000000 --- a/pages/auto/illusory3.html +++ /dev/null @@ -1,55 +0,0 @@ - - - - - - -illusory clonotype expansion 3 - - - -
    [1.1] CLONOTYPE = 44 CELLS
    -┌───────────┬───────────────────────────────────────┬─────────────────────────────────────┐
    -│           │  CHAIN 1                              │  CHAIN 2                            │
    -│           │  146.1.2|IGHV3-53 ◆ 55|IGHJ4          │  299|IGKV4-1 ◆ 217|IGKJ4            │
    -│           ├───────────────────────────────────────┼─────────────────────────────────────┤
    -│           │              1111111111111            │              11111111111 1          │
    -│           │  12257777789 1111111222222            │  11345778899 11111112222 2          │
    -│           │  35831234686 3456789012345            │  78291346825 34567890123 7          │
    -│           │              ═════CDR3════            │              ════CDR3═══            │
    -│reference  │  STGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦            │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T          │
    -│donor ref  │  LSGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦            │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T          │
    -├───────────┼───────────────────────────────────────┼─────────────────────────────────────┤
    -│#   n        ........... .............  u  const    ........... ........... .  u  const│
    -│1  38      │  LSNNGDGNYFV CARGGTTTYFISW  4  IGHA1  │  TNAFSLYRTSE CQQYCDTPLTF T  3  IGKC │
    -│2   5      │  LSNNGDGNYFV CARGGTTTYFISW  4  IGHA1  │                                     │
    -│3   1      │                                       │  TNAFSLYRTSE CQQYCDTPLTF T  6  IGKC │
    -└───────────┴───────────────────────────────────────┴─────────────────────────────────────┘
    -
    - - - diff --git a/pages/auto/illusory4.html b/pages/auto/illusory4.html deleted file mode 100644 index 0f3fa0389..000000000 --- a/pages/auto/illusory4.html +++ /dev/null @@ -1,91 +0,0 @@ - - - - - - -illusory clonotype expansion 4 - - - -
    [1.1] CLONOTYPE = 38 CELLS
    -┌────────────────────────────────────────┬──────────────────────────────────────────┬────────────────────────────────────────┐
    -│                                        │  CHAIN 1                                 │  CHAIN 2                               │
    -│                                        │  146.1.2|IGHV3-53 ◆ 55|IGHJ4             │  299|IGKV4-1 ◆ 217|IGKJ4               │
    -│                                        ├──────────────────────────────────────────┼────────────────────────────────────────┤
    -│                                        │              1111111111111               │              11111111111 1             │
    -│                                        │  12257777789 1111111222222               │  11345778899 11111112222 2             │
    -│                                        │  35831234686 3456789012345               │  78291346825 34567890123 7             │
    -│                                        │              ═════CDR3════               │              ════CDR3═══               │
    -│reference                               │  STGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦               │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T             │
    -│donor ref                               │  LSGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦               │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T             │
    -├────────────────────────────────────────┼──────────────────────────────────────────┼────────────────────────────────────────┤
    -│#  barcode              n    gex  cred    ........... .............     u  const    ........... ........... .     u  const│
    -│1                      38   4986   0.8  │  LSNNGDGNYFV CARGGTTTYFISW     4  IGHA1  │  TNAFSLYRTSE CQQYCDTPLTF T     3  IGKC │
    -│   AAATGCCCACTGAAGG-1       7142   0.7                                  2                                         7       │
    -│   AACCATGCAAAGAATC-1       4583   0.8                                  3                                         2       │
    -│   AACTGGTGTCGAACAG-1       4252   0.5                                  8                                         7       │
    -│   ACGGGTCGTCGCGGTT-1       2544   0.7                                  2                                         3       │
    -│   AGACGTTAGAGTAAGG-1       5198   0.9                                  6                                         3       │
    -│   AGCATACGTTTCCACC-1       5852   0.8                                  5                                         1       │
    -│   AGTGTCAAGTAGTGCG-1       3173   0.8                                 10                                        17       │
    -│   ATCCGAAAGGACTGGT-1        842   2.8                                  1                                         2       │
    -│   ATCTACTTCAGTTAGC-1       1662   0.5                                  5                                         2       │
    -│   ATCTGCCGTTACGACT-1       6078   1.0                                  2                                         2       │
    -│   CAAGTTGAGTTACGGG-1       4586   0.5                                  2                                         3       │
    -│   CAGAGAGAGATGGGTC-1       6870   0.8                                  4                                         1       │
    -│   CATATTCTCCGCTGTT-1       4944   0.7                                  7                                         2       │
    -│   CGATTGATCCACGCAG-1       3952   0.3                                  7                                        11       │
    -│   CGGCTAGGTCAACTGT-1       5499   0.7                                  2                                         2       │
    -│   CGTAGGCCAAACTGTC-1       1320   1.8                                  2                                         1       │
    -│   CTAGTGACACGGTTTA-1       3896   0.8                                  1                                         3       │
    -│   CTCTAATAGCCGATTT-1       2151   1.6                                  2                                         1       │
    -│   CTGGTCTAGCTGCCCA-1      19984  15.0                               1725                                      6300       │
    -│   CTTCTCTAGATGCCAG-1       6228   1.0                                  5                                         5       │
    -│   GAAGCAGTCGTTACAG-1       5434   1.0                                  3                                         1       │
    -│   GACGTTATCTACCAGA-1       3898   0.7                                  2                                         2       │
    -│   GAGTCCGTCGGTCTAA-1      11095  10.0                                  3                                         1       │
    -│   GATGAGGAGATCTGCT-1       7510   1.1                                  4                                         1       │
    -│   GCATACATCGACAGCC-1       1646   1.0                                  3                                         2       │
    -│   GGAATAAGTTTGACAC-1       8007   1.4                                  3                                         1       │
    -│   GGCTGGTCAGTGGGAT-1       9681   0.9                                 16                                         6       │
    -│   GGGAGATTCCGCATAA-1       4633   1.0                                  5                                         4       │
    -│   GTACTCCAGGTGTGGT-1       4575   0.5                                  5                                         3       │
    -│   GTTAAGCCACATTAGC-1       7601   0.9                                  4                                         2       │
    -│   TAGTGGTTCGGCGCTA-1       4986   0.8                                 11                                        14       │
    -│   TCAGGATCAAGTTCTG-1       7352   0.5                                  2                                         3       │
    -│   TCAGGATGTTGCCTCT-1       3496   0.5                                  2                                         2       │
    -│   TCCCGATTCTATCCCG-1       5962   0.9                                  3                                         6       │
    -│   TGCGCAGCAAATCCGT-1       5736   0.9                                  8                                         5       │
    -│   TTCCCAGCAAGTTAAG-1       5860   0.7                                 11                                        15       │
    -│   TTGAACGTCCATTCTA-1       4682   0.6                                  3                                         2       │
    -│   TTTGCGCCACACAGAG-1       4958   0.8                                  4                                         3       │
    -└────────────────────────────────────────┴──────────────────────────────────────────┴────────────────────────────────────────┘
    -
    - - - diff --git a/pages/auto/illusory5.html b/pages/auto/illusory5.html deleted file mode 100644 index e61c1d41d..000000000 --- a/pages/auto/illusory5.html +++ /dev/null @@ -1,91 +0,0 @@ - - - - - - -illusory clonotype expansion 5 - - - -
    [1.1] CLONOTYPE = 38 CELLS
    -┌───────────────────────────────────────────┬──────────────────────────────────────────┬────────────────────────────────────────┐
    -│                                           │  CHAIN 1                                 │  CHAIN 2                               │
    -│                                           │  146.1.2|IGHV3-53 ◆ 55|IGHJ4             │  299|IGKV4-1 ◆ 217|IGKJ4               │
    -│                                           ├──────────────────────────────────────────┼────────────────────────────────────────┤
    -│                                           │              1111111111111               │              11111111111 1             │
    -│                                           │  12257777789 1111111222222               │  11345778899 11111112222 2             │
    -│                                           │  35831234686 3456789012345               │  78291346825 34567890123 7             │
    -│                                           │              ═════CDR3════               │              ════CDR3═══               │
    -│reference                                  │  STGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦               │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T             │
    -│donor ref                                  │  LSGSSGGSYSL ◦◦◦◦◦◦◦◦◦◦◦◦◦               │  AYVLSIYRSSD CQQ◦◦◦◦◦◦◦◦ T             │
    -├───────────────────────────────────────────┼──────────────────────────────────────────┼────────────────────────────────────────┤
    -│#  barcode              n    gex  cred  T    ........... .............     u  const    ........... ........... .     u  const│
    -│1                      38   4986   0.8     │  LSNNGDGNYFV CARGGTTTYFISW     4  IGHA1  │  TNAFSLYRTSE CQQYCDTPLTF T     3  IGKC │
    -│   AAATGCCCACTGAAGG-1       7142   0.7  ◯                                  2                                         7       │
    -│   AACCATGCAAAGAATC-1       4583   0.8  ◯                                  3                                         2       │
    -│   AACTGGTGTCGAACAG-1       4252   0.5  ◯                                  8                                         7       │
    -│   ACGGGTCGTCGCGGTT-1       2544   0.7                                     2                                         3       │
    -│   AGACGTTAGAGTAAGG-1       5198   0.9  ◯                                  6                                         3       │
    -│   AGCATACGTTTCCACC-1       5852   0.8  ◯                                  5                                         1       │
    -│   AGTGTCAAGTAGTGCG-1       3173   0.8                                    10                                        17       │
    -│   ATCCGAAAGGACTGGT-1        842   2.8                                     1                                         2       │
    -│   ATCTACTTCAGTTAGC-1       1662   0.5  ◯                                  5                                         2       │
    -│   ATCTGCCGTTACGACT-1       6078   1.0  ◯                                  2                                         2       │
    -│   CAAGTTGAGTTACGGG-1       4586   0.5                                     2                                         3       │
    -│   CAGAGAGAGATGGGTC-1       6870   0.8  ◯                                  4                                         1       │
    -│   CATATTCTCCGCTGTT-1       4944   0.7                                     7                                         2       │
    -│   CGATTGATCCACGCAG-1       3952   0.3                                     7                                        11       │
    -│   CGGCTAGGTCAACTGT-1       5499   0.7  ◯                                  2                                         2       │
    -│   CGTAGGCCAAACTGTC-1       1320   1.8                                     2                                         1       │
    -│   CTAGTGACACGGTTTA-1       3896   0.8                                     1                                         3       │
    -│   CTCTAATAGCCGATTT-1       2151   1.6                                     2                                         1       │
    -│   CTGGTCTAGCTGCCCA-1      19984  15.0                                  1725                                      6300       │
    -│   CTTCTCTAGATGCCAG-1       6228   1.0  ◯                                  5                                         5       │
    -│   GAAGCAGTCGTTACAG-1       5434   1.0  ◯                                  3                                         1       │
    -│   GACGTTATCTACCAGA-1       3898   0.7                                     2                                         2       │
    -│   GAGTCCGTCGGTCTAA-1      11095  10.0                                     3                                         1       │
    -│   GATGAGGAGATCTGCT-1       7510   1.1  ◯                                  4                                         1       │
    -│   GCATACATCGACAGCC-1       1646   1.0                                     3                                         2       │
    -│   GGAATAAGTTTGACAC-1       8007   1.4                                     3                                         1       │
    -│   GGCTGGTCAGTGGGAT-1       9681   0.9  ◯                                 16                                         6       │
    -│   GGGAGATTCCGCATAA-1       4633   1.0  ◯                                  5                                         4       │
    -│   GTACTCCAGGTGTGGT-1       4575   0.5  ◯                                  5                                         3       │
    -│   GTTAAGCCACATTAGC-1       7601   0.9  ◯                                  4                                         2       │
    -│   TAGTGGTTCGGCGCTA-1       4986   0.8  ◯                                 11                                        14       │
    -│   TCAGGATCAAGTTCTG-1       7352   0.5                                     2                                         3       │
    -│   TCAGGATGTTGCCTCT-1       3496   0.5  ◯                                  2                                         2       │
    -│   TCCCGATTCTATCCCG-1       5962   0.9  ◯                                  3                                         6       │
    -│   TGCGCAGCAAATCCGT-1       5736   0.9  ◯                                  8                                         5       │
    -│   TTCCCAGCAAGTTAAG-1       5860   0.7  ◯                                 11                                        15       │
    -│   TTGAACGTCCATTCTA-1       4682   0.6  ◯                                  3                                         2       │
    -│   TTTGCGCCACACAGAG-1       4958   0.8  ◯                                  4                                         3       │
    -└───────────────────────────────────────────┴──────────────────────────────────────────┴────────────────────────────────────────┘
    -
    - - - diff --git a/pages/auto/innate.html b/pages/auto/innate.html deleted file mode 100644 index 94bb401e5..000000000 --- a/pages/auto/innate.html +++ /dev/null @@ -1,200 +0,0 @@ - - - - - - -iNKT and MAIT cells - - - - - - - - - - - -
    -enclone banner - -

    iNKT and MAIT cells

    - -

    -enclone can look for evidence that T cells are iNKT or MAIT cells. This evidence may be -displayed e.g. with LVARSP=inkt or LVARSP=mait, see -enclone help lvars. One may also filter to show -only cells with some evidence, using the INKT and MAIT options, see -enclone help filter. -

    - -
    - -

    -enclone looks for both gene and junction (CDR3) evidence. The rules for this are -likely -to be tweaked over time. Junction evidence is discussed later. For gene evidence, the following -rules are used currently: -

      -
    • Human iNKT: α chain -- use of both TRAV10 and TRAJ18; - β chain: use of TRBV25-1
    • -
    • Human MAIT: α chain -- use of TRAV1-2 and any of - TRAJ33/TRAJ20/TRAJ12; β chain -- use of any member of the - TRBV20 or TRBV6 families
    • -
    • Mouse MAIT: α chain -- use of both TRAV1 and TRAJ33; β chain -- use - of either TRBV19 or TRBV13
    • -
    • Mouse iNKT: α chain -- use of both TRAV11 and TRAJ18; β chain -- use - of TRBV13-2 or TRBV1 or TRBV29.
    • -
    -

    - -

    As an example, -

    enclone TCR=101287 LVARSP=mait CDR3=CSAGQGDTEAFF
    - -
    [1] GROUP = 1 CLONOTYPES = 1 CELLS
    -
    -[1.1] CLONOTYPE = 1 CELLS
    -┌─────────────┬──────────────────────────────┬──────────────────────────────┐
    -│             │  CHAIN 1                     │  CHAIN 2                     │
    -│             │  600|TRBV20-1 ◆ 540|TRBJ1-1  │  458.1.1|TRAV1-2 ◆ 423|TRAJ33│
    -│             ├──────────────────────────────┼──────────────────────────────┤
    -│             │  111111111111                │    111111111111              │
    -│             │  000111111111                │  5 000000111111              │
    -│             │  789012345678                │  9 456789012345              │
    -│             │  ════CDR3════                │    ════CDR3════              │
    -│reference    │  ◦◦◦◦◦◦◦◦◦◦◦FP ◦◦◦◦◦◦◦◦◦LIW              │
    -│donor ref    │  ◦◦◦◦◦◦◦◦◦◦◦FP ◦◦◦◦◦◦◦◦◦LIW              │
    -├─────────────┼──────────────────────────────┼──────────────────────────────┤
    -│#  n   mait    ............  u  const        . ............  u  const    │
    -│1  1  𝝰gj𝝱g  │  CSAGQGDTEAFF  5  TRBC1      │  P CAVMDSNYQLIW  1  TRAC     │
    -└─────────────┴──────────────────────────────┴──────────────────────────────┘
    -
    - -displays a single cell showing evidence for the cell being a MAIT cell, encoded using -the string 𝝰gj𝝱g. This stands for "gene and junction evidence on the alpha -chain, and gene evidence on the beta chain" (i.e. 𝝰gj = alpha gene and junction). - -

    Please we aware that detection of junction evidence is limited by the completeness of -the lists on which they are based (see below), and these lists are incomplete! They are also -likely to change over time as more data studying iNKT and MAIT cells emerge. Note that while -iNKT and MAIT cells are described as "invariant", a more accurate description would be -"semi-invariant", -or "limited variant" as V and J gene recombinations that generate iNKT and MAIT TCRs can still -generate considerable nucleotide and amino acid diversity within the CDR3.

    - -
    - -

    For junction evidence, enclone tests for presence of a given chain's CDR3 amino acid -sequence in a fixed list. Here is the list for human iNKT: -

      -
    • CASARGVNEQYF
    • -
    • CASRGQGLGEQYF
    • -
    • CASRYYSVQGRTDTQYF
    • -
    • CASSAMDTEAFF
    • -
    • CASSAPLAGHYEQYF
    • -
    • CASSAWDGYEQYF
    • -
    • CASSDGFTDTQYF
    • -
    • CASSDLGLAGVIEQFF
    • -
    • CASSDLMGPDNYEQYF
    • -
    • CASSDLPETQYF
    • -
    • CASSDQNTEAFF
    • -
    • CASSDRANEQFF
    • -
    • CASSDRLAGDTQYF
    • -
    • CASSDRRQGAHQPQHF
    • -
    • CASSEAGSGEKLFF
    • -
    • CASSEALILFF
    • -
    • CASSEAPWRDSGNTIYF
    • -
    • CASSEEGALKESVGTQYF
    • -
    • CASSEFDGGQETQYF
    • -
    • CASSEFGGTERTQETQYF
    • -
    • CASSEFGQSADEQFF
    • -
    • CASSEGGQDYEQYF
    • -
    • CASSEGTAGTDTQYF
    • -
    • CASSEGTGPNSPLHF
    • -
    • CASSEGWEQYF
    • -
    • CASSELLRGQGRTGELFF
    • -
    • CASSELTDTQYF
    • -
    • CASSELYTGGDEQFF
    • -
    • CASSEMGQGVYTF
    • -
    • CASSENSGTGRIYEQYF
    • -
    • CASSEPSSGNTIYF
    • -
    • CASSEPTGLGTDTQYF
    • -
    • CASSESATGFSPLHF
    • -
    • CASSESGGSTEAFF
    • -
    • CASSESLAGGYNEQFF
    • -
    • CASSESVETQYF
    • -
    • CASSEWAGGQETQYF
    • -
    • CASSEWEDITDTQYF
    • -
    • CASSEWGRTQETQYF
    • -
    • CASSEWGTNEKLFF
    • -
    • CASSEYESTNEKLFF
    • -
    • CASSEYFAGFNEQYF
    • -
    • CASSEYGTLQETYF
    • -
    • CASSEYMEAGIPTDTQYF
    • -
    • CASSEYMEGGEKLFF
    • -
    • CASSEYRLQETQYF
    • -
    • CASSEYRRRSGEKLFF
    • -
    • CASSFGGETQYF
    • -
    • CASSGDRRQGAHQPQHF
    • -
    • CASSGLLTGPDTQYF
    • -
    • CASSGLRDRGLYEQYF
    • -
    • CASSGTGGAFDEQFF
    • -
    • CASSGTVTEAFF
    • -
    • CASSGYQGGGETQYF
    • -
    • CASSPIGGHGYEQYF
    • -
    • CASSPRDRWHEQYF
    • -
    • CASSRGGFDEQYF
    • -
    • CASSRGGGTEAFF
    • -
    • CASSRGGYTEAFF
    • -
    • CASSTGGADEKLFF
    • -
    • CASSVPLRDYEQYF
    • -
    • CASTGASGTYEQYF
    • -
    • CASTPRKGTDVGNTIYF
    • -
    • CASTPSGGWSSDTQYF
    • -
    • CASTSLETSQYF.
    • -
    -and here is the list for human MAIT: -
      -
    • CAALDSNYQLIW
    • -
    • CAAMDSNYQLIW
    • -
    • CARSDSNYQLIW
    • -
    • CASMDSNYQLIW
    • -
    • CASSDSGESGTEAFF
    • -
    • CASSPSGGDYNEQFF
    • -
    • CASSQIAGGQQETQYF
    • -
    • CAVLDSNYQLIW
    • -
    • CAVMDSNYQLIW
    • -
    • CAVNGDDYKLSF
    • -
    • CAVRDGDYKLSF
    • -
    • CAVRDSDYKLSF
    • -
    • CAVRDSNYQLIQW
    • -
    • CAVRDSNYQLIW
    • -
    • CAVSDSNYQLIW
    • -
    • CAVSLQDYKLSF
    • -
    • CSARQGAESREQYF
    • -
    -At present we do not have lists for mouse. -

    - - - diff --git a/pages/auto/installation_details.html b/pages/auto/installation_details.html deleted file mode 100644 index 4036554db..000000000 --- a/pages/auto/installation_details.html +++ /dev/null @@ -1,88 +0,0 @@ - - - - - - -enclone installation details - - - - - - - - - - - -
    -enclone banner - -

    enclone installation details

    - -

    The purpose of this page is to provide some information about what the enclone installation -command does, in case you're curious.

    - -

    The command is:

    - -

    -curl -sSf -L bit.ly/enclone_install | sh -s SIZE - where SIZE is -small, medium or large -

    - -

    1. First, bit.ly/enclone_install is a redirect to -https://10xgenomics.github.io/enclone/install.sh, as you can see if you type -bit.ly/enclone_install+ (the + is the way that bit.ly -provides for seeing what a redirect does).

    - -

    2. The -sSf option to curl causes it to run quietly if it -is successful, print an error message if it fails, and importantly, not pass logging or error -messages to sh.

    - -

    3. The overall command just causes the script install.sh to be executed.

    - -

    4. On a first invocation, the script downloads the enclone executable and datasets.

    - -

    5. On subsequent invocations, the script checks to see if the local copies are current, -and if not, redownloads them. In principle, the executable could be downloaded as a compressed -file, which would be more efficient. For the case where -SIZE is medium, the action is -also inefficient, as it downloads everything if anything has changed.

    - -

    6. The script puts the executable in ~/bin and the datasets in -~/enclone. These directories are created if they don't already exist.

    - -

    7. The following step makes it so you don't have to type -~/bin/enclone every time you want to run it, and can instead type just -enclone. To enable this, -if ~/bin is not in your path, the script adds a line to -.bash_profile or .profile that makes ~/bin first in -your path. (Which file is used depends on the version of Linux that you're using.) If you -want, when the script is done, you can manually tidy up the file to make it more readable.

    - -

    Questions? You can email us at -enclone@10xgenomics.com.

    - - - diff --git a/pages/auto/mait_example.html b/pages/auto/mait_example.html deleted file mode 100644 index 4171ae7db..000000000 --- a/pages/auto/mait_example.html +++ /dev/null @@ -1,55 +0,0 @@ - - - - - - -enclone output - - - -
    [1] GROUP = 1 CLONOTYPES = 1 CELLS
    -
    -[1.1] CLONOTYPE = 1 CELLS
    -┌─────────────┬──────────────────────────────┬──────────────────────────────┐
    -│             │  CHAIN 1                     │  CHAIN 2                     │
    -│             │  600|TRBV20-1 ◆ 540|TRBJ1-1  │  458.1.1|TRAV1-2 ◆ 423|TRAJ33│
    -│             ├──────────────────────────────┼──────────────────────────────┤
    -│             │  111111111111                │    111111111111              │
    -│             │  000111111111                │  5 000000111111              │
    -│             │  789012345678                │  9 456789012345              │
    -│             │  ════CDR3════                │    ════CDR3════              │
    -│reference    │  ◦◦◦◦◦◦◦◦◦◦◦FP ◦◦◦◦◦◦◦◦◦LIW              │
    -│donor ref    │  ◦◦◦◦◦◦◦◦◦◦◦FP ◦◦◦◦◦◦◦◦◦LIW              │
    -├─────────────┼──────────────────────────────┼──────────────────────────────┤
    -│#  n   mait    ............  u  const        . ............  u  const    │
    -│1  1  𝝰gj𝝱g  │  CSAGQGDTEAFF  5  TRBC1      │  P CAVMDSNYQLIW  1  TRAC     │
    -└─────────────┴──────────────────────────────┴──────────────────────────────┘
    -
    - - - diff --git a/pages/auto/plot.html b/pages/auto/plot.html deleted file mode 100644 index 37f9f6336..000000000 --- a/pages/auto/plot.html +++ /dev/null @@ -1,149 +0,0 @@ - - - - - - -honeycomb plots - - - - - - - - - - - -
    -enclone banner - -

    honeycomb plots

    - -

    -enclone can create a "honeycomb" plot showing each clonotype as a cluster of dots, one per cell. -

    - -

    enclone provides four ways to assign colors in such a plot. We describe them in order of -precedence, i.e. color data for the first will be used if provided, etc.

    - -

    -Hint. -Use the MIN_CELLS option (see -(see enclone help filter) to exclude tiny clonotypes, -which would otherwise crowd the image and make plotting very slow. -

    - -
    - -
    1. The first way is to use the argument -
    PLOT="filename,sample1->color1,...,samplen->colorn"
    -which creates an svg file of the given name, and assigns the given colors to the given samples. -Unspecified samples will be black. - -

    Example: -enclone BCR=123085:123089 MIN_CELLS=10 PLOT="plot.svg,s1->blue,s2->red" NOPRINT -LEGEND=blue,123085,red,123089

    - -

    Note the colon between 123085 and 123089. This tells enclone -that the two datasets are different samples from the same donor. This is not actually true, as -the two datasets are from the same sample, but is needed to plot in this way.

    - -samples honeycomb plot - -

    - -There is another example on the main enclone page, based on pre- and post-vaccination -samples. - -

    - -
    - -
    2. The second way is to provide simply -
    PLOT=filename
    -on the command line, and then provide the color field in the CSV defined by the META -option. This assigns a color to each dataset. - -

    - -
    - -
    3. The third way is to use the simple -PLOT specification, and assign a color to -each barcode using the bc field for META. - -

    - -
    - -
    4. The fourth way is -PLOT_BY_ISOTYPE=filename. This plots by heavy chain -constant region name and -labels accordingly. (This only makes sense for BCR.) Some cells may be labeled "unassigned", for -one of three reasons: (1) no heavy chain was found; (2) no constant region was found; (3) two -heavy chains were found and had conflicting constant region names. Running with -MIN_CHAINS_EXACT=2 -is usually a good idea to avoid noise coming from cells for which only a light chain was detected. - Currently a maximum of 12 constant region names is allowed. Let us know if you have more and we -will fix this. Note that PLOT_BY_ISOTYPE cannot be used with PLOT or -LEGEND. - -

    Example: enclone BCR=123085,123089 MIN_CELLS=5 MIN_CHAINS_EXACT=2 NOPRINT -PLOT_BY_ISOTYPE=plot.svg

    - -isotype honeycomb plot - -

    - -
    - -

    -The colors should be valid colors for use in an svg file. They can be named colors like red or -blue (see here for a full list) -or a hex specification like #00FFFF for aqua. The full color description for svg is -here. -

    - -

    -enclone also recognizes the color abbreviations @1, ..., @6, which refer -to enclone's color blind friendly palette -(see enclone help color). -

    - -

    -Each cell is shown as a disk having the given color, and each clonotype is shown as a cluster of -these disks, which are positioned at random. The filename argument may be "stdout". -

    - -

    -To add a legend to the graph, add the argument LEGEND to your command line. This -will give you an -auto-generated legend. You can also customize the legend by adding an argument of the form -LEGEND=color1,"text1",...,colorn,"textn" -to the command line. -

    - - - diff --git a/pages/auto/tree.html b/pages/auto/tree.html deleted file mode 100644 index 792f7623e..000000000 --- a/pages/auto/tree.html +++ /dev/null @@ -1,189 +0,0 @@ - - - - - - -making phylogenetic trees - - - - - - - - - - - -
    -enclone banner - -

    Making phylogenetic trees

    - -

    enclone provides several mechanisms for creating, displaying, and exporting a phylogenetic -tree for each -clonotype. These are initial mechanisms, which are likely to be expanded and/or improved over -time in response to feedback. The initial implementation is inspired by the Levenshtein-NJ method described by Yermanos et al. 2017. For all of these mechanisms, we recommend using the argument -COMPLETE to remove exact subclonotypes that are missing 1 or more chains.

    - -
    - -

    Method 1. -This method is invoked using the argument TREE, or TREE=const, to -label leaves by heavy chain constant region names (N.B.If more than one heavy chain is present, -we separate their constant region names by "+"). -The method first defines defines the distance between any two exact subclonotypes to be their -Levenshtein distance. We then add a root "virtual" exact subclonotype which equals the donor -reference away from the recombination region and which is undefined within that region (i.e. a -germline-reverted exact clonotype without the junction). The distance from the root to any -actual exact subclonotype is the Levenshtein distance, away from the region of recombination. -

    - -

    -Next a tree is creating from these data using the -neighbor joining algorithm. -This sometimes yields negative distances, which we change to zero. We have only observed -such negative distances on the edge emanating from the root. -

    - -

    Note that for a given clonotype, the neighbor joining algorithm is -O(n3), where n is the number of exact subclonotypes in the clonotype. Thus -for sample types having highly complex clonotypes (e.g. with ~1000 subclonotypes), the -algorithm will be very slow. We have only observed this in a lymphoma sample, and of -course for such cases, the tree would be so large that it would be difficult to do anything with it. -

    - -

    Finally, the tree is visualized using plain text, as shown in the example below. The -added field dref shows the distance of each exact subclonotype from the -donor reference, away from the recombination region.

    - -enclone BCR=123085 TREE COMPLETE CDR3=CARDQNFDESSGYDAFDIW LVARSP=dref - -
    [1] GROUP = 1 CLONOTYPES = 42 CELLS
    -
    -[1.1] CLONOTYPE = 42 CELLS
    -┌──────────────┬────────────────────────────────────────────────────────────┬───────────────────────────────────────┐
    -│              │  CHAIN 1                                                   │  CHAIN 2                              │
    -│              │  159|IGHV3-7 ◆ 53|IGHJ3                                    │  379|IGLV5-37 ◆ 316|IGLJ3             │
    -│              ├────────────────────────────────────────────────────────────┼───────────────────────────────────────┤
    -│              │                       11 1111111111111111111               │           11 11111111111              │
    -│              │  23344445667777788999901 1111112222222222333               │  23556679911 11111122222              │
    -│              │  22324893380156725357903 4567890123456789012               │  17068902403 45678901234              │
    -│              │                          ════════CDR3═══════               │              ════CDR3═══              │
    -│reference     │  LPGAGSSSLNKQEKYVRANLLQ◦ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦WVRSYLYYAAAY CMIW◦◦◦◦◦◦◦              │
    -│donor ref     │  LPGAGSSSLNKQEKYVRANLLQ◦ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦WVRSYLYYAAAY CMIW◦◦◦◦◦◦◦              │
    -├──────────────┼────────────────────────────────────────────────────────────┼───────────────────────────────────────┤
    -│#    n  dref    xxxxxxxxxxxxxxxxxxxxxxx .x......x........x.     u  const    xxxxxxxxxxx ...........      u  const│
    -│1   10     8  │  LPGAGSSSPNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW  2795  IGHG1  │  VRGYLYYAAAY CMIWPSNAWVF  14994  IGLC2│
    -│2    8     9  │  LPGAGSSSLNKEEKYMRANLLQY CARDQNFDESSGYDAFDIW  3944  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF  12199  IGLC2│
    -│3    7     9  │  LPGAKSNSLNKEQKYVRANLLQY CARDQNFDESSGYDAFDIW  2314  IGHG1  │  VRSYLYYTAAY CMIWPSNAWVF  10274  IGLC2│
    -│4    5     4  │  LPGAGSSSLNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW    11  IGHG1  │  VRSYLYYAGAY CMIWPSNAWVF     26  IGLC2│
    -│5    2     8  │  LPGAGSSSLNKEEKYMRANLLQY CARDQNFDESSGYDAFDIW  1290  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF   6825  IGLC2│
    -│6    1     7  │  LPGAGSNSLNKEEIYVRANLLEY CTRDQNFDESSGYDAFDIW  4407  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF  14507  IGLC2│
    -│7    1     3  │  LPGAGSSSLNKEEKYVRANLLQY CARDQNFDDSSGYDAFDIW  3893  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF  15622  IGLC2│
    -│8    1     3  │  LPGAGSSSLNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW  3302  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF   5256  IGLC2│
    -│9    1     9  │  LPGAGSSSPNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW  3067  IGHG1  │  VRGYLYYAAAY CMIWPSNAWVF   6429  IGLC2│
    -│10   1    13  │  LPGAGRNSLNKEEKYVRGNLLQY CARDQNFDESSGYDAFDIW  2724  IGHG3  │  VRGYLYYAAAY CMIWPSNAWVF   5775  IGLC2│
    -│11   1     9  │  LPGAGSSSLNKEEKYMRANLLQY CARDQNFDESSGYDAFDIW  2504  IGHA1  │  VRSYLYYAAAY CMIWPSNAWVF  14551  IGLC2│
    -│12   1     7  │  LPGAGSSSLNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW   404  IGHG1  │  VRGYLYYAAAY CMIWPSNAWVF   3456  IGLC2│
    -│13   1     1  │  LPGAGSSSLNKQEKYVRANLLQY CARDQNFDESSGYDAFDIW   136  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF   1023  IGLC2│
    -│14   1     9  │  LPGAGSNSLNKEEIYVRANLLEY CTRDQNFDESSGYDAFDIW    96  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF   1762  IGLC2│
    -│15   1     6  │  LPGAGSSSPNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW    27  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF    153  IGLC2│
    -└──────────────┴────────────────────────────────────────────────────────────┴───────────────────────────────────────┘
    -
    -•
    -╚═ • [0.00]
    -   ╠════════ 13 [1.33]
    -   ╚════ • [0.67]
    -         ╠══ • [0.33]
    -         ║   ╠═════════════════════════ • [4.40]
    -         ║   ║                          ╠═══════ • [1.28]
    -         ║   ║                          ║        ╠════ • [0.61]
    -         ║   ║                          ║        ║     ╠═══ 1 [0.50]
    -         ║   ║                          ║        ║     ╚═════════ 9 [1.50]
    -         ║   ║                          ║        ╚══ 12 [0.39]
    -         ║   ║                          ╚════ 15 [0.72]
    -         ║   ╚═══════════ • [1.97]
    -         ║                ╠══════ 4 [1.00]
    -         ║                ╚═ 8 [0.00]
    -         ╚═ • [0.14]
    -            ╠═══════════════ 7 [2.61]
    -            ╚═══ • [0.59]
    -                 ╠═════════════════════ • [3.60]
    -                 ║                      ╠═══════════════════════ • [3.91]
    -                 ║                      ║                        ╠════ • [0.77]
    -                 ║                      ║                        ║     ╠═ 2 [0.00]
    -                 ║                      ║                        ║     ╚═ 11 [0.00]
    -                 ║                      ║                        ╚═ 5 [0.23]
    -                 ║                      ╚═══════════════════════════════════════════════ 10 [8.09]
    -                 ╚═ • [0.15]
    -                    ╠════════════════════════════════════════════ 3 [7.68]
    -                    ╚═════════════════════════════════════ • [6.32]
    -                                                           ╠═ 6 [0.00]
    -                                                           ╚════════════ 14 [2.00]
    -
    - -
    - -

    Method 2. -This method is invoked using the argument NEWICK, and is exactly like -method 1, except that it outputs the resulting tree in -Newick format.

    - -

    For example, running enclone BCR=123085 NEWICK COMPLETE CDR3=CARDSWYSSGRNTPNWFDPW -will generate the following Newick tree for the largest clonotype:

    - -

    ((((3:0.00,20:4.00)I5:1.00,11:0.00)I7:0.69,(5:0.82,((13:0.00,14:3.00)I4:4.89,(((((((2:0.00,19:0.00)I1:0.95,4:0.05)I2:0.50,18:0.50)I3:5.96,(7:0.00,12:0.00)I8:0.54)I12:0.32,((6:0.83,9:0.17)I6:0.94,10:0.06)I9:0.80) -I15:0.13,((16:0.95,17:0.05)I10:0.03,21:2.98)I11:0.93)I17:0.04,(1:0.00,(8:1.00,15:0.00)I20:0.00)I19:0.02)I18:0.10)I16:0.11)I14:0.18)I13:0.00)0;

    - -

    This tree can be copied and pasted or otherwise exported to be viewed in tools such as iTOL.

    - -
    - -

    Method 3. -This method is invoked using the argument CLUSTAL_DNA=filename or -CLUSTAL_AA=filename, where -filename can be stdout, and otherwise must have the extension -".tar". It does not generate a tree, but instead -generates a CLUSTALW alignment for each clonotype (using either bases or -amino acids), with one sequence for each exact subclonotype. -This sequence is the concatenation of the per-chain sequences, with the appropriate number -of gap (-) characters shown if a chain is missing. As above, we recommend using the -COMPLETE option to avoid this happening.

    - -

    If filename is stdout, then the alignments are printed out -after each clonotype picture. Otherwise, a tar file is generated, which if untarred yields -one file per clonotype. To avoid confusion, it would be best for filename to have -the suffix .tar. We also recommend using MIN_CELLS=... or some other -argument to restrict the number of files that would be generated upon untarring.

    - -

    This method can be used to provide input to another program that will generate a tree.

    - -
    - -

    Method 4. -This method is invoked using the argument PHYLIP_DNA=filename or -PHYLIP_AA=filename, and is just like method 3, except for the output format.

    - - - diff --git a/pages/auto/tree_example.html b/pages/auto/tree_example.html deleted file mode 100644 index 570cdc0bc..000000000 --- a/pages/auto/tree_example.html +++ /dev/null @@ -1,100 +0,0 @@ - - - - - - -enclone output - - - -

    [1] GROUP = 1 CLONOTYPES = 42 CELLS
    -
    -[1.1] CLONOTYPE = 42 CELLS
    -┌──────────────┬────────────────────────────────────────────────────────────┬───────────────────────────────────────┐
    -│              │  CHAIN 1                                                   │  CHAIN 2                              │
    -│              │  159|IGHV3-7 ◆ 53|IGHJ3                                    │  379|IGLV5-37 ◆ 316|IGLJ3             │
    -│              ├────────────────────────────────────────────────────────────┼───────────────────────────────────────┤
    -│              │                       11 1111111111111111111               │           11 11111111111              │
    -│              │  23344445667777788999901 1111112222222222333               │  23556679911 11111122222              │
    -│              │  22324893380156725357903 4567890123456789012               │  17068902403 45678901234              │
    -│              │                          ════════CDR3═══════               │              ════CDR3═══              │
    -│reference     │  LPGAGSSSLNKQEKYVRANLLQ◦ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦WVRSYLYYAAAY CMIW◦◦◦◦◦◦◦              │
    -│donor ref     │  LPGAGSSSLNKQEKYVRANLLQ◦ ◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦WVRSYLYYAAAY CMIW◦◦◦◦◦◦◦              │
    -├──────────────┼────────────────────────────────────────────────────────────┼───────────────────────────────────────┤
    -│#    n  dref    xxxxxxxxxxxxxxxxxxxxxxx .x......x........x.     u  const    xxxxxxxxxxx ...........      u  const│
    -│1   10     8  │  LPGAGSSSPNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW  2795  IGHG1  │  VRGYLYYAAAY CMIWPSNAWVF  14994  IGLC2│
    -│2    8     9  │  LPGAGSSSLNKEEKYMRANLLQY CARDQNFDESSGYDAFDIW  3944  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF  12199  IGLC2│
    -│3    7     9  │  LPGAKSNSLNKEQKYVRANLLQY CARDQNFDESSGYDAFDIW  2314  IGHG1  │  VRSYLYYTAAY CMIWPSNAWVF  10274  IGLC2│
    -│4    5     4  │  LPGAGSSSLNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW    11  IGHG1  │  VRSYLYYAGAY CMIWPSNAWVF     26  IGLC2│
    -│5    2     8  │  LPGAGSSSLNKEEKYMRANLLQY CARDQNFDESSGYDAFDIW  1290  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF   6825  IGLC2│
    -│6    1     7  │  LPGAGSNSLNKEEIYVRANLLEY CTRDQNFDESSGYDAFDIW  4407  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF  14507  IGLC2│
    -│7    1     3  │  LPGAGSSSLNKEEKYVRANLLQY CARDQNFDDSSGYDAFDIW  3893  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF  15622  IGLC2│
    -│8    1     3  │  LPGAGSSSLNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW  3302  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF   5256  IGLC2│
    -│9    1     9  │  LPGAGSSSPNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW  3067  IGHG1  │  VRGYLYYAAAY CMIWPSNAWVF   6429  IGLC2│
    -│10   1    13  │  LPGAGRNSLNKEEKYVRGNLLQY CARDQNFDESSGYDAFDIW  2724  IGHG3  │  VRGYLYYAAAY CMIWPSNAWVF   5775  IGLC2│
    -│11   1     9  │  LPGAGSSSLNKEEKYMRANLLQY CARDQNFDESSGYDAFDIW  2504  IGHA1  │  VRSYLYYAAAY CMIWPSNAWVF  14551  IGLC2│
    -│12   1     7  │  LPGAGSSSLNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW   404  IGHG1  │  VRGYLYYAAAY CMIWPSNAWVF   3456  IGLC2│
    -│13   1     1  │  LPGAGSSSLNKQEKYVRANLLQY CARDQNFDESSGYDAFDIW   136  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF   1023  IGLC2│
    -│14   1     9  │  LPGAGSNSLNKEEIYVRANLLEY CTRDQNFDESSGYDAFDIW    96  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF   1762  IGLC2│
    -│15   1     6  │  LPGAGSSSPNKEEKYVRANLLQY CARDQNFDESSGYDAFDIW    27  IGHG1  │  VRSYLYYAAAY CMIWPSNAWVF    153  IGLC2│
    -└──────────────┴────────────────────────────────────────────────────────────┴───────────────────────────────────────┘
    -
    -•
    -╚═ • [0.00]
    -   ╠════════ 13 [1.33]
    -   ╚════ • [0.67]
    -         ╠══ • [0.33]
    -         ║   ╠═════════════════════════ • [4.40]
    -         ║   ║                          ╠═══════ • [1.28]
    -         ║   ║                          ║        ╠════ • [0.61]
    -         ║   ║                          ║        ║     ╠═══ 1 [0.50]
    -         ║   ║                          ║        ║     ╚═════════ 9 [1.50]
    -         ║   ║                          ║        ╚══ 12 [0.39]
    -         ║   ║                          ╚════ 15 [0.72]
    -         ║   ╚═══════════ • [1.97]
    -         ║                ╠══════ 4 [1.00]
    -         ║                ╚═ 8 [0.00]
    -         ╚═ • [0.14]
    -            ╠═══════════════ 7 [2.61]
    -            ╚═══ • [0.59]
    -                 ╠═════════════════════ • [3.60]
    -                 ║                      ╠═══════════════════════ • [3.91]
    -                 ║                      ║                        ╠════ • [0.77]
    -                 ║                      ║                        ║     ╠═ 2 [0.00]
    -                 ║                      ║                        ║     ╚═ 11 [0.00]
    -                 ║                      ║                        ╚═ 5 [0.23]
    -                 ║                      ╚═══════════════════════════════════════════════ 10 [8.09]
    -                 ╚═ • [0.15]
    -                    ╠════════════════════════════════════════════ 3 [7.68]
    -                    ╚═════════════════════════════════════ • [6.32]
    -                                                           ╠═ 6 [0.00]
    -                                                           ╚════════════ 14 [2.00]
    -
    - - - diff --git a/pages/auto/windows.html b/pages/auto/windows.html deleted file mode 100644 index 26321b910..000000000 --- a/pages/auto/windows.html +++ /dev/null @@ -1,66 +0,0 @@ - - - - - - -enclone on Windows computers - - - - - - - - - - - -
    -enclone banner - -

    Using enclone on Windows computers

    - -We are attempting to build enclone for Windows. This may take some time. -In the meantime, we can suggest two workarounds: - -

    -1. Use Windows Subsystem for Linux (WSL) on your Windows computer. You can get Ubuntu Linux for -free here, and we're told that current PCs come with -this preinstalled. - -

    -2. Set up your Windows computer for Linux dual boot. This is similar but generally much less -satisfactory, since the computer has to be rebooted between uses of Windows and Linux. - -

    -Importantly, we have not tested either solution ourselves, but are relaying what has been -suggested to us by others. - -

    -In either case, while you are using Linux on your Windows computer, you will have access to -both the files on the Linux side and the files on the Windows side. The files on the -Windows side may be found in the directory /mnt/c, and you can explore what is -there by typing ls /mnt/c and then enlarging the path. - - - diff --git a/pages/compile.html.src b/pages/compile.html.src deleted file mode 100644 index f4f51de10..000000000 --- a/pages/compile.html.src +++ /dev/null @@ -1,72 +0,0 @@ -enclone compilation - - - -
    -enclone banner - -

    Compilation

    -

    You should only have to do this if you want to -experimentally modify the #enclone software.

    -
      -
    1. -

      For now, you can run on an x86-64 linux server or a Mac.

      -
    2. -
    3. -

      You need to have the Rust compiler installed. Detailed instructions on how to do this -can be found here. You can -confirm that you -have successfully installed the Rust compiler by running rustc --version.

      -
    4. - -
    5. -

      Clone the enclone repository and build enclone using Cargo (which -comes with Rust) by running:

      -
      git clone --depth=1 git@github.com:10XGenomics/enclone.git
      -cd enclone
      -cargo b
      - -(The --depth=1 part avoids downloading history, which is faster, but if you want -the history, exclude that argument.) - -

      and then add the full path of enclone/target/debug to your PATH. - -

      -Compilation takes 8-10 minutes on a 2017 MacBook Pro with a dual-core i7 and 5-7 minutes on a -similar Linux machine. -

      - -
    6. - -
    - -
      -
    1. -

      Copy the directory enclone/test/inputs to somewhere you can point to, or just leave it -where it is. These are test data you can play with; you can also supply your own output -from a Cell Ranger immune profiling run (so long as there is an -all_contig_annotations.json output). -When you read the documentation at step 6, you'll get to a place where you put -PRE=enclone/test/inputs -or instead with the path where your copied data reside. But you need to supply -PRE with a path that makes sense relative to your working directory.

      -
    2. -
    3. -

      Type enclone help, and read the terminal setup instructions there.

      -
    4. -
    5. -

      Type enclone and study the documentation shown there.

      -
    6. -
    7. -

      If you want to run the built-in tests, type

      - -
      cargo t
      - -
    8. -
    - -

    If you have problems, please write to us at -#enclone@10xgenomics.com.

    - - - diff --git a/pages/dang_i_cannot_install.html.src b/pages/dang_i_cannot_install.html.src deleted file mode 100644 index 70ddbfa83..000000000 --- a/pages/dang_i_cannot_install.html.src +++ /dev/null @@ -1,28 +0,0 @@ -enclone installation problems - - - -
    -enclone banner - -

    #enclone installation troubleshooting

    - -

    The purpose of this page is to provide guidance in case the installation script fails.

    - -

    We know of one case where the script might fail.

    - -
      -
    1. The command curl is not installed on your computer. In that case, -you should have the command wget, and can use that instead. To do so, run the -following command: -
      wget -nv bit.ly/enclone_install -O - | sh -s SIZE
      -where SIZE -is as described on the main #enclone page. -
    2. -
    - -

    If you have a different problem, please let us know by writing to us at -#enclone@10xgenomics.com.

    - - - diff --git a/pages/enclone.css b/pages/enclone.css deleted file mode 100644 index 22873ce33..000000000 --- a/pages/enclone.css +++ /dev/null @@ -1,119 +0,0 @@ -/* - DejaVuSansMono font (see https://dejavu-fonts.github.io) - We use this font because it works well for our clonotype box character diagrams and it's free. - The name is a bit of a misnomer because we try to use Menlo first. - Fallback scheme: - 1. Menlo as local; pretty but likely available only on Macs - 2. woff2 from CDN; good because woff2 is compact; why is it stored in deathbeds? - 3. truetype from CDN - 4. truetype as part of site; LGC is a subset but includes all that we use - 5. Courier New as local; poorly suited but widely available -*/ - -@font-face { - font-family: "DejaVuSansMono"; - src: - local("Menlo Regular"), - url('https://cdn.jsdelivr.net/npm/@deathbeds/jupyterlab-font-dejavu-sans-mono@1.0.0/style/fonts/DejaVuSansMono.woff2') - format("woff2"), - url('https://cdn.jsdelivr.net/npm/dejavu-fonts-ttf@2.37.3/ttf/DejaVuSansMono.ttf') - format("truetype"), - url('../fonts/DejaVuLGCSansMono.ttf') format("truetype"), - local("Courier New"); -} - -@font-face { - font-family: "DejaVuSansMono"; - font-weight: bold; - src: - local("Menlo Bold"), - url('https://cdn.jsdelivr.net/npm/@deathbeds/jupyterlab-font-dejavu-sans-mono@1.0.0/style/fonts/DejaVuSansMono-Bold.woff2') - format("woff2"), - url('https://cdn.jsdelivr.net/npm/dejavu-fonts-ttf@2.37.3/ttf/DejaVuSansMono-Bold.ttf') - format("truetype"), - url('../fonts/DejaVuLGCSansMono-Bold.ttf') format("truetype"), - local("Courier New Bold"); -} - -code { - font-family: "DejaVuSansMono"; - font-size: 90%; - background: rgb(92%,92%,92%); - padding-left: 5px; - padding-right: 5px; - padding-top: 1px; - padding-bottom: 1px; -} - -html { - font-size: 95%; - font-family: "Arial"; - line-height: 140%; -} - -h1 { - color: black; - line-height: 30px; -} - -body { - max-width: 1080px; - color: rgb(36, 41, 46); - margin: 0 auto 50px; - padding: 0 25px; - box-sizing: border-box; -} - -hr { - height: .25em; - padding: 0; - margin: 0px 0; - background-color: #e1e4e8; - border: 0; -} - -p { - margin-top: 20px; - margin-bottom: 20px; -} - -ul { - display: block; - list-style-type: disc; - margin-top: 10px; - margin-bottom: 10px; - margin-left: 0; - margin-right: 0; - padding-left: 20px; -} - -ol { - display: block; - list-style-type: decimal; - margin-top: 10px; - margin-bottom: 10px; - margin-left: 0; - margin-right: 0; - padding-left: 20px; -} - -li { - margin: 5px 0; -} - -/* Links are shown in the same blue color, whether visited or not. There is no underlining at all. - We agreed that this was optimal. Don't know if the second link definition is redundant. -*/ - -a:link { - text-decoration: none; - color: #0071D9; -} - -a:link { - color: #0071D9; -} - -a:visited { - color: #0071D9; -} diff --git a/pages/enclone_css_v2.css b/pages/enclone_css_v2.css deleted file mode 100644 index 22873ce33..000000000 --- a/pages/enclone_css_v2.css +++ /dev/null @@ -1,119 +0,0 @@ -/* - DejaVuSansMono font (see https://dejavu-fonts.github.io) - We use this font because it works well for our clonotype box character diagrams and it's free. - The name is a bit of a misnomer because we try to use Menlo first. - Fallback scheme: - 1. Menlo as local; pretty but likely available only on Macs - 2. woff2 from CDN; good because woff2 is compact; why is it stored in deathbeds? - 3. truetype from CDN - 4. truetype as part of site; LGC is a subset but includes all that we use - 5. Courier New as local; poorly suited but widely available -*/ - -@font-face { - font-family: "DejaVuSansMono"; - src: - local("Menlo Regular"), - url('https://cdn.jsdelivr.net/npm/@deathbeds/jupyterlab-font-dejavu-sans-mono@1.0.0/style/fonts/DejaVuSansMono.woff2') - format("woff2"), - url('https://cdn.jsdelivr.net/npm/dejavu-fonts-ttf@2.37.3/ttf/DejaVuSansMono.ttf') - format("truetype"), - url('../fonts/DejaVuLGCSansMono.ttf') format("truetype"), - local("Courier New"); -} - -@font-face { - font-family: "DejaVuSansMono"; - font-weight: bold; - src: - local("Menlo Bold"), - url('https://cdn.jsdelivr.net/npm/@deathbeds/jupyterlab-font-dejavu-sans-mono@1.0.0/style/fonts/DejaVuSansMono-Bold.woff2') - format("woff2"), - url('https://cdn.jsdelivr.net/npm/dejavu-fonts-ttf@2.37.3/ttf/DejaVuSansMono-Bold.ttf') - format("truetype"), - url('../fonts/DejaVuLGCSansMono-Bold.ttf') format("truetype"), - local("Courier New Bold"); -} - -code { - font-family: "DejaVuSansMono"; - font-size: 90%; - background: rgb(92%,92%,92%); - padding-left: 5px; - padding-right: 5px; - padding-top: 1px; - padding-bottom: 1px; -} - -html { - font-size: 95%; - font-family: "Arial"; - line-height: 140%; -} - -h1 { - color: black; - line-height: 30px; -} - -body { - max-width: 1080px; - color: rgb(36, 41, 46); - margin: 0 auto 50px; - padding: 0 25px; - box-sizing: border-box; -} - -hr { - height: .25em; - padding: 0; - margin: 0px 0; - background-color: #e1e4e8; - border: 0; -} - -p { - margin-top: 20px; - margin-bottom: 20px; -} - -ul { - display: block; - list-style-type: disc; - margin-top: 10px; - margin-bottom: 10px; - margin-left: 0; - margin-right: 0; - padding-left: 20px; -} - -ol { - display: block; - list-style-type: decimal; - margin-top: 10px; - margin-bottom: 10px; - margin-left: 0; - margin-right: 0; - padding-left: 20px; -} - -li { - margin: 5px 0; -} - -/* Links are shown in the same blue color, whether visited or not. There is no underlining at all. - We agreed that this was optimal. Don't know if the second link definition is redundant. -*/ - -a:link { - text-decoration: none; - color: #0071D9; -} - -a:link { - color: #0071D9; -} - -a:visited { - color: #0071D9; -} diff --git a/pages/expanded.html.src b/pages/expanded.html.src deleted file mode 100644 index 798ff13d1..000000000 --- a/pages/expanded.html.src +++ /dev/null @@ -1,126 +0,0 @@ -illusory clonotype expansions - - - -
    -enclone banner - -

    Detecting illusory clonotype expansions

    - -

    -Please read this! This page was written before we added two major filtering steps, based on -UMI counts, which completely annihilate the particular illusory expansion described here. The -reason we left the page here is that the approach used to analyze the expansion may have -utility for other datasets. To reproduce the actual results shown here, you will need to add -to each #enclone command the arguments NUMI and NUMI_RATIO that turn off -the added filter.

    - -

    This page explains the origin of certain illusory clonotype expansions, and exhibits one example -of how to detect them.

    -

    These expansions are known to occur occasionally (see below for one possible mechanism), and -we hypothesize that they arise when an individual cell disintegrates or leaks. This leaves -fragments that seed multiple GEM partitions, producing a clonotype that appears larger than -its true size.

    -

    We believe that events of this type usually originate from plasma or plasmablast B cells. We -thus focus on B cells in this vignette. However with obvious changes, the same methods also apply -to T cells.

    -

    Disintegration might occur during or after preparation of the sample. One -way to document such an event would be to create two libraries from a single tube of cells. If -the clonotype is large and appears in only one of two libraries, one could be reasonably certain -that a disintegration event occurred during or after cells were drawn from the tube. This method -could not be used to detect disintegration events occurring prior to that point.

    -

    Here we show that with the aid of gene expression data, illusory clonotype expansions can -generally be detected, even if only a single library was made. The easier case would be a sample -consisting of pure B cells. The case where one has a mix of cell types is more challenging because -a GEM can contain both a B cell fragment, plus a cell of a different type, and thus appear to have -a normal level of gene expression, and no evidence of mixing from the VDJ assay either. We -therefore focus on the case of samples that contain a mixture of cell types. - -
    -cell bits -
    - -To that end, we show an example, using two libraries obtained from a single tube of PBMC cells, -obtained from a healthy human donor. The two libraries contain 7287 and 9559 cells, respectively, -of which ~12% are B cells. All the data shown here are part of the large dataset -package described in the -download section of the main #enclone page.

    - -
    enclone BCR=128037,128040 NCROSS
    -
    -

    The NCROSS option instructs #enclone to not filter out expanded clonotypes -that appear in only one dataset arising from the same sample (and which based on their sizes are -highly improbable). Normally one would want this filtering, but these clonotypes are exactly what -we wish to see now! Here is the top clonotype:

    - -#include pages/auto/illusory1.html - -

    If we do not use the NCROSS option, and search for the clonotype using the heavy -chain CDR3 sequence, we see just one cell (the others having been filtered out):

    -
    enclone BCR=128037,128040 CDR3=CARGGTTTYFISW
    -
    - -#include pages/auto/illusory2.html - -

    This is a good answer, but only works if libraries were made from two separate draws of cells. -Now suppose that both a VDJ and a GEX library have been made, from a single draw of cells. (And -we henceforth ignore the data made from the other draw of cells, useful though it is.)

    -
    enclone BCR=128040 GEX=127801 CDR3=CARGGTTTYFISW
    -
    - -#include pages/auto/illusory3.html - -

    Now we see less cells. This is because the default behavior of #enclone is to filter out -cells called by the VDJ pipeline that are not also called by the GEX pipeline. Most of these -would have consisted of "nearly empty drops", GEMs containing just a B cell fragment.

    -

    Now we add the option PER_CELL, causing data for each cell to be displayed, and we -also add two -fields to the display. One is gex, the normalized count of gene expression UMIs, -and the other is a field cred (short for "credibility"), that is more complicated. We -will also hide the onesie (single chain) cells.

    -
    enclone BCR=128040 GEX=127801 CDR3=CARGGTTTYFISW PER_CELL LVARSP=gex,cred CHAINS_EXACT=2
    -
    - -#include pages/auto/illusory4.html - -

    The field cred is a measure of the extent to which cells having gene expression -similar to a -given putative B cell are themselves B cells. In more detail, first for any datasets, let n be -the number of VDJ cells that are also GEX cells. Now for a given cell, we find the n GEX cells -that are closest to it in PCA space, and report the percent of those that are also VDJ cells.
    -This is cred. The closer this number is to 100, the more the given cell looks like a -typical B cell. Conversely, a very low number makes the given cell appear suspect.

    -

    The values of cred vary considerably from dataset to dataset, requiring somewhat -different interpretation. We show the distribution for this one dataset:

    - -

    -cred_gex_dist -

    - -

    Thus the values of the cells in the reported clonotype are very low indeed, and almost all -highly suspect. Probably the clonotype originated from a single cell, which broke up into one -major piece (the one for barcode CTGGTCTAGCTGCCCA-1), and many smaller pieces. These -smaller pieces reside in GEMs that may or may not contain an actual intact cell. In fact, many of -the cells are detected as T cells (using TCR data 128024 from the same cell draw). We -can mark these cells in the same display using the command - -

    enclone BCR=128040 GEX=127801 BC=128024_cells.csv CDR3=CARGGTTTYFISW PER_CELL LVARSP=gex,cred,T CHAINS_EXACT=2
    -
    - -where the file 128024_cells.csv is a CSV file with header barcode,T -and having one line for each barcode in -128024/outs/cell_barcodes.json, e.g. AAACGGGAGAGAACAG-1,◯. -(We used the character as a value just because we liked it.) - -#include pages/auto/illusory5.html - -

    We thus conclude in this case that the clonotype is likely contaminated with many cells that -are not B cells, and in fact that the entire clonotype probably arose from a single true B cell. -In other examples we have looked at, there appear to be a few true B -cells, along with many that are not, either corresponding to other cell types or nearly empty GEMs.

    -

    Overall conclusion: illusory clonotypes are rare, and can generally be detected, either with -the aid of a second library made from the same lot of cells, or with gene expression data.

    - - - diff --git a/pages/fetching_test_datasets.html.keeping_but_not_using_now b/pages/fetching_test_datasets.html.keeping_but_not_using_now deleted file mode 100644 index ac71d8766..000000000 --- a/pages/fetching_test_datasets.html.keeping_but_not_using_now +++ /dev/null @@ -1,187 +0,0 @@ -fetching datasets for enclone - - - -
    -enclone banner - -

    Fetching test datasets

    - -

    Here are three methods that should do the same thing. First cd so that you're in your -home directory. Then do one of the following:

    -
      -
    1. -svn export https://github.com/10XGenomics/enclone/trunk enclone -
    2. -
    3. -curl -L https://github.com/10XGenomics/enclone/archive/master.tar.gz | tar zx enclone-master/test; mv enclone_master enclone -
    4. -
    5. -wget -O- https://github.com/10XGenomics/enclone/archive/master.tar.gz | tar zx enclone-master; mv enclone-master enclone -
    6. -
    - -

    Installing curl

    - -

    If, perchance, neither svn nor curl nor wget are installed on your computer, you will need -to install one of them. There are a few ways to do this depending on which flavor of Unix you use. Here are some ways to get curl onto your machine.

    - -

    Debian/Ubuntu

    - -

    For these machines, use one of the following:

    -
      -
    1. -apt-get install curl -
    2. -
    3. -sudo apt-get install curl -
    4. -
    5. -sudo apt install curl -
    6. -
    - -

    Mac OS X

    - -

    To get the latest curl on OS X, you will likely want to use the latest Homebrew:

    -
      -
    1. /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)"
    2. -
    3. brew install curl
    4. -
    - -

    CentOS/Fedora/RHEL

    - -

    For these machines, use the following:

    -

    yum install curl

    - -

    Other flavors

    - -

    If you're using another flavor of Unix, these may be helpful:

    -
      -
    1. -OpenSUSE: zypper install curl -
    2. -
    3. -ArchLinux: pacman -Sy curl -
    4. -
    - -

    Installing wget

    - -

    Have something against curl but don't know how to install wget? This section is for you.

    - -

    Debian/Ubuntu

    - -

    For these machines, use one of the following:

    -
      -
    1. -apt-get install wget -
    2. -
    3. -sudo apt-get install wget -
    4. -
    5. -sudo apt install wget -
    6. -
    - -

    Mac OS X

    - -

    To get the latest wget on OS X, you will likely want to use the latest Homebrew:

    -
      -
    1. /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)"
    2. -
    3. brew install wget
    4. -
    - -

    CentOS/Fedora/RHEL

    - -

    For these machines, use the following:

    -

    yum install wget

    - -

    Other flavors

    - -

    If you're using another flavor of Unix, these may be helpful:

    -
      -
    1. -OpenSUSE: zypper install wget -
    2. -
    3. -ArchLinux: pacman -Sy wget -
    4. -
    - -

    Installing svn

    - -

    If you'd rather use svn instead of wget or curl, feel free! If you don't know how to install svn, this section -should help you do so.

    - -

    Ubuntu

    - -

    For these machines, run the following commands in order:

    -
      -
    1. -apt-get install subversion -
    2. -
    3. -apt-get install libapache2-svn -
    4. -
    - -

    Debian

    - -

    For these machines, run the following commands in order:

    -
      -
    1. -apt-get install subversion -
    2. -
    3. -apt-get install libapache2-mod-svn -
    4. -
    - -

    Mac OS X

    - -

    To get the latest svn on OS X, you will likely want to use the latest Homebrew:

    -
      -
    1. /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)"
    2. -
    3. brew install subversion
    4. -
    - -

    Fedora

    - -

    For Fedora machines, run yum install subversion.

    - -

    CentOS/RHEL

    - -

    For these machines, run the following commands in order:

    -
      -
    1. -yum install subversion -
    2. -
    3. -yum install mod_dav_svn -
    4. -
    - -

    Other flavors

    - -

    If you're using another flavor of Unix, these may be helpful:

    -
      -
    1. -OpenSUSE: zypper install subversion and then zypper install subversion-server -
    2. -
    3. -ArchLinux: pacman -Sy subversion -
    4. -
    - -

    I still can't install curl, wget, or svn!

    - -

    If all else fails, maybe these instructions from xkcd will help:

    - -universal install script - -

    Well seriously, if you're really stuck, please write to us at enclone@10xgenomics.com.

    - - - diff --git a/pages/heuristics.html.src b/pages/heuristics.html.src deleted file mode 100644 index b4cf06490..000000000 --- a/pages/heuristics.html.src +++ /dev/null @@ -1,84 +0,0 @@ -enclone heuristics - - - -
    -enclone banner - -

    enclone heuristics

    - -

    This page is a start in describing the heuristics that #enclone uses. It will be gradually -expanded. See also enclone help how. The content here -is geeky and technical. -

    - -
    - -

    UMI filtering. #enclone filters out B cells having low UMI counts, relative to a baseline -that is determined for each dataset, according to a -heuristic described here, unless the argument NUMI is supplied, to turn off that -filter. - -

    The motivation for this filter is to mitigate illusory clonotype expansions arising from -fragmentation of plasma cells or other physical processes (not all fully understood). These -processes all result in "cells" having low UMI counts, many of which do not correspond to intact -real cells. Illusory clonotype expansions are generally infrequent, but occasionally cluster -in individual datasets.

    - -

    Nomenclature: for any cell, find the maximum UMI count for its zero or more heavy chains, -and the maximum for its light chains. The sum of these two maxima is -denoted umitot.

    - -

    The algorithm for this filter first establishes a baseline for the expected value of -umitot, for each dataset taken individually. To do this, all clonotypes having -exactly one cell and exactly one heavy and light chain each are examined. If there are less than -20 such cells, the filter is not applied to cells in that dataset. Otherwise, -let n_50% denote the median of the umitot values for the dataset, and let -n_10% the 10th percentile. Let -

    umin = min( n_10%, n_50% - 4 * sqrt(n_50%) ).
    -This is the baseline low value for umitot. The reason for having the second -part of the min is to prevent filtering in cases where UMI counts are sufficiently -low that poisson variability could cause a real cell to appear fake.

    - -

    Next we scan each clonotype having at least two cells, and delete every cell having -umitot < umin, with the following qualifications: -

      -
    • Let k be the number of cells to be deleted in clonotype having n -cells. Then we require that for a binomial distribution having p = 0.1, the -probability of observing k or more events in a sample of size n is -less then 0.01. The more cells are flagged in a clonotype, the more likely this -test is satisfied, which is the point of the test. -
    • -
    • If every cell in a clonotype would be deleted, then we find its exact subclonotype -having the highest sum for umitot, summing across its cells. Then we protect from -deletion the cell in this exact subclonotype having the highest umitot value. We -do this because in general even if a clonotype expansion did not occur, there was probably at -least a single bona fide cell that gave rise to it. -
    • -
    - -A better test could probably be devised that started from the expected distribution of UMI counts. -The test would trigger based on the number and improbability of low UMI counts. The current test -only considers the number of counts that fall below a threshold, and not their particular values. - -

    This UMI filter is carried out before most of the other filters.

    - -
    - -

    UMI ratio filtering. #enclone filters out B cells having low UMI counts, relative to -other UMI counts in a given clonotype, according to a -heuristic described here, unless the argument NUMI_RATIO is supplied, to turn off that -filter. - -

    First we mark a cell for possible deletion, if the VDJ UMI count for some chain of some other -cell is at least 500 times greater than the total VDJ UMI count for the given cell.

    - -

    Then we scan each clonotype having at least two cells, and delete every cell marked as above, -with the following qualification. -Let k be the number of cells to be deleted in clonotype having n -cells. Then we require that for a binomial distribution having p = 0.1, the -probability of observing k or more events in a sample of size n is -less then 0.01.

    - - - diff --git a/pages/history.html.src b/pages/history.html.src deleted file mode 100644 index 6ba1eed86..000000000 --- a/pages/history.html.src +++ /dev/null @@ -1,145 +0,0 @@ -enclone history - - - -
    -enclone banner - -

    History

    - -

    This page provides a selective history of what was changed in #enclone and when.

    - -

    -We show changes that affect users like new features, changes to results, and the like. This -log starts with with initial public availability. The complete history may be seen by cloning the -#enclone repo and typing git log. -

    - -

    Breaking changes are shown in red.

    - -

    -Please be aware that our workflow when we make changes is to automatically update the GitHub -site, including all the website pages and this page too. This happens in advance of -actually making a release (which might follow in a couple days). This means that the website -may describe features that are not yet available in a release (although they will be in the -source code that's available). We apologize for this asynchrony! Note however that the -command-line help that comes with your copy of #enclone will always match its behavior.

    - -
    - -

    -8/10/20: tweak the definition of the weak onesies filter so that it does not -delete single-cell clonotypes. -

    - -

    -8/7/20: add the ability to analyze alternate splicing using GEX data to characterize -UMIs as secreted or membrane, and display this information using lvars "sec" and "mem". See -enclone help lvars for limitations. -

    - -

    -8/5/20: -

      -
    1. A reference file is now required as part of the Cell Ranger outs directory, -if Cell Ranger version 4.0 or greater was used.
    2. -
    3. Deprecate MAX_SCORE and replace by MAX_LOG_SCORE, the - base 10 logarithm of it.
    4. -
    5. Add cvar cdr3_len
    6. -
    7. Add and document knobs that allow nearly all clonotype join filtering to be turned off. Please - see \"enclone help how\" and the end of \"enclone help faq\".
    8. -

      - -

      -6/24/20: -

        -
      1. Add support for iNKT and MAIT cells.
      2. -
      3. Make #enclone faster. This is most noticeable in cases where many GEX datasets are - provided as input.
      4. -
      -

      - -

      -6/19/20: -

        -
      1. We now use a data hierarchy of donor (top), origin, dataset (bottom), where an -origin is a set of 1 or more datasets from the same source (tube of cells, tissue, timepoint, etc.). -(This breaks previous invocations of META.) -
      2. -
      3. Improve the alternate (faster) internal storage structure for the GEX matrix -created using the option NH5 as described using the command -enclone help input. -This will speed things up, particularly for the case where several datasets are combined. If -you have already used the NH5 option for a given dataset, then the next time you -run #enclone on it, the file will be automatically rewritten. This would also apply to some -datasets obtained as part of the large download. -
      4. -
      -

      - -

      -6/17/20: -

        -
      1. Now TREE=const can be used to show a tree with heavy chain constant region - names attached to the leaves.
      2. -
      3. SEG and SEGN are now cumulative, so that multiple instances may - be used to progressively filter.
      4. -
      5. The clonotype joining heuristic parameters MAX_SCORE and - MAX_CDR3_DIFFS are now accessible.
      6. -
      -

      - -

      -6/10/20: -

        -
      1. Add a complex of features for generating phylogenetic trees from clonotypes, see - here.
      2. -
      3. New "single button" installation procedure.
      4. -
      5. Change the default value for PRE - to ~/enclone/datasets,~/enclone/datasets2.
      6. -
      7. Add argument NALL to turn off all filters.
      8. -
      9. Add new lead variables nd<k> that display the number of cells in the - top datasets for a given clonotype.
      10. -
      11. Add new lead variable dref that shows the distance of V..J from the reference - outside the region of recombination. -
      12. Add argument COMPLETE to remove exact subclonotypes that do not have all - chains.
      13. -
      14. Test for consistency between VDJ and GEX barcodes, and exit if this is not the case.
      15. -
      16. Add option COLOR=property to color amino acids by their properties.
      17. -
      18. Add option FCELL to allow filtering by cells.
      19. -
      -

      - -

      -5/29/20: -Add new "UMI ratio" filter that further reduces noise in certain cases. This can be turned -off using the argument NUMI_RATIO. -

      - -

      -5/22/20: -Add major new "UMI" filter that greatly reduces noise in certain cases. This can be turned -off using the argument NUMI. -

      - -

      -5/12/20: -Add PLOT_BY_ISOTYPE to generate honeycomb plots colored by isotype. -

      - -

      -5/1/20: -Change the definition of the fields "edit" and "comp" to be based on -alignment from the beginning of the CDR3 up to the end of the J, rather than stopping at -the end of the CDR3. The intention is to capture the full region of recombination, which -may not have been done before. -

      - -

      -4/30/20: -First release. -

      - - - diff --git a/pages/index.html.src b/pages/index.html.src deleted file mode 100644 index 4b98231f6..000000000 --- a/pages/index.html.src +++ /dev/null @@ -1,698 +0,0 @@ -enclone (bit.ly/enclone) - - - -
      -enclone banner - -
      -

      #enclone (beta)

      -

      Accurate and user-friendly computational tool for clonal grouping to study the adaptive immune system

      -

      - -

      10x Genomics Chromium Single Cell V(D)J data - containing B cell - receptor (BCR) and T cell receptor (TCR) RNA sequences are entered as input data to - #enclone. Based on the - input, #enclone finds and organizes cells arising from the same progenitors into groups - (clonotypes) and - compactly displays each clonotype along with its salient features, including mutated amino - acids.

      -

      -#enclone (beta) is provided as a tool for use by the community to accelerate immunology research. -#enclone is only supported via -#enclone@10xgenomics.com. -The clonotype assignment algorithm that is part of #enclone will be integrated into a future -release of Cell Ranger. -

      - -

      #enclone has been designed for immunologists but anyone can download and experiment with it.

      - -

      Background: when you get sick, your body mounts an immune response by selectively -amplifying immune cells and mutations within these selected cells. #enclone allows you to see the -history of -single immune cells within a biological sample (such as a blood draw or biopsy). This history -reflects how the cognate receptors of these cells evolved in response to antigens, including -viruses, bacteria, and tumors.

      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       1. Introduction  8. Help 
       2. Objective  9. Understanding #enclone output 
       3. Why #enclone?  10. Combining multiomic data 
       4. Data input  11. Visualizing multiple clonotypes at once 
       5. Software  12. The power of #enclone 
       6. Installing #enclone  13. Questions 
       7. Running #enclone  14. Where am I? 
      - -
      - -
      - -

      -

      Introduction

      -

      The body defends itself from antigens, like viruses, bacteria, and tumors, by recognizing the -antigens and mounting an immune response through selective amplification of immune cells and -mutations within selected cells. #enclone enables profiling of the history of single immune -cells within a biological sample (such as a blood draw or a biopsy) by mapping the evolution of the -cognate BCRs and TCRs of those cells responding to antigen exposure. This history reflects how the -cognate receptors of these cells evolved in response to various antigens.

      - -

      -

      Objective

      - - -

      Using #enclone to profile B and T cell receptors for any sample using Chromium Single Cell -V(D)J as input enables you to make the best use of your data. You can explore the biology of these -cells without help from a computational expert!

      - -

      The objective of #enclone is to:

      - - - - - - -
      -

      - Find and display clonotypes: - groups of T and B cells sharing the same fully rearranged common ancestor. -

      -
      -

      Find: - It is easy to mistakenly put unrelated cells in - the same clonotype, or "pollute" a clonotype with extraneous chains. - #enclone's algorithms make finding accurate.

      -

      Display: - It is challenging to compactly represent a - large repertoire of data. #enclone enables compact, easy-to-grasp data - display.

      -
      - -

      The diversity of BCR and TCR chains, containing various combinations of V, D, and/or J segments, -broadens the immune repertoire to protect against a wide variety of pathogens. The figure below -illustrates the concept of a BCR clonotype. A similar concept applies to TCRs but without -somatic hypermutations.

      - - -what is a clonotype - -

      Each cell in a clonotype is typically represented by two or three chains, and this information is -present and directly observable in single cell V(D)J data. #enclone computationally approximates -the clonotypes -from the data with high accuracy (see below). The methods of #enclone are described -briefly in the online documentation for #enclone, and will -also be described separately in more detail. -

      - - - -

      -Clonotyping performance. To test the performance of #enclone, we combined data from 443 BCR -enriched libraries from 30 donors. #enclone detected a total of -381,506 -clonotypes. Of the detected clonotypes, 9,573 contained at least -two cells, -of which 15 (0.16%) of the clonotypes -contained receptors from multiple donors, and thus were erroneous. -The low error rate is a consequence of the paired chain data and the #enclone -algorithm, which minimizes the placement of unrelated cells in the same clonotype. -

      - -
      - -

      -

      Why use #enclone?

      -

      -#enclone has unique features! -

      -

      -Unique insights into 10x Genomics data: #enclone has been designed and tested -extensively to -gain in-depth insight and perspective regarding 10x Genomics single cell V(D)J datasets. Other -similar tools -may be used, but frequently, #enclone will provide a different answer, which in turn may affect -the biological interpretation of the data. -

      -

      -Speed: #enclone is very fast, allowing analysis of datasets in seconds. -

      -

      -Easy installation: The software is easy to install and to use. -

      - -
      - -

      -

      Inputs to #enclone

      - -

      10x Genomics single cell 5' data

      - -

      -BCR or TCR RNA sequences generated using the 10x Genomics -Chromium Single Cell Immune Profiling Solution and Cell Ranger 3.1 or higher are the inputs to -#enclone. #enclone can also process and display gene expression and Feature Barcode data -from the same cells. The latter can be used to quantify cell surface proteins, antigen binding, CRISPR -sgRNA, and other cellular features. You can see a list of publications that use 10x VDJ data -here. - -

      - -
      - -

      -

      The #enclone software

      - -

      #enclone is beta software†† released under this license. -Binary executables for Linux and Mac can be directly downloaded from this page, as can sample 10x -Genomics datasets. -#enclone can be run on a laptop, desktop, or server. -

      -

      -To use #enclone, basic knowledge of the command line is necessary. The command line is easy to -learn, and a -colleague may be able to help you if you are unfamiliar. Additional skills, like programming, -are not required. The command line can be dynamically changed to select specific clonotypes and -fields you wish to -see. #enclone is fast, typically responding in seconds (if run on a single dataset). -

      - -

      -#enclone, in addition to Cell Ranger and -Loupe -(and in which the core algorithm of #enclone will be integrated at a later point in time), -supports the -analysis of V(D)J and other data from the -Chromium Single Cell Immune Profiling -solution. -

      - -

      - ††beta software implies that it is still being actively developed, with -features being added/modified, -and on rare occasions may involve breaking syntax that previously worked. See -this page for the history of changes. -

      - -
      - -

      -

      Installing #enclone

      - -

      -You can run #enclone directly from a Linux or Mac terminal window; see -here for Windows options; -see here if you have a problem. -

      - - - - - - - -
      Type this  -
      curl -sSf -L bit.ly/enclone_install | sh -s SIZE
      -
      - where SIZE is -small, medium or large, according to: -
      - -
      - - - - - - - - - - - - - - - - - - - - - -
      -

      - small -

      -
      -

      - load small dataset collection (one dataset, 123085) -

      -
      -

      - 30 MB -

      -
      -

      - do this if your internet connection is very slow -

      -
      -

      - medium -

      -
      -

      - load medium dataset collection -

      -
      -

      - 350 MB -

      -
      -

      - do this for a moderate number of datasets (~15) -

      -
      -

      - large -

      -
      -

      - load large dataset collection -

      -
      -

      - 2600 MB -

      -
      -

      - do this for a large number of datasets (~120) -

      -
      - -

      The command does three things:

      -
        -
      1. Puts the #enclone executable (for Linux or Mac as appropriate) in ~/bin.
      2. -
      3. If needed, adds a line to your bash initialization file so that ~/bin is included. -
      4. -
      5. Puts #enclone datasets in ~/enclone.
      6. -
      -Additional details can be found here. - -Restart your terminal session; you can now run #enclone.

      - -

      To update, type the same command! - -Only required files will be downloaded. -See history for what has changed.

      - -
      - -

      -

      Running #enclone

      - -

      Running #enclone can be as simple as typing e.g.

      -
      enclone BCR=/home/my_name/experiment_123
      -
      -

      where the path is where your Cell Ranger outputs live, but there are many options to learn -about. For example, if you want to combine many datasets, you can do that, but you probably -need to provide a metadata file that describes the datasets. You can find most of the #enclone -documentation within its online menus. To get started you should:

      -
        -
      1. -

        Type enclone help, to make sure your terminal window works for -#enclone.

        -
      2. -
      3. -

        Type enclone to get to the main #enclone help menu.

        -
      4. -
      -
      - -

      -

      Help

      - -

      #enclone help is split between material on this site and pages that are invocable from the -#enclone command line. All the latter pages are also listed here:

      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      commandwhat it provides
      enclone helphelp to test for correct setup
      enclonewhat you see here; guide to all the docs
      enclone help quickquick guide to getting started
      enclone help howoutline of how #enclone works, see also heuristics page, below
      enclone help commandinfo about #enclone command line argument processing
      enclone help glossaryglossary of terms used by #enclone, and conventions
      enclone help example1explanation of an example
      enclone help example2example showing gene expression and feature barcodes
      enclone help inputhow to provide input to #enclone
      enclone help input_techhow to provide input to #enclone (technical notes)
      enclone help parseableparseable output
      enclone help filterclonotype filtering options, scanning for feature enrichment
      enclone help specialspecial filtering options
      enclone help lvarslead column options
      enclone help cvarsper chain column options
      enclone help aminoper chain column options for amino acids
      enclone help displayother clonotype display options
      enclone help indelsinsertion and deletion handling
      enclone help colorhow #enclone uses color, and related things
      enclone help faqfrequently asked questions
      enclone help developera few things for developers
      enclone help allconcatenation of all the help pages (USE THIS TO SEARCH ALL THE HELP PAGES)
      - -
      - -For completeness, here are all the other pages on this site: - -

      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      pageaudience
      history of changeseveryone
      detecting illusory clonotypeseveryone
      how to compile #enclonepeople who want to contribute code
      licenseeveryone
      Windowspeople using Windows computers
      notes on heuristics - (only a little bit now) - people who want to know details on how the algorithm works
      honeycomb plotseveryone
      making phylogenetic treeseveryone
      installation troubleshooting - if you have trouble installing
      installation details - if you're curious about what the install command does
      iNKT and MAIT cells - people interested in iNKT and MAIT cells
      - -
      - -
      - -

      -

      Understanding #enclone output

      - -

      The example below shows how #enclone displays clonotypes. Understanding this display is -important for using #enclone. Consult the available #enclone documentation and use -the sample datasets to understand #enclone features and output.

      - -enclone annotated example - -Notice the compression in two directions: -
        -
      1. Vertically to group cells into a single line if they have identical V(D)J - transcripts - (instead of showing one line for every cell).
      2. -
      3. Horizontally, a flexible concept, to show by default all positions exhibiting a - difference from the reference and all positions in the CDR3 (instead of showing - all transcript positions, only "interesting" positions are shown).
      4. -
      - -

      The same exact output would be obtained by typing:

      -
      enclone BCR=123085 CDR3=CQQRSNWPPSITF
      -
      -

      The directory 123085 is in the directory ~/enclone/datasets and -contains some files from a Cell Ranger run, obtained from a human ovarian cancer sample.

      - -

      -How does #enclone find my data? -It uses a search path called PRE that is preset to -~/enclone/datasets,~/enclone/datasets2, and which can be set to any value, either -by setting PRE=... on the command line, or by setting the environment variable -ENCLONE_PRE. To find your data, #enclone prepends PRE to the value of -BCR or TCR given on the command line. -For example, all of the following argument combinations do the same thing: -
      1. BCR=123085 (using the default value of PRE) -
      2. PRE=~/enclone/datasets BCR=123085 -
      3. PRE=~/enclone BCR=datasets/123085 -
      4. BCR=~/enclone/datasets/123085. -
      There is also an argument META that is convenient for specifying multiple -datasets. See here for how. -
      -
      -Please note that while paths can have non-Latin characters, best practice is to not have -blanks, tabs, etc. in path names. #enclone can be made to work with such characters by double -quoting the paths, but it makes things harder, and other programs you might use may break. -

      - -

      The argument CDR3=CQQRSNWPPSITF causes #enclone to display only clonotypes in which -the given CDR3 sequence occurs. Many other filters are provided. In the absence of filters, all -clonotypes are shown. Clonotypes are shown from largest to smallest, and the output is -automatically paged, so you can scroll through it.

      -

      By default, #enclone prints clonotypes in this human-readable form. You can also instruct -#enclone -to print clonotypes in machine-readable forms that are suitable for input to other programs.

      -
      - -

      -

      Combining multiomic data

      - -

      Gene expression and Feature Barcode data can be displayed simultaneously alongside VDJ data. For -example, here we add columns for the same clonotype, showing the median number of UMIs detected -for all genes, a particular gene, and a particular antibody:

      - -#include pages/auto/clonotype_with_gex.html - -

      To obtain this, we added the extra arguments -GEX=123749 LVARSP=gex,IGHV3-49_g,CD19_ab -to the previous command. The GEX part points to the directory containing gene -expression and feature barcode data. The LVARSP part defines the additional columns -to be displayed.

      -

      Other types of data can be brought in via Feature Barcoding. For example, the response to -multiple antigens can be measured using -LIBRA-seq -and these data can be displayed as additional columns.

      -
      - -

      -

      Visualizing multiple clonotypes

      - - - - - - - - -
      - honeycomb plot - -

      After selecting multiple clonotypes in #enclone, you can display them using - a "honeycomb" plot.

      -

      In this instance, pre- and post-vaccination samples were collected from four individuals, - many datasets were generated for each sample, and these were combined in a single call - to #enclone. Clonotypes containing at least ten cells are shown. - The plot was generated by adding

      -
      MIN_CELLS=10 PLOT="clono.svg,pre->blue,post->red
      -LEGEND=blue,"pre-vaccination cell",
      -       red,"post-vaccination cell"
      -

      to the #enclone command line, yielding the image shown here as the file - clono.svg.

      -

      For more information about honeycomb plots, - see here.

      -
      - -
      -
      - -

      -

      The power of #enclone

      - -

      There are many ways to use 10x Genomics data to study immunobiology.

      -

      Response to an antigen or vaccine: #enclone is a great tool for studying responses to a -vaccine. For example, in the previous section, the red clonotypes may represent responses to -antigens in the vaccine.

      -

      Vaccine and therapeutic antibody development: For certain infectious agents e.g. COVID-19, -a vaccine does not currently exist; different approaches may be employed in pursuit of this goal. One such -approach is to identify patient and survivor B cell clonotypes that expand in response to the infectious -disease. These define antibodies that can be used to design passive or active vaccines.

      -

      Additional power is added by mapping antigen specificity to multiple antigens directly via Feature -Barcoding (LIBRA-seq). These data -are easy to display in #enclone. Candidates can be selected directly for vaccine or therapeutic -development by picking large clonotypes with high antigen counts and single or multiple antigen specificities.

      -

      We are actively working on further functionality that will make this process even more effective.

      -

      See this vignette to learn how to generate phylogenetic trees using -#enclone. -

      Another example use of #enclone is the detection of -illusory clonotypes.

      - -
      - -

      -

      Questions

      - -

      -Please contact us with your questions and comments! We look forward to hearing your feedback and ideas to -further evolve #enclone. -

      - -

      -Our address is #enclone@10xgenomics.com. -

      - -

      -To send us #enclone output, please simply cut and paste text, rather than send a -screenshot, except when necessary. Please send both the command you used and the output. -

      - -

      -#enclone is provided as a tool for use by the community. -#enclone is beta software and thus a work in progress. We are actively making many changes and may -be unable to respond promptly to your particular request. -

      - -
      - -

      -

      Where am I?

      - - -

      bit.ly/#enclone

      - - - diff --git a/pages/innate.html.src b/pages/innate.html.src deleted file mode 100644 index 7e3e0c608..000000000 --- a/pages/innate.html.src +++ /dev/null @@ -1,150 +0,0 @@ -iNKT and MAIT cells - - - -
      -enclone banner - -

      iNKT and MAIT cells

      - -

      -#enclone can look for evidence that T cells are iNKT or MAIT cells. This evidence may be -displayed e.g. with LVARSP=inkt or LVARSP=mait, see -enclone help lvars. One may also filter to show -only cells with some evidence, using the INKT and MAIT options, see -enclone help filter. -

      - -
      - -

      -#enclone looks for both gene and junction (CDR3) evidence. The rules for this are -likely -to be tweaked over time. Junction evidence is discussed later. For gene evidence, the following -rules are used currently: -

        -
      • Human iNKT: α chain -- use of both TRAV10 and TRAJ18; - β chain: use of TRBV25-1
      • -
      • Human MAIT: α chain -- use of TRAV1-2 and any of - TRAJ33/TRAJ20/TRAJ12; β chain -- use of any member of the - TRBV20 or TRBV6 families
      • -
      • Mouse MAIT: α chain -- use of both TRAV1 and TRAJ33; β chain -- use - of either TRBV19 or TRBV13
      • -
      • Mouse iNKT: α chain -- use of both TRAV11 and TRAJ18; β chain -- use - of TRBV13-2 or TRBV1 or TRBV29.
      • -
      -

      - -

      As an example, -

      enclone TCR=101287 LVARSP=mait CDR3=CSAGQGDTEAFF
      - -#include pages/auto/mait_example.html - -displays a single cell showing evidence for the cell being a MAIT cell, encoded using -the string 𝝰gj𝝱g. This stands for "gene and junction evidence on the alpha -chain, and gene evidence on the beta chain" (i.e. 𝝰gj = alpha gene and junction). - -

      Please we aware that detection of junction evidence is limited by the completeness of -the lists on which they are based (see below), and these lists are incomplete! They are also -likely to change over time as more data studying iNKT and MAIT cells emerge. Note that while -iNKT and MAIT cells are described as "invariant", a more accurate description would be -"semi-invariant", -or "limited variant" as V and J gene recombinations that generate iNKT and MAIT TCRs can still -generate considerable nucleotide and amino acid diversity within the CDR3.

      - -
      - -

      For junction evidence, #enclone tests for presence of a given chain's CDR3 amino acid -sequence in a fixed list. Here is the list for human iNKT: -

        -
      • CASARGVNEQYF
      • -
      • CASRGQGLGEQYF
      • -
      • CASRYYSVQGRTDTQYF
      • -
      • CASSAMDTEAFF
      • -
      • CASSAPLAGHYEQYF
      • -
      • CASSAWDGYEQYF
      • -
      • CASSDGFTDTQYF
      • -
      • CASSDLGLAGVIEQFF
      • -
      • CASSDLMGPDNYEQYF
      • -
      • CASSDLPETQYF
      • -
      • CASSDQNTEAFF
      • -
      • CASSDRANEQFF
      • -
      • CASSDRLAGDTQYF
      • -
      • CASSDRRQGAHQPQHF
      • -
      • CASSEAGSGEKLFF
      • -
      • CASSEALILFF
      • -
      • CASSEAPWRDSGNTIYF
      • -
      • CASSEEGALKESVGTQYF
      • -
      • CASSEFDGGQETQYF
      • -
      • CASSEFGGTERTQETQYF
      • -
      • CASSEFGQSADEQFF
      • -
      • CASSEGGQDYEQYF
      • -
      • CASSEGTAGTDTQYF
      • -
      • CASSEGTGPNSPLHF
      • -
      • CASSEGWEQYF
      • -
      • CASSELLRGQGRTGELFF
      • -
      • CASSELTDTQYF
      • -
      • CASSELYTGGDEQFF
      • -
      • CASSEMGQGVYTF
      • -
      • CASSENSGTGRIYEQYF
      • -
      • CASSEPSSGNTIYF
      • -
      • CASSEPTGLGTDTQYF
      • -
      • CASSESATGFSPLHF
      • -
      • CASSESGGSTEAFF
      • -
      • CASSESLAGGYNEQFF
      • -
      • CASSESVETQYF
      • -
      • CASSEWAGGQETQYF
      • -
      • CASSEWEDITDTQYF
      • -
      • CASSEWGRTQETQYF
      • -
      • CASSEWGTNEKLFF
      • -
      • CASSEYESTNEKLFF
      • -
      • CASSEYFAGFNEQYF
      • -
      • CASSEYGTLQETYF
      • -
      • CASSEYMEAGIPTDTQYF
      • -
      • CASSEYMEGGEKLFF
      • -
      • CASSEYRLQETQYF
      • -
      • CASSEYRRRSGEKLFF
      • -
      • CASSFGGETQYF
      • -
      • CASSGDRRQGAHQPQHF
      • -
      • CASSGLLTGPDTQYF
      • -
      • CASSGLRDRGLYEQYF
      • -
      • CASSGTGGAFDEQFF
      • -
      • CASSGTVTEAFF
      • -
      • CASSGYQGGGETQYF
      • -
      • CASSPIGGHGYEQYF
      • -
      • CASSPRDRWHEQYF
      • -
      • CASSRGGFDEQYF
      • -
      • CASSRGGGTEAFF
      • -
      • CASSRGGYTEAFF
      • -
      • CASSTGGADEKLFF
      • -
      • CASSVPLRDYEQYF
      • -
      • CASTGASGTYEQYF
      • -
      • CASTPRKGTDVGNTIYF
      • -
      • CASTPSGGWSSDTQYF
      • -
      • CASTSLETSQYF.
      • -
      -and here is the list for human MAIT: -
        -
      • CAALDSNYQLIW
      • -
      • CAAMDSNYQLIW
      • -
      • CARSDSNYQLIW
      • -
      • CASMDSNYQLIW
      • -
      • CASSDSGESGTEAFF
      • -
      • CASSPSGGDYNEQFF
      • -
      • CASSQIAGGQQETQYF
      • -
      • CAVLDSNYQLIW
      • -
      • CAVMDSNYQLIW
      • -
      • CAVNGDDYKLSF
      • -
      • CAVRDGDYKLSF
      • -
      • CAVRDSDYKLSF
      • -
      • CAVRDSNYQLIQW
      • -
      • CAVRDSNYQLIW
      • -
      • CAVSDSNYQLIW
      • -
      • CAVSLQDYKLSF
      • -
      • CSARQGAESREQYF
      • -
      -At present we do not have lists for mouse. -

      - - - diff --git a/pages/installation_details.html.src b/pages/installation_details.html.src deleted file mode 100644 index 3a564c481..000000000 --- a/pages/installation_details.html.src +++ /dev/null @@ -1,55 +0,0 @@ -enclone installation details - - - -
      -enclone banner - -

      #enclone installation details

      - -

      The purpose of this page is to provide some information about what the #enclone installation -command does, in case you're curious.

      - -

      The command is:

      - -

      -curl -sSf -L bit.ly/enclone_install | sh -s SIZE - where SIZE is -small, medium or large -

      - -

      1. First, bit.ly/enclone_install is a redirect to -https://10xgenomics.github.io/enclone/install.sh, as you can see if you type -bit.ly/enclone_install+ (the + is the way that bit.ly -provides for seeing what a redirect does).

      - -

      2. The -sSf option to curl causes it to run quietly if it -is successful, print an error message if it fails, and importantly, not pass logging or error -messages to sh.

      - -

      3. The overall command just causes the script install.sh to be executed.

      - -

      4. On a first invocation, the script downloads the #enclone executable and datasets.

      - -

      5. On subsequent invocations, the script checks to see if the local copies are current, -and if not, redownloads them. In principle, the executable could be downloaded as a compressed -file, which would be more efficient. For the case where -SIZE is medium, the action is -also inefficient, as it downloads everything if anything has changed.

      - -

      6. The script puts the executable in ~/bin and the datasets in -~/enclone. These directories are created if they don't already exist.

      - -

      7. The following step makes it so you don't have to type -~/bin/enclone every time you want to run it, and can instead type just -enclone. To enable this, -if ~/bin is not in your path, the script adds a line to -.bash_profile or .profile that makes ~/bin first in -your path. (Which file is used depends on the version of Linux that you're using.) If you -want, when the script is done, you can manually tidy up the file to make it more readable.

      - -

      Questions? You can email us at -#enclone@10xgenomics.com.

      - - - diff --git a/pages/old_install.html.keeping_but_not_using_now b/pages/old_install.html.keeping_but_not_using_now deleted file mode 100644 index 3ff1ffd7c..000000000 --- a/pages/old_install.html.keeping_but_not_using_now +++ /dev/null @@ -1,102 +0,0 @@ -enclone (bit.ly/enclone) - - - -
      -
      -
      - -
      - -

      -

      Installing enclone

      - -

      1. Download enclone. -Open a terminal window and type the following to download the enclone executable:

      - - - - - - - - - - - - - - -
      -

      - Linux -

      -
      -
      mkdir -p ~/bin; cd ~/bin
      -wget https://github.com/10XGenomics/enclone/releases/latest/download/enclone_linux -O enclone; chmod +x enclone
      -
      -

      - Mac -

      -
      -
      mkdir -p ~/bin; cd ~/bin
      -curl -L https://github.com/10XGenomics/enclone/releases/latest/download/enclone_macos --output enclone; chmod +x enclone
      -
      -

      - Windows -

      -
      -

      - not currently supported, let us know if this is important to you, and see - this page -

      -
      - -

      This gets you the absolute latest version of enclone. You can repeat this step if you ever -want to update.

      -

      It is not necessary to compile enclone, unless you want to contribute -to the enclone codebase. -Please see compilation.

      -

      2. Download test data. Type the following to download the enclone test datasets -(plus source code, because it's easier to fetch everything):

      -
      cd
      -svn export https://github.com/10XGenomics/enclone/trunk enclone
      -
      -

      (See here if this doesn't work for you.) At this point -~/enclone/datasets will contain the datasets that are prepackaged with enclone. If you -subsequently want to update this, delete the directory and repeat the command.

      - -

      -🐌 If you have a very slow internet connection, you may prefer to download -just one dataset (123085), like this:

      - -
      cd; mkdir -p enclone/datasets; cd enclone/datasets
      -svn export https://github.com/10XGenomics/enclone/trunk/test/inputs/version14/123085
      -
      - -

      The test datasets were selected because they happened to be useful as part of algorithmic -regression tests (included in the codebase). - -A much larger collection of datasets (including most of those above) is also provided and can be -downloaded by: - -

      wget https://s3-us-west-2.amazonaws.com/10x.files/supp/cell-vdj/enclone_data_1.0.tar.gz
      -zcat enclone_data_1.0.tar.gz | tar xf -
      -mv enclone_data_1.0 ~/enclone/datasets2
      - -On a Mac, you may need to use curl instead. - -

      3. Update your path. Edit your shell initialization file to prepend -~/bin: to your PATH. -Close and reopen your terminal window to refresh your path. Then you're good to go!

      -

      4. Periodically update or check the history to see if -anything has changed that might affect you.

      - -
      - -
      -
      -
      - - - diff --git a/pages/plot.html.src b/pages/plot.html.src deleted file mode 100644 index 31e676f27..000000000 --- a/pages/plot.html.src +++ /dev/null @@ -1,116 +0,0 @@ -honeycomb plots - - - -
      -enclone banner - -

      honeycomb plots

      - -

      -#enclone can create a "honeycomb" plot showing each clonotype as a cluster of dots, one per cell. -

      - -

      #enclone provides four ways to assign colors in such a plot. We describe them in order of -precedence, i.e. color data for the first will be used if provided, etc.

      - -

      -Hint. -Use the MIN_CELLS option (see -(see enclone help filter) to exclude tiny clonotypes, -which would otherwise crowd the image and make plotting very slow. -

      - -
      - -
      1. The first way is to use the argument -
      PLOT="filename,sample1->color1,...,samplen->colorn"
      -which creates an svg file of the given name, and assigns the given colors to the given samples. -Unspecified samples will be black. - -

      Example: -enclone BCR=123085:123089 MIN_CELLS=10 PLOT="plot.svg,s1->blue,s2->red" NOPRINT -LEGEND=blue,123085,red,123089

      - -

      Note the colon between 123085 and 123089. This tells #enclone -that the two datasets are different samples from the same donor. This is not actually true, as -the two datasets are from the same sample, but is needed to plot in this way.

      - -samples honeycomb plot - -

      - -There is another example on the main #enclone page, based on pre- and post-vaccination -samples. - -

      - -
      - -
      2. The second way is to provide simply -
      PLOT=filename
      -on the command line, and then provide the color field in the CSV defined by the META -option. This assigns a color to each dataset. - -

      - -
      - -
      3. The third way is to use the simple -PLOT specification, and assign a color to -each barcode using the bc field for META. - -

      - -
      - -
      4. The fourth way is -PLOT_BY_ISOTYPE=filename. This plots by heavy chain -constant region name and -labels accordingly. (This only makes sense for BCR.) Some cells may be labeled "unassigned", for -one of three reasons: (1) no heavy chain was found; (2) no constant region was found; (3) two -heavy chains were found and had conflicting constant region names. Running with -MIN_CHAINS_EXACT=2 -is usually a good idea to avoid noise coming from cells for which only a light chain was detected. - Currently a maximum of 12 constant region names is allowed. Let us know if you have more and we -will fix this. Note that PLOT_BY_ISOTYPE cannot be used with PLOT or -LEGEND. - -

      Example: enclone BCR=123085,123089 MIN_CELLS=5 MIN_CHAINS_EXACT=2 NOPRINT -PLOT_BY_ISOTYPE=plot.svg

      - -isotype honeycomb plot - -

      - -
      - -

      -The colors should be valid colors for use in an svg file. They can be named colors like red or -blue (see here for a full list) -or a hex specification like #00FFFF for aqua. The full color description for svg is -here. -

      - -

      -#enclone also recognizes the color abbreviations @1, ..., @6, which refer -to #enclone's color blind friendly palette -(see enclone help color). -

      - -

      -Each cell is shown as a disk having the given color, and each clonotype is shown as a cluster of -these disks, which are positioned at random. The filename argument may be "stdout". -

      - -

      -To add a legend to the graph, add the argument LEGEND to your command line. This -will give you an -auto-generated legend. You can also customize the legend by adding an argument of the form -LEGEND=color1,"text1",...,colorn,"textn" -to the command line. -

      - - - diff --git a/pages/tree.html.src b/pages/tree.html.src deleted file mode 100644 index 9ccfb5db1..000000000 --- a/pages/tree.html.src +++ /dev/null @@ -1,94 +0,0 @@ -making phylogenetic trees - - - -
      -enclone banner - -

      Making phylogenetic trees

      - -

      #enclone provides several mechanisms for creating, displaying, and exporting a phylogenetic -tree for each -clonotype. These are initial mechanisms, which are likely to be expanded and/or improved over -time in response to feedback. The initial implementation is inspired by the Levenshtein-NJ method described by Yermanos et al. 2017. For all of these mechanisms, we recommend using the argument -COMPLETE to remove exact subclonotypes that are missing 1 or more chains.

      - -
      - -

      Method 1. -This method is invoked using the argument TREE, or TREE=const, to -label leaves by heavy chain constant region names (N.B.If more than one heavy chain is present, -we separate their constant region names by "+"). -The method first defines defines the distance between any two exact subclonotypes to be their -Levenshtein distance. We then add a root "virtual" exact subclonotype which equals the donor -reference away from the recombination region and which is undefined within that region (i.e. a -germline-reverted exact clonotype without the junction). The distance from the root to any -actual exact subclonotype is the Levenshtein distance, away from the region of recombination. -

      - -

      -Next a tree is creating from these data using the -neighbor joining algorithm. -This sometimes yields negative distances, which we change to zero. We have only observed -such negative distances on the edge emanating from the root. -

      - -

      Note that for a given clonotype, the neighbor joining algorithm is -O(n3), where n is the number of exact subclonotypes in the clonotype. Thus -for sample types having highly complex clonotypes (e.g. with ~1000 subclonotypes), the -algorithm will be very slow. We have only observed this in a lymphoma sample, and of -course for such cases, the tree would be so large that it would be difficult to do anything with it. -

      - -

      Finally, the tree is visualized using plain text, as shown in the example below. The -added field dref shows the distance of each exact subclonotype from the -donor reference, away from the recombination region.

      - -enclone BCR=123085 TREE COMPLETE CDR3=CARDQNFDESSGYDAFDIW LVARSP=dref - -#include pages/auto/tree_example.html - -
      - -

      Method 2. -This method is invoked using the argument NEWICK, and is exactly like -method 1, except that it outputs the resulting tree in -Newick format.

      - -

      For example, running enclone BCR=123085 NEWICK COMPLETE CDR3=CARDSWYSSGRNTPNWFDPW -will generate the following Newick tree for the largest clonotype:

      - -

      ((((3:0.00,20:4.00)I5:1.00,11:0.00)I7:0.69,(5:0.82,((13:0.00,14:3.00)I4:4.89,(((((((2:0.00,19:0.00)I1:0.95,4:0.05)I2:0.50,18:0.50)I3:5.96,(7:0.00,12:0.00)I8:0.54)I12:0.32,((6:0.83,9:0.17)I6:0.94,10:0.06)I9:0.80) -I15:0.13,((16:0.95,17:0.05)I10:0.03,21:2.98)I11:0.93)I17:0.04,(1:0.00,(8:1.00,15:0.00)I20:0.00)I19:0.02)I18:0.10)I16:0.11)I14:0.18)I13:0.00)0;

      - -

      This tree can be copied and pasted or otherwise exported to be viewed in tools such as iTOL.

      - -
      - -

      Method 3. -This method is invoked using the argument CLUSTAL_DNA=filename or -CLUSTAL_AA=filename, where -filename can be stdout, and otherwise must have the extension -".tar". It does not generate a tree, but instead -generates a CLUSTALW alignment for each clonotype (using either bases or -amino acids), with one sequence for each exact subclonotype. -This sequence is the concatenation of the per-chain sequences, with the appropriate number -of gap (-) characters shown if a chain is missing. As above, we recommend using the -COMPLETE option to avoid this happening.

      - -

      If filename is stdout, then the alignments are printed out -after each clonotype picture. Otherwise, a tar file is generated, which if untarred yields -one file per clonotype. To avoid confusion, it would be best for filename to have -the suffix .tar. We also recommend using MIN_CELLS=... or some other -argument to restrict the number of files that would be generated upon untarring.

      - -

      This method can be used to provide input to another program that will generate a tree.

      - -
      - -

      Method 4. -This method is invoked using the argument PHYLIP_DNA=filename or -PHYLIP_AA=filename, and is just like method 3, except for the output format.

      - - - diff --git a/pages/windows.html.src b/pages/windows.html.src deleted file mode 100644 index be1de45b7..000000000 --- a/pages/windows.html.src +++ /dev/null @@ -1,33 +0,0 @@ -enclone on Windows computers - - - -
      -enclone banner - -

      Using #enclone on Windows computers

      - -We are attempting to build #enclone for Windows. This may take some time. -In the meantime, we can suggest two workarounds: - -

      -1. Use Windows Subsystem for Linux (WSL) on your Windows computer. You can get Ubuntu Linux for -free here, and we're told that current PCs come with -this preinstalled. - -

      -2. Set up your Windows computer for Linux dual boot. This is similar but generally much less -satisfactory, since the computer has to be rebooted between uses of Windows and Linux. - -

      -Importantly, we have not tested either solution ourselves, but are relaying what has been -suggested to us by others. - -

      -In either case, while you are using Linux on your Windows computer, you will have access to -both the files on the Linux side and the files on the Windows side. The files on the -Windows side may be found in the directory /mnt/c, and you can explore what is -there by typing ls /mnt/c and then enlarging the path. - - - diff --git a/release_instructions b/release_instructions deleted file mode 100644 index 0f9c6afc8..000000000 --- a/release_instructions +++ /dev/null @@ -1,13 +0,0 @@ - -1. Merge in changes you want to master, and then git switch master and git pull origin master to - catch your local version up-to-speed. -2. Do another cargo b and cargo t to make sure things behave as expected. -3. Type "bump_version" to bump the z version in each enclone crate Cargo.toml, then commit and - push the change. -4. Edit README.md to reflect the upcoming version change (do this via GitHub.com so that it's an - officially signed commit). -5. git tag vX.Y.Z , for instance git tag v0.4.43 -6. git push origin --tags (this triggers the release build and upload process) -7. When the release is done, change the title of the release to beta release vX.Y.Z, download the - binaries, and calculate the SHA-256 checksums for the binaries and include those in the release - notes. diff --git a/speed b/speed deleted file mode 100755 index a689a6dca..000000000 --- a/speed +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/csh -e - -cd enclone_main; cargo test --test enclone_test --features cpu -- --nocapture diff --git a/speed_test b/speed_test deleted file mode 100755 index 7ddc209f6..000000000 --- a/speed_test +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/csh - -# Test speed of current enclone versus the latest release. Note that the latest release -# would have been compiled in release mode. - -curl -s -L https://github.com/10XGenomics/enclone/releases/latest/download/enclone_linux \ - --output enclone_main/test/outputs/enclone -chmod 775 enclone_main/test/outputs/enclone - -echo -echo "expect: 7700" - -foreach i (1 2 3 4 5) - - set current = `enclone BI=10 NCROSS NGEX NOPRINT PRINT_CPU` - echo "current: $current" - - set latest = `enclone_main/test/outputs/enclone BI=10 NCROSS NGEX NOPRINT PRINT_CPU` - echo "latest: $latest" - -end - -echo diff --git a/spt b/spt deleted file mode 100755 index 9597ad484..000000000 --- a/spt +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/csh - -# Test speed of current enclone versus the latest release. Note that the latest release -# would have been compiled in release mode. - - -echo -echo "expect: 7700" - -foreach j (1 2 3 4 5) - touch enclone_main/src/main_enclone.rs - cargo b - cp target/debug/enclone target/debug/enclone{$j} -end - -foreach i (1 2 3 4 5) - echo - foreach j (1 2 3 4 5) - set current = `target/debug/enclone{$j} BI=10 NCROSS NGEX NOPRINT PRINT_CPU` - echo "$j = $current" - end -end - -echo diff --git a/third_party/acknowledgements b/third_party/acknowledgements deleted file mode 100644 index 4c8643346..000000000 --- a/third_party/acknowledgements +++ /dev/null @@ -1,5 +0,0 @@ -enclone includes software developed by the OpenSSL Project -for use in the OpenSSL Toolkit (http://www.openssl.org/). - -Plain text tree visualization in enclone is based on -https://gitlab.com/Noughmad/ptree, by Miha Čančula.