diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..efb1e94
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,12 @@
+# top-most EditorConfig file
+root = true
+
+# Unix-style newlines with a newline ending every file
+[*]
+end_of_line = lf
+insert_final_newline = true
+
+[*.py]
+charset = utf-8
+indent_style = space
+indent_size = 4
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..f675c0d
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,34 @@
+name: CI for DGEB
+
+on:
+ push:
+ branches: ["**"]
+ pull_request:
+ branches: ["**"]
+
+permissions:
+ id-token: write
+ contents: read
+ actions: write
+ pull-requests: read
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ ruff:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v4
+ with:
+ python-version: "3.11"
+ - uses: yezz123/setup-uv@v4
+ with:
+ uv-venv: ".geb_venv"
+ - run: uv pip install ruff
+ - run: ruff format .
+ - run: ruff check .
+ # TODO: pytest
+ # TODO: pyright
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..2e34f9d
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,50 @@
+# This workflow will
+# - Find the latest version tag based on the commit history
+# - Create a git tag for the new version
+# - Update the version number in pyproject.toml based on the commit history
+# - Upload the package to PyPI
+# - Create a release on GitHub
+
+# This workflow required the following secrets to be set:
+# - a GitHub personal access token with the `repo` scope called `RELEASE`
+# - and that you setup trusted publishing using PyPI as described here: https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/
+
+name: Release
+on:
+ push:
+ branches:
+ - main
+
+jobs:
+ release:
+ runs-on: ubuntu-latest
+ concurrency: release
+ permissions:
+ id-token: write # IMPORTANT: this permission is mandatory for trusted publishing using PyPI
+ contents: write
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ token: ${{ secrets.GH_TOKEN }}
+
+ - name: Python Semantic Release
+ id: release
+ uses: python-semantic-release/python-semantic-release@v9.8.3
+ with:
+ github_token: ${{ secrets.GH_TOKEN }}
+
+ - name: Publish package distributions to PyPI
+ uses: pypa/gh-action-pypi-publish@v1.9.0
+ if: steps.release.outputs.released == 'true'
+ # This action supports PyPI's trusted publishing implementation, which allows authentication to PyPI without a manually
+ # configured API token or username/password combination. To perform trusted publishing with this action, your project's
+ # publisher must already be configured on PyPI.
+
+ - name: Publish package distributions to GitHub Releases
+ uses: python-semantic-release/upload-to-gh-release@v9.8.3
+ if: steps.release.outputs.released == 'true'
+ with:
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ tag: ${{ steps.release.outputs.tag }}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bb077e1
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+.venv/
+__pycache__/
+.vscode/
+build/
+dist/
+*egg-info/
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..f73d605
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,197 @@
+# CHANGELOG
+
+## v0.0.10 (2024-07-09)
+
+### Fix
+
+* fix: remove noop task ([`7d5b393`](https://github.com/TattaBio/DGEB/commit/7d5b3933f48e51fb4c71945f01af2cc5a7dba3ed))
+
+## v0.0.9 (2024-07-09)
+
+### Fix
+
+* fix: update cli script name ([`633e14d`](https://github.com/TattaBio/DGEB/commit/633e14db7e1eed0d9606ef1097e369e4f5e245f5))
+
+### Unknown
+
+* 0.0.9
+
+Automatically generated by python-semantic-release [skip ci] ([`a8c1a96`](https://github.com/TattaBio/DGEB/commit/a8c1a96d18af589795bc9532fee8ad9764cd52ed))
+
+* Merge pull request #9 from TattaBio/andre
+
+Update ModAC main metric ([`3c67e65`](https://github.com/TattaBio/DGEB/commit/3c67e6559d0e49d90ffe2858eb9e287abd1b6e6c))
+
+* ruff format ([`78461ac`](https://github.com/TattaBio/DGEB/commit/78461ac901b8617821ca15e543c0dd8e2dbf6e95))
+
+* update top_k=50 for modac ([`2c3dcd5`](https://github.com/TattaBio/DGEB/commit/2c3dcd5856b6679a80999b3c4b3512876ac0b58d))
+
+* remove revision ([`2d587da`](https://github.com/TattaBio/DGEB/commit/2d587daa79f32c49201b419892b7f95f3dc5eedb))
+
+* Merge pull request #8 from TattaBio/cli
+
+Cli & cleanup ([`9698c8f`](https://github.com/TattaBio/DGEB/commit/9698c8f5ab0bab6c3c0a76d59dc29cfd964ebf15))
+
+* Exclude leaderboard files in anticipation of merging leaderboard PR ([`58bdcba`](https://github.com/TattaBio/DGEB/commit/58bdcba11af605bdef11cfecc087c9efb0e97b72))
+
+* Update README ([`d323905`](https://github.com/TattaBio/DGEB/commit/d3239059e29fb149f9c348b951bc4988d8b9f8dc))
+
+* cleanup ([`1f0fe16`](https://github.com/TattaBio/DGEB/commit/1f0fe16de6910200d88c918b08cbf26067313469))
+
+* Add cli to pyproject.toml ([`5404218`](https://github.com/TattaBio/DGEB/commit/54042181ef54c11db74ebb53c403b21a8114c02b))
+
+* Remove Dataset 'description' which does not exist on model. ([`46b0040`](https://github.com/TattaBio/DGEB/commit/46b0040a302384fa00791bbfdd6fae24645d6a6d))
+
+* Merge pull request #7 from TattaBio/add_dna_tasks
+
+Add dna tasks ([`cfc5799`](https://github.com/TattaBio/DGEB/commit/cfc57995f9b1e584bb60e998f9cf68bea5ec39fa))
+
+* ruff ([`f9fa125`](https://github.com/TattaBio/DGEB/commit/f9fa12502df9837b5381da17b17198f3667c4911))
+
+* adding rpob datasets and updating ec revision ([`8f9cc3f`](https://github.com/TattaBio/DGEB/commit/8f9cc3f819beb70f51a5cc59f16c65bffceedbad))
+
+* Update README.md ([`d5d7c24`](https://github.com/TattaBio/DGEB/commit/d5d7c24215d347fc17d6016ac2a3eddfb3cf2a12))
+
+* Merge pull request #4 from TattaBio/andre
+
+Add dataset revisions ([`95b6f11`](https://github.com/TattaBio/DGEB/commit/95b6f11ffee3dccc45ab119ac4f602066750f7ef))
+
+* add dataset revision numbers ([`7e069a2`](https://github.com/TattaBio/DGEB/commit/7e069a237de5391e7c6b7f09c108292ac10c25af))
+
+* Merge pull request #3 from TattaBio/andre
+
+Update readme and task imports ([`ade30a8`](https://github.com/TattaBio/DGEB/commit/ade30a856deffe35ddf57d16705d030b6d0192c8))
+
+* rename dgeb ([`6b1c2ee`](https://github.com/TattaBio/DGEB/commit/6b1c2ee76798d89e487386116efe23c90d2d039c))
+
+* add intro ([`a2280dd`](https://github.com/TattaBio/DGEB/commit/a2280dd732984d58caed45b9a429038c0d81851a))
+
+* update readme and tasks ([`00e0a79`](https://github.com/TattaBio/DGEB/commit/00e0a791f070ca37e5b92770b3363ef066e2789f))
+
+* Merge pull request #2 from TattaBio/andre
+
+rename dgeb imports ([`1894ba9`](https://github.com/TattaBio/DGEB/commit/1894ba9a92a8f369053ddb9d351ae48fd8e2d674))
+
+* rename dgeb imports ([`5f1f8b8`](https://github.com/TattaBio/DGEB/commit/5f1f8b850f271cd6785291e3feb2c2d4bf979f9c))
+
+## v0.0.8 (2024-07-01)
+
+### Fix
+
+* fix: don't run ci on release of new version ([`fa97104`](https://github.com/TattaBio/DGEB/commit/fa971049429975d06c8aca086e86b19d92383969))
+
+### Unknown
+
+* 0.0.8
+
+Automatically generated by python-semantic-release [skip ci] ([`8dc15d3`](https://github.com/TattaBio/DGEB/commit/8dc15d34c6317087253950893974d16b9f75a17c))
+
+## v0.0.7 (2024-07-01)
+
+### Fix
+
+* fix: try again ([`e7d0ecd`](https://github.com/TattaBio/DGEB/commit/e7d0ecdcb63e909f9ab727f11fb3fd57414d2fa5))
+
+* fix: edit readme to see if job still works with restricted permissions ([`93cd728`](https://github.com/TattaBio/DGEB/commit/93cd728c8a632b9bed611c55dace2e2ffb103410))
+
+### Unknown
+
+* 0.0.7
+
+Automatically generated by python-semantic-release ([`9808d4f`](https://github.com/TattaBio/DGEB/commit/9808d4f328a577c066affd34d408ad26eb6098d0))
+
+* Merge pull request #1 from TattaBio/edit-readme
+
+fix: edit readme to see if job still works with restricted permissions ([`c45599c`](https://github.com/TattaBio/DGEB/commit/c45599cf9628155603245f906c09cf6483cffce8))
+
+## v0.0.6 (2024-07-01)
+
+### Fix
+
+* fix: nevermind that broke it ([`ec33a1c`](https://github.com/TattaBio/DGEB/commit/ec33a1c6539ac1fb2710869a2d436483a02236e0))
+
+* fix: see if I can remove this line ([`246d4e9`](https://github.com/TattaBio/DGEB/commit/246d4e9841a83d18217506d46f211f1341c63526))
+
+### Unknown
+
+* 0.0.6
+
+Automatically generated by python-semantic-release ([`1b28df5`](https://github.com/TattaBio/DGEB/commit/1b28df559c95db0aea95111a5f27d01645d23786))
+
+## v0.0.5 (2024-07-01)
+
+### Fix
+
+* fix: try fixing release to handle protected branch ([`5cedad3`](https://github.com/TattaBio/DGEB/commit/5cedad3e9f34d249eda9257e3c21fc8443d000cf))
+
+* fix: another change... ([`c5ad3f0`](https://github.com/TattaBio/DGEB/commit/c5ad3f098d36e25afdf4fa9aae20967eb968568e))
+
+* fix: update pip install command in readme ([`ff90791`](https://github.com/TattaBio/DGEB/commit/ff90791398f9a9b907c308400f88811a8f8633dc))
+
+### Unknown
+
+* 0.0.5
+
+Automatically generated by python-semantic-release ([`ec24ca3`](https://github.com/TattaBio/DGEB/commit/ec24ca343b49bee85c72907554772976f02eab1a))
+
+## v0.0.4 (2024-07-01)
+
+### Fix
+
+* fix: move gh token to env ([`95e292c`](https://github.com/TattaBio/DGEB/commit/95e292c46f7908659d46bc093ef4903609f1edc5))
+
+### Unknown
+
+* 0.0.4
+
+Automatically generated by python-semantic-release ([`03f3004`](https://github.com/TattaBio/DGEB/commit/03f300476b0aeca2796b780139fce0be037ae636))
+
+## v0.0.3 (2024-07-01)
+
+### Fix
+
+* fix: remove persist credentials ([`2ae683e`](https://github.com/TattaBio/DGEB/commit/2ae683ed7a68b0559b81b1b7f5716636beef1415))
+
+* fix: try to fix release CI ([`1dfc938`](https://github.com/TattaBio/DGEB/commit/1dfc9383b2dab8bba444b09c6b85500dadee7203))
+
+### Unknown
+
+* 0.0.3
+
+Automatically generated by python-semantic-release ([`7cbfc8d`](https://github.com/TattaBio/DGEB/commit/7cbfc8d0acef975d046ff485001ed289800d143f))
+
+## v0.0.2 (2024-07-01)
+
+### Fix
+
+* fix: new repository name ([`8fc1145`](https://github.com/TattaBio/DGEB/commit/8fc1145985eab8aa97562f697edab45a30b189ba))
+
+* fix: addl geb references ([`86a5af8`](https://github.com/TattaBio/DGEB/commit/86a5af8c24244ac8f2670801468e1a25b8e3e9df))
+
+### Unknown
+
+* 0.0.2
+
+Automatically generated by python-semantic-release ([`1c7b19b`](https://github.com/TattaBio/DGEB/commit/1c7b19b50597e9dabe07fbf7cb7d3c589438917a))
+
+## v0.0.1 (2024-07-01)
+
+### Fix
+
+* fix: rename geb to dgeb ([`be712f8`](https://github.com/TattaBio/DGEB/commit/be712f8d19678801b9148ac8397f13afe826871b))
+
+### Unknown
+
+* 0.0.1
+
+Automatically generated by python-semantic-release ([`1503e03`](https://github.com/TattaBio/DGEB/commit/1503e030bb1277e1a2dcad7b99c9ed3472243f5d))
+
+## v0.0.0 (2024-07-01)
+
+### Unknown
+
+* 0.0.0
+
+Automatically generated by python-semantic-release ([`4b791ee`](https://github.com/TattaBio/DGEB/commit/4b791ee07085647427afec31a1adf61977e6bd4c))
+
+* Initial commit ([`36fe62c`](https://github.com/TattaBio/DGEB/commit/36fe62c234331de97f2827a49bf62d5c35b92a1f))
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..58c8634
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,27 @@
+# Docker file for leaderboard
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# install curl
+RUN apt-get update && apt-get install -y curl
+ADD https://astral.sh/uv/install.sh /install.sh
+RUN chmod +x /install.sh
+RUN /install.sh && rm /install.sh
+
+# install deps
+COPY leaderboard/requirements.txt ./
+RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt
+
+# copy src
+COPY dgeb dgeb
+COPY leaderboard/ leaderboard/
+
+# Run gradio when the container launches
+EXPOSE 7860
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+ENV GRADIO_TEMP_DIR="/app"
+WORKDIR /app/leaderboard
+CMD ["python", "app.py"]
+
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..f49a4e1
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..535d54c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,181 @@
+---
+title: DGEB
+app_file : leaderboard/app.py
+sdk: docker
+sdk_version: 4.36.1
+---
+
Diverse Genomic Embedding Benchmark
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Installation |
+ Usage |
+ Leaderboard |
+ Citing
+
+
+
+
+
+
+
+DGEB is a benchmark for evaluating biological sequence models on functional and evolutionary information.
+
+DGEB is designed to evaluate model embeddings using:
+
+- Diverse sequences accross the tree of life.
+- Diverse tasks that capture different aspects of biological function.
+- Both amino acid and nucleotide sequences.
+
+The current version of DGEB consists of 18 datasets covering all three domains of life (Bacteria, Archaea and Eukarya). DGEB evaluates embeddings using six different embedding tasks: Classification, BiGene mining, Evolutionary Distance Similarity (EDS), Pair Classification, Clustering, and Retrieval.
+
+We welcome contributions of new tasks and datasets.
+
+## Installation
+
+Install DGEB using pip.
+
+```bash
+pip install dgeb
+```
+
+## Usage
+
+- Launch evaluation using the python script (see [cli.py](https://github.com/tattabio/dgeb/blob/main/dgeb/cli.py)):
+
+```bash
+dgeb --model facebook/esm2_t6_8M_UR50D
+```
+
+- To see all supported models and tasks:
+
+```bash
+dgeb --help
+```
+
+- Using the python API:
+
+```py
+import dgeb
+
+model = dgeb.get_model("facebook/esm2_t6_8M_UR50D")
+tasks = dgeb.get_tasks_by_modality(dgeb.Modality.PROTEIN)
+evaluation = dgeb.DGEB(tasks=tasks)
+evaluation.run(model, output_folder="results")
+```
+
+### Using a custom model
+
+Custom models should be wrapped with the `dgeb.models.BioSeqTransformer` abstract class, and specify the modality, number of layers, and embedding dimension. See [models.py](https://github.com/tattabio/dgeb/blob/main/dgeb/models.py) for additional examples on custom model loading and inference.
+
+```python
+import dgeb
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks.tasks import Modality
+
+class MyModel(BioSeqTransformer):
+
+ @property
+ def modality(self) -> Modality:
+ return Modality.PROTEIN
+
+ @property
+ def num_layers(self) -> int:
+ return self.config.num_hidden_layers
+
+ @property
+ def embed_dim(self) -> int:
+ return self.config.hidden_size
+
+
+model = MyModel(model_name='path_to/huggingface_model')
+tasks = dgeb.get_tasks_by_modality(model.modality)
+evaluation = dgeb.DGEB(tasks=tasks)
+evaluation.run(model)
+```
+
+### Evaluating on a custom dataset
+
+**We strongly encourage users to contribute their custom datasets to DGEB. Please open a PR adding your dataset so that the community can benefit!**
+
+To evaluate on a custom dataset, first upload your dataset to the [Huggingface Hub](https://huggingface.co/docs/hub/en/datasets-adding). Then define a `Task` subclass with `TaskMetadata` that points to your huggingface dataset. For example, a classification task on a custom dataset can be defined as follows:
+
+```python
+import dgeb
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+from dgeb.tasks.classification_tasks import run_classification_task
+
+class MyCustomTask(Task):
+ metadata = TaskMetadata(
+ id="my_custom_classification",
+ display_name="...",
+ description="...",
+ type="classification",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="path_to/huggingface_dataset",
+ revision="...",
+ )
+ ],
+ primary_metric_id="f1",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_classification_task(model, self.metadata)
+
+model = dgeb.get_model("facebook/esm2_t6_8M_UR50D")
+evaluation = dgeb.DGEB(tasks=[MyCustomTask])
+evaluation.run(model)
+```
+
+## Leaderboard
+
+To add your submission to the DGEB leaderboard, proceed through the following instructions.
+
+1. Fork the DGEB repository by following GitHub's instruction [Forking Workflow](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork).
+
+2. Add your submission .json file to the leaderboard/submissions// directory.
+
+```bash
+mv /path/to/.json /path/to/DGEB/leaderboard/submissions//
+```
+
+4. Update your fork with the new submission:
+
+```bash
+git add leaderboard/submissions//.json
+git commit -m "Add submission for "
+git push
+```
+
+5. Open a pull request to the main branch of the repository via the Github interface.
+
+6. Once the PR is review and merged, your submission will be added to the leaderboard!
+
+
+## Acknowledgements
+
+DGEB follows the design of text embedding bechmark [MTEB](https://github.com/embeddings-benchmark/mteb) developed by Huggingface 🤗. The evaluation code is adapted from the MTEB codebase.
+
+## Citing
+
+DGEB was introduced in "[Diverse Genomic Embedding Benchmark for Functional Evaluation Across the Tree of Life]()", feel free to cite:
+
+TODO
diff --git a/dgeb/__init__.py b/dgeb/__init__.py
new file mode 100644
index 0000000..38d6f4a
--- /dev/null
+++ b/dgeb/__init__.py
@@ -0,0 +1,28 @@
+from dgeb.dgeb import (
+ DGEB,
+ get_all_model_names,
+ get_all_task_names,
+ get_all_tasks,
+ get_model,
+ get_output_folder,
+ get_tasks_by_modality,
+ get_tasks_by_name,
+)
+from dgeb.modality import Modality
+from dgeb.tasks.tasks import TaskResult
+
+# importing without setting `__all__` produces a Ruff error:
+# "imported but unused; consider removing, adding to __all__, or using a redundant alias RuffF401"
+# See https://docs.astral.sh/ruff/rules/unused-import/#why-is-this-bad
+__all__ = [
+ "DGEB",
+ "get_all_tasks",
+ "get_all_task_names",
+ "get_tasks_by_name",
+ "get_tasks_by_modality",
+ "get_all_model_names",
+ "get_model",
+ "get_output_folder",
+ "TaskResult",
+ "Modality",
+]
diff --git a/dgeb/cli.py b/dgeb/cli.py
new file mode 100644
index 0000000..0be74fb
--- /dev/null
+++ b/dgeb/cli.py
@@ -0,0 +1,136 @@
+"""
+Main command to run diverse genomic embedding benchmarks (DGEB) on a model.
+example command to run DGEB:
+python run_dgeb.py -m facebook/esm2_t6_8M_UR50D
+"""
+
+import argparse
+import logging
+import os
+
+import dgeb
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+ALL_TASK_NAMES = dgeb.get_all_task_names()
+ALL_MODEL_NAMES = dgeb.get_all_model_names()
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-m",
+ "--model",
+ type=str,
+ default=None,
+ help=f"Model to evaluate. Choose from {ALL_MODEL_NAMES}",
+ )
+ parser.add_argument(
+ "-t",
+ "--tasks",
+ type=lambda s: [item for item in s.split(",")],
+ default=None,
+ help=f"Comma separated tasks to evaluate on. Choose from {ALL_TASK_NAMES} or do not specify to evaluate on all tasks",
+ )
+ parser.add_argument(
+ "-l",
+ "--layers",
+ type=str,
+ default=None,
+ help="Layer to evaluate. Comma separated list of integers or 'mid' and 'last'. Default is 'mid,last'",
+ )
+ parser.add_argument(
+ "--devices",
+ type=str,
+ default="0",
+ help="Comma separated list of GPU device ids to use. Default is 0 (if GPUs are detected).",
+ )
+ parser.add_argument(
+ "--output_folder",
+ type=str,
+ default=None,
+ help="Output directory for results. Will default to results/model_name if not set.",
+ )
+ parser.add_argument(
+ "-v", "--verbosity", type=int, default=2, help="Verbosity level"
+ )
+ parser.add_argument(
+ "-b", "--batch_size", type=int, default=64, help="Batch size for evaluation"
+ )
+ parser.add_argument(
+ "--max_seq_len",
+ type=int,
+ default=1024,
+ help="Maximum sequence length for model, default is 1024.",
+ )
+ parser.add_argument(
+ "--pool_type",
+ type=str,
+ default="mean",
+ help="Pooling type for model, choose from mean, max, cls, last. Default is mean.",
+ )
+
+ args = parser.parse_args()
+
+ # set logging based on verbosity level
+ if args.verbosity == 0:
+ logging.getLogger("geb").setLevel(logging.CRITICAL)
+ elif args.verbosity == 1:
+ logging.getLogger("geb").setLevel(logging.WARNING)
+ elif args.verbosity == 2:
+ logging.getLogger("geb").setLevel(logging.INFO)
+ elif args.verbosity == 3:
+ logging.getLogger("geb").setLevel(logging.DEBUG)
+
+ if args.model is None:
+ raise ValueError("Please specify a model using the -m or --model argument")
+
+ # make sure that devices are comma separated list of integers
+ try:
+ devices = [int(device) for device in args.devices.split(",")]
+ except ValueError:
+ raise ValueError("Devices must be comma separated list of integers")
+
+ layers = args.layers
+ if layers:
+ if layers not in ["mid", "last"]:
+ # Layers should be list of integers.
+ try:
+ layers = [int(layer) for layer in layers.split(",")]
+ except ValueError:
+ raise ValueError("Layers must be a list of integers.")
+
+ model_name = args.model.split("/")[-1]
+ output_folder = args.output_folder
+ if output_folder is None:
+ output_folder = os.path.join("results", model_name)
+ # create output folder if it does not exist
+ if not os.path.exists(output_folder):
+ os.makedirs(output_folder)
+ logger.info(f"Results will be saved to {output_folder}")
+
+ # Load the model by name.
+ model = dgeb.get_model(
+ model_name=args.model,
+ layers=layers,
+ devices=devices,
+ max_seq_length=args.max_seq_len,
+ batch_size=args.batch_size,
+ pool_type=args.pool_type,
+ )
+
+ all_tasks_for_modality = dgeb.get_tasks_by_modality(model.modality)
+
+ if args.tasks:
+ task_list = dgeb.get_tasks_by_name(args.tasks)
+ if not all([task.metadata.modality == model.modality for task in task_list]):
+ raise ValueError(f"Tasks must be one of {all_tasks_for_modality}")
+ else:
+ task_list = all_tasks_for_modality
+ evaluation = dgeb.DGEB(tasks=task_list)
+ _ = evaluation.run(model)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/dgeb/dgeb.py b/dgeb/dgeb.py
new file mode 100644
index 0000000..6dfd940
--- /dev/null
+++ b/dgeb/dgeb.py
@@ -0,0 +1,129 @@
+import logging
+import os
+import traceback
+from itertools import chain
+from typing import Any, List
+
+from rich.console import Console
+
+from .eval_utils import set_all_seeds
+from .modality import Modality
+from .models import BioSeqTransformer
+from .tasks.tasks import Task
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class DGEB:
+ """GEB class to run the evaluation pipeline."""
+
+ def __init__(self, tasks: List[type[Task]], seed: int = 42):
+ self.tasks = tasks
+ set_all_seeds(seed)
+
+ def print_selected_tasks(self):
+ """Print the selected tasks."""
+ console = Console()
+ console.rule("[bold]Selected Tasks\n", style="grey15")
+ for task in self.tasks:
+ prefix = " - "
+ name = f"{task.metadata.display_name}"
+ category = f", [italic grey39]{task.metadata.type}[/]"
+ console.print(f"{prefix}{name}{category}")
+ console.print("\n")
+
+ def run(
+ self,
+ model, # type encoder
+ output_folder: str = "results",
+ ):
+ """Run the evaluation pipeline on the selected tasks.
+
+ Args:
+ model: Model to be used for evaluation
+ output_folder: Folder where the results will be saved. Default to 'results'. Where it will save the results in the format:
+ `{output_folder}/{model_name}/{model_revision}/{task_name}.json`.
+
+ Returns:
+ A list of MTEBResults objects, one for each task evaluated.
+ """
+ # Run selected tasks
+ self.print_selected_tasks()
+ results = []
+
+ for task in self.tasks:
+ logger.info(
+ f"\n\n********************** Evaluating {task.metadata.display_name} **********************"
+ )
+
+ try:
+ result = task().run(model)
+ except Exception as e:
+ logger.error(e)
+ logger.error(traceback.format_exc())
+ logger.error(f"Error running task {task}")
+ continue
+
+ results.append(result)
+
+ save_path = get_output_folder(model.hf_name, task, output_folder)
+ with open(save_path, "w") as f_out:
+ f_out.write(result.model_dump_json(indent=2))
+ return results
+
+
+def get_model(model_name: str, **kwargs: Any) -> type[BioSeqTransformer]:
+ all_names = get_all_model_names()
+ for cls in BioSeqTransformer.__subclasses__():
+ if model_name in cls.MODEL_NAMES:
+ return cls(model_name, **kwargs)
+ raise ValueError(f"Model {model_name} not found in {all_names}.")
+
+
+def get_all_model_names() -> List[str]:
+ return list(
+ chain.from_iterable(
+ cls.MODEL_NAMES for cls in BioSeqTransformer.__subclasses__()
+ )
+ )
+
+
+def get_all_task_names() -> List[str]:
+ return [task.metadata.id for task in get_all_tasks()]
+
+
+def get_tasks_by_name(tasks: List[str]) -> List[type[Task]]:
+ return [_get_task(task) for task in tasks]
+
+
+def get_tasks_by_modality(modality: Modality) -> List[type[Task]]:
+ return [task for task in get_all_tasks() if task.metadata.modality == modality]
+
+
+def get_all_tasks() -> List[type[Task]]:
+ return Task.__subclasses__()
+
+
+def _get_task(task_name: str) -> type[Task]:
+ logger.info(f"Getting task {task_name}")
+ for task in get_all_tasks():
+ if task.metadata.id == task_name:
+ return task
+
+ raise ValueError(
+ f"Task {task_name} not found, available tasks are: {[task.metadata.id for task in get_all_tasks()]}"
+ )
+
+
+def get_output_folder(
+ model_hf_name: str, task: type[Task], output_folder: str, create: bool = True
+):
+ output_folder = os.path.join(output_folder, os.path.basename(model_hf_name))
+ # create output folder if it does not exist
+ if create and not os.path.exists(output_folder):
+ os.makedirs(output_folder)
+ return os.path.join(
+ output_folder,
+ f"{task.metadata.id}.json",
+ )
diff --git a/dgeb/eval_utils.py b/dgeb/eval_utils.py
new file mode 100644
index 0000000..7b5f630
--- /dev/null
+++ b/dgeb/eval_utils.py
@@ -0,0 +1,394 @@
+"""Utility functions for evaluation."""
+
+from typing import Any, Dict, List, Tuple
+import json
+import torch
+import random
+import numpy as np
+from sklearn.metrics import auc
+
+
+class ForwardHook:
+ """Pytorch forward hook class to store outputs of intermediate layers."""
+
+ def __init__(self, module: torch.nn.Module):
+ self.hook = module.register_forward_hook(self.hook_fn)
+ self.output = None
+
+ def hook_fn(self, module, input, output):
+ self.output = output
+
+ def close(self):
+ self.hook.remove()
+
+
+def pool(
+ last_hidden_states: torch.Tensor, attention_mask: torch.Tensor, pool_type: str
+) -> torch.Tensor:
+ """Pool embeddings across the sequence length dimension."""
+ assert (
+ last_hidden_states.ndim == 3
+ ), f"Expected hidden_states to have shape [batch, seq_len, D], got shape: {last_hidden_states.shape}"
+ assert (
+ attention_mask.ndim == 2
+ ), f"Expected attention_mask to have shape [batch, seq_len], got shape: {attention_mask.shape}"
+ last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
+ if pool_type == "mean":
+ emb = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
+ elif pool_type == "max":
+ emb = last_hidden.max(dim=1)[0]
+ elif pool_type == "cls":
+ emb = last_hidden[:, 0]
+ elif pool_type == "last":
+ emb = last_hidden[torch.arange(last_hidden.size(0)), attention_mask.sum(1) - 1]
+ else:
+ raise ValueError(f"pool_type {pool_type} not supported")
+ return emb
+
+
+def set_all_seeds(seed):
+ random.seed(seed)
+ np.random.seed(seed)
+ torch.manual_seed(seed)
+ torch.cuda.manual_seed(seed)
+ torch.backends.cudnn.deterministic = True
+
+
+def write_results_to_json(results: Dict[str, Any], results_path: str):
+ """Write results dict to a json file."""
+ with open(results_path, "w") as f:
+ json.dump(results, f, indent=4)
+
+
+def merge_split_elem_embeds(ids, embeds, preserve_order: bool = False):
+ """Merge embeddings with the same id by mean-pooling and optionally preserve order in which they appear.
+
+ Args:
+ ids: Array of string ids, [batch].
+ embeds: Array of embeddings, [batch, ...].
+
+ Returns:
+ ids: Unique ids, [unique_batch].
+ embeds: Array of embeddings, [unique_batch, ...].
+ """
+ unique_ids, indices = np.unique(ids, return_inverse=True)
+ shape_no_batch = embeds.shape[1:]
+ sums = np.zeros([unique_ids.size, *shape_no_batch], dtype=embeds.dtype)
+ counts = np.bincount(indices, minlength=unique_ids.size)
+ np.add.at(sums, indices, embeds)
+ # Add trailing dimensions to counts.
+ counts = counts[(...,) + (None,) * len(shape_no_batch)]
+ mean_pooled = sums / counts
+ # Preserve the order of the input ids.
+ if preserve_order:
+ order = []
+ for id in unique_ids:
+ idx = np.where(ids == id)[0][0]
+ order.append(idx)
+ re_order = np.argsort(order)
+ unique_ids = unique_ids[re_order]
+ mean_pooled = mean_pooled[re_order]
+ return unique_ids, mean_pooled
+
+
+def paired_dataset(labels, embeds):
+ """Creates a paired dataset for consecutive operonic gene pairs."""
+ embeds1 = embeds[:-1]
+ embeds2 = embeds[1:]
+ labels = labels[:-1]
+ return embeds1, embeds2, labels
+
+
+def cos_sim(a, b):
+ """Computes the cosine similarity cos_sim(a[i], b[j]) for all i and j.
+
+ Return:
+ Matrix with res[i][j] = cos_sim(a[i], b[j])
+ """ # noqa: D402
+ if not isinstance(a, torch.Tensor):
+ a = torch.tensor(a)
+
+ if not isinstance(b, torch.Tensor):
+ b = torch.tensor(b)
+
+ if len(a.shape) == 1:
+ a = a.unsqueeze(0)
+
+ if len(b.shape) == 1:
+ b = b.unsqueeze(0)
+
+ a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
+ b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
+ return torch.mm(a_norm, b_norm.transpose(0, 1))
+
+
+def dot_score(a: torch.Tensor, b: torch.Tensor):
+ """Computes the dot-product dot_prod(a[i], b[j]) for all i and j.
+ :return: Matrix with res[i][j] = dot_prod(a[i], b[j])
+ """
+ if not isinstance(a, torch.Tensor):
+ a = torch.tensor(a)
+
+ if not isinstance(b, torch.Tensor):
+ b = torch.tensor(b)
+
+ if len(a.shape) == 1:
+ a = a.unsqueeze(0)
+
+ if len(b.shape) == 1:
+ b = b.unsqueeze(0)
+
+ return torch.mm(a, b.transpose(0, 1))
+
+
+# From https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/beir/retrieval/custom_metrics.py#L4
+def mrr(
+ qrels: dict[str, dict[str, int]],
+ results: dict[str, dict[str, float]],
+ k_values: List[int],
+ output_type: str = "mean",
+) -> Tuple[Dict[str, float]]:
+ MRR = {}
+
+ for k in k_values:
+ MRR[f"MRR@{k}"] = []
+
+ k_max, top_hits = max(k_values), {}
+
+ for query_id, doc_scores in results.items():
+ top_hits[query_id] = sorted(
+ doc_scores.items(), key=lambda item: item[1], reverse=True
+ )[0:k_max]
+
+ for query_id in top_hits:
+ query_relevant_docs = set(
+ [doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0]
+ )
+ for k in k_values:
+ rr = 0
+ for rank, hit in enumerate(top_hits[query_id][0:k]):
+ if hit[0] in query_relevant_docs:
+ rr = 1.0 / (rank + 1)
+ break
+ MRR[f"MRR@{k}"].append(rr)
+
+ if output_type == "mean":
+ for k in k_values:
+ MRR[f"MRR@{k}"] = round(sum(MRR[f"MRR@{k}"]) / len(qrels), 5)
+
+ elif output_type == "all":
+ pass
+
+ return MRR
+
+
+# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
+def recall_cap(
+ qrels: dict[str, dict[str, int]],
+ results: dict[str, dict[str, float]],
+ k_values: List[int],
+ output_type: str = "mean",
+) -> Tuple[Dict[str, float]]:
+ capped_recall = {}
+
+ for k in k_values:
+ capped_recall[f"R_cap@{k}"] = []
+
+ k_max = max(k_values)
+
+ for query_id, doc_scores in results.items():
+ top_hits = sorted(doc_scores.items(), key=lambda item: item[1], reverse=True)[
+ 0:k_max
+ ]
+ query_relevant_docs = [
+ doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0
+ ]
+ for k in k_values:
+ retrieved_docs = [
+ row[0] for row in top_hits[0:k] if qrels[query_id].get(row[0], 0) > 0
+ ]
+ denominator = min(len(query_relevant_docs), k)
+ capped_recall[f"R_cap@{k}"].append(len(retrieved_docs) / denominator)
+
+ if output_type == "mean":
+ for k in k_values:
+ capped_recall[f"R_cap@{k}"] = round(
+ sum(capped_recall[f"R_cap@{k}"]) / len(qrels), 5
+ )
+
+ elif output_type == "all":
+ pass
+
+ return capped_recall
+
+
+# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
+def hole(
+ qrels: dict[str, dict[str, int]],
+ results: dict[str, dict[str, float]],
+ k_values: List[int],
+ output_type: str = "mean",
+) -> Tuple[Dict[str, float]]:
+ Hole = {}
+
+ for k in k_values:
+ Hole[f"Hole@{k}"] = []
+
+ annotated_corpus = set()
+ for _, docs in qrels.items():
+ for doc_id, score in docs.items():
+ annotated_corpus.add(doc_id)
+
+ k_max = max(k_values)
+
+ for _, scores in results.items():
+ top_hits = sorted(scores.items(), key=lambda item: item[1], reverse=True)[
+ 0:k_max
+ ]
+ for k in k_values:
+ hole_docs = [
+ row[0] for row in top_hits[0:k] if row[0] not in annotated_corpus
+ ]
+ Hole[f"Hole@{k}"].append(len(hole_docs) / k)
+
+ if output_type == "mean":
+ for k in k_values:
+ Hole[f"Hole@{k}"] = round(Hole[f"Hole@{k}"] / len(qrels), 5)
+
+ elif output_type == "all":
+ pass
+
+ return Hole
+
+
+# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
+def top_k_accuracy(
+ qrels: dict[str, dict[str, int]],
+ results: dict[str, dict[str, float]],
+ k_values: List[int],
+ output_type: str = "mean",
+) -> Tuple[Dict[str, float]]:
+ top_k_acc = {}
+
+ for k in k_values:
+ top_k_acc[f"Accuracy@{k}"] = []
+
+ k_max, top_hits = max(k_values), {}
+
+ for query_id, doc_scores in results.items():
+ top_hits[query_id] = [
+ item[0]
+ for item in sorted(
+ doc_scores.items(), key=lambda item: item[1], reverse=True
+ )[0:k_max]
+ ]
+
+ for query_id in top_hits:
+ query_relevant_docs = set(
+ [doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0]
+ )
+ for k in k_values:
+ for relevant_doc_id in query_relevant_docs:
+ if relevant_doc_id in top_hits[query_id][0:k]:
+ top_k_acc[f"Accuracy@{k}"].append(1.0)
+ break
+
+ if output_type == "mean":
+ for k in k_values:
+ top_k_acc[f"Accuracy@{k}"] = round(
+ top_k_acc[f"Accuracy@{k}"] / len(qrels), 5
+ )
+
+ elif output_type == "all":
+ pass
+
+ return top_k_acc
+
+
+# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
+def confidence_scores(sim_scores: List[float]) -> Dict[str, float]:
+ """Computes confidence scores for a single instance = (query, positives, negatives)
+
+ Args:
+ sim_scores: Query-documents similarity scores with length `num_pos+num_neg`
+
+ Returns:
+ conf_scores:
+ - `max`: Maximum similarity score
+ - `std`: Standard deviation of similarity scores
+ - `diff1`: Difference between highest and second highest similarity scores
+ """
+ sim_scores_sorted = sorted(sim_scores)[::-1]
+
+ cs_max = sim_scores_sorted[0]
+ cs_std = np.std(sim_scores)
+ if len(sim_scores) > 1:
+ cs_diff1 = sim_scores_sorted[0] - sim_scores_sorted[1]
+ elif len(sim_scores) == 1:
+ cs_diff1 = 0.0
+
+ conf_scores = {"max": cs_max, "std": cs_std, "diff1": cs_diff1}
+
+ return conf_scores
+
+
+# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
+def nAUC(
+ conf_scores: np.ndarray,
+ metrics: np.ndarray,
+ abstention_rates: np.ndarray = np.linspace(0, 1, 11)[:-1],
+) -> float:
+ """Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997
+ 1/ Computes the raw abstention curve, i.e., the average evaluation metric at different abstention rates determined by the confidence scores
+ 2/ Computes the oracle abstention curve, i.e., the best theoretical abstention curve (e.g.: at a 10% abstention rate, the oracle abstains on the bottom-10% instances with regard to the evaluation metric)
+ 3/ Computes the flat abstention curve, i.e., the one remains flat for all abstention rates (ineffective abstention)
+ 4/ Computes the area under the three curves
+ 5/ Finally scales the raw AUC between the oracle and the flat AUCs to get normalized AUC
+
+ Args:
+ conf_scores: Instance confidence scores used for abstention thresholding, with shape `(num_test_instances,)`
+ metrics: Metric evaluations at instance-level (e.g.: average precision, NDCG...), with shape `(num_test_instances,)`
+ abstention_rates: Target rates for the computation of the abstention curve
+
+ Returns:
+ abst_nauc: Normalized area under the abstention curve (upper-bounded by 1)
+ """
+
+ def abstention_curve(
+ conf_scores: np.ndarray,
+ metrics: np.ndarray,
+ abstention_rates: np.ndarray = np.linspace(0, 1, 11)[:-1],
+ ) -> np.ndarray:
+ """Computes the raw abstention curve for a given set of evaluated instances and corresponding confidence scores
+
+ Args:
+ conf_scores: Instance confidence scores used for abstention thresholding, with shape `(num_test_instances,)`
+ metrics: Metric evaluations at instance-level (e.g.: average precision, NDCG...), with shape `(num_test_instances,)`
+ abstention_rates: Target rates for the computation of the abstention curve
+
+ Returns:
+ abst_curve: Abstention curve of length `len(abstention_rates)`
+ """
+ conf_scores_argsort = np.argsort(conf_scores)
+ abst_curve = np.zeros(len(abstention_rates))
+
+ for i, rate in enumerate(abstention_rates):
+ num_instances_abst = min(
+ round(rate * len(conf_scores_argsort)), len(conf_scores) - 1
+ )
+ abst_curve[i] = metrics[conf_scores_argsort[num_instances_abst:]].mean()
+
+ return abst_curve
+
+ abst_curve = abstention_curve(conf_scores, metrics, abstention_rates)
+ or_curve = abstention_curve(metrics, metrics, abstention_rates)
+ abst_auc = auc(abstention_rates, abst_curve)
+ or_auc = auc(abstention_rates, or_curve)
+ flat_auc = or_curve[0] * (abstention_rates[-1] - abstention_rates[0])
+
+ if or_auc == flat_auc:
+ abst_nauc = np.nan
+ else:
+ abst_nauc = (abst_auc - flat_auc) / (or_auc - flat_auc)
+
+ return abst_nauc
diff --git a/dgeb/evaluators.py b/dgeb/evaluators.py
new file mode 100644
index 0000000..5098970
--- /dev/null
+++ b/dgeb/evaluators.py
@@ -0,0 +1,839 @@
+"""
+Evaluator objects for different evaluation types.
+"""
+
+import logging
+import random
+from abc import ABC, abstractmethod
+import heapq
+from collections import defaultdict
+import pytrec_eval
+import numpy as np
+import sklearn.cluster
+import torch
+from scipy.stats import pearsonr
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import (
+ accuracy_score,
+ average_precision_score,
+ classification_report,
+ f1_score,
+ precision_score,
+ recall_score,
+ label_ranking_average_precision_score,
+)
+from sklearn.metrics.cluster import v_measure_score
+from sklearn.metrics.pairwise import (
+ paired_cosine_distances,
+ paired_euclidean_distances,
+ paired_manhattan_distances,
+)
+from sklearn.multioutput import MultiOutputRegressor
+from sklearn.preprocessing import MultiLabelBinarizer
+from typing import Dict, List, Tuple
+
+from .eval_utils import (
+ cos_sim,
+ dot_score,
+ mrr,
+ recall_cap,
+ hole,
+ confidence_scores,
+ nAUC,
+ top_k_accuracy,
+)
+
+
+class Evaluator(ABC):
+ """Base class for all evaluators
+ Extend this class and implement __call__ for custom evaluators.
+ """
+
+ def __init__(self, seed=42, **kwargs):
+ self.seed = seed
+ random.seed(self.seed)
+ np.random.seed(self.seed)
+ torch.manual_seed(self.seed)
+ torch.cuda.manual_seed_all(self.seed)
+
+ @abstractmethod
+ def __call__(self, model):
+ """This is called during training to evaluate the model.
+ It returns scores.
+
+ Parameters
+ ----------
+ model:
+ the model to evaluate
+ """
+ pass
+
+
+logger = logging.getLogger(__name__)
+
+
+class logRegClassificationEvaluator(Evaluator):
+ def __init__(
+ self,
+ embeds_train,
+ y_train,
+ embeds_test,
+ y_test,
+ max_iter=1000,
+ **kwargs,
+ ):
+ super().__init__(**kwargs)
+ self.embeds_train = embeds_train
+ self.y_train = y_train
+ self.embeds_test = embeds_test
+ self.y_test = y_test
+
+ self.max_iter = max_iter
+
+ def __call__(self):
+ scores = {}
+ clf = LogisticRegression(
+ random_state=self.seed,
+ n_jobs=-1,
+ max_iter=self.max_iter,
+ verbose=1 if logger.isEnabledFor(logging.DEBUG) else 0,
+ )
+ logger.info(f"Encoding {len(self.embeds_train)} training embeds...")
+ X_train = np.asarray(self.embeds_train)
+
+ logger.info(f"Encoding {len(self.embeds_test)} test embeds...")
+ X_test = np.asarray(self.embeds_test)
+ logger.info("Fitting logistic regression classifier...")
+ clf.fit(X_train, self.y_train)
+ logger.info("Evaluating...")
+ y_pred = clf.predict(X_test)
+ accuracy = accuracy_score(self.y_test, y_pred)
+ f1 = f1_score(self.y_test, y_pred, average="macro")
+ scores["accuracy"] = accuracy
+ scores["f1"] = f1
+
+ # if binary classification
+ if len(np.unique(self.y_train)) == 2:
+ ap = average_precision_score(self.y_test, y_pred)
+ scores["ap"] = ap
+
+ return scores
+
+
+class ClusteringEvaluator(Evaluator):
+ def __init__(
+ self,
+ embeds,
+ labels,
+ clustering_batch_size=500,
+ **kwargs,
+ ):
+ super().__init__(**kwargs)
+ self.embeds = embeds
+ self.labels = labels
+ self.clustering_batch_size = clustering_batch_size
+
+ def __call__(self):
+ logger.info(f"Encoding {len(self.embeds)} embeds...")
+ corpus_embeddings = np.asarray(self.embeds)
+
+ logger.info("Fitting Mini-Batch K-Means model...")
+ clustering_model = sklearn.cluster.MiniBatchKMeans(
+ n_clusters=len(set(self.labels)),
+ batch_size=self.clustering_batch_size,
+ n_init="auto",
+ )
+ clustering_model.fit(corpus_embeddings)
+ cluster_assignment = clustering_model.labels_
+
+ logger.info("Evaluating...")
+ v_measure = v_measure_score(self.labels, cluster_assignment)
+
+ return {"v_measure": v_measure}
+
+
+class PairClassificationEvaluator(Evaluator):
+ """Evaluate a model based on the similarity of the embeddings by calculating the accuracy of identifying similar and
+ dissimilar embeds.
+ The metrics are the cosine similarity as well as euclidean and Manhattan distance
+ The returned score is the accuracy with a specified metric.
+ The results are written in a CSV. If a CSV already exists, then values are appended.
+ The labels need to be 0 for dissimilar pairs and 1 for similar pairs.
+ :param embeds1: The first column of embeds
+ :param embeds2: The second column of embeds
+ :param labels: labels[i] is the label for the pair (embeds1[i], embeds2[i]). Must be 0 or 1
+ :param name: Name for the output
+ :param write_csv: Write results to a CSV file
+ """
+
+ def __init__(self, embeds1, embeds2, labels, **kwargs):
+ super().__init__(**kwargs)
+ self.embeds1 = embeds1
+ self.embeds2 = embeds2
+ self.labels = labels
+
+ assert len(self.embeds1) == len(self.embeds2)
+ assert len(self.embeds1) == len(self.labels)
+ for label in labels:
+ assert label == 0 or label == 1
+
+ def __call__(self):
+ scores = self.compute_metrics()
+ # Compute the max of Average Precision (AP) over all distance metrics.
+ top_ap_score = max(score for k, score in scores.items() if k.endswith("_ap"))
+ scores["top_ap"] = top_ap_score
+ return scores
+
+ def compute_metrics(self):
+ embeddings1 = np.array(self.embeds1)
+ embeddings2 = np.array(self.embeds2)
+
+ logger.info("Computing similarity distances...")
+ cosine_scores = 1 - paired_cosine_distances(embeddings1, embeddings2)
+ manhattan_distances = paired_manhattan_distances(embeddings1, embeddings2)
+ euclidean_distances = paired_euclidean_distances(embeddings1, embeddings2)
+
+ embeddings1_np = np.asarray(embeddings1)
+ embeddings2_np = np.asarray(embeddings2)
+ dot_scores = [
+ np.dot(embeddings1_np[i], embeddings2_np[i])
+ for i in range(len(embeddings1_np))
+ ]
+
+ logger.info("Computing metrics...")
+ labels = np.asarray(self.labels)
+ output_scores = {}
+ for short_name, name, scores, reverse in [
+ ["cos_sim", "Cosine-Similarity", cosine_scores, True],
+ ["manhattan", "Manhattan-Distance", manhattan_distances, False],
+ ["euclidean", "Euclidean-Distance", euclidean_distances, False],
+ ["dot", "Dot-Product", dot_scores, True],
+ ]:
+ metrics = self._compute_metrics(scores, labels, reverse)
+ metrics = {short_name + "_" + k: v for k, v in metrics.items()}
+ output_scores.update(metrics)
+
+ return output_scores
+
+ @staticmethod
+ def _compute_metrics(scores, labels, high_score_more_similar):
+ """Compute the metrics for the given scores and labels.
+
+ Args:
+ scores (`np.ndarray` of shape (n_pairs, )): The similarity/dissimilarity scores for the pairs.
+ labels (`np.ndarray` of shape (n_pairs, )): The labels for the pairs.
+ high_score_more_similar (`bool`): If true, then the higher the score, the more similar the pairs are.
+
+ Returns:
+ `dict`: The metrics for the given scores and labels.
+ """
+ acc, acc_threshold = PairClassificationEvaluator.find_best_acc_and_threshold(
+ scores, labels, high_score_more_similar
+ )
+ f1, precision, recall, f1_threshold = (
+ PairClassificationEvaluator.find_best_f1_and_threshold(
+ scores, labels, high_score_more_similar
+ )
+ )
+ ap = PairClassificationEvaluator.ap_score(
+ scores, labels, high_score_more_similar
+ )
+
+ return {
+ "accuracy": acc,
+ "accuracy_threshold": acc_threshold,
+ "f1": f1,
+ "f1_threshold": f1_threshold,
+ "precision": precision,
+ "recall": recall,
+ "ap": ap,
+ }
+
+ @staticmethod
+ def find_best_acc_and_threshold(scores, labels, high_score_more_similar: bool):
+ assert len(scores) == len(labels)
+ rows = list(zip(scores, labels))
+
+ rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar)
+
+ max_acc = 0
+ best_threshold = -1
+
+ positive_so_far = 0
+ remaining_negatives = sum(np.array(labels) == 0)
+
+ for i in range(len(rows) - 1):
+ score, label = rows[i]
+ if label == 1:
+ positive_so_far += 1
+ else:
+ remaining_negatives -= 1
+
+ acc = (positive_so_far + remaining_negatives) / len(labels)
+ if acc > max_acc:
+ max_acc = acc
+ best_threshold = (rows[i][0] + rows[i + 1][0]) / 2
+
+ return max_acc, best_threshold
+
+ @staticmethod
+ def find_best_f1_and_threshold(scores, labels, high_score_more_similar: bool):
+ assert len(scores) == len(labels)
+
+ scores = np.asarray(scores)
+ labels = np.asarray(labels)
+
+ rows = list(zip(scores, labels))
+
+ rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar)
+
+ best_f1 = best_precision = best_recall = 0
+ threshold = 0
+ nextract = 0
+ ncorrect = 0
+ total_num_duplicates = sum(labels)
+
+ for i in range(len(rows) - 1):
+ score, label = rows[i]
+ nextract += 1
+
+ if label == 1:
+ ncorrect += 1
+
+ if ncorrect > 0:
+ precision = ncorrect / nextract
+ recall = ncorrect / total_num_duplicates
+ f1 = 2 * precision * recall / (precision + recall)
+ if f1 > best_f1:
+ best_f1 = f1
+ best_precision = precision
+ best_recall = recall
+ threshold = (rows[i][0] + rows[i + 1][0]) / 2
+
+ return best_f1, best_precision, best_recall, threshold
+
+ @staticmethod
+ def ap_score(scores, labels, high_score_more_similar: bool):
+ return average_precision_score(
+ labels, scores * (1 if high_score_more_similar else -1)
+ )
+
+
+class MultiClassMultiOutputLogRegClassificationEvaluator(Evaluator):
+ def __init__(
+ self,
+ embeds_train,
+ y_train,
+ embeds_test,
+ y_test,
+ max_iter=1000,
+ **kwargs,
+ ):
+ super().__init__(**kwargs)
+ self.embeds_train = embeds_train
+ self.y_train = y_train
+ self.embeds_test = embeds_test
+ self.y_test = y_test
+ self.max_iter = max_iter
+
+ def __call__(self):
+ scores = {}
+ mlb = MultiLabelBinarizer()
+ # all classes in y_train and y_test
+
+ class_labels = list(self.y_train) + list(self.y_test)
+ labels = [class_label.split(", ") for class_label in class_labels]
+ mlb.fit(labels)
+ train_labels = [class_label.split(", ") for class_label in self.y_train]
+ test_labels = [class_label.split(", ") for class_label in self.y_test]
+
+ y_train = mlb.transform(train_labels)
+ y_test = mlb.transform(test_labels)
+ clf = MultiOutputRegressor(
+ LogisticRegression(
+ random_state=self.seed, solver="lbfgs", max_iter=self.max_iter
+ )
+ ).fit(self.embeds_train, y_train)
+ y_pred = clf.predict(self.embeds_test)
+
+ results_dict = classification_report(y_test, y_pred, output_dict=True)
+ assert isinstance(
+ results_dict, dict
+ ), "Should always be true since `output_dict=True` is passed to sklearn.metric.classification_report"
+ scores["precision"] = results_dict["macro avg"]["precision"]
+ scores["recall"] = results_dict["macro avg"]["recall"]
+ scores["f1"] = results_dict["macro avg"]["f1-score"]
+ scores["accuracy"] = accuracy_score(y_test, y_pred)
+
+ return scores
+
+
+class MultiClassMultiOutputKNNClassificationEvaluator(Evaluator):
+ def __init__(
+ self,
+ embeds_train,
+ y_train,
+ embeds_test,
+ y_test,
+ n_neighbors=5,
+ **kwargs,
+ ):
+ super().__init__(**kwargs)
+ self.embeds_train = embeds_train
+ self.y_train = y_train
+ self.embeds_test = embeds_test
+ self.y_test = y_test
+ self.n_neighbors = n_neighbors
+
+ def __call__(self):
+ scores = {}
+
+ mlb = MultiLabelBinarizer()
+ class_labels = list(self.y_train) + list(self.y_test)
+ labels = [class_label.split(", ") for class_label in class_labels]
+ mlb.fit(labels)
+ train_labels = [class_label.split(", ") for class_label in self.y_train]
+ test_labels = [class_label.split(", ") for class_label in self.y_test]
+
+ y_train = mlb.transform(train_labels)
+ y_test = mlb.transform(test_labels)
+ clf = sklearn.neighbors.KNeighborsClassifier(
+ n_neighbors=self.n_neighbors, metric="cosine"
+ )
+ logger.info("Fitting KNN classifier...")
+ clf.fit(self.embeds_train, y_train)
+ logger.info("Evaluating...")
+ y_pred = clf.predict(self.embeds_test)
+ accuracy = accuracy_score(y_test, y_pred)
+ f1 = f1_score(y_test, y_pred, average="macro")
+ precision = precision_score(y_test, y_pred, average="macro")
+ recall = recall_score(y_test, y_pred, average="macro")
+ lrap = label_ranking_average_precision_score(y_test, y_pred)
+ scores["f1"] = f1
+ scores["accuracy"] = accuracy
+ scores["precision"] = precision
+ scores["recall"] = recall
+ scores["lrap"] = lrap
+
+ return scores
+
+
+class BiGeneMiningEvaluator(Evaluator):
+ """
+ BiGene Mining Evaluator, analogous to Bitext Mining Evaluator https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/BitextMiningEvaluator.py.
+
+ If top_k > 1, then recall@k is also computed.
+ """
+
+ def __init__(self, embeds1, embeds2, top_k=1, **kwargs):
+ super().__init__(**kwargs)
+ self.n = len(embeds1)
+ self.embeds1 = np.array(embeds1)
+ self.embeds2 = np.array(embeds2)
+ self.gold = list(zip(range(self.n), range(self.n)))
+ self.top_k = top_k
+
+ def __call__(self):
+ scores = self.compute_metrics()
+ return scores
+
+ def compute_metrics(self):
+ logger.info(f"Finding nearest neighbors... with top_k={self.top_k}")
+ nearest_neighbors = self._similarity_search(
+ self.embeds1, self.embeds2, top_k=self.top_k
+ )
+
+ # Compute errors
+ logger.info("Computing metrics...")
+ labels = []
+ predictions = []
+
+ # Get predictions and labels for top_k=1.
+ for i, x in enumerate(nearest_neighbors):
+ j = x[0]["corpus_id"]
+ predictions.append(j)
+ labels.append(self.gold[i][1])
+
+ scores = {
+ "precision": precision_score(
+ labels, predictions, zero_division=0, average="weighted"
+ ),
+ "recall": recall_score(
+ labels, predictions, zero_division=0, average="weighted"
+ ),
+ "f1": f1_score(labels, predictions, zero_division=0, average="weighted"),
+ "accuracy": accuracy_score(labels, predictions),
+ }
+
+ if self.top_k > 1:
+ # Compute recall@k.
+ top_k_preds = []
+ for i, x in enumerate(nearest_neighbors):
+ top_k_preds.append([pred["corpus_id"] for pred in x])
+ top_k_recall = [
+ self.gold[i][1] in top_k_pred
+ for i, top_k_pred in enumerate(top_k_preds)
+ ]
+ scores[f"recall_at_{self.top_k}"] = sum(top_k_recall) / len(top_k_recall)
+ return scores
+
+ def _similarity_search(
+ self,
+ query_embeddings,
+ corpus_embeddings,
+ query_chunk_size=100,
+ corpus_chunk_size=500000,
+ top_k=1,
+ score_function=cos_sim,
+ ):
+ """This function performs a cosine similarity search between a list of query embeddings and a list of corpus embeddings.
+ It can be used for Information Retrieval / Semantic Search for corpora up to about 1 Million entries.
+ :param query_embeddings: A 2 dimensional tensor with the query embeddings.
+ :param corpus_embeddings: A 2 dimensional tensor with the corpus embeddings.
+ :param query_chunk_size: Process 100 queries simultaneously. Increasing that value increases the speed, but requires more memory.
+ :param corpus_chunk_size: Scans the corpus 50k entries at a time. Increasing that value increases the speed, but requires more memory.
+ :param top_k: Retrieve top k matching entries.
+ :param score_function: Function for computing scores. By default, cosine similarity.
+ :return: Returns a list with one entry for each query. Each entry is a list of dictionaries with the keys 'corpus_id' and 'score', sorted by decreasing cosine similarity scores.
+ """
+ query_embeddings = torch.from_numpy(query_embeddings)
+ corpus_embeddings = torch.from_numpy(corpus_embeddings)
+ if len(query_embeddings.shape) == 1:
+ query_embeddings = query_embeddings.unsqueeze(0)
+ if len(corpus_embeddings.shape) == 1:
+ corpus_embeddings = corpus_embeddings.unsqueeze(0)
+
+ # Check that corpus and queries are on the same device
+ if corpus_embeddings.device != query_embeddings.device:
+ query_embeddings = query_embeddings.to(corpus_embeddings.device)
+
+ queries_result_list = [[] for _ in range(len(query_embeddings))]
+
+ for query_start_idx in range(0, len(query_embeddings), query_chunk_size):
+ # Iterate over chunks of the corpus
+ for corpus_start_idx in range(0, len(corpus_embeddings), corpus_chunk_size):
+ # Compute cosine similarities
+ cos_scores = score_function(
+ query_embeddings[
+ query_start_idx : query_start_idx + query_chunk_size
+ ],
+ corpus_embeddings[
+ corpus_start_idx : corpus_start_idx + corpus_chunk_size
+ ],
+ )
+
+ # Get top-k scores
+ cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk(
+ cos_scores,
+ min(top_k, len(cos_scores[0])),
+ dim=1,
+ largest=True,
+ sorted=False,
+ )
+ cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
+ cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
+
+ for query_itr in range(len(cos_scores)):
+ for sub_corpus_id, score in zip(
+ cos_scores_top_k_idx[query_itr],
+ cos_scores_top_k_values[query_itr],
+ ):
+ corpus_id = corpus_start_idx + sub_corpus_id
+ query_id = query_start_idx + query_itr
+ queries_result_list[query_id].append(
+ {"corpus_id": corpus_id, "score": score}
+ )
+
+ # Sort and strip to top_k results
+ for idx in range(len(queries_result_list)):
+ queries_result_list[idx] = sorted(
+ queries_result_list[idx], key=lambda x: x["score"], reverse=True
+ )
+ queries_result_list[idx] = queries_result_list[idx][0:top_k]
+
+ return queries_result_list
+
+
+class EDSEvaluator(Evaluator):
+ """
+ Evolutionary Distance Similarity Evaluator, analogous to Semantic Textual Similarity Evaluator.
+ Adapted from https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/STSEvaluator.py
+ """
+
+ def __init__(self, embeds1, embeds2, gold_scores, **kwargs):
+ super().__init__(**kwargs)
+ self.embeds1 = embeds1
+ self.embeds2 = embeds2
+ self.gold_scores = gold_scores
+
+ def __call__(self):
+ embeddings1 = np.array(self.embeds1)
+ embeddings2 = np.array(self.embeds2)
+ logger.info("Evaluating...")
+ cosine_scores = paired_cosine_distances(embeddings1, embeddings2)
+ manhattan_distances = paired_manhattan_distances(embeddings1, embeddings2)
+ euclidean_distances = paired_euclidean_distances(embeddings1, embeddings2)
+
+ cosine_pearson, _ = pearsonr(self.gold_scores, cosine_scores)
+ manhattan_pearson, _ = pearsonr(self.gold_scores, manhattan_distances)
+ euclidean_pearson, _ = pearsonr(self.gold_scores, euclidean_distances)
+
+ top_corr = max(
+ cosine_pearson,
+ manhattan_pearson,
+ euclidean_pearson,
+ )
+ return {
+ "cos_sim": cosine_pearson,
+ "manhattan": manhattan_pearson,
+ "euclidean": euclidean_pearson,
+ "top_corr": top_corr,
+ }
+
+
+class RetrievalEvaluator(Evaluator):
+ """Adapted from
+ https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/RetrievalEvaluator.py
+ """
+
+ def __init__(
+ self,
+ corpus_embeds,
+ query_embeds,
+ corpus_ids,
+ query_ids,
+ qrels: Dict[str, Dict[str, int]],
+ k_values: List[int] = [5, 10, 50],
+ score_function: str = "cos_sim",
+ corpus_chunk_size: int = 50000,
+ **kwargs,
+ ):
+ super().__init__(**kwargs)
+ self.corpus_embeds = corpus_embeds
+ self.query_embeds = query_embeds
+ self.corpus_ids = corpus_ids
+ self.query_ids = query_ids
+ self.qrels = qrels
+ self.k_values = k_values
+ self.top_k = max(k_values) if "top_k" not in kwargs else kwargs["top_k"]
+ self.score_function = score_function
+ self.score_functions = {
+ "cos_sim": cos_sim,
+ "dot": dot_score,
+ }
+ self.corpus_chunk_size = corpus_chunk_size
+
+ def __call__(self):
+ results = self.search(
+ self.corpus_embeds,
+ self.query_embeds,
+ self.corpus_ids,
+ self.query_ids,
+ self.top_k,
+ self.score_function,
+ )
+ ndcg, _map, recall, precision, naucs = self.evaluate(
+ self.qrels, results, self.k_values
+ )
+ mrr, naucs_mrr = self.evaluate_custom(self.qrels, results, self.k_values, "mrr")
+ scores = {
+ **{f"ndcg_at_{k.split('@')[1]}": v for (k, v) in ndcg.items()},
+ **{f"map_at_{k.split('@')[1]}": v for (k, v) in _map.items()},
+ **{f"recall_at_{k.split('@')[1]}": v for (k, v) in recall.items()},
+ **{f"precision_at_{k.split('@')[1]}": v for (k, v) in precision.items()},
+ **{f"mrr_at_{k.split('@')[1]}": v for (k, v) in mrr.items()},
+ **{
+ k.replace("@", "_at_").replace("_P", "_precision").lower(): v
+ for k, v in naucs.items()
+ },
+ **{
+ k.replace("@", "_at_").replace("_P", "_precision").lower(): v
+ for k, v in naucs_mrr.items()
+ },
+ }
+ return scores
+
+ def search(
+ self,
+ corpus_embeds,
+ query_embeds,
+ corpus_ids,
+ query_ids,
+ top_k: int,
+ score_function: str,
+ return_sorted: bool = False,
+ **kwargs,
+ ) -> dict[str, dict[str, float]]:
+ # Create embeddings for all queries using model.encode()
+ # Runs semantic search against the corpus embeddings
+ # Returns a ranked list with the corpus ids
+ if score_function not in self.score_functions:
+ raise ValueError(
+ f"score function: {score_function} must be either (cos_sim) for cosine similarity or (dot) for dot product"
+ )
+ # make query embeds and corpus embeds torch tensors
+ query_embeds = torch.from_numpy(query_embeds)
+ corpus_embeds = torch.from_numpy(corpus_embeds)
+ itr = range(0, len(corpus_embeds), self.corpus_chunk_size)
+ results = defaultdict(dict)
+ # Keep only the top-k docs for each query
+ result_heaps = defaultdict(list)
+ for batch_num, corpus_start_idx in enumerate(itr):
+ logger.info("Searching Batch {}/{}...".format(batch_num + 1, len(itr)))
+ corpus_end_idx = min(
+ corpus_start_idx + self.corpus_chunk_size, len(corpus_ids)
+ )
+ sub_corpus_embeds = corpus_embeds[corpus_start_idx:corpus_end_idx]
+ # Compute similarites using either cosine-similarity or dot product
+ cos_scores = self.score_functions[score_function](
+ query_embeds, sub_corpus_embeds
+ )
+ cos_scores[torch.isnan(cos_scores)] = -1
+
+ # Get top-k values
+ cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk(
+ cos_scores,
+ min(
+ top_k + 1,
+ len(cos_scores[1]) if len(cos_scores) > 1 else len(cos_scores[-1]),
+ ),
+ dim=1,
+ largest=True,
+ sorted=return_sorted,
+ )
+ cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
+ cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
+
+ for query_itr in range(len(query_embeds)):
+ query_id = query_ids[query_itr]
+ for sub_corpus_id, score in zip(
+ cos_scores_top_k_idx[query_itr], cos_scores_top_k_values[query_itr]
+ ):
+ corpus_id = corpus_ids[corpus_start_idx + sub_corpus_id]
+ if corpus_id != query_id:
+ if len(result_heaps[query_id]) < top_k:
+ # Push item on the heap
+ heapq.heappush(result_heaps[query_id], (score, corpus_id))
+ else:
+ # If item is larger than the smallest in the heap, push it on the heap then pop the smallest element
+ heapq.heappushpop(
+ result_heaps[query_id], (score, corpus_id)
+ )
+
+ for qid in result_heaps:
+ for score, corpus_id in result_heaps[qid]:
+ results[qid][corpus_id] = score
+
+ return results
+
+ @staticmethod
+ def evaluate(
+ qrels: dict[str, dict[str, int]],
+ results: dict[str, dict[str, float]],
+ k_values: List[int],
+ ignore_identical_ids: bool = True,
+ ) -> Tuple[Dict[str, float], dict[str, float], dict[str, float], dict[str, float]]:
+ if ignore_identical_ids:
+ logger.info(
+ "For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this."
+ )
+ popped = []
+ for qid, rels in results.items():
+ for pid in list(rels):
+ if qid == pid:
+ results[qid].pop(pid)
+ popped.append(pid)
+
+ all_ndcgs, all_aps, all_recalls, all_precisions = {}, {}, {}, {}
+
+ for k in k_values:
+ all_ndcgs[f"NDCG@{k}"] = []
+ all_aps[f"MAP@{k}"] = []
+ all_recalls[f"Recall@{k}"] = []
+ all_precisions[f"P@{k}"] = []
+
+ map_string = "map_cut." + ",".join([str(k) for k in k_values])
+ ndcg_string = "ndcg_cut." + ",".join([str(k) for k in k_values])
+ recall_string = "recall." + ",".join([str(k) for k in k_values])
+ precision_string = "P." + ",".join([str(k) for k in k_values])
+ evaluator = pytrec_eval.RelevanceEvaluator(
+ qrels, {map_string, ndcg_string, recall_string, precision_string}
+ )
+ scores = evaluator.evaluate(results)
+
+ for query_id in scores.keys():
+ for k in k_values:
+ all_ndcgs[f"NDCG@{k}"].append(scores[query_id]["ndcg_cut_" + str(k)])
+ all_aps[f"MAP@{k}"].append(scores[query_id]["map_cut_" + str(k)])
+ all_recalls[f"Recall@{k}"].append(scores[query_id]["recall_" + str(k)])
+ all_precisions[f"P@{k}"].append(scores[query_id]["P_" + str(k)])
+ ndcg, _map, recall, precision = (
+ all_ndcgs.copy(),
+ all_aps.copy(),
+ all_recalls.copy(),
+ all_precisions.copy(),
+ )
+
+ for k in k_values:
+ ndcg[f"NDCG@{k}"] = round(sum(ndcg[f"NDCG@{k}"]) / len(scores), 5)
+ _map[f"MAP@{k}"] = round(sum(_map[f"MAP@{k}"]) / len(scores), 5)
+ recall[f"Recall@{k}"] = round(sum(recall[f"Recall@{k}"]) / len(scores), 5)
+ precision[f"P@{k}"] = round(sum(precision[f"P@{k}"]) / len(scores), 5)
+ naucs = RetrievalEvaluator.evaluate_abstention(
+ results, {**all_ndcgs, **all_aps, **all_recalls, **all_precisions}
+ )
+ return ndcg, _map, recall, precision, naucs
+
+ @staticmethod
+ def evaluate_abstention(
+ results: dict[str, dict[str, float]],
+ metric_scores: dict[str, list[float]],
+ ) -> Dict[str, float]:
+ """Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997"""
+ all_sim_scores = [list(results[qid].values()) for qid in list(results.keys())]
+ all_conf_scores = [
+ confidence_scores(sim_scores) for sim_scores in all_sim_scores
+ ]
+ conf_fcts = list(all_conf_scores[0].keys())
+ all_conf_scores = {
+ fct: np.array([x[fct] for x in all_conf_scores]) for fct in conf_fcts
+ }
+ metric_scores = {k: np.array(v) for k, v in metric_scores.items()}
+ naucs = {}
+
+ for metric_name, scores in metric_scores.items():
+ for fct, conf_scores in all_conf_scores.items():
+ naucs[f"nAUC_{metric_name}_{fct}"] = nAUC(conf_scores, scores)
+
+ return naucs
+
+ @staticmethod
+ def evaluate_custom(
+ qrels: dict[str, dict[str, int]],
+ results: dict[str, dict[str, float]],
+ k_values: List[int],
+ metric: str,
+ output_type: str = "all",
+ ) -> Tuple[Dict[str, float]]:
+ if metric.lower() in ["mrr", "mrr@k", "mrr_cut"]:
+ metric_scores = mrr(qrels, results, k_values, output_type)
+
+ elif metric.lower() in ["recall_cap", "r_cap", "r_cap@k"]:
+ metric_scores = recall_cap(qrels, results, k_values, output_type)
+
+ elif metric.lower() in ["hole", "hole@k"]:
+ metric_scores = hole(qrels, results, k_values, output_type)
+
+ elif metric.lower() in [
+ "acc",
+ "top_k_acc",
+ "accuracy",
+ "accuracy@k",
+ "top_k_accuracy",
+ ]:
+ metric_scores = top_k_accuracy(qrels, results, k_values, output_type)
+
+ naucs = RetrievalEvaluator.evaluate_abstention(results, metric_scores)
+ metric_scores_avg = {k: sum(v) / len(v) for k, v in metric_scores.items()}
+
+ return metric_scores_avg, naucs
diff --git a/dgeb/modality.py b/dgeb/modality.py
new file mode 100644
index 0000000..88c23c9
--- /dev/null
+++ b/dgeb/modality.py
@@ -0,0 +1,8 @@
+from enum import Enum
+
+
+class Modality(Enum):
+ """Data modality, either DNA or protein sequence."""
+
+ PROTEIN = "protein"
+ DNA = "dna"
diff --git a/dgeb/models.py b/dgeb/models.py
new file mode 100644
index 0000000..bce7b50
--- /dev/null
+++ b/dgeb/models.py
@@ -0,0 +1,481 @@
+import logging
+import re
+from abc import ABC, abstractmethod
+from functools import partial
+from types import SimpleNamespace
+from typing import Dict, List, Literal, Optional
+
+import numpy as np
+import torch
+import tqdm as tqdm
+from datasets import Dataset
+from torch import Tensor
+from torch.nn import functional as F
+from torch.utils.data import DataLoader
+from transformers import (
+ AutoConfig,
+ AutoModel,
+ AutoModelForCausalLM,
+ AutoModelForMaskedLM,
+ AutoTokenizer,
+ BatchEncoding,
+ DefaultDataCollator,
+ T5EncoderModel,
+ T5Tokenizer,
+)
+from transformers.modeling_outputs import BaseModelOutput
+
+from .modality import Modality
+from .eval_utils import ForwardHook, pool
+
+logger = logging.getLogger(__name__)
+
+
+class BioSeqTransformer(ABC):
+ """
+ Abstract class to wrap models which map biological sequences (DNA/Prot) to embeddings.
+ Modelled after SentenceTransformer (https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/SentenceTransformer.py)
+
+ Args:
+ model_name: Name or path to the pretrained model.
+ layers: List of model layers to probe. Can be integers or "mid" or "last".
+ devices: List of device ids for inference. If cuda is not available, will use cpu.
+ num_processes: Number of processes to use for data loading.
+ max_seq_length: Maximum sequence length of the input sequences.
+ l2_norm: If true, embeddings are L2-normalized before they are returned.
+ batch_size: Batch size for encoding.
+ pool_type: Pooling strategy to use. One of "mean", "max", "cls", "last".
+ """
+
+ def __init__(
+ self,
+ model_name: str,
+ layers: Optional[List[int] | Literal["mid"] | Literal["last"]] = None,
+ devices: List[int] = [0],
+ num_processes: int = 16,
+ max_seq_length: int = 1024,
+ l2_norm: bool = False,
+ batch_size: int = 128,
+ pool_type: str = "mean",
+ ):
+ super().__init__()
+
+ self.id = self.__class__.__name__
+ self.hf_name = model_name
+ self.encoder = self._load_model(model_name)
+ if not hasattr(self.encoder, "config"):
+ raise ValueError(
+ 'The model from `self._load_model()` must have a "config" attribute.'
+ )
+ self.config = self.encoder.config
+ self.tokenizer = self._get_tokenizer(model_name)
+ self.num_param = sum(p.numel() for p in self.encoder.parameters())
+ self.data_collator = DefaultDataCollator()
+ self.gpu_count = len(devices)
+ self.l2_norm = l2_norm
+
+ self.device = torch.device(
+ f"cuda:{devices[0]}" if torch.cuda.is_available() else "cpu"
+ )
+ self.num_processes = num_processes
+ self.max_seq_length = max_seq_length
+ self.batch_size = batch_size
+ self.pool_type = pool_type
+
+ if self.gpu_count > 1:
+ self.encoder = torch.nn.DataParallel(self.encoder, device_ids=devices)
+ self.encoder.to(self.device)
+ self.encoder.eval()
+
+ mid_layer = self.num_layers // 2
+ last_layer = self.num_layers - 1
+ mid_layer_label = f"mid ({mid_layer})"
+ last_layer_label = f"last ({self.num_layers - 1})"
+
+ if layers is None:
+ logger.debug(f"Using default layers: {mid_layer_label}, {last_layer_label}")
+ self.layers = [mid_layer, last_layer]
+ self.layer_labels = [mid_layer_label, last_layer_label]
+ elif layers == "mid":
+ self.layers = [mid_layer]
+ self.layer_labels = [mid_layer_label]
+ elif layers == "last":
+ self.layers = [last_layer]
+ self.layer_labels = [last_layer_label]
+ else:
+ self.layers = layers
+ self.layer_labels = [str(layer) for layer in layers]
+
+ def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
+ """Returns the output embedding for the given batch with shape [batch, num_layers, D]."""
+ outputs = self.encoder(**batch_dict, output_hidden_states=True)
+ embeds = [outputs.hidden_states[layer] for layer in self.layers]
+ embeds = [
+ pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
+ for layer_embeds in embeds
+ ]
+ # Stack with shape [B, num_layers, D].
+ embeds = torch.stack(embeds, dim=1)
+ return embeds
+
+ def _load_model(self, model_name):
+ return AutoModel.from_pretrained(model_name, trust_remote_code=True)
+
+ def _get_tokenizer(self, model_name):
+ return AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+
+ def _tokenize_func(
+ self, tokenizer, examples: Dict[str, List], max_seq_length: int
+ ) -> BatchEncoding:
+ batch_dict = tokenizer(
+ examples["input_seqs"],
+ max_length=max_seq_length,
+ padding=True,
+ truncation=True,
+ )
+ return batch_dict
+
+ @property
+ def metadata(self) -> Dict:
+ return {
+ "hf_name": self.hf_name,
+ "num_layers": self.num_layers,
+ "num_params": self.num_param,
+ "embed_dim": self.embed_dim,
+ }
+
+ @property
+ @abstractmethod
+ def num_layers(self) -> int:
+ pass
+
+ @property
+ @abstractmethod
+ def embed_dim(self) -> int:
+ pass
+
+ @property
+ @abstractmethod
+ def modality(self) -> Modality:
+ pass
+
+ @torch.no_grad()
+ def encode(self, sequences, **kwargs) -> np.ndarray:
+ """Returns a list of embeddings for the given sequences.
+ Args:
+ sequences (`List[str]`): List of sequences to encode
+ Returns:
+ `np.ndarray`: Embeddings for the given sequences of shape [num_sequences, num_layers, embedding_dim].
+ """
+ dataset = Dataset.from_dict({"input_seqs": sequences})
+ dataset.set_transform(
+ partial(
+ self._tokenize_func, self.tokenizer, max_seq_length=self.max_seq_length
+ )
+ )
+ data_loader = DataLoader(
+ dataset,
+ batch_size=self.batch_size * self.gpu_count,
+ shuffle=False,
+ drop_last=False,
+ num_workers=self.num_processes,
+ collate_fn=self.data_collator,
+ pin_memory=True,
+ )
+
+ if max(self.layers) >= self.num_layers:
+ raise ValueError(
+ f"Layer {max(self.layers)} is not available in the model. Choose a layer between 0 and {self.num_layers - 1}"
+ )
+
+ encoded_embeds = []
+ for batch_dict in tqdm.tqdm(
+ data_loader, desc="encoding", mininterval=10, disable=len(sequences) < 128
+ ):
+ batch_dict = {k: v.to(self.device) for k, v in batch_dict.items()}
+
+ embeds = self._encode_single_batch(batch_dict)
+
+ if self.l2_norm:
+ embeds = F.normalize(embeds, p=2, dim=-1)
+ encoded_embeds.append(embeds.cpu().numpy())
+
+ return np.concatenate(encoded_embeds, axis=0)
+
+
+class ESM(BioSeqTransformer):
+ """ESM model from https://huggingface.co/docs/transformers/en/model_doc/esm"""
+
+ MODEL_NAMES = [
+ "facebook/esm2_t6_8M_UR50D",
+ "facebook/esm2_t12_35M_UR50D",
+ "facebook/esm2_t30_150M_UR50D",
+ "facebook/esm2_t33_650M_UR50D",
+ "facebook/esm2_t36_3B_UR50D",
+ "facebook/esm2_t48_15B_UR50D",
+ ]
+
+ @property
+ def modality(self) -> Modality:
+ return Modality.PROTEIN
+
+ @property
+ def num_layers(self) -> int:
+ return self.config.num_hidden_layers
+
+ @property
+ def embed_dim(self) -> int:
+ return self.config.hidden_size
+
+
+class ESM3(BioSeqTransformer):
+ """ESM3 model from https://github.com/evolutionaryscale/esm"""
+
+ MODEL_NAMES = ["esm3_sm_open_v1"]
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ # Register forward hooks to store embeddings per layer.
+ self.hooks = [
+ ForwardHook(self.encoder.transformer.blocks[layer]) for layer in self.layers
+ ]
+
+ @property
+ def modality(self) -> Modality:
+ return Modality.PROTEIN
+
+ @property
+ def num_layers(self) -> int:
+ return self.config.num_hidden_layers
+
+ @property
+ def embed_dim(self) -> int:
+ return self.config.hidden_size
+
+ def _load_model(self, model_name):
+ try:
+ from esm.models.esm3 import ESM3 as ModelESM3
+ except ImportError:
+ raise ImportError(
+ "ESM3 is not installed. Please install it with `pip install esm`."
+ )
+ model = ModelESM3.from_pretrained("esm3_sm_open_v1")
+ model.config = SimpleNamespace(
+ num_hidden_layers=len(model.transformer.blocks),
+ hidden_size=model.transformer.blocks[0].ffn[-1].out_features,
+ )
+ return model
+
+ def _get_tokenizer(self, model_name):
+ try:
+ from esm.tokenization.sequence_tokenizer import EsmSequenceTokenizer
+ except ImportError:
+ raise ImportError(
+ "ESM3 is not installed. Please install it with `pip install esm`."
+ )
+ return EsmSequenceTokenizer()
+
+ def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
+ _ = self.encoder.forward(sequence_tokens=batch_dict["input_ids"])
+ embeds = [hook.output for hook in self.hooks]
+ embeds = [
+ pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
+ for layer_embeds in embeds
+ ]
+ # Stack with shape [B, num_layers, D].
+ embeds = torch.stack(embeds, dim=1)
+ embeds = embeds.to(torch.float32)
+ return embeds
+
+
+class ProtT5(BioSeqTransformer):
+ """ProtT5 model from https://github.com/agemagician/ProtTrans"""
+
+ MODEL_NAMES = [
+ "Rostlab/prot_t5_xl_uniref50",
+ "Rostlab/prot_t5_xl_bfd",
+ "Rostlab/prot_t5_xxl_uniref50",
+ "Rostlab/prot_t5_xxl_bfd",
+ ]
+
+ @property
+ def modality(self) -> Modality:
+ return Modality.PROTEIN
+
+ @property
+ def num_layers(self) -> int:
+ return self.config.num_layers
+
+ @property
+ def embed_dim(self) -> int:
+ return self.config.d_model
+
+ def _load_model(self, model_name):
+ return T5EncoderModel.from_pretrained(model_name)
+
+ def _get_tokenizer(self, model_name):
+ return T5Tokenizer.from_pretrained(model_name, do_lower_case=False)
+
+ def _tokenize_func(
+ self, tokenizer, examples: Dict[str, List], max_seq_length: int
+ ) -> BatchEncoding:
+ example_sequences = examples["input_seqs"]
+ # Add space between amino acids to make sure they are tokenized correctly.
+ example_sequences = [" ".join(sequence) for sequence in example_sequences]
+ example_sequences = [
+ re.sub(r"[UZOB]", "X", sequence) for sequence in example_sequences
+ ]
+ batch_dict = tokenizer(
+ example_sequences,
+ max_length=max_seq_length,
+ padding=True,
+ truncation=True,
+ add_special_tokens=True,
+ )
+
+ return batch_dict
+
+
+class ProGen(BioSeqTransformer):
+ """ProGen models from https://github.com/salesforce/progen."""
+
+ MODEL_NAMES = [
+ "hugohrban/progen2-small",
+ "hugohrban/progen2-medium",
+ "hugohrban/progen2-base",
+ "hugohrban/progen2-large",
+ "hugohrban/progen2-xlarge",
+ ]
+
+ @property
+ def modality(self) -> Modality:
+ return Modality.PROTEIN
+
+ @property
+ def num_layers(self) -> int:
+ return self.config.n_layer
+
+ @property
+ def embed_dim(self) -> int:
+ return self.config.embed_dim
+
+ def _load_model(self, model_name):
+ return AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
+
+ def _get_tokenizer(self, model_name_or_path):
+ tokenizer = AutoTokenizer.from_pretrained(
+ model_name_or_path, trust_remote_code=True
+ )
+ tokenizer.pad_token = "<|pad|>"
+ return tokenizer
+
+ def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
+ """Returns the output embedding for the given batch with shape [batch, num_layers, D]."""
+ outputs: BaseModelOutput = self.encoder(
+ input_ids=batch_dict["input_ids"],
+ output_hidden_states=True,
+ use_cache=False,
+ )
+ embeds = [outputs.hidden_states[layer] for layer in self.layers]
+ embeds = [
+ pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
+ for layer_embeds in embeds
+ ]
+ # Stack with shape [B, num_layers, D].
+ embeds = torch.stack(embeds, dim=1)
+ return embeds
+
+
+class EvoModel(BioSeqTransformer):
+ """https://github.com/evo-design/evo."""
+
+ MODEL_NAMES = [
+ "togethercomputer/evo-1-8k-base",
+ "togethercomputer/evo-1-131k-base",
+ ]
+
+ @property
+ def modality(self) -> Modality:
+ return Modality.DNA
+
+ @property
+ def num_layers(self) -> int:
+ return self.config.num_layers
+
+ @property
+ def embed_dim(self) -> int:
+ return self.config.hidden_size
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ # Register forward hooks to store embeddings per layer.
+ self.hooks = []
+ for layer in self.layers:
+ # For the last layer, get the output of `backbone.norm`, which directly precedes `backbone.unembed`.
+ # This is equivalent to the approach in https://github.com/evo-design/evo/issues/32.
+ if layer == self.num_layers - 1 or layer == -1:
+ self.hooks.append(ForwardHook(self.encoder.backbone.norm))
+ else:
+ self.hooks.append(ForwardHook(self.encoder.backbone.blocks[layer]))
+
+ def _load_model(self, model_name):
+ config = AutoConfig.from_pretrained(
+ model_name, trust_remote_code=True, revision="1.1_fix"
+ )
+ model = AutoModelForCausalLM.from_pretrained(
+ model_name, config=config, trust_remote_code=True, revision="1.1_fix"
+ )
+ return model
+
+ def _get_tokenizer(self, model_name):
+ tokenizer = AutoTokenizer.from_pretrained(
+ model_name, revision="1.1_fix", trust_remote_code=True
+ )
+ # Evo tokenizer is missing pad_token by default.
+ tokenizer.add_special_tokens({"pad_token": "N"})
+ return tokenizer
+
+ def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
+ _ = self.encoder(batch_dict["input_ids"], use_cache=False)
+ embeds = [hook.output for hook in self.hooks]
+ # The hook output for Evo middle layers is a tuple (embedding, inference_params=None).
+ embeds = [x[0] if isinstance(x, tuple) else x for x in embeds]
+ embeds = [
+ pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
+ for layer_embeds in embeds
+ ]
+ # Stack with shape [B, num_layers, D].
+ embeds = torch.stack(embeds, dim=1)
+ embeds = embeds.to(torch.float32)
+ return embeds
+
+
+class NTModel(BioSeqTransformer):
+ """Nucleotide Transformer https://github.com/instadeepai/nucleotide-transformer"""
+
+ MODEL_NAMES = [
+ "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species",
+ "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species",
+ "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species",
+ "InstaDeepAI/nucleotide-transformer-2.5b-multi-species",
+ ]
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.max_seq_length = self.tokenizer.model_max_length
+
+ @property
+ def modality(self) -> Modality:
+ return Modality.DNA
+
+ @property
+ def num_layers(self) -> int:
+ return self.config.num_hidden_layers
+
+ @property
+ def embed_dim(self) -> int:
+ return self.config.hidden_size
+
+ def _load_model(self, model_name):
+ return AutoModelForMaskedLM.from_pretrained(model_name, trust_remote_code=True)
diff --git a/dgeb/tasks/__init__.py b/dgeb/tasks/__init__.py
new file mode 100644
index 0000000..8cec126
--- /dev/null
+++ b/dgeb/tasks/__init__.py
@@ -0,0 +1,16 @@
+# ruff: noqa: F403
+
+from .tasks import Dataset, Task, TaskMetadata, TaskResult
+from .eds_tasks import *
+from .pair_classification_tasks import *
+from .retrieval_tasks import *
+from .classification_tasks import *
+from .clustering_tasks import *
+from .bigene_mining_tasks import *
+
+__all__ = [
+ "Dataset",
+ "Task",
+ "TaskMetadata",
+ "TaskResult",
+]
diff --git a/dgeb/tasks/bigene_mining_tasks.py b/dgeb/tasks/bigene_mining_tasks.py
new file mode 100644
index 0000000..45d7679
--- /dev/null
+++ b/dgeb/tasks/bigene_mining_tasks.py
@@ -0,0 +1,77 @@
+"""
+Bigene mining tasks are analogous to bitext matching tasks, but for genes.
+Cosine similarity is used to mine genes of related functions from different organisms.
+"""
+
+import logging
+from collections import defaultdict
+
+from dgeb.evaluators import BiGeneMiningEvaluator
+from dgeb.modality import Modality
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+
+logger = logging.getLogger(__name__)
+
+
+def run_bigene_mining_tasks(
+ model: BioSeqTransformer, metadata: TaskMetadata, top_k: int = 1
+) -> TaskResult:
+ """Evaluate bigene mining task. Utilizes the BiGeneMiningEvaluator."""
+ if len(metadata.datasets) != 1:
+ raise ValueError("BiGeneMining tasks require 1 dataset.")
+ ds = metadata.datasets[0].load()["train"]
+ layer_results = defaultdict(dict)
+ embeds1 = model.encode(ds["Seq1"])
+ embeds2 = model.encode(ds["Seq2"])
+ for i, layer in enumerate(model.layers):
+ evaluator = BiGeneMiningEvaluator(embeds1[:, i], embeds2[:, i], top_k=top_k)
+ layer_results["layers"][layer] = evaluator()
+ logger.info(
+ f"Layer: {layer}, {metadata.display_name} matching results: {layer_results['layers'][layer]}"
+ )
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
+
+
+class BacArchBiGeneMining(Task):
+ metadata = TaskMetadata(
+ id="bacarch_bigene",
+ display_name="BacArch BiGene",
+ description="Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+ type="bigene_mining",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/bac_arch_bigene",
+ revision="d5a65e44bae43a9ba9f2fdc03056dff9c12f6631",
+ )
+ ],
+ primary_metric_id="f1",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_bigene_mining_tasks(model, self.metadata)
+
+
+class ModACParalogyBiGeneMining(Task):
+ # ModAC Paralogy matching with top_k=1 is too strict (most models have accuracy < 0.1%)
+ # Instead use recall@50 as the main metric.
+ TOP_K = 50
+
+ metadata = TaskMetadata(
+ id="modac_paralogy_bigene",
+ display_name="ModAC Paralogy BiGene",
+ description="Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+ type="bigene_mining",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/modac_paralogy_bigene",
+ revision="241ca6397856e3360da04422d54933035b1fab87",
+ )
+ ],
+ primary_metric_id=f"recall_at_{TOP_K}",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_bigene_mining_tasks(model, self.metadata, top_k=self.TOP_K)
diff --git a/dgeb/tasks/classification_tasks.py b/dgeb/tasks/classification_tasks.py
new file mode 100644
index 0000000..4da268b
--- /dev/null
+++ b/dgeb/tasks/classification_tasks.py
@@ -0,0 +1,213 @@
+"""
+Classification tasks take in biological sequence and functional labels.
+Multi-class and/or multi-label classification tasks are supported.
+"""
+
+import logging
+from collections import defaultdict
+
+import datasets
+import numpy as np
+
+from dgeb.eval_utils import merge_split_elem_embeds
+from dgeb.evaluators import (
+ MultiClassMultiOutputKNNClassificationEvaluator,
+ logRegClassificationEvaluator,
+)
+from dgeb.modality import Modality
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+
+logger = logging.getLogger(__name__)
+
+
+def split_sequences(
+ ds: datasets.DatasetDict, max_seq_length: int
+) -> datasets.DatasetDict:
+ """Split sequences into chunks of max_seq_length using datasets.Dataset.map()."""
+
+ def _split_sequence(examples, max_seq_length):
+ assert (
+ len(examples["Sequence"]) == 1
+ ), "split map function should use batch size of 1."
+ example = {k: v[0] for k, v in examples.items()}
+ seq = example["Sequence"]
+ # Split by chunks of max_seq_length.
+ seq_split = [
+ seq[i : i + max_seq_length] for i in range(0, len(seq), max_seq_length)
+ ]
+ # Repeat other fields by the number of splits.
+ example = {
+ k: [v] * len(seq_split) for k, v in example.items() if k != "Sequence"
+ }
+ example["Sequence"] = seq_split
+ return example
+
+ ds = ds.map(
+ _split_sequence,
+ batched=True,
+ batch_size=1,
+ fn_kwargs={"max_seq_length": max_seq_length},
+ keep_in_memory=True,
+ load_from_cache_file=False,
+ )
+ return ds
+
+
+def run_classification_task(
+ model: BioSeqTransformer, metadata: TaskMetadata
+) -> TaskResult:
+ """Evaluate on classification tasks using logistic regression classifier."""
+ ds = metadata.datasets[0].load()
+ layer_results = defaultdict(dict)
+ train_embeds = model.encode(ds["train"]["Sequence"])
+ test_embeds = model.encode(ds["test"]["Sequence"])
+ for i, layer in enumerate(model.layers):
+ layer_results["layers"][layer] = logRegClassificationEvaluator(
+ train_embeds[:, i],
+ ds["train"]["Label"],
+ test_embeds[:, i],
+ ds["test"]["Label"],
+ )()
+ logger.info(
+ f"Layer: {layer}, {metadata.display_name} results: {layer_results['layers'][layer]}"
+ )
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
+
+
+class EnzymeCommissionClassification(Task):
+ metadata = TaskMetadata(
+ id="ec_classification",
+ display_name="EC Classification",
+ description="Evaluate on Enzyme Commission number classification task.",
+ type="classification",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/ec_classification",
+ revision="ead5570168e6969a5149f6861e8a33d6b5d22498",
+ )
+ ],
+ primary_metric_id="f1",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_classification_task(model, self.metadata)
+
+
+class EnzymeCommissionDNAClassification(Task):
+ metadata = TaskMetadata(
+ id="ec_dna_classification",
+ display_name="EC Classification",
+ description="Evaluate on Enzyme Commission number classification task using DNA sequences.",
+ type="classification",
+ modality=Modality.DNA,
+ datasets=[
+ Dataset(
+ path="tattabio/ec_classification_dna",
+ revision="cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd",
+ )
+ ],
+ primary_metric_id="f1",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_classification_task(model, self.metadata)
+
+
+class ConvergentEnzymesClassification(Task):
+ metadata = TaskMetadata(
+ id="convergent_enzymes_classification",
+ display_name="Convergent Enzymes Classification",
+ description="Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+ type="classification",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/convergent_enzymes",
+ revision="37f75609f54de2bc0911ccb72faf1c2f5a4285aa",
+ )
+ ],
+ primary_metric_id="f1",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_classification_task(model, self.metadata)
+
+
+def run_mibig_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
+ """
+ Evaluate on MIBIG classification tasks. Multiclass, multi-label KNN classification is used for evaluation.
+ """
+ ds = metadata.datasets[0].load()
+ if metadata.modality == Modality.DNA:
+ # MIBiG DNA sequences can be very long. Instead of truncating to max_seq_length,
+ # split into multiple sequences and mean pool the resulting embeddings.
+ ds = split_sequences(ds, model.max_seq_length)
+
+ layer_results = defaultdict(dict)
+ train_embeds = model.encode(ds["train"]["Sequence"])
+ test_embeds = model.encode(ds["test"]["Sequence"])
+
+ train_ids = ds["train"]["Entry"]
+ test_ids = ds["test"]["Entry"]
+ train_labels = ds["train"]["class"]
+ test_labels = ds["test"]["class"]
+ train_id_to_label = {id: label for id, label in zip(train_ids, train_labels)}
+ test_id_to_label = {id: label for id, label in zip(test_ids, test_labels)}
+ # Mean pool embeds with the same ID.
+ train_ids, train_embeds = merge_split_elem_embeds(train_ids, train_embeds)
+ test_ids, test_embeds = merge_split_elem_embeds(test_ids, test_embeds)
+ # Gather the labels after merging by unique ID.
+ train_labels = np.array([train_id_to_label[id] for id in train_ids])
+ test_labels = np.array([test_id_to_label[id] for id in test_ids])
+
+ for i, layer in enumerate(model.layers):
+ evaluator = MultiClassMultiOutputKNNClassificationEvaluator(
+ train_embeds[:, i], train_labels, test_embeds[:, i], test_labels
+ )
+ layer_results["layers"][layer] = evaluator()
+ logger.info(
+ f"Layer: {layer}, MIBiG classification results: {layer_results['layers'][layer]}"
+ )
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
+
+
+class MIBiGProteinClassification(Task):
+ metadata = TaskMetadata(
+ id="MIBIG_protein_classification",
+ display_name="MIBiG Classification",
+ description="Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+ type="classification",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/mibig_classification_prot",
+ revision="915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f",
+ )
+ ],
+ primary_metric_id="f1",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_mibig_task(model, self.metadata)
+
+
+class MIBiGDNAClassification(Task):
+ metadata = TaskMetadata(
+ id="MIBIG_dna_classification",
+ display_name="MIBiG Classification",
+ description="Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.",
+ type="classification",
+ modality=Modality.DNA,
+ datasets=[
+ Dataset(
+ path="tattabio/mibig_classification_dna",
+ revision="b5ca7a76d469e4e66c46f1b655903972571e6b61",
+ )
+ ],
+ primary_metric_id="f1",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_mibig_task(model, self.metadata)
diff --git a/dgeb/tasks/clustering_tasks.py b/dgeb/tasks/clustering_tasks.py
new file mode 100644
index 0000000..ba441f8
--- /dev/null
+++ b/dgeb/tasks/clustering_tasks.py
@@ -0,0 +1,70 @@
+"""
+Biological sequences are clustered and performance is determined by how well clustering matches assigned labels.
+"""
+
+import logging
+from collections import defaultdict
+
+from dgeb.evaluators import ClusteringEvaluator
+from dgeb.modality import Modality
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+
+logger = logging.getLogger(__name__)
+
+
+def run_clustering_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
+ """Evaluate clustering task. Utilizes the ClusteringEvaluator."""
+ if len(metadata.datasets) != 1:
+ raise ValueError("Clustering tasks require 1 dataset.")
+ ds = metadata.datasets[0].load()["train"]
+ embeds = model.encode(ds["Sequence"])
+ layer_results = defaultdict(dict)
+ for i, layer in enumerate(model.layers):
+ labels = ds["Label"]
+ evaluator = ClusteringEvaluator(embeds[:, i], labels)
+ layer_results["layers"][layer] = evaluator()
+ logger.info(
+ f"Layer: {layer}, {metadata.display_name} results: {layer_results['layers'][layer]}"
+ )
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
+
+
+class RNAclustering(Task):
+ metadata = TaskMetadata(
+ id="ecoli_rna_clustering",
+ display_name="E.coli RNA Clustering",
+ description="Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.",
+ type="clustering",
+ modality=Modality.DNA,
+ datasets=[
+ Dataset(
+ path="tattabio/e_coli_rnas",
+ revision="4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6",
+ )
+ ],
+ primary_metric_id="v_measure",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_clustering_task(model, self.metadata)
+
+
+class MopBClustering(Task):
+ metadata = TaskMetadata(
+ id="mopb_clustering",
+ display_name="MopB Clustering",
+ description="Evaluate on MopB clustering task.",
+ type="clustering",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/mopb_clustering",
+ revision="eed4bfff9c5bd2dc2500c50757bfcb90425d999a",
+ )
+ ],
+ primary_metric_id="v_measure",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_clustering_task(model, self.metadata)
diff --git a/dgeb/tasks/eds_tasks.py b/dgeb/tasks/eds_tasks.py
new file mode 100644
index 0000000..c7512d2
--- /dev/null
+++ b/dgeb/tasks/eds_tasks.py
@@ -0,0 +1,246 @@
+"""
+Evolutionary Distance Similarity (EDS) tasks compare embedding distances to continuous evolutionary distances.
+The label distances are typically derived from phylogenetic trees.
+"""
+
+import logging
+from collections import defaultdict
+
+import numpy as np
+import pandas as pd
+
+from dgeb.evaluators import EDSEvaluator
+from dgeb.modality import Modality
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+
+logger = logging.getLogger(__name__)
+
+
+def run_eds_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
+ """Evaluate phylogeny distance correlation task. Utilizes the Evolutionary Distance Similarity (EDS) evaluator."""
+ if len(metadata.datasets) != 2:
+ raise ValueError("Phylogeny tasks require 2 datasets: sequences and distances.")
+
+ ds = metadata.datasets[0].load()["train"]
+ distance_df = metadata.datasets[1].load()["train"].to_pandas()
+ assert isinstance(
+ distance_df, pd.DataFrame
+ ), f"Expected DataFrame, got {type(distance_df)}"
+
+ id_index_dict = {k: i for i, k in enumerate(ds["Entry"])}
+ distance_df["embeds1"] = None
+ distance_df["embeds2"] = None
+ test_embeds = model.encode(ds["Sequence"])
+ layer_results = defaultdict(dict)
+ for i, layer in enumerate(model.layers):
+ for row_idx, row in distance_df.iterrows():
+ id1 = row["ID1"]
+ id2 = row["ID2"]
+ embedding1 = test_embeds[id_index_dict[id1], i]
+ embedding2 = test_embeds[id_index_dict[id2], i]
+ distance_df.at[row_idx, "embeds1"] = embedding1
+ distance_df.at[row_idx, "embeds2"] = embedding2
+ embeds1 = np.array(distance_df["embeds1"].tolist())
+ embeds2 = np.array(distance_df["embeds2"].tolist())
+ dists = np.array(distance_df["distance"].tolist())
+ evaluator = EDSEvaluator(embeds1, embeds2, dists)
+ layer_results["layers"][layer] = evaluator()
+ # log results
+ logger.info(
+ f"Layer: {layer}, {metadata.display_name} distance correlation results: {layer_results['layers'][layer]}"
+ )
+
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
+
+
+class RpobBacPhylogeny(Task):
+ metadata = TaskMetadata(
+ id="rpob_bac_phylogeny",
+ display_name="RpoB Bacterial Phylogeny",
+ description="Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+ type="eds",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/rpob_bac_phylogeny_sequences",
+ revision="b833ef8d8d873ea5387540562873f41d073d3e03",
+ ),
+ Dataset(
+ path="tattabio/rpob_bac_phylogeny_distances",
+ revision="0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7",
+ ),
+ ],
+ primary_metric_id="top_corr",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_eds_task(model, self.metadata)
+
+
+class RpobArchPhylogeny(Task):
+ metadata = TaskMetadata(
+ id="rpob_arch_phylogeny",
+ display_name="RpoB Archaeal Phylogeny",
+ description="Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+ type="eds",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/rpob_arch_phylogeny_sequences",
+ revision="10de75b9f5ad12340d629fd1ad015ef4319d6ee4",
+ ),
+ Dataset(
+ path="tattabio/rpob_arch_phylogeny_distances",
+ revision="2a585f0e135fe74b8ae6d31e7801c6031b0dcc18",
+ ),
+ ],
+ primary_metric_id="top_corr",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_eds_task(model, self.metadata)
+
+
+class RpobBacDNAPhylogeny(Task):
+ metadata = TaskMetadata(
+ id="rpob_bac_dna_phylogeny",
+ display_name="RpoB Bacterial Phylogeny",
+ description="Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.",
+ type="eds",
+ modality=Modality.DNA,
+ datasets=[
+ Dataset(
+ path="tattabio/rpob_bac_dna_phylogeny_sequences",
+ revision="8e137d3fb8886d8739ce08d1918745444c7d30d6",
+ ),
+ Dataset(
+ path="tattabio/rpob_bac_dna_phylogeny_distances",
+ revision="67339e271b2a1602208153d53d70d35ba6fa8876",
+ ),
+ ],
+ primary_metric_id="top_corr",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_eds_task(model, self.metadata)
+
+
+class RpobArchDNAPhylogeny(Task):
+ metadata = TaskMetadata(
+ id="rpob_arch_dna_phylogeny",
+ display_name="RpoB Archaeal Phylogeny",
+ description="Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.",
+ type="eds",
+ modality=Modality.DNA,
+ datasets=[
+ Dataset(
+ path="tattabio/rpob_arch_dna_phylogeny_sequences",
+ revision="4453552a0e1021fee8697c71a559f4d3f6da2408",
+ ),
+ Dataset(
+ path="tattabio/rpob_arch_dna_phylogeny_distances",
+ revision="51df97684a927ec2203568e80175ef26a62db039",
+ ),
+ ],
+ primary_metric_id="top_corr",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_eds_task(model, self.metadata)
+
+
+class FeFePhylogeny(Task):
+ metadata = TaskMetadata(
+ id="fefe_phylogeny",
+ display_name="FeFeHydrogenase Phylogeny",
+ description="Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+ type="eds",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/fefe_phylogeny_sequences",
+ revision="bce06d79d9ce58413e7bcbed6943905d1afb8b26",
+ ),
+ Dataset(
+ path="tattabio/fefe_phylogeny_distances",
+ revision="d6357cee9b4071a8dcdeef54083006f0d5e94fd2",
+ ),
+ ],
+ primary_metric_id="top_corr",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_eds_task(model, self.metadata)
+
+
+class Bac16SPhylogeny(Task):
+ metadata = TaskMetadata(
+ id="bac_16S_phylogeny",
+ display_name="16S Bacterial Phylogeny",
+ description="Evaluate on 16S Bacterial phylogeny distance correlation task.",
+ type="eds",
+ modality=Modality.DNA,
+ datasets=[
+ Dataset(
+ path="tattabio/bac_16S_sequences",
+ revision="efde1456b86748909cbcfecb07d783756d570aa3",
+ ),
+ Dataset(
+ path="tattabio/bac_16S_distances",
+ revision="5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3",
+ ),
+ ],
+ primary_metric_id="top_corr",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_eds_task(model, self.metadata)
+
+
+class Arch16SPhylogeny(Task):
+ metadata = TaskMetadata(
+ id="arch_16S_phylogeny",
+ display_name="16S Archaeal Phylogeny",
+ description="Evaluate on 16S Archaeal phylogeny distance correlation task.",
+ type="eds",
+ modality=Modality.DNA,
+ datasets=[
+ Dataset(
+ path="tattabio/arch_16S_sequences",
+ revision="e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0",
+ ),
+ Dataset(
+ path="tattabio/arch_16S_distances",
+ revision="b0356b632a954be70cefd57e3a02e7e1ccd34408",
+ ),
+ ],
+ primary_metric_id="top_corr",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_eds_task(model, self.metadata)
+
+
+class Euk18SPhylogeny(Task):
+ metadata = TaskMetadata(
+ id="euk_18S_phylogeny",
+ display_name="18S Eukaryotic Phylogeny",
+ description="Evaluate on 18S Eukaryotic phylogeny distance correlation task.",
+ type="eds",
+ modality=Modality.DNA,
+ datasets=[
+ Dataset(
+ path="tattabio/euk_18S_sequences",
+ revision="5174cb3b2c5c46b61307fd1c2c08f5c432655196",
+ ),
+ Dataset(
+ path="tattabio/euk_18S_distances",
+ revision="c4cea4fbb1185d08e0e01fd28ffb8b06a25025da",
+ ),
+ ],
+ primary_metric_id="top_corr",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_eds_task(model, self.metadata)
diff --git a/dgeb/tasks/pair_classification_tasks.py b/dgeb/tasks/pair_classification_tasks.py
new file mode 100644
index 0000000..6b34b57
--- /dev/null
+++ b/dgeb/tasks/pair_classification_tasks.py
@@ -0,0 +1,96 @@
+"""
+Pair classification tasks evaluating distances between functionally relevant gene pairs.
+For instance, distance thresholds distinguish between co-transcribed and non-co-transcribed gene pairs.
+"""
+
+import logging
+from collections import defaultdict
+
+from dgeb.evaluators import PairClassificationEvaluator
+from dgeb.modality import Modality
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+
+from ..eval_utils import paired_dataset
+
+logger = logging.getLogger(__name__)
+
+
+def run_pair_classification_task(
+ model: BioSeqTransformer, metadata: TaskMetadata
+) -> TaskResult:
+ """Evaluate pair classification task. Utilizes the PairClassificationEvaluator."""
+ if len(metadata.datasets) != 1:
+ raise ValueError("Pair classification tasks require 1 dataset.")
+ ds = metadata.datasets[0].load()["train"]
+ embeds = model.encode(ds["Sequence"])
+ layer_results = defaultdict(dict)
+ for i, layer in enumerate(model.layers):
+ labels = ds["Label"]
+ embeds1, embeds2, labels = paired_dataset(labels, embeds[:, i])
+ evaluator = PairClassificationEvaluator(embeds1, embeds2, labels)
+ layer_results["layers"][layer] = evaluator()
+ logger.info(
+ f"Layer: {layer}, {metadata.display_name} classification results: {layer_results['layers'][layer]}"
+ )
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
+
+
+class EcoliOperon(Task):
+ metadata = TaskMetadata(
+ id="ecoli_operonic_pair",
+ display_name="E.coli Operonic Pair",
+ description="Evaluate on E.coli K-12 operonic pair classification task.",
+ type="pair_classification",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/ecoli_operonic_pair",
+ revision="a62c01143a842696fc8200b91c1acb825e8cb891",
+ )
+ ],
+ primary_metric_id="top_ap",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_pair_classification_task(model, self.metadata)
+
+
+class CyanoOperonPair(Task):
+ metadata = TaskMetadata(
+ id="cyano_operonic_pair",
+ display_name="Cyano Operonic Pair",
+ description="Evaluate on Cyano operonic pair classification task.",
+ type="pair_classification",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/cyano_operonic_pair",
+ revision="eeb4cb71ec2a4ff688af9de7c0662123577d32ec",
+ )
+ ],
+ primary_metric_id="top_ap",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_pair_classification_task(model, self.metadata)
+
+
+class VibrioOperonPair(Task):
+ metadata = TaskMetadata(
+ id="vibrio_operonic_pair",
+ display_name="Vibrio Operonic Pair",
+ description="Evaluate on Vibrio operonic pair classification task.",
+ type="pair_classification",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/vibrio_operonic_pair",
+ revision="24781b12b45bf81a079a6164ef0d2124948c1878",
+ )
+ ],
+ primary_metric_id="top_ap",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_pair_classification_task(model, self.metadata)
diff --git a/dgeb/tasks/retrieval_tasks.py b/dgeb/tasks/retrieval_tasks.py
new file mode 100644
index 0000000..5bfcc22
--- /dev/null
+++ b/dgeb/tasks/retrieval_tasks.py
@@ -0,0 +1,96 @@
+"""
+Retrieval tasks find functionally relevant genes in a corpus of genes based on a query gene.
+Typically corpus is derived from a different phylogenetic group than the query genes.
+"""
+
+import logging
+from collections import defaultdict
+
+from dgeb.evaluators import RetrievalEvaluator
+from dgeb.modality import Modality
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+
+logger = logging.getLogger(__name__)
+
+
+def run_retrieval_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
+ """Evaluate retrieval task. Utilizes the Retrieval evaluator."""
+ if len(metadata.datasets) != 2:
+ raise ValueError("Retrieval tasks require 3 datasets: corpus, query and qrels.")
+ corpus_ds = metadata.datasets[0].load()["train"]
+ query_ds = metadata.datasets[0].load()["test"]
+ qrels = metadata.datasets[1].load()
+ corpus_embeds = model.encode(corpus_ds["Sequence"])
+ query_embeds = model.encode(query_ds["Sequence"])
+ qrels_dict = defaultdict(dict)
+
+ def qrels_dict_init(row):
+ qrels_dict[str(row["query_id"])][str(row["corpus_id"])] = int(row["fuzz_ratio"])
+
+ # Populate `qrels_dict` from the dataset.
+ # See https://github.com/cvangysel/pytrec_eval for qrels format.
+ qrels.map(qrels_dict_init)
+ qrels = qrels_dict
+ layer_results = defaultdict(dict)
+ for i, layer in enumerate(model.layers):
+ evaluator = RetrievalEvaluator(
+ corpus_embeds[:, i],
+ query_embeds[:, i],
+ corpus_ds["Entry"],
+ query_ds["Entry"],
+ qrels,
+ )
+ layer_results["layers"][layer] = evaluator()
+ logger.info(
+ f"Layer: {layer}, Retrieval results: {layer_results['layers'][layer]}"
+ )
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
+
+
+class ArchRetrieval(Task):
+ metadata = TaskMetadata(
+ id="arch_retrieval",
+ display_name="Arch Retrieval",
+ description="Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+ type="retrieval",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/arch_retrieval",
+ revision="a19124322604a21b26b1b3c13a1bd0b8a63c9f7b",
+ ),
+ Dataset(
+ path="tattabio/arch_retrieval_qrels",
+ revision="3f142f2f9a0995d56c6e77188c7251761450afcf",
+ ),
+ ],
+ primary_metric_id="map_at_5",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_retrieval_task(model, self.metadata)
+
+
+class EukRetrieval(Task):
+ metadata = TaskMetadata(
+ id="euk_retrieval",
+ display_name="Euk Retrieval",
+ description="Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+ type="retrieval",
+ modality=Modality.PROTEIN,
+ datasets=[
+ Dataset(
+ path="tattabio/euk_retrieval",
+ revision="c93dc56665cedd19fbeaea9ace146f2474c895f0",
+ ),
+ Dataset(
+ path="tattabio/euk_retrieval_qrels",
+ revision="a5aa01e9b9738074aba57fc07434e352c4c71e4b",
+ ),
+ ],
+ primary_metric_id="map_at_5",
+ )
+
+ def run(self, model: BioSeqTransformer) -> TaskResult:
+ return run_retrieval_task(model, self.metadata)
diff --git a/dgeb/tasks/tasks.py b/dgeb/tasks/tasks.py
new file mode 100644
index 0000000..e385f9f
--- /dev/null
+++ b/dgeb/tasks/tasks.py
@@ -0,0 +1,135 @@
+"""Task abstract class for evaluation and results."""
+
+import logging
+from typing import List, Literal, Optional, Any
+from importlib.metadata import version
+from enum import Enum
+import datasets
+from pydantic import BaseModel, model_validator
+from abc import ABC, abstractmethod
+
+
+# HACK: if Modality is not defined, then import it from modality.py
+try:
+ from ..modality import Modality
+except Exception:
+ # if not, super hack to get the leaderboard working.
+ # SHOULD MATCH the code exactly in modality.py
+ # can we read the file and run that code?
+ from enum import Enum
+
+ class Modality(Enum):
+ """Data modality, either DNA or protein sequence."""
+
+ PROTEIN = "protein"
+ DNA = "dna"
+
+
+logging.basicConfig(level=logging.INFO)
+
+TaskType = Literal[
+ "classification",
+ "pair_classification",
+ "clustering",
+ "eds",
+ "bigene_mining",
+ "retrieval",
+]
+
+
+class TaskMetric(BaseModel):
+ id: str
+ display_name: str
+ description: Optional[str] = None
+ value: float = 0.0
+
+
+class LayerResult(BaseModel):
+ layer_number: int
+ layer_display_name: str
+ metrics: List[TaskMetric]
+
+
+class GEBModel(BaseModel):
+ hf_name: str
+ num_layers: int
+ num_params: int
+ embed_dim: int
+
+
+class Dataset(BaseModel):
+ path: str
+ revision: str
+
+ def load(self) -> datasets.DatasetDict:
+ ds = datasets.load_dataset(self.path, revision=self.revision)
+ if not isinstance(ds, datasets.DatasetDict):
+ raise ValueError(
+ f"Dataset {self.path} is not a datasets.DatasetDict object."
+ )
+ return ds
+
+
+class TaskMetadata(BaseModel):
+ id: str
+ display_name: str
+ description: str
+ modality: Modality
+ type: TaskType
+ # List of datasets used by the task.
+ # Each dataset is a dict of all arguments to pass to `datasets.load_dataset()`.
+ datasets: List[Dataset]
+ primary_metric_id: str
+
+
+# tasks.py
+class TaskResult(BaseModel):
+ dgeb_version: str
+ task: "TaskMetadata"
+ # TODO: Convert model to ModelMetadata
+ model: GEBModel
+ results: List[LayerResult]
+
+ @model_validator(mode="after")
+ def check_valid_primary_metric(self):
+ for result in self.results:
+ if all(
+ metric.id != self.task.primary_metric_id for metric in result.metrics
+ ):
+ raise ValueError(
+ f"Primary metric {self.task.primary_metric_id} not found in results.metrics"
+ )
+ return self
+
+ @staticmethod
+ def from_dict(
+ task_metadata: "TaskMetadata",
+ layer_results: LayerResult,
+ model_metadata: GEBModel,
+ ):
+ return TaskResult(
+ dgeb_version=version("dgeb"),
+ task=task_metadata,
+ model=model_metadata,
+ results=list(
+ LayerResult(
+ layer_number=int(layer),
+ layer_display_name=str(layer),
+ metrics=[
+ TaskMetric(id=metric, display_name=metric, value=value)
+ for metric, value in metrics.items()
+ ],
+ )
+ for layer, metrics in layer_results["layers"].items()
+ ),
+ )
+
+
+# move to model.py?
+class Task(ABC):
+ metadata: TaskMetadata
+
+ # using Any instead of "BioSeqTransformer" to avoid installing all deps in leaderboard
+ @abstractmethod
+ def run(self, model: Any, layers: Optional[List[int]] = None) -> TaskResult:
+ pass
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..6429ba3
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,8 @@
+version: "3"
+services:
+ dgeb-leaderboard:
+ build:
+ context: ./
+ dockerfile: Dockerfile
+ ports:
+ - "7680:7860"
diff --git a/docs/images/tatta_logo.png b/docs/images/tatta_logo.png
new file mode 100644
index 0000000..76220bd
Binary files /dev/null and b/docs/images/tatta_logo.png differ
diff --git a/leaderboard/.gitignore b/leaderboard/.gitignore
new file mode 100644
index 0000000..912a462
--- /dev/null
+++ b/leaderboard/.gitignore
@@ -0,0 +1,2 @@
+/.projectile
+**/__pycache__/
diff --git a/leaderboard/DGEB_Figure.png b/leaderboard/DGEB_Figure.png
new file mode 100644
index 0000000..8fdeeaa
Binary files /dev/null and b/leaderboard/DGEB_Figure.png differ
diff --git a/leaderboard/README.md b/leaderboard/README.md
new file mode 100644
index 0000000..964b4a7
--- /dev/null
+++ b/leaderboard/README.md
@@ -0,0 +1,2 @@
+# to set up hf repo to recieve origin pushes
+git remote set-url --add origin git@hf.co:spaces/tattabio/DGEB
diff --git a/leaderboard/__init__.py b/leaderboard/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/leaderboard/app.py b/leaderboard/app.py
new file mode 100644
index 0000000..d6463c2
--- /dev/null
+++ b/leaderboard/app.py
@@ -0,0 +1,260 @@
+import math
+import json
+from pathlib import Path
+import gradio as gr
+from typing import List
+import pandas as pd
+import importlib.util
+from pydantic import ValidationError, parse_obj_as
+
+SIG_FIGS = 4
+
+# HACK: very hacky way to import from parent directory, while avoiding needing all the deps of the parent package
+modality_path = "../dgeb/modality.py"
+spec = importlib.util.spec_from_file_location("modality", modality_path)
+modality = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(modality)
+Modality = modality.Modality
+
+
+tasks_path = "../dgeb/tasks/tasks.py"
+
+# Load the module
+spec = importlib.util.spec_from_file_location("tasks", tasks_path)
+tasks = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(tasks)
+TaskResult = tasks.TaskResult
+GEBModel = tasks.GEBModel
+
+
+# Assuming the class definitions provided above are complete and imported here
+
+
+def format_num_params(param: int) -> str:
+ # if the number of parameters is greater than 1 billion, display billion
+ million = 1_000_000
+ # billion = 1_000_000_000
+ # if param >= billion:
+ # num_billions = int(param / 1_000_000_000)
+ # return f"{num_billions:}B"
+ if param >= million:
+ num_millions = int(param / 1_000_000)
+ return f"{num_millions:}M"
+ else:
+ return f"{param:,}"
+
+
+def load_json_files_from_directory(directory_path: Path) -> List[dict]:
+ """
+ Recursively load all JSON files within the specified directory path.
+
+ :param directory_path: Path to the directory to search for JSON files.
+ :return: List of dictionaries loaded from JSON files.
+ """
+ json_files_content = []
+ for json_file in directory_path.rglob("*.json"): # Recursively find all JSON files
+ try:
+ with open(json_file, "r", encoding="utf-8") as file:
+ json_content = json.load(file)
+ json_files_content.append(json_content)
+ except Exception as e:
+ print(f"Error loading {json_file}: {e}")
+ return json_files_content
+
+
+def load_results() -> List[TaskResult]:
+ """
+ Recursively load JSON files in ./submissions/** and return a list of TaskResult objects.
+ """
+ submissions_path = Path("./submissions")
+ json_contents = load_json_files_from_directory(submissions_path)
+
+ task_results_objects = []
+ for content in json_contents:
+ try:
+ task_result = parse_obj_as(
+ TaskResult, content
+ ) # Using Pydantic's parse_obj_as for creating TaskResult objects
+ task_results_objects.append(task_result)
+ except ValidationError as e:
+ print(f"Error parsing TaskResult object: {e}")
+ raise e
+
+ return task_results_objects
+
+
+def task_results_to_dgeb_score(
+ model: GEBModel, model_results: List[TaskResult]
+) -> dict:
+ best_scores_per_task = []
+ modalities_seen = set()
+ for task_result in model_results:
+ modalities_seen.add(task_result.task.modality)
+ assert (
+ task_result.model.hf_name == model.hf_name
+ ), f"Model names do not match, {task_result.model.hf_name} != {model.hf_name}"
+ primary_metric_id = task_result.task.primary_metric_id
+ scores = []
+ # Get the primary score for each layer.
+ for result in task_result.results:
+ for metric in result.metrics:
+ if metric.id == primary_metric_id:
+ scores.append(metric.value)
+ best_score = max(scores)
+ best_scores_per_task.append(best_score)
+
+ assert (
+ len(modalities_seen) == 1
+ ), f"Multiple modalities found for model {model.hf_name}"
+ # Calculate the average of the best scores for each task.
+ assert len(best_scores_per_task) > 0, f"No tasks found for model {model.hf_name}"
+ dgeb_score = sum(best_scores_per_task) / len(best_scores_per_task)
+ return {
+ "Task Name": "DGEB Score",
+ "Task Category": "DGEB",
+ "Model": model.hf_name,
+ "Modality": list(modalities_seen)[0],
+ "Num. Parameters (millions)": format_num_params(model.num_params),
+ "Emb. Dimension": model.embed_dim,
+ "Score": dgeb_score,
+ }
+
+
+def task_results_to_df(model_results: List[TaskResult]) -> pd.DataFrame:
+ # Initialize an empty list to hold all rows of data
+ data_rows = []
+ all_models = {}
+ for res in model_results:
+ task = res.task
+ model = res.model
+ all_models[model.hf_name] = model
+ print(f"Processing {task.display_name} for {model.hf_name}")
+ for layer in res.results:
+ total_layers = model.num_layers - 1
+ mid_layer = math.ceil(total_layers / 2)
+ if mid_layer == layer.layer_number:
+ layer.layer_display_name = "mid"
+ elif total_layers == layer.layer_number:
+ layer.layer_display_name = "last"
+
+ if layer.layer_display_name not in ["mid", "last"]:
+ # calculate if the layer is mid or last
+ print(
+ f"Layer {layer.layer_number} is not mid or last out of {total_layers}. Skipping"
+ )
+ continue
+ else:
+ # For each Metric in the Layer
+ # pivoting the data so that each metric is a row
+ metric_ids = []
+ primary_metric_label = f"{task.primary_metric_id} (primary metric)"
+ for metric in layer.metrics:
+ if task.primary_metric_id == metric.id:
+ metric_ids.append(primary_metric_label)
+ else:
+ metric_ids.append(metric.id)
+
+ metric_values = [metric.value for metric in layer.metrics]
+ zipped = zip(metric_ids, metric_values)
+ # sort primary metric id first
+ sorted_zip = sorted(
+ zipped,
+ key=lambda x: x[0] != primary_metric_label,
+ )
+ data_rows.append(
+ {
+ "Task Name": task.display_name,
+ "Task Category": task.type,
+ "Model": model.hf_name,
+ "Num. Parameters (millions)": format_num_params(
+ model.num_params
+ ),
+ "Emb. Dimension": model.embed_dim,
+ "Modality": task.modality,
+ "Layer": layer.layer_display_name,
+ **dict(sorted_zip),
+ }
+ )
+ for model_name, model in all_models.items():
+ results_for_model = [
+ res for res in model_results if res.model.hf_name == model_name
+ ]
+ assert len(results_for_model) > 0, f"No results found for model {model_name}"
+ dgeb_score_record = task_results_to_dgeb_score(model, results_for_model)
+ print(f'model {model.hf_name} dgeb score: {dgeb_score_record["Score"]}')
+ data_rows.append(dgeb_score_record)
+ print("Finished processing all results")
+ df = pd.DataFrame(data_rows)
+ return df
+
+
+df = task_results_to_df(load_results())
+image_path = "./DGEB_Figure.png"
+with gr.Blocks() as demo:
+ gr.Label("Diverse Genomic Embedding Benchmark", show_label=False, scale=2)
+ gr.HTML(
+ f""
+ )
+ gr.HTML(
+ """
+
+"""
+ )
+
+ unique_categories = df["Task Category"].unique()
+ # sort "DGEB" to the start
+ unique_categories = sorted(unique_categories, key=lambda x: x != "DGEB")
+ for category in unique_categories:
+ with gr.Tab(label=category):
+ unique_tasks_in_category = df[df["Task Category"] == category][
+ "Task Name"
+ ].unique()
+ # sort "Overall" to the start
+ unique_tasks_in_category = sorted(
+ unique_tasks_in_category, key=lambda x: x != "Overall"
+ )
+ for task in unique_tasks_in_category:
+ with gr.Tab(label=task):
+ columns_to_hide = ["Task Name", "Task Category"]
+ # get rows where Task Name == task and Task Category == category
+ filtered_df = (
+ df[
+ (df["Task Name"] == task)
+ & (df["Task Category"] == category)
+ ].drop(columns=columns_to_hide)
+ ).dropna(axis=1, how="all") # drop all NaN columns for Overall tab
+ # round all values to 4 decimal places
+ rounded_df = filtered_df.round(SIG_FIGS)
+
+ # calculate ranking column
+ # if in Overview tab, rank by average metric value
+ if task == "Overall":
+ # rank by average col
+ rounded_df["Rank"] = filtered_df["Average"].rank(
+ ascending=False
+ )
+ else:
+ avoid_cols = [
+ "Model",
+ "Emb. Dimension",
+ "Num. Parameters (millions)",
+ "Modality",
+ "Layer",
+ ]
+ rounded_df["Rank"] = (
+ rounded_df.drop(columns=avoid_cols, errors="ignore")
+ .sum(axis=1)
+ .rank(ascending=False)
+ )
+ # make Rank first column
+ cols = list(rounded_df.columns)
+ cols.insert(0, cols.pop(cols.index("Rank")))
+ rounded_df = rounded_df[cols]
+ # sort by rank
+ rounded_df = rounded_df.sort_values("Rank")
+ data_frame = gr.DataFrame(rounded_df)
+
+
+demo.launch(allowed_paths=["."])
diff --git a/leaderboard/requirements.txt b/leaderboard/requirements.txt
new file mode 100644
index 0000000..d557bda
--- /dev/null
+++ b/leaderboard/requirements.txt
@@ -0,0 +1,82 @@
+aiofiles==23.2.1
+aiohttp==3.9.5
+aiosignal==1.3.1
+altair==5.3.0
+annotated-types==0.7.0
+anyio==4.4.0
+attrs==23.2.0
+certifi==2024.6.2
+charset-normalizer==3.3.2
+click==8.1.7
+contourpy==1.2.1
+cycler==0.12.1
+datasets==2.14.4
+dill==0.3.7
+dnspython==2.6.1
+email-validator==2.1.2
+fastapi==0.111.0
+fastapi-cli==0.0.4
+ffmpy==0.3.2
+filelock==3.15.1
+fonttools==4.53.0
+frozenlist==1.4.1
+fsspec==2024.6.0
+gradio==4.37.2
+gradio-client==1.0.2
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.4
+idna==3.7
+importlib-resources==6.4.0
+jinja2==3.1.4
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+markupsafe==2.1.5
+matplotlib==3.9.0
+mdurl==0.1.2
+multidict==6.0.5
+multiprocess==0.70.15
+numpy==2.0.0
+orjson==3.10.5
+packaging==24.1
+pandas==2.2.2
+pillow==10.3.0
+pyarrow==16.1.0
+pydantic==2.7.4
+pydantic-core==2.18.4
+pydub==0.25.1
+pygments==2.18.0
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+pyyaml==6.0.1
+referencing==0.35.1
+requests==2.32.3
+rich==13.7.1
+rpds-py==0.18.1
+ruff==0.4.9
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+starlette==0.37.2
+tomlkit==0.12.0
+toolz==0.12.1
+tqdm==4.66.4
+typer==0.12.3
+typing-extensions==4.12.2
+tzdata==2024.1
+ujson==5.10.0
+urllib3==2.2.2
+uvicorn==0.30.1
+uvloop==0.19.0
+watchfiles==0.22.0
+websockets==11.0.3
+xxhash==3.4.1
+yarl==1.9.4
diff --git a/leaderboard/submissions/.DS_Store b/leaderboard/submissions/.DS_Store
new file mode 100644
index 0000000..9832d4d
Binary files /dev/null and b/leaderboard/submissions/.DS_Store differ
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/MIBIG_protein_classification.json b/leaderboard/submissions/esm2_t12_35M_UR50D/MIBIG_protein_classification.json
new file mode 100644
index 0000000..854f854
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/MIBIG_protein_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_protein_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_prot",
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6537260383267297
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6689342403628118
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7853286513915045
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6020175670931918
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.798563869992442
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.645844633541225
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.655328798185941
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7407876819384401
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.5970376985838431
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.7849584278155715
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/arch_retrieval.json b/leaderboard/submissions/esm2_t12_35M_UR50D/arch_retrieval.json
new file mode 100644
index 0000000..07555ea
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/arch_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "arch_retrieval",
+ "display_name": "Arch Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/arch_retrieval",
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+ },
+ {
+ "path": "tattabio/arch_retrieval_qrels",
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.84127
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.82701
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.79635
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.27329
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.37939
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.64453
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.2839
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.40033
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.70443
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.7621
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.69407
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.42452
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.8853108550291645
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.8879126611520968
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.8892435700922602
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6178391415234327
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.27510768020625387
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.2751226626247053
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6158935362175889
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.29490376307826244
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.3173510395378902
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6282820888186709
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.217967587602592
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.3392167130961565
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.02706102865662817
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.33465305568189146
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.29252115202920864
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.1461797349288265
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.3984979781227535
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.15678893453735943
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.5443958382387585
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.3379769732428374
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.23212587702223994
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.008899383756080657
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.3376357180005265
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.2949278653804833
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.11957594632298725
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.4084900248156052
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.16409679466126934
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.5478175261971683
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.3566768602643857
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.24770750166012404
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5588205820812548
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.053528426968584814
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.5895997876864452
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.5109397710788774
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": -0.0014360394688449447
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.5972188824684267
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.30493219390483955
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.35096314542920914
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.4163370977258702
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.6041064087877195
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.2995447501683336
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.1176892239839227
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.6055526314461911
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.3015594122136539
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.11951448723943421
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.6050403183375579
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.3012299482545067
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.12091114334431136
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.82819
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.81615
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.78982
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.27067
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.37321
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.63596
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.27906
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.39106
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.69746
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.7487
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.68506
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.42266
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.8752382984777344
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.878253189168681
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.8795454419523189
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6238124910465183
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.3878031710482511
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.22961445620397436
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6136556294192528
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.4027695454909326
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.23933162739820324
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6039490411056802
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.379240829313294
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.23134380586116654
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": -0.018274861348075953
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.3153330580523699
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.31839102956934573
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.10106646301687382
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.4143687386138405
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.18923312509326384
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.5144031685310609
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.45693618989546114
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.1513413062960939
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": -0.031265621786664255
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.32028522957198785
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.32056979656535384
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.07820354892522365
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.42551786412535775
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2040509113490322
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.5060801621108716
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.5071691349011768
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.11952783139053508
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5923656191314365
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.1954332256400316
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.5508269378169939
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.5458701611463479
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.12975949111453675
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.5537528325655148
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.3549845967268747
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.26254902560124815
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.3919186481758992
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.6284613562335846
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.3609822238622607
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.13691647729285375
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.6282780633119702
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.36649482857679033
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.1301211341279461
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.6290574535816186
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.367920824556504
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.13036774230606793
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/bacarch_bigene.json b/leaderboard/submissions/esm2_t12_35M_UR50D/bacarch_bigene.json
new file mode 100644
index 0000000..66fb190
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/bacarch_bigene.json
@@ -0,0 +1,86 @@
+{
+ "task": {
+ "id": "bacarch_bigene",
+ "display_name": "BacArch BiGene",
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/bac_arch_bigene",
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.6215094339622641
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.7056603773584905
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6469182389937107
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.7056603773584905
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.6138364779874214
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.7018867924528301
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6413836477987421
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.7018867924528301
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/convergent_enzymes_classification.json b/leaderboard/submissions/esm2_t12_35M_UR50D/convergent_enzymes_classification.json
new file mode 100644
index 0000000..622c6c9
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/convergent_enzymes_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "convergent_enzymes_classification",
+ "display_name": "Convergent Enzymes Classification",
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/convergent_enzymes",
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.2475
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.20116666666666666
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.2425
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.19904761904761906
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/cyano_operonic_pair.json b/leaderboard/submissions/esm2_t12_35M_UR50D/cyano_operonic_pair.json
new file mode 100644
index 0000000..1d71bcd
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/cyano_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "cyano_operonic_pair",
+ "display_name": "Cyano Operonic Pair",
+ "description": "Evaluate on Cyano operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/cyano_operonic_pair",
+ "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7203065134099617
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.990619957447052
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.44058665070338227
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.815308690071106
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.28253358925143957
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.32424099100055437
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7187739463601532
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 40.061012268066406
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.43963963963963965
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 380.5898742675781
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.28218966846569005
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9945652173913043
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3051200502841412
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7187739463601532
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 2.2720906734466553
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4404548174745661
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 25.41253662109375
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.28242517267843437
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3117112729287826
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7206896551724138
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 1764.11328125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.44177215189873426
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 1021.9218139648438
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.28795379537953797
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9483695652173914
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.35181607664099845
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.35181607664099845
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7206896551724138
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9833309650421143
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4454067429631921
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.8805520534515381
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.2883460152182619
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9782608695652174
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.3325946475342702
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.721455938697318
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 230.74539184570312
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4439615026389321
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 690.979248046875
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.28772635814889336
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9714673913043478
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.33577510329678106
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7210727969348659
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 13.784924507141113
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.44413697682462816
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 39.12321472167969
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.29791183294663576
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8722826086956522
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.33823458280589236
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7191570881226054
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 10542.0
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.4403230631169608
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 4913.24560546875
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.2823168392788646
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.28278909833025945
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.33823458280589236
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/ec_classification.json b/leaderboard/submissions/esm2_t12_35M_UR50D/ec_classification.json
new file mode 100644
index 0000000..b3bad3d
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/ec_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification",
+ "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6015625
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.55390625
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.5546875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5096354166666667
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/ecoli_operonic_pair.json b/leaderboard/submissions/esm2_t12_35M_UR50D/ecoli_operonic_pair.json
new file mode 100644
index 0000000..2ed5eb3
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/ecoli_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "ecoli_operonic_pair",
+ "display_name": "E.coli Operonic Pair",
+ "description": "Evaluate on E.coli K-12 operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/ecoli_operonic_pair",
+ "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6309689383402874
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9664175510406494
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5831148400629261
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.876137375831604
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.41972823351786614
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.954779622209502
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5226436718954207
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6237830319888734
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 151.0961456298828
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5765230312035661
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 417.6656494140625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4051044083526682
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9994275901545506
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5038561800803791
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.624246638850255
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 9.827131271362305
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5778148457047539
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 23.485851287841797
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.4077212806026365
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9914138523182598
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5109707609256201
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6200741770978211
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 1509.6474609375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.576427863981512
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 827.195556640625
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.40501043841336115
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9994275901545506
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.498147478687894
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5226436718954207
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6305053314789059
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9585829377174377
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5934650455927052
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9002124071121216
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.44412851862382713
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8941041785918717
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.545021841060869
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6342141863699583
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 444.21954345703125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.6035735322992343
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 612.2872314453125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.45935445307830247
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8797939324556382
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5574639922170803
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6339823829392675
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 29.62457275390625
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5996841689696012
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 38.6270751953125
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.45766797228080747
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.86949055523755
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5553872058517757
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.5948076031525267
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 14395.623046875
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.577018736527939
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 5674.908203125
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4061624649859944
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9959931310818546
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3862357442891778
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5574639922170803
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/euk_retrieval.json b/leaderboard/submissions/esm2_t12_35M_UR50D/euk_retrieval.json
new file mode 100644
index 0000000..69e10b0
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/euk_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "euk_retrieval",
+ "display_name": "Euk Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/euk_retrieval",
+ "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
+ },
+ {
+ "path": "tattabio/euk_retrieval_qrels",
+ "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.80067
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.79455
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.77429
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.30914
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.41095
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.60087
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.31905
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.43473
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.66233
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.7119
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.63408
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.3663
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.8471596998928188
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.8513856989741232
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.8527043294326252
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.7166495695870103
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.5383304196281262
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.38408074718110424
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.71056350273151
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.5386325626626473
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.3678412023083028
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6787542765531929
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.4678010355684318
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.3023078330221261
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.17506411594869709
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.344228905317099
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.26025197550499063
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.28364735198157687
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.4946084063548821
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.13024980686869012
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.6456837506614725
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.5024354435806796
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.18849105999507082
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.15537143366366737
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.3338972930408563
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.27534514133854515
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.24230061291494534
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.4763992415794819
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.17167004025145782
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.6062660448007379
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.45445564371902375
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.09621042247019258
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5420327575630611
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.37248428210075407
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.6517795575595553
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.46182346579179107
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.2556997419766225
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.6371093546193429
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.22395520722060117
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.27077611986871364
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.4324048296185153
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.7966902615822546
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.5623896062382641
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.27875113624180275
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.7982850278647994
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.5623589312727257
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.27578274493030464
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.7977600079745486
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.5625363754999084
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.2708948491113527
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.79574
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.7872
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.76804
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.30344
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.40308
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.59158
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.31068
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.41808
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.64688
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.70611
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.63055
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.36862
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.8521436227224009
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.8555504516919309
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.8571980685347454
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.687147173549288
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.534917528750057
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.039388068191112346
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6821413074357394
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.541004104911246
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.06613569078084217
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6546658854714889
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.5141528362539365
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.045010206374762184
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.1717014705213338
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.298486867259319
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3158992753503486
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.29394629114728443
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.4807193931287969
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.200767704240122
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.6266013107050147
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.5400967080146492
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.06821295960747309
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.15728927641821855
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.3020952193182204
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.3196038571595756
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.273851179897414
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.4822263524474807
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.1998852576547706
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.610064992339158
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.5237697244132881
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.047861477876695854
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5642831983945668
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.41268016275342806
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.3902377594145758
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.4757631079174044
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.32238368240767273
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.4280345103983777
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.19318747544949869
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.2262940005534252
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.2898939009819229
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.7559907957579797
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.5232164154691852
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.016325972601983724
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.7604182097391701
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.5188685708290457
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.008720431706015956
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.7617325890747185
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.5213157058041827
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.015621035073521741
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/fefe_phylogeny.json b/leaderboard/submissions/esm2_t12_35M_UR50D/fefe_phylogeny.json
new file mode 100644
index 0000000..b7fc4fa
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/fefe_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "fefe_phylogeny",
+ "display_name": "FeFeHydrogenase Phylogeny",
+ "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/fefe_phylogeny_sequences",
+ "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
+ },
+ {
+ "path": "tattabio/fefe_phylogeny_distances",
+ "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.46213607103563425
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.5621218764061721
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.5442663405841599
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.5621218764061721
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.1524486344353939
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.5194125891005561
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.48868066660269227
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.5194125891005561
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/modac_paralogy_bigene.json b/leaderboard/submissions/esm2_t12_35M_UR50D/modac_paralogy_bigene.json
new file mode 100644
index 0000000..e8d7077
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/modac_paralogy_bigene.json
@@ -0,0 +1,97 @@
+{
+ "task": {
+ "id": "modac_paralogy_bigene",
+ "display_name": "ModAC Paralogy BiGene",
+ "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/modac_paralogy_bigene",
+ "revision": "241ca6397856e3360da04422d54933035b1fab87"
+ }
+ ],
+ "primary_metric_id": "recall_at_50"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.4952467261118094e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 8.984467652322665e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.03485254691689008
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.4952467261118094e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 8.984467652322665e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.05361930294906166
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/mopb_clustering.json b/leaderboard/submissions/esm2_t12_35M_UR50D/mopb_clustering.json
new file mode 100644
index 0000000..bfae754
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/mopb_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "mopb_clustering",
+ "display_name": "MopB Clustering",
+ "description": "Evaluate on MopB clustering task.",
+ "modality": "protein",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/mopb_clustering",
+ "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.7366377426487285
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.7842647128962572
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/rpob_arch_phylogeny.json b/leaderboard/submissions/esm2_t12_35M_UR50D/rpob_arch_phylogeny.json
new file mode 100644
index 0000000..fb1d53f
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/rpob_arch_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_phylogeny_sequences",
+ "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
+ },
+ {
+ "path": "tattabio/rpob_arch_phylogeny_distances",
+ "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.2624971928673971
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.31502824152693154
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.3088945849814121
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.31502824152693154
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.34668475738519444
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.372455403853565
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.369729316093801
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.372455403853565
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/rpob_bac_phylogeny.json b/leaderboard/submissions/esm2_t12_35M_UR50D/rpob_bac_phylogeny.json
new file mode 100644
index 0000000..d9bc6ac
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/rpob_bac_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_phylogeny_sequences",
+ "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
+ },
+ {
+ "path": "tattabio/rpob_bac_phylogeny_distances",
+ "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.12971577033648743
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.18177734472255433
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.16423413011355156
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.18177734472255433
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.10194557773024183
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.18622026845391912
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.15405389239655473
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.18622026845391912
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/vibrio_operonic_pair.json b/leaderboard/submissions/esm2_t12_35M_UR50D/vibrio_operonic_pair.json
new file mode 100644
index 0000000..c8afaa6
--- /dev/null
+++ b/leaderboard/submissions/esm2_t12_35M_UR50D/vibrio_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "vibrio_operonic_pair",
+ "display_name": "Vibrio Operonic Pair",
+ "description": "Evaluate on Vibrio operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/vibrio_operonic_pair",
+ "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 33992881,
+ "embed_dim": 480
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6781966575981345
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.970278263092041
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.518608169440242
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.8757017254829407
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.35501242750621376
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9618406285072951
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.4581544787406372
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6731441896618733
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 137.3688507080078
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5146164978292329
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 391.87298583984375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3467238689547582
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9977553310886644
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.4383109013756369
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.672755538282161
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 8.506048202514648
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5152786099460755
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 21.124141693115234
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.35145075602778914
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9652076318742986
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.4438681594614018
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6599300427516518
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 1570.195556640625
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5147654892877822
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 898.4225463867188
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.3468591494342567
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9977553310886644
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.4179931403914694
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.4581544787406372
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6746987951807228
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9681814312934875
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5363604114934374
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9120055437088013
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.3921161825726141
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8484848484848485
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.46704651746605186
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6746987951807228
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 360.30352783203125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5305821665438467
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 576.9113159179688
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3949533735600658
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8080808080808081
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.468990806236423
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6758647493198601
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 22.342727661132812
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5301837270341208
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 39.38741683959961
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.37459434399629116
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9068462401795735
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.46775797789146023
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6541002720559658
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 9448.685546875
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5145827317354895
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 4854.8955078125
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.3464230171073095
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3679854825040224
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.468990806236423
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/MIBIG_protein_classification.json b/leaderboard/submissions/esm2_t30_150M_UR50D/MIBIG_protein_classification.json
new file mode 100644
index 0000000..1898101
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/MIBIG_protein_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_protein_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_prot",
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "revision": "...",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.721568117708931
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.7165532879818595
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.820388189148414
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6689951528396479
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.8363567649281944
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6298307655443518
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6099773242630385
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7648458169950588
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.5789820341918578
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.752078609221467
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/arch_retrieval.json b/leaderboard/submissions/esm2_t30_150M_UR50D/arch_retrieval.json
new file mode 100644
index 0000000..0e91873
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/arch_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "arch_retrieval",
+ "display_name": "Arch Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/arch_retrieval",
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+ },
+ {
+ "path": "tattabio/arch_retrieval_qrels",
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "revision": "...",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.91537
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.90635
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.87424
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.30526
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.42635
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.72433
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.31067
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.4378
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.75859
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.82689
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.76159
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.46726
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.9422321809645754
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9439900344829917
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9446453591992101
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6549640359156222
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.11037035667235007
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.41554431142868614
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6536082943031309
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.140251553474609
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.4541965457157918
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6159871931946869
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.006651176818080506
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.39627086499203873
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": -0.047556791244411895
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.16420917659496206
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.28627326792803204
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.06426190649373154
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.23746446970773183
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.15565045001627686
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.5237897180891637
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.1865080232459892
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.2688572949738638
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": -0.054074967730710764
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.1711511016438979
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.2896050332877169
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.05005034152582497
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.24918235642253458
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.16768640965952947
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.5114754425984644
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.2173420630028766
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.2526274232326276
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5525639421444303
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.01857146637175079
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.7765476306675947
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.48362026531371466
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": -0.0051297270434755475
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.7004665714420365
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.24671476154878727
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.37006645670815747
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.36951553698605216
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.64312359548717
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.04622765419712948
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.22259410250972433
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.6385468425832173
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.058640802937365115
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.21579087208897282
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.6402042049799889
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.052782783025246006
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.21896215733129423
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.83285
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.81413
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.76701
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.25404
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.35083
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.58387
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.266
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.37545
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.66303
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.75621
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.6866
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.41047
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.8947289799402471
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.895975855130784
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.8970771214115124
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6033756709037629
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.48175424620769186
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.1614695329433979
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.5820557360820439
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.48937482522317327
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.18205509390904553
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.49384788238425553
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.354953353704701
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.10767304568721194
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.03598090314920231
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.11662947626949612
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.28974453988735166
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.13482748795676255
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.22360013731689057
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.19043309088480928
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.42287317105206507
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.32712992457779794
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.02056986996465222
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.021824220192766298
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.11009705855814085
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.28505819859304804
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.10661440304261144
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.2092712287791401
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.19742570630860265
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.38620604109572715
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.2924386961038862
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.025319280347884648
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5425386973889819
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.4063280755847313
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.43965420847555414
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.4721960038905336
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.35700671463443756
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.44652985217538876
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.2526299155090765
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.021434326602753354
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.3009002533330021
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.6726463178530804
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.49687521406966506
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.05561071266486503
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.6731608376359998
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.49491217127896847
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.05832429376042118
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.6735463200113443
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.495779540068593
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.06154966156964915
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/bacarch_bigene.json b/leaderboard/submissions/esm2_t30_150M_UR50D/bacarch_bigene.json
new file mode 100644
index 0000000..0a06474
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/bacarch_bigene.json
@@ -0,0 +1,86 @@
+{
+ "task": {
+ "id": "bacarch_bigene",
+ "display_name": "BacArch BiGene",
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/bac_arch_bigene",
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "revision": "...",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7591194968553459
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.8188679245283019
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.7779874213836478
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.8188679245283019
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.656010781671159
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.7320754716981132
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6774213836477987
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.7320754716981132
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/convergent_enzymes_classification.json b/leaderboard/submissions/esm2_t30_150M_UR50D/convergent_enzymes_classification.json
new file mode 100644
index 0000000..987fb41
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/convergent_enzymes_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "convergent_enzymes_classification",
+ "display_name": "Convergent Enzymes Classification",
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/convergent_enzymes",
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "revision": "...",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.2975
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.24646428571428572
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.2475
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.20091666666666666
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/cyano_operonic_pair.json b/leaderboard/submissions/esm2_t30_150M_UR50D/cyano_operonic_pair.json
new file mode 100644
index 0000000..75d1682
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/cyano_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "cyano_operonic_pair",
+ "display_name": "Cyano Operonic Pair",
+ "description": "Evaluate on Cyano operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/cyano_operonic_pair",
+ "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "revision": "...",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7218390804597701
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9874778389930725
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4406451612903225
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9247815608978271
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.288917089678511
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9279891304347826
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.32971769257135214
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7187739463601532
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 340.19317626953125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4400597907324364
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 1209.041259765625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.28210042161747795
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.30836065241808736
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7187739463601532
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 5.940918922424316
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4398773006134969
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 47.593170166015625
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.2840729001584786
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9741847826086957
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.31645982324175975
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7210727969348659
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 12984.419921875
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.44671433435478375
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 8378.552734375
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.2980463425715584
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.8913043478260869
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.34218016632367393
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.34218016632367393
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7237547892720306
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9779865145683289
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.44695441710367084
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9349428415298462
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.3178427997705106
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.7527173913043478
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.36912560854307275
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7241379310344828
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 674.400634765625
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.44820441988950277
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 1384.0185546875
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.300462962962963
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8817934782608695
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.36707205079753535
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.724904214559387
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 39.354820251464844
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4482758620689655
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 84.60987854003906
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.307035175879397
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8301630434782609
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.37335963472985745
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.717624521072797
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 67287.0625
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.44063721070033063
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 29718.42578125
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.28290235430335775
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9959239130434783
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.2624766540914564
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.37335963472985745
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/ec_classification.json b/leaderboard/submissions/esm2_t30_150M_UR50D/ec_classification.json
new file mode 100644
index 0000000..013f529
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/ec_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification",
+ "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "revision": "...",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6640625
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6111979166666666
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.578125
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5226562499999999
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/ecoli_operonic_pair.json b/leaderboard/submissions/esm2_t30_150M_UR50D/ecoli_operonic_pair.json
new file mode 100644
index 0000000..dcba464
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/ecoli_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "ecoli_operonic_pair",
+ "display_name": "E.coli Operonic Pair",
+ "description": "Evaluate on E.coli K-12 operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/ecoli_operonic_pair",
+ "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "revision": "...",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6312007417709782
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.964427649974823
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5821596244131455
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9228619337081909
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.42521762068055924
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9227246708643388
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.524131558836932
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6191469633750579
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 472.7284851074219
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5769426104621634
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 942.1561279296875
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4096153846153846
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9753863766456783
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.49992125667466114
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.624246638850255
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 28.615245819091797
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5795512930296284
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 47.59563064575195
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.41348973607038125
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9685174585002863
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5150815389359208
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6193787668057488
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 11865.05078125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5762376237623762
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 6186.25341796875
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4048226292603756
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9994275901545506
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.5074172652530077
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.524131558836932
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6483541956420955
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9662680625915527
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.6057630736392743
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9369711875915527
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.48298162014976176
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8122495706926159
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5752810967507045
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6488178025034771
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 912.6686401367188
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.6068776865963268
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 1335.36962890625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4606941560367843
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8889524899828277
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5731535780314824
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6467315716272601
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 48.278228759765625
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.6060103626943005
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 73.76244354248047
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.47498375568551005
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8368631940469377
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5733375822821691
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.5948076031525267
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 89725.625
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5765676567656766
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 26633.4609375
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4050544864363552
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3632996357288699
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5752810967507045
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/euk_retrieval.json b/leaderboard/submissions/esm2_t30_150M_UR50D/euk_retrieval.json
new file mode 100644
index 0000000..e7d0dc3
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/euk_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "euk_retrieval",
+ "display_name": "Euk Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/euk_retrieval",
+ "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
+ },
+ {
+ "path": "tattabio/euk_retrieval_qrels",
+ "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "revision": "...",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.92259
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.9167
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.88687
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.35223
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.47142
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.70071
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.35765
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.48426
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.74119
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.81865
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.73633
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.42412
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.9517684887459807
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9522278364722093
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.953643102583901
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.7150037451899073
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.2906625946483748
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.7209294602302925
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6907192680160392
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.3058553886156155
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.6912724175806588
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6773879240615319
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.2230353104353846
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.5037777804193893
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.07512993110545117
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.10451684557609604
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.20258838285578284
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.15504770699858275
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.27125172402690906
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.08216003549646085
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.5793806460927751
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.39855546980064466
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.3212267685833858
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.06378940095453296
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.0935410532779427
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.20565896341550313
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.12463958126591933
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.2460932574460236
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.09141269581756521
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.5316388055512066
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.39363828399474204
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.27072875132052776
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.3814594443420771
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.27374483565752716
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.8999025518235271
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.2990471971388469
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.2179409172747032
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.69964609275835
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.14129861784392306
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.26407541710839577
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.31442605468864243
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.9209205235715922
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.26647535110302417
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.8207529427307493
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.9201601439905501
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.26067869360312956
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.8298259416241756
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.9202405827206523
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.265282549872998
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.8251010908893353
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.84626
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.82923
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.78558
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.30997
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.40782
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.59403
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.31985
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.43295
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.66666
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.74598
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.65852
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.36688
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.9123258306538048
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.914544735364671
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9150978563852772
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6917517819391329
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.5237021136032984
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.12166243275163607
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.7037426830988499
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.484773980307184
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.12250966461302211
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6749011438082552
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.4004427197799687
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.09048181785892781
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.1748175382079989
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.011739002651428564
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3400304609570671
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.29156932199723384
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.16970879434540548
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.2357874576308254
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.6261176648013426
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.26553990452224274
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.00008479750409092293
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.16601631967349068
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": -0.0028582806705203046
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.3381167573210711
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.25720692642755344
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.13614183480704828
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2557093468354816
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.5690350990599277
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.18579980394403742
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.0024995904740863243
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5027516634592902
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.5209659666134092
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.5193484166288937
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.42341736061780094
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.4441506486245349
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.5127529647369428
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.18125443712215236
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.047925843664078704
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.3532612529819783
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.7446679342833693
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.6132809211091352
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.09338614253655397
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.742408783880129
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.620360028959556
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.07601111866100194
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.7411037680464723
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.6185536637957788
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.07840809223865396
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/fefe_phylogeny.json b/leaderboard/submissions/esm2_t30_150M_UR50D/fefe_phylogeny.json
new file mode 100644
index 0000000..f0ef494
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/fefe_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "fefe_phylogeny",
+ "display_name": "FeFeHydrogenase Phylogeny",
+ "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/fefe_phylogeny_sequences",
+ "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
+ },
+ {
+ "path": "tattabio/fefe_phylogeny_distances",
+ "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "revision": "...",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.42638593677257985
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.613514079803676
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.6057539011664933
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.613514079803676
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.2903783693037115
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.5353019718492421
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.5569899700006391
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.5569899700006391
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/modac_paralogy_bigene.json b/leaderboard/submissions/esm2_t30_150M_UR50D/modac_paralogy_bigene.json
new file mode 100644
index 0000000..93a430d
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/modac_paralogy_bigene.json
@@ -0,0 +1,97 @@
+{
+ "task": {
+ "id": "modac_paralogy_bigene",
+ "display_name": "ModAC Paralogy BiGene",
+ "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/modac_paralogy_bigene",
+ "revision": "241ca6397856e3360da04422d54933035b1fab87"
+ }
+ ],
+ "primary_metric_id": "recall_at_50"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.4952467261118094e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 8.984467652322665e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.040214477211796246
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.504309723543487e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 9.002569333287721e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.14544235924932977
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/mopb_clustering.json b/leaderboard/submissions/esm2_t30_150M_UR50D/mopb_clustering.json
new file mode 100644
index 0000000..5a97c06
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/mopb_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "mopb_clustering",
+ "display_name": "MopB Clustering",
+ "description": "Evaluate on MopB clustering task.",
+ "modality": "protein",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/mopb_clustering",
+ "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "revision": "...",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.843073317600245
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.7993491625653556
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/rpob_arch_phylogeny.json b/leaderboard/submissions/esm2_t30_150M_UR50D/rpob_arch_phylogeny.json
new file mode 100644
index 0000000..b792041
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/rpob_arch_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_phylogeny_sequences",
+ "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
+ },
+ {
+ "path": "tattabio/rpob_arch_phylogeny_distances",
+ "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "revision": "...",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.17685077749657868
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.18397145017500027
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.1845894895803573
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.1845894895803573
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.3483214537780648
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3731512151106379
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.34706946425354485
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.3731512151106379
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/rpob_bac_phylogeny.json b/leaderboard/submissions/esm2_t30_150M_UR50D/rpob_bac_phylogeny.json
new file mode 100644
index 0000000..f8490f6
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/rpob_bac_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_phylogeny_sequences",
+ "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
+ },
+ {
+ "path": "tattabio/rpob_bac_phylogeny_distances",
+ "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "revision": "...",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.11953486541105843
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.15790804024970093
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.1545090554656792
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.15790804024970093
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.1810910478431202
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.2420094772926146
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.2240652992499544
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.2420094772926146
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/vibrio_operonic_pair.json b/leaderboard/submissions/esm2_t30_150M_UR50D/vibrio_operonic_pair.json
new file mode 100644
index 0000000..e005f54
--- /dev/null
+++ b/leaderboard/submissions/esm2_t30_150M_UR50D/vibrio_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "vibrio_operonic_pair",
+ "display_name": "Vibrio Operonic Pair",
+ "description": "Evaluate on Vibrio operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/vibrio_operonic_pair",
+ "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
+ "revision": "...",
+ "num_layers": 30,
+ "num_params": 148795481,
+ "embed_dim": 640
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 15,
+ "layer_display_name": "15",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6743101438010105
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9717499613761902
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5162846803377563
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9134178161621094
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.3529896907216495
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9607182940516273
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.44141636821591623
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6669257675864749
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 500.4721984863281
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5148800924588268
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 1224.591064453125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.34669260700389104
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.41668517460563326
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6692576758647493
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 25.1038875579834
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5144927536231884
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 46.55123519897461
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.35192069392812886
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9562289562289562
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.4358738619047138
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6614846482705014
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 11981.70703125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5147313691507799
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 6498.75244140625
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.34655775962660446
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.4304420964721535
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.44141636821591623
+ }
+ ]
+ },
+ {
+ "layer_number": 29,
+ "layer_display_name": "29",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.68558103381267
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9706978797912598
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.535526776338817
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9334700107574463
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.3891149542217701
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8585858585858586
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.49246488546090866
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6809172172561213
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 795.2196044921875
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5332845647403073
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 1224.1300048828125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3955507325013565
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8181818181818182
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.4833411491930948
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6801399144966964
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 43.079410552978516
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5346938775510204
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 77.91600036621094
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.383601756954612
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8821548821548821
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.48219648792984926
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6541002720559658
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 60487.8359375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5148456057007126
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 30271.841796875
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.3500201857085184
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9730639730639731
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.34301425719850337
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.49246488546090866
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/MIBIG_protein_classification.json b/leaderboard/submissions/esm2_t33_650M_UR50D/MIBIG_protein_classification.json
new file mode 100644
index 0000000..fd6cc07
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/MIBIG_protein_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_protein_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_prot",
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "revision": "...",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6646213726039093
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.7142857142857143
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7797818341533279
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6226719674475112
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.8295540438397593
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6625178133673452
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6598639455782312
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.8568965517241379
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6022767137392347
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.7842025699168563
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/arch_retrieval.json b/leaderboard/submissions/esm2_t33_650M_UR50D/arch_retrieval.json
new file mode 100644
index 0000000..ae38f34
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/arch_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "arch_retrieval",
+ "display_name": "Arch Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/arch_retrieval",
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+ },
+ {
+ "path": "tattabio/arch_retrieval_qrels",
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "revision": "...",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.93123
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.92445
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.8848
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.30898
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.43383
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.737
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.31538
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.44791
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.76764
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.84481
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.78015
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.46964
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.9498435054773084
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9516662263141135
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9519930735110318
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6308519525257628
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.089031581354036
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.4412273663070538
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6032994212940577
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.09724157956704019
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.4738679731426537
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.5712702884058568
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": -0.1658603066862231
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.3611726321158017
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": -0.017602606962065153
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.09514819051312666
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.2848295232149835
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.08673113556503126
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.15294567339061488
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.16894744699362754
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.5120519885236438
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": -0.0038334646025400855
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.2199525642638632
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": -0.02455812168337926
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.09896005096881812
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.2875714656687537
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.06805556416914144
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.15852083147934737
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.1784723750352869
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.500056208041346
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.002586904787276158
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.20499742952429414
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5039507321590944
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.05258016937163956
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.7647229953284452
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.4021315165507268
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.016992811311858026
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.6768345994695547
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.17895723642667355
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.33113192778327805
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.3516930058377777
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.6679881478327583
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.1530559182468113
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.2950913220027977
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.6645424475589732
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.16781717474258154
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.2930131697897344
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.6647896092012971
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.16547097365694646
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.2935701117386994
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.80913
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.79635
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.75732
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.24603
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.34022
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.56511
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.25919
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.37057
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.65688
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.73419
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.672
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.40938
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.874619433774363
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.8781182041745417
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.8796387772814498
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6393103094705105
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.39489258054605936
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.14026462748913124
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6190457124081322
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.4030000242340694
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.1730798319035512
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.5320810476895583
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.3325889831793021
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.08862322432493497
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.14094623550319177
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.1285128991955811
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.19503770328831505
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.23096756151958173
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.221478673456825
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.11283510381365157
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.4540497453609997
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.29125135118566164
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.036933801012767234
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.11450448159576665
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.12487127801737757
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.18688120243913126
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.17741445748537313
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.19925238150416746
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.11233216281151082
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.3791957310622827
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.2551711179587873
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.00018595264688904266
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5314308675519602
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.34301160006074155
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.33926190682858753
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.4448067496058197
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.2968723779500677
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.3528818604969278
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.1936211550389429
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.0215074969245683
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.20326952474019822
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.7158514971972625
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.39909861586332773
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.11127787012043149
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.7189756388185046
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.39804773603313176
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.11462677765400364
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.7198766577873941
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.40101385908354403
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.1168522554652628
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/bacarch_bigene.json b/leaderboard/submissions/esm2_t33_650M_UR50D/bacarch_bigene.json
new file mode 100644
index 0000000..61d4bde
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/bacarch_bigene.json
@@ -0,0 +1,86 @@
+{
+ "task": {
+ "id": "bacarch_bigene",
+ "display_name": "BacArch BiGene",
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/bac_arch_bigene",
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "revision": "...",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.790251572327044
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.8490566037735849
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.808427672955975
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.8490566037735849
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.5652650494159928
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6566037735849056
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5896406109613657
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6566037735849056
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/convergent_enzymes_classification.json b/leaderboard/submissions/esm2_t33_650M_UR50D/convergent_enzymes_classification.json
new file mode 100644
index 0000000..8d899de
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/convergent_enzymes_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "convergent_enzymes_classification",
+ "display_name": "Convergent Enzymes Classification",
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/convergent_enzymes",
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "revision": "...",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.3125
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.25675
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.2675
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.22246428571428567
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/cyano_operonic_pair.json b/leaderboard/submissions/esm2_t33_650M_UR50D/cyano_operonic_pair.json
new file mode 100644
index 0000000..5b10e58
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/cyano_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "cyano_operonic_pair",
+ "display_name": "Cyano Operonic Pair",
+ "description": "Evaluate on Cyano operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/cyano_operonic_pair",
+ "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "revision": "...",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7206896551724138
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9891349673271179
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.44163658243080617
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.8806867003440857
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.2836166924265842
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9972826086956522
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.34297195857616974
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7203065134099617
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 1108.904541015625
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4404548174745661
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 3620.740966796875
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.28242517267843437
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3146763612933017
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7206896551724138
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 38.761192321777344
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4416441644164416
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 127.99288940429688
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.28340392760877936
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3240934026221954
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7199233716475095
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 59584.36328125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.44705020153902525
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 42462.1484375
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.30607124937280483
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.8288043478260869
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3553266247045366
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.3553266247045366
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7256704980842912
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9802291393280029
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.45144356955380577
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9365932941436768
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.31175556706369756
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8179347826086957
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.3711095484875728
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7268199233716475
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 2405.0166015625
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4552
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 3873.87744140625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.322562358276644
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.7730978260869565
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.37188521395957413
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7283524904214559
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 89.34696960449219
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4592169657422513
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 158.45071411132812
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3280885780885781
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.7649456521739131
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.37329497692478114
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.717624521072797
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 414854.375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.4394618834080718
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 120746.546875
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.28171713300114987
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.998641304347826
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.2524373913991999
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.37329497692478114
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/ec_classification.json b/leaderboard/submissions/esm2_t33_650M_UR50D/ec_classification.json
new file mode 100644
index 0000000..849e5a8
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/ec_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification",
+ "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "revision": "...",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6953125
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6372395833333333
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.640625
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5825520833333333
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/ecoli_operonic_pair.json b/leaderboard/submissions/esm2_t33_650M_UR50D/ecoli_operonic_pair.json
new file mode 100644
index 0000000..2afeb54
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/ecoli_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "ecoli_operonic_pair",
+ "display_name": "E.coli Operonic Pair",
+ "description": "Evaluate on E.coli K-12 operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/ecoli_operonic_pair",
+ "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "revision": "...",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.634445989800649
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9598076343536377
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5842179759377212
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9043072462081909
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.4227912932138284
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9450486548368632
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5276812260133239
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6212331942512749
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 1719.5322265625
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5772681954137587
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 3341.0849609375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4066963240458909
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9942759015455066
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.49092987587616777
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6256374594343996
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 66.70790100097656
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5782290279627164
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 122.32955932617188
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.40765078620042244
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9942759015455066
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.511753481157234
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6302735280482151
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 54134.9296875
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5839282445542742
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 41969.6640625
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4292249730893434
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9129937034917001
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.5301473268684156
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5301473268684156
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6430227167362077
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9668768048286438
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.6095796676441838
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9326354265213013
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.46288598574821854
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8923869490555237
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5636125661225051
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6460361613351877
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 2996.8798828125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.602950342821525
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 3910.81005859375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.47325505544683627
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8305666857469949
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.568026958169892
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.645804357904497
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 107.93898010253906
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.6100134279685403
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 177.9056396484375
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.45874206578188115
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9101316542644533
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5727211593488946
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.5948076031525267
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 522833.3125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5762376237623762
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 105705.90625
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4048226292603756
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9994275901545506
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3449597323547321
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5727211593488946
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/euk_retrieval.json b/leaderboard/submissions/esm2_t33_650M_UR50D/euk_retrieval.json
new file mode 100644
index 0000000..62e64a9
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/euk_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "euk_retrieval",
+ "display_name": "Euk Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/euk_retrieval",
+ "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
+ },
+ {
+ "path": "tattabio/euk_retrieval_qrels",
+ "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "revision": "...",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.93328
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.92927
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.89032
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.35914
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.48142
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.71036
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.36397
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.48703
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.73804
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.83537
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.75402
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.42482
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.9437299035369775
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9440514469453377
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.945011211244356
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.7620928279266368
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.33055988644552864
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.49891672668881054
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.7238394046424372
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.2897858408379934
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.4991960417643786
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6394920275348465
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": -0.033060154820210755
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.3423790207125621
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.15329766118784013
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": -0.04748558443889553
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.19738372855185915
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.21780702746711833
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.08698230377058089
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.07610409072111464
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.566496850232904
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.07469827272103352
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.29548110389437116
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.15194985870758682
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": -0.049271771318523803
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.2108956515825209
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.21617272285778213
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.0839962096295339
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.09028425010830723
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.5469629807206992
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.05743544528584219
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.27372175464202625
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.30507744236501566
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.39710388130115676
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.8234215276991089
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.17621225835821736
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.32037529086829686
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.6212516491077492
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.029957995813415312
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.17440396313077902
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.2483983668583826
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.8982147001536217
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.3436237156115937
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.4261298572775073
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.8976297271659986
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.3435359664305329
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.42532802107589146
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.8958429750776126
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.34758003000370125
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.4212983155790701
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.86745
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.85726
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.81762
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.32161
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.42823
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.63052
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.32639
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.43888
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.68495
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.77749
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.69678
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.38842
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.9054126473740622
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9076634512325832
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.908978059494486
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.7748586911316915
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.5142764011620478
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.16183638440404874
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.7792667164072754
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.5091522839696889
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.15010464498820264
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6885174711474481
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.4634219612093561
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.0958526447757281
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.28942359787845134
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.05518275112905171
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3024360599910821
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.37812716933920204
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.20300375200619958
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.23169359168341253
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.618121347997912
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.32151209873733966
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.014585296410912104
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.2890637788771335
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.047744544718804925
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.29922142051515754
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.3727257601790394
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.18909483665427484
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.23075313240938425
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.5831797713164615
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.3065281773765325
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.017022054288872285
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.46921647871568267
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.5005062576058754
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.5495785640432075
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.3017994065661075
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.408118171847032
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.4865073732708222
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.03870849804412649
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.0005533993463936359
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.34313948852939863
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.8384104464388212
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.5062896003344488
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.02489705741590697
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.8397121680667853
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.4954436348725267
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.029156290363593018
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.8408067544253827
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.507869466281193
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.025479819846184824
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/fefe_phylogeny.json b/leaderboard/submissions/esm2_t33_650M_UR50D/fefe_phylogeny.json
new file mode 100644
index 0000000..c780f2c
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/fefe_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "fefe_phylogeny",
+ "display_name": "FeFeHydrogenase Phylogeny",
+ "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/fefe_phylogeny_sequences",
+ "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
+ },
+ {
+ "path": "tattabio/fefe_phylogeny_distances",
+ "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "revision": "...",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.5321010160894765
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.6485251532984211
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.640158436352614
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.6485251532984211
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.5866984424931825
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.7174938899857625
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.6962913981388911
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.7174938899857625
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/modac_paralogy_bigene.json b/leaderboard/submissions/esm2_t33_650M_UR50D/modac_paralogy_bigene.json
new file mode 100644
index 0000000..a124df5
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/modac_paralogy_bigene.json
@@ -0,0 +1,97 @@
+{
+ "task": {
+ "id": "modac_paralogy_bigene",
+ "display_name": "ModAC Paralogy BiGene",
+ "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/modac_paralogy_bigene",
+ "revision": "241ca6397856e3360da04422d54933035b1fab87"
+ }
+ ],
+ "primary_metric_id": "recall_at_50"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.4952467261118094e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 8.984467652322665e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.04423592493297587
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.0003572022938507674
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0020107238605898124
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.0004896388988695724
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0020107238605898124
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.1742627345844504
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/mopb_clustering.json b/leaderboard/submissions/esm2_t33_650M_UR50D/mopb_clustering.json
new file mode 100644
index 0000000..4d57c76
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/mopb_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "mopb_clustering",
+ "display_name": "MopB Clustering",
+ "description": "Evaluate on MopB clustering task.",
+ "modality": "protein",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/mopb_clustering",
+ "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "revision": "...",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.8403349456017069
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.871856033794394
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/rpob_arch_phylogeny.json b/leaderboard/submissions/esm2_t33_650M_UR50D/rpob_arch_phylogeny.json
new file mode 100644
index 0000000..185a15b
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/rpob_arch_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_phylogeny_sequences",
+ "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
+ },
+ {
+ "path": "tattabio/rpob_arch_phylogeny_distances",
+ "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "revision": "...",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.1267191344357247
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.13544894658566908
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.14103248399983875
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.14103248399983875
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.23178083925138884
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.31760837159962363
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.3066594331560383
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.31760837159962363
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/rpob_bac_phylogeny.json b/leaderboard/submissions/esm2_t33_650M_UR50D/rpob_bac_phylogeny.json
new file mode 100644
index 0000000..c2434f6
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/rpob_bac_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_phylogeny_sequences",
+ "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
+ },
+ {
+ "path": "tattabio/rpob_bac_phylogeny_distances",
+ "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "revision": "...",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.07801983788225005
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.15172067185368715
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.16114607476373227
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.16114607476373227
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.20519819494619662
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.28437203263094524
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.2855776096898296
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.2855776096898296
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/vibrio_operonic_pair.json b/leaderboard/submissions/esm2_t33_650M_UR50D/vibrio_operonic_pair.json
new file mode 100644
index 0000000..8400658
--- /dev/null
+++ b/leaderboard/submissions/esm2_t33_650M_UR50D/vibrio_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "vibrio_operonic_pair",
+ "display_name": "Vibrio Operonic Pair",
+ "description": "Evaluate on Vibrio operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/vibrio_operonic_pair",
+ "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t33_650M_UR50D",
+ "revision": "...",
+ "num_layers": 33,
+ "num_params": 652353941,
+ "embed_dim": 1280
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6743101438010105
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9669862985610962
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5211475894705708
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.8917249441146851
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.3538152610441767
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9887766554433222
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.4326581796370962
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6649825106879129
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 1687.185791015625
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5159010600706714
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 3160.9052734375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.34970059880239523
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9831649831649831
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3953543828676884
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6696463272444617
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 61.02823257446289
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5176045741799579
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 109.26025390625
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3536184210526316
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9652076318742986
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.41999108125278384
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6595413913719393
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 58606.640625
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5259608900876601
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 44785.0859375
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.3759036144578313
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.8754208754208754
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.44069080050077336
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.44069080050077336
+ }
+ ]
+ },
+ {
+ "layer_number": 32,
+ "layer_display_name": "32",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6820831713952584
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9717143177986145
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.536664503569111
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9286491274833679
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.37745321770880874
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9281705948372615
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.4828328003170023
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6797512631169841
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 2772.750732421875
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5311884438608011
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 4146.494140625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.37540603248259863
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9079685746352413
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.4753549489730454
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6789739603575593
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 100.84526062011719
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5390972663699936
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 190.31085205078125
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3760532150776053
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9517396184062851
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.479168611593493
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.653322969296541
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 364605.8125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5140051978053711
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 113893.3515625
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.34603421461897355
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9988776655443322
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3154670845055397
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.4828328003170023
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/MIBIG_protein_classification.json b/leaderboard/submissions/esm2_t36_3B_UR50D/MIBIG_protein_classification.json
new file mode 100644
index 0000000..91eaa70
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/MIBIG_protein_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_protein_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_prot",
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "revision": "...",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.7132237125944091
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.746031746031746
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.836830500285544
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6633593919422381
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.8495842781557078
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6695716322239411
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6689342403628118
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7821148498790867
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.623537709467486
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.799697656840515
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/arch_retrieval.json b/leaderboard/submissions/esm2_t36_3B_UR50D/arch_retrieval.json
new file mode 100644
index 0000000..d6041dc
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/arch_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "arch_retrieval",
+ "display_name": "Arch Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/arch_retrieval",
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+ },
+ {
+ "path": "tattabio/arch_retrieval_qrels",
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "revision": "...",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.9278
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.92019
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.88622
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.31302
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.43564
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.734
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.31902
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.4476
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.76254
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.83841
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.77217
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.47315
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.948968558827714
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9506117513159769
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9511580419936511
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.5695825862687866
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.221986509500315
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.4203950131177228
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.564322416808483
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.19108656309781616
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.42395611376427206
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.5055593534589494
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": -0.09468644055518095
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.2864463127750372
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": -0.03538843910397287
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.09311883793714422
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.30555605847252953
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.0783240568174268
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.17795919882735975
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.18509762934741394
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.4771715396594046
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.068423803745228
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.17379854324787608
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": -0.03848314855918574
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.09553829284420427
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.30587990770622553
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.07003591555908308
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.18213605943910144
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.19768968297795225
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.47791953190170117
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.06888807128628972
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.14996648800723503
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.502204692599048
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.1575682995183711
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.806722029506651
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.4134966659666948
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.08972449319873597
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.6905241985128383
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.14715073209729895
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.3324778553017647
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.3229141728806895
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.6064510581732133
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.35520955023129336
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.2776339495301657
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.6013823108442311
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.3564245142833086
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.2553085460733028
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.6029004356105379
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.35308050283451
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.257866385843169
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.87956
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.86893
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.82298
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.28161
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.38967
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.6507
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.29346
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.41081
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.71119
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.79385
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.7303
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.43768
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.9240219092331764
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9247296912789864
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9256488541632965
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6207749722953523
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.515941363395918
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.16217044846365308
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.5968096620157092
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.5154687268366953
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.1920119276825337
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.529142718670939
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.2280823824107517
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.10670338306671276
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.16178078739669308
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.020984503452804337
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.23075737918650982
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.25410720612296833
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.12652298327617323
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.1499452335235867
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.5089639558817692
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.13626981480705233
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.01710354293008994
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.13773178438608552
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.013499315164379576
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.22477658297065084
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.21976238711840093
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.1146387796750991
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.15154255626633595
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.47534977446281584
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.07116715411722786
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.008607115109327266
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.424843376611528
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.46963649338900365
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.47110941056522865
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.3148320527956752
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.39164203516684803
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.4331079065864579
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.07686361736738528
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.0710043916712577
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.22846701483468224
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.7122038946666341
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.5616947469059596
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.07473438287064475
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.7119380442662104
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.5584194442105753
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.07250782682707735
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.7118323491211572
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.5561670251588102
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.07251247155922119
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/bacarch_bigene.json b/leaderboard/submissions/esm2_t36_3B_UR50D/bacarch_bigene.json
new file mode 100644
index 0000000..b59b067
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/bacarch_bigene.json
@@ -0,0 +1,86 @@
+{
+ "task": {
+ "id": "bacarch_bigene",
+ "display_name": "BacArch BiGene",
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/bac_arch_bigene",
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "revision": "...",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7773584905660378
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.8415094339622642
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.7973584905660378
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.8415094339622642
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.6844384546271338
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.7547169811320755
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.7023450134770889
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.7547169811320755
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/convergent_enzymes_classification.json b/leaderboard/submissions/esm2_t36_3B_UR50D/convergent_enzymes_classification.json
new file mode 100644
index 0000000..afbd6b4
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/convergent_enzymes_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "convergent_enzymes_classification",
+ "display_name": "Convergent Enzymes Classification",
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/convergent_enzymes",
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "revision": "...",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.3175
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.26516666666666666
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.2875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.23929761904761904
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/cyano_operonic_pair.json b/leaderboard/submissions/esm2_t36_3B_UR50D/cyano_operonic_pair.json
new file mode 100644
index 0000000..a65fadf
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/cyano_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "cyano_operonic_pair",
+ "display_name": "Cyano Operonic Pair",
+ "description": "Evaluate on Cyano operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/cyano_operonic_pair",
+ "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "revision": "...",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7229885057471265
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9862627983093262
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4427099594130503
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9512487649917603
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.287393595460073
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9633152173913043
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.3521021786502902
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7222222222222222
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 4447.77392578125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4404548174745661
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 11370.837890625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.28242517267843437
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3253018180693411
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7222222222222222
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 111.42530822753906
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.44223826714801445
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 316.37481689453125
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.2840030911901082
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.998641304347826
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3371280175314328
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7191570881226054
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 655867.3125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.44874551971326165
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 434046.21875
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.3047711781888997
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.8505434782608695
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3402695550794156
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.3521021786502902
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.721455938697318
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9799139499664307
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4436528497409326
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9414458274841309
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.29124149659863946
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9307065217391305
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.34858593267642246
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.721455938697318
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 8714.66015625
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4455159112825458
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 15921.369140625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.29178947368421054
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9415760869565217
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.34892147616715924
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7222222222222222
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 217.1142578125
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4531132783195799
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 380.83392333984375
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.31295336787564765
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8206521739130435
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3577233751789095
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.717624521072797
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 2331774.5
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.4394618834080718
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 1028537.625
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.28171713300114987
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.998641304347826
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.25200076084231776
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.3577233751789095
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/ec_classification.json b/leaderboard/submissions/esm2_t36_3B_UR50D/ec_classification.json
new file mode 100644
index 0000000..e3775f9
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/ec_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification",
+ "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "revision": "...",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.734375
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6796875
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6263020833333334
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/ecoli_operonic_pair.json b/leaderboard/submissions/esm2_t36_3B_UR50D/ecoli_operonic_pair.json
new file mode 100644
index 0000000..c7fae4b
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/ecoli_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "ecoli_operonic_pair",
+ "display_name": "E.coli Operonic Pair",
+ "description": "Evaluate on E.coli K-12 operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/ecoli_operonic_pair",
+ "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "revision": "...",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6321279554937413
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9784801006317139
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5851825340014316
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.955406904220581
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.42567039833376724
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9358900973096738
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5401199692237064
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6210013908205841
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 5729.439453125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5777106411547499
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 10155.3583984375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4086915222037521
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9851173440183171
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5020331902964992
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6265646731571627
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 148.85182189941406
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5890001757160429
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 254.08401489257812
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.4249492900608519
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9593589009730967
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5261132930952928
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6235512285581827
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 551950.0
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5956977385548815
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 433432.1875
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4387865655471289
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9273039496279336
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.5241523190145476
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5401199692237064
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6420955030134446
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9660873413085938
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5942733657482442
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9371581077575684
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.43352601156069365
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9444762449914138
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5507975673875168
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6432545201668984
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 11284.8125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.594818279956819
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 16347.359375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4337444240356862
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9461934745277619
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5513298747216403
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6446453407510431
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 286.11083984375
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.6041088284286508
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 419.63922119140625
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.44638949671772427
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9341728677733258
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.560712040100291
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.5948076031525267
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 2749512.5
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5762376237623762
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 935082.25
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4048226292603756
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9994275901545506
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3470113984193546
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.560712040100291
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/euk_retrieval.json b/leaderboard/submissions/esm2_t36_3B_UR50D/euk_retrieval.json
new file mode 100644
index 0000000..e6aaccd
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/euk_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "euk_retrieval",
+ "display_name": "Euk Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/euk_retrieval",
+ "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
+ },
+ {
+ "path": "tattabio/euk_retrieval_qrels",
+ "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "revision": "...",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.92746
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.92435
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.8887
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.35702
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.48097
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.71102
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.36361
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.49573
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.73945
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.8283
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.74662
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.42386
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.9445337620578779
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9454269381922114
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9461824139335013
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.7182511735125956
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.5222382381680614
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.28061527472757375
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6822551267755836
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.47487826635542546
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.2879774120935547
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.5831687400559494
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.19720517604494417
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.12322021001506091
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.15995605492393108
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": -0.009192337725249395
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.2726694815884632
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.23974115018115752
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.17361801624364362
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.18401877459714397
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.5824063620629947
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.2841159591470895
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.033462605905602995
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.1552580393454503
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": -0.018330467820246478
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.2916738259676148
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.21463442654398712
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.15438579748881917
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.18787323197368067
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.5745278139796267
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.2757383419710411
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.05586820546617385
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.3488804246892938
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.4962700661015737
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.6999956331879145
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.2078054685387809
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.4017745706742885
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.5648444286940194
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": -0.020389884132193954
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.21348688617893796
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.2569747609893314
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.8526479528304391
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.7145003856437514
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.2523037494515739
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.8502362990306102
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.7119663865365718
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.27066105682545166
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.8500349732046102
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.7081399222244705
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.2783135947517424
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.91085
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.90346
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.85899
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.3399
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.4558
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.67548
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.34867
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.47383
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.71868
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.82251
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.73633
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.40997
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.930010718113612
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9314844587352624
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9323214148863197
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.7640857998353766
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.7354333242704569
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.30519240283573684
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.7355219364749873
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.6683650025567451
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.3707995393372363
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6610500400872863
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.3111396784423232
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.1876061714382793
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.30631322003416805
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": -0.024074646428667697
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.30856020616160795
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.38789519784115756
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.12667431447242408
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.22285611272661204
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.674757049307216
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.21754000071916246
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.027057421884101825
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.29285629152302867
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": -0.04105614851900655
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.30446018048841167
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.35827571757915555
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.0869475077364285
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.21288114526580437
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.6580911825358527
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.1427550599573899
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.021156553400256822
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.3608478586760989
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.6317427657580859
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.7043182301188912
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.16171321927396293
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.5007867347225371
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.6246067899980982
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": -0.07045167205515267
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.05554080115033524
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.3542460994718293
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.8324314570384903
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.8215594631368153
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.20461209640061234
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.8314402422775857
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.8177212818589602
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.21316411254806447
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.831948038836457
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.8169691016937358
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.20728005964482124
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/fefe_phylogeny.json b/leaderboard/submissions/esm2_t36_3B_UR50D/fefe_phylogeny.json
new file mode 100644
index 0000000..bf457f5
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/fefe_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "fefe_phylogeny",
+ "display_name": "FeFeHydrogenase Phylogeny",
+ "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/fefe_phylogeny_sequences",
+ "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
+ },
+ {
+ "path": "tattabio/fefe_phylogeny_distances",
+ "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "revision": "...",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.6558782216716175
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.7365256290104596
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.7136968365232351
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.7365256290104596
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.7465702832852074
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.7917831210213578
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.7649782553422211
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.7917831210213578
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/modac_paralogy_bigene.json b/leaderboard/submissions/esm2_t36_3B_UR50D/modac_paralogy_bigene.json
new file mode 100644
index 0000000..5fc7e25
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/modac_paralogy_bigene.json
@@ -0,0 +1,97 @@
+{
+ "task": {
+ "id": "modac_paralogy_bigene",
+ "display_name": "ModAC Paralogy BiGene",
+ "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/modac_paralogy_bigene",
+ "revision": "241ca6397856e3360da04422d54933035b1fab87"
+ }
+ ],
+ "primary_metric_id": "recall_at_50"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.4952467261118094e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 8.984467652322665e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.036193029490616625
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.513409339146605e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 9.02074410313958e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.1306970509383378
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/mopb_clustering.json b/leaderboard/submissions/esm2_t36_3B_UR50D/mopb_clustering.json
new file mode 100644
index 0000000..4745131
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/mopb_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "mopb_clustering",
+ "display_name": "MopB Clustering",
+ "description": "Evaluate on MopB clustering task.",
+ "modality": "protein",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/mopb_clustering",
+ "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "revision": "...",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.7761858715009393
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.9024367024356471
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/rpob_arch_phylogeny.json b/leaderboard/submissions/esm2_t36_3B_UR50D/rpob_arch_phylogeny.json
new file mode 100644
index 0000000..a45c688
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/rpob_arch_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_phylogeny_sequences",
+ "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
+ },
+ {
+ "path": "tattabio/rpob_arch_phylogeny_distances",
+ "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "revision": "...",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.106058717692602
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.1528085277532408
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.1702712021061018
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.1702712021061018
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.22892873976632383
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.2920991527135924
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.293053056643355
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.293053056643355
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/rpob_bac_phylogeny.json b/leaderboard/submissions/esm2_t36_3B_UR50D/rpob_bac_phylogeny.json
new file mode 100644
index 0000000..5791419
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/rpob_bac_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_phylogeny_sequences",
+ "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
+ },
+ {
+ "path": "tattabio/rpob_bac_phylogeny_distances",
+ "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "revision": "...",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.06958717767819357
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.1566146955803603
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.19202284530665048
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.19202284530665048
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.19060675364434318
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.25577724644638433
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.2587461756509838
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.2587461756509838
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/vibrio_operonic_pair.json b/leaderboard/submissions/esm2_t36_3B_UR50D/vibrio_operonic_pair.json
new file mode 100644
index 0000000..9168761
--- /dev/null
+++ b/leaderboard/submissions/esm2_t36_3B_UR50D/vibrio_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "vibrio_operonic_pair",
+ "display_name": "Vibrio Operonic Pair",
+ "description": "Evaluate on Vibrio operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/vibrio_operonic_pair",
+ "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t36_3B_UR50D",
+ "revision": "...",
+ "num_layers": 36,
+ "num_params": 2841627041,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 18,
+ "layer_display_name": "18",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6708122813835989
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.981367290019989
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5193538555318501
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9532276391983032
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.35648535564853556
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9562289562289562
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.43327834671246207
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6622619510299261
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 4904.591796875
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5153980244044161
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 10609.9453125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3477067816542532
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9955106621773289
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.40016477443788834
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6653711620676254
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 131.35702514648438
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5229448662925057
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 228.46902465820312
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.37043966323666977
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8888888888888888
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.4280072686520109
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6603186941313641
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 597536.0
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5275381552753815
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 464319.0625
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.37447008949599625
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.8922558922558923
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.44016861120869744
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.44016861120869744
+ }
+ ]
+ },
+ {
+ "layer_number": 35,
+ "layer_display_name": "35",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6712009327633113
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9714021682739258
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5251497005988024
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9320735335350037
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.35810534912209063
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9842873176206509
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.44585848288179364
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6692576758647493
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 10474.798828125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5252225519287834
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 18217.796875
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.35699878983461075
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9932659932659933
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.43460961961390276
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6708122813835989
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 269.1448974609375
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5331240188383045
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 407.750244140625
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3700959023539669
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9528619528619529
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.444004705862405
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.653322969296541
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 2218353.5
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5145827317354895
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 976602.5625
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.3464230171073095
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3232698755870762
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.44585848288179364
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/MIBIG_protein_classification.json b/leaderboard/submissions/esm2_t6_8M_UR50D/MIBIG_protein_classification.json
new file mode 100644
index 0000000..5d11160
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/MIBIG_protein_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_protein_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_prot",
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "revision": "...",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6564698290049759
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6213151927437641
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.8560673597146576
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.598094204265186
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.7698412698412702
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6539081896462444
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6825396825396826
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7848864063703949
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6078800724477005
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.8102796674225253
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/arch_retrieval.json b/leaderboard/submissions/esm2_t6_8M_UR50D/arch_retrieval.json
new file mode 100644
index 0000000..010e209
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/arch_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "arch_retrieval",
+ "display_name": "Arch Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/arch_retrieval",
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+ },
+ {
+ "path": "tattabio/arch_retrieval_qrels",
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "revision": "...",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.46922
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.43575
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.39114
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.10438
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.13772
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.21355
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.1218
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.17417
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.3415
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.42595
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.36573
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.20961
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.6061032863849769
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.6136309439126343
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.618612010194391
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.1998075116367266
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.501606836885053
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.1378118530981871
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.18302753229030955
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.529252934890408
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.13526580782340344
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.1600349751039775
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.5759387637864083
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.14570847362804085
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.06563284509194764
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.4956919284414832
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3095151767985699
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.11335639333019934
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.5577957210632679
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.254596062635998
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.20045555920826685
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.6132212804732518
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.16302757181104657
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.03401084834217468
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.4617531031384631
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.28397660576444894
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.06720993976029453
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.5073816131438185
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.23261778468205002
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.16667921011510828
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.5259393842070421
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.13381540436587672
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.2105023896681829
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.4512153840299376
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": 0.04767765797059535
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.2000938079661469
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.4461163108781014
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": 0.01450421362018999
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.12765115099732313
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.3146014632375712
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.044088192546631495
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.21854370496963085
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.4305406870224175
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.2065829490711869
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.21766766497784718
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.43116699819575055
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.20801462901159104
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.2176313827069169
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.43135087322870347
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.2078825282336836
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.63152
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.60452
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.56721
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.17864
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.24217
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.39543
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.19424
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.27639
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.51307
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.56901
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.50265
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.29486
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7392801251956179
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.7450229999525767
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.7481511664471421
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.39989468043684767
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.4936426613891317
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.09522925993445099
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.3810925649678609
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.5282583606322245
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.08763512132492676
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.3621673695795759
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.6080254445975454
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.09742405454353203
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.07782146154396122
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.48693859418142155
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3599139708725904
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.15672935830334864
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.5656574495870028
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.2853689795806975
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.35302238221077475
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.645119394317702
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.12237037613331439
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.052013553698076434
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.45739246576262554
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.3493212024717546
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.11192308593513252
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.5489391222279509
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2769457461783965
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.3133757496247297
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.6618732618345899
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.11423599863005311
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.4062309467686318
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.3828260336036461
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.0870886550678612
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.3881405881948008
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.35340692971248944
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.1401576823890037
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.3163198713977749
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.10096336675596154
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.20998793758526332
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.4276357773831127
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.42675005803237664
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.1732983192011071
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.42738519405434844
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.4349603370469823
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.17457505671954143
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.4269447696243609
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.43456693807574653
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.17238808668733543
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/bacarch_bigene.json b/leaderboard/submissions/esm2_t6_8M_UR50D/bacarch_bigene.json
new file mode 100644
index 0000000..53bd271
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/bacarch_bigene.json
@@ -0,0 +1,86 @@
+{
+ "task": {
+ "id": "bacarch_bigene",
+ "display_name": "BacArch BiGene",
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/bac_arch_bigene",
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "revision": "...",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.24651143056803437
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.30943396226415093
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.26085981104849026
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.30943396226415093
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.43651991614255764
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.5132075471698113
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.4573989218328841
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.5132075471698113
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/convergent_enzymes_classification.json b/leaderboard/submissions/esm2_t6_8M_UR50D/convergent_enzymes_classification.json
new file mode 100644
index 0000000..15c068a
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/convergent_enzymes_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "convergent_enzymes_classification",
+ "display_name": "Convergent Enzymes Classification",
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/convergent_enzymes",
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "revision": "...",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.06491666666666666
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.15
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.11641666666666665
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/cyano_operonic_pair.json b/leaderboard/submissions/esm2_t6_8M_UR50D/cyano_operonic_pair.json
new file mode 100644
index 0000000..1585d8d
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/cyano_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "cyano_operonic_pair",
+ "display_name": "Cyano Operonic Pair",
+ "description": "Evaluate on Cyano operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/cyano_operonic_pair",
+ "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "revision": "...",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7218390804597701
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9959506988525391
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.442998760842627
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9245835542678833
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.28691813804173355
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9714673913043478
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.3475030371533737
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.721455938697318
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 19.038516998291016
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4394618834080718
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 111.03251647949219
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.28171713300114987
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.998641304347826
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3346837880060032
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7206896551724138
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 1.384537696838379
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4400597907324364
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 8.970820426940918
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.28210042161747795
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3353446157868629
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7241379310344828
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 269.9737854003906
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.4413710162357186
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 153.47955322265625
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.2833976833976834
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9972826086956522
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3529107024982295
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.3529107024982295
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7203065134099617
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9935691356658936
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.44019138755980863
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.6014477014541626
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.2822085889570552
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.33649957902185235
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7210727969348659
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 52.736236572265625
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4401197604790419
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 375.45855712890625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.28225806451612906
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.998641304347826
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3307875908554976
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7210727969348659
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 4.403592109680176
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4403230631169608
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 28.599159240722656
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.2823168392788646
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.33161356625614147
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7199233716475095
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 1383.460693359375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.44529750479846447
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 968.0859985351562
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.29121338912133893
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9456521739130435
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3259103133557722
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.33649957902185235
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/ec_classification.json b/leaderboard/submissions/esm2_t6_8M_UR50D/ec_classification.json
new file mode 100644
index 0000000..ed88033
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/ec_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification",
+ "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "revision": "...",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.3984375
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.33880208333333334
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.4921875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.43671875
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/ecoli_operonic_pair.json b/leaderboard/submissions/esm2_t6_8M_UR50D/ecoli_operonic_pair.json
new file mode 100644
index 0000000..999fdcd
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/ecoli_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "ecoli_operonic_pair",
+ "display_name": "E.coli Operonic Pair",
+ "description": "Evaluate on E.coli K-12 operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/ecoli_operonic_pair",
+ "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "revision": "...",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6353732035234122
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9866563081741333
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5892511013215859
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9222266674041748
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.4256619144602851
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9570692615912993
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5334395272423883
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6332869726471951
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 28.67717933654785
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5785809018567639
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 108.07534790039062
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4072345390898483
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9988551803091014
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5209630222281199
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6316643486323598
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 2.2130112648010254
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5811421872383186
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 7.783103942871094
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.4107481060606061
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9931310818546079
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5214913767122241
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6105702364394993
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 263.1507568359375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5766182298546895
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 145.48297119140625
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.40519842190763516
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9994275901545506
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.45836132847658473
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5334395272423883
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.625173852573018
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9659532308578491
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5768532276704639
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.690697431564331
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.4053364269141531
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5151490072703857
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.626332869726472
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 107.05496215820312
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5768911055694097
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 344.6910705566406
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4065135895032802
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9931310818546079
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5190509198117845
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6270282800185443
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 7.555862903594971
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5776892430278885
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 25.03308868408203
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.40682721533785365
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9959931310818546
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.521895461247817
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.5948076031525267
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 1922.33544921875
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.577347143334447
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 901.3367309570312
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.40764331210191085
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9891242129364625
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3848147438817163
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.521895461247817
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/euk_retrieval.json b/leaderboard/submissions/esm2_t6_8M_UR50D/euk_retrieval.json
new file mode 100644
index 0000000..1f7668d
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/euk_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "euk_retrieval",
+ "display_name": "Euk Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/euk_retrieval",
+ "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
+ },
+ {
+ "path": "tattabio/euk_retrieval_qrels",
+ "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "revision": "...",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.49408
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.47293
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.46848
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.12033
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.17266
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.25802
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.13544
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.21307
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.414
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.45209
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.40289
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.25177
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.6258842443729904
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.6334430664012657
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.63890993904596
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.3197308988578981
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.39435369504510975
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.11150171386250221
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.29199381948996095
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.4225854193571499
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.09968997256246669
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.23946169242646484
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.4884005419729609
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.15426795632949822
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.18414959332523892
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.5120588644087861
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.2941293942685382
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.1871139452652369
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.6089646840398025
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.2564925569791867
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.2706367050944652
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.5951870280953666
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.18224740482812346
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.1661806048683229
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.4546332994565831
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.2642973757882288
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.15499500800330515
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.5128221338936043
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2151781820356106
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.26130667769549404
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.46903523907914035
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.18462384496314488
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.292996802846984
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.3791795767825314
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": 0.04216356786779477
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.24939481046185955
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.377667896018253
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.0009061997167830926
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.11321554826255
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.07535552700441209
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.09619972438311733
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.35543307829742765
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.31072161870298926
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.1905412426846346
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.358928797571008
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.30496859707141505
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.18287037565676256
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.36422026130776614
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.3034707810121825
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.18390019146509284
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.63931
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.62964
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.61528
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.21465
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.29645
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.435
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.22756
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.33071
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.53849
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.57878
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.51801
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.29942
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7176848874598072
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.7224187209717757
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.7253074741993061
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.392629018512806
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.6258691925738074
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.05004738255742322
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.395700048668899
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.626564211565225
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.05691623676909853
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.3789799295065068
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.6450219381389026
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.07483337993245745
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.10430912918132056
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.5179584195524325
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3593900689021102
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.22103111933461858
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.6496944258232327
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.2622775364232766
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.3908889482223821
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.7223406412444101
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.14242751621845157
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.11222028982139254
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.4733253400027294
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.3295287493268477
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.22279476415603025
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.5802492026654497
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.24853557230212295
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.4335923460209573
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.5885101603016908
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.09840850865407258
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.37166379252531523
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.5201669057711779
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.18172576887258612
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.3776413732817113
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.4464394014516979
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.21994682279900005
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.1885532531573671
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.006154947469005304
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.2438525109491442
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.40584332584647376
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.6023237957095166
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.09886691841743467
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.4110434425857959
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.6016275572651675
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.10021734255007336
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.40988630845569324
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.6016744312872032
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.09594082080189775
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/fefe_phylogeny.json b/leaderboard/submissions/esm2_t6_8M_UR50D/fefe_phylogeny.json
new file mode 100644
index 0000000..f92aa0e
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/fefe_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "fefe_phylogeny",
+ "display_name": "FeFeHydrogenase Phylogeny",
+ "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/fefe_phylogeny_sequences",
+ "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
+ },
+ {
+ "path": "tattabio/fefe_phylogeny_distances",
+ "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "revision": "...",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.16751888592484196
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3387417973183231
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.28409648418772426
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.3387417973183231
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.09209866020666911
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.393364401792143
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.35353088826306645
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.393364401792143
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/modac_paralogy_bigene.json b/leaderboard/submissions/esm2_t6_8M_UR50D/modac_paralogy_bigene.json
new file mode 100644
index 0000000..547345d
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/modac_paralogy_bigene.json
@@ -0,0 +1,97 @@
+{
+ "task": {
+ "id": "modac_paralogy_bigene",
+ "display_name": "ModAC Paralogy BiGene",
+ "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/modac_paralogy_bigene",
+ "revision": "241ca6397856e3360da04422d54933035b1fab87"
+ }
+ ],
+ "primary_metric_id": "recall_at_50"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.5012846666438603e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 8.996527340446588e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.048927613941018765
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.4952467261118094e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 8.984467652322665e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.039544235924932974
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/mopb_clustering.json b/leaderboard/submissions/esm2_t6_8M_UR50D/mopb_clustering.json
new file mode 100644
index 0000000..6e61599
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/mopb_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "mopb_clustering",
+ "display_name": "MopB Clustering",
+ "description": "Evaluate on MopB clustering task.",
+ "modality": "protein",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/mopb_clustering",
+ "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "revision": "...",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.5331854224162181
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.6537410442469105
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/rpob_arch_phylogeny.json b/leaderboard/submissions/esm2_t6_8M_UR50D/rpob_arch_phylogeny.json
new file mode 100644
index 0000000..2ccdcb0
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/rpob_arch_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_phylogeny_sequences",
+ "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
+ },
+ {
+ "path": "tattabio/rpob_arch_phylogeny_distances",
+ "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "revision": "...",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.27249253585753297
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.32110156722667066
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.2765400534230894
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.32110156722667066
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.3522084172242064
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.37491879803690276
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.37357997215191524
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.37491879803690276
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/rpob_bac_phylogeny.json b/leaderboard/submissions/esm2_t6_8M_UR50D/rpob_bac_phylogeny.json
new file mode 100644
index 0000000..d0eeedc
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/rpob_bac_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_phylogeny_sequences",
+ "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
+ },
+ {
+ "path": "tattabio/rpob_bac_phylogeny_distances",
+ "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "revision": "...",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.1718947821310708
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.19149793117776248
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.19007575246850486
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.19149793117776248
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.08790159385645117
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.15824982794036926
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.14789213236823395
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.15824982794036926
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/vibrio_operonic_pair.json b/leaderboard/submissions/esm2_t6_8M_UR50D/vibrio_operonic_pair.json
new file mode 100644
index 0000000..89f729c
--- /dev/null
+++ b/leaderboard/submissions/esm2_t6_8M_UR50D/vibrio_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "vibrio_operonic_pair",
+ "display_name": "Vibrio Operonic Pair",
+ "description": "Evaluate on Vibrio operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/vibrio_operonic_pair",
+ "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "facebook/esm2_t6_8M_UR50D",
+ "revision": "...",
+ "num_layers": 6,
+ "num_params": 7840121,
+ "embed_dim": 320
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 3,
+ "layer_display_name": "3",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.672755538282161
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9907011985778809
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.519350811485643
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9296838045120239
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.359706009511457
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9337822671156004
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.4530183167188049
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6700349786241742
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 26.794944763183594
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.514153668399769
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 108.27510070800781
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3461688059120965
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9988776655443322
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.4362485957941368
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6684803731053245
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 1.9865591526031494
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5159492102818211
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 6.243534088134766
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3562874251497006
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9349046015712682
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.4373990660087158
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6673144189661874
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 249.0767059326172
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.514153668399769
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 140.143310546875
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.3461688059120965
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9988776655443322
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.4217483948524102
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.4530183167188049
+ }
+ ]
+ },
+ {
+ "layer_number": 5,
+ "layer_display_name": "5",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6739214924212981
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9804219007492065
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5148973098061903
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.7031583786010742
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.34684333593141076
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9988776655443322
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.4509860248736615
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6731441896618733
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 87.79132080078125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5145827317354895
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 424.6683349609375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3464230171073095
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.44705936021219694
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6723668869024485
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 6.4593658447265625
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5145995952587453
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 28.367399215698242
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.34657320872274144
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9988776655443322
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.44864006673188783
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.655266226195103
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 1411.238037109375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5150289017341041
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 733.3570556640625
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.3468275593616193
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.37998347107914326
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.4509860248736615
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/MIBIG_protein_classification.json b/leaderboard/submissions/esm3_sm_open_v1/MIBIG_protein_classification.json
new file mode 100644
index 0000000..b664b6d
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/MIBIG_protein_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_protein_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_prot",
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "revision": "...",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.636355104266605
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6167800453514739
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7069604380909595
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.597171733895904
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.7634164777021923
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6312487280675517
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6485260770975056
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.8287305765851944
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.593295341094183
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.7853363567649292
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/arch_retrieval.json b/leaderboard/submissions/esm3_sm_open_v1/arch_retrieval.json
new file mode 100644
index 0000000..eb8f3f7
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/arch_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "arch_retrieval",
+ "display_name": "Arch Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/arch_retrieval",
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+ },
+ {
+ "path": "tattabio/arch_retrieval_qrels",
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "revision": "...",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.70515
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.67508
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.61454
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.19626
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.26995
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.4346
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.20967
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.29976
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.54307
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.64225
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.5694
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.31906
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7978588703940812
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.8018806441341647
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.8042019381614351
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.3579650015240981
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.5538662118374643
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.07121392537711281
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.36004282446502917
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.5500899121550181
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.06407608269846478
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.3770368533261319
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.534374599363524
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.07015060905281298
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.18539848898802547
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.31854810998743216
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.28004800188179146
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.25192779729106995
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.38402228376270797
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.20900765662652615
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.3954699798402059
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.4920278710857863
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.06884183044564153
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.16650302541068765
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.2850510299682119
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.2636967276986314
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.2249286475281941
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.33887450650989764
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.20401547306921805
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.4023781046085471
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.43234270658640944
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.060365206823630356
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.308376572382042
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.43665840937338507
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.10907818514059998
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.2846959530839789
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.3847779977100886
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.15212765230925407
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.18337257938419196
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.18530369222525087
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.1644079959042882
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.36754804900818905
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.5693513814779096
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.2062429461049503
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.3691004694413564
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.5691709967620638
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.20330937371937058
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.37177469619915476
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.5703399454020713
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.20367868350764826
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.90233
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.88489
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.82906
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.28889
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.39901
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.66224
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.29863
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.41847
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.71763
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.82057
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.7449
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.43684
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.939586000853606
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9404775454071223
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9412010959480642
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.606910907737967
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.5199070917615352
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.17599253890391636
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.5841557989626389
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.5054057074197627
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.19935613525227047
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.542974079382819
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.25968971303126415
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.14191979932817245
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.016840485704212264
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.08991283753636957
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3257883729634555
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.10638156884546024
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.2262347259166251
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.23593506757296429
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.49736494683455656
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.27319531923466184
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.054060401364761604
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": -0.0007354671211860954
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.07694495575013391
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.32263639676831435
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.07571120622707732
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.20449939474022025
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2426929738765447
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.4472592915344256
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.2222518817744839
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.04044050438475481
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.43825590057266733
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.4364103471025071
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.5504677601558484
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.38558205107498994
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.34273043994349706
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.5338137215032046
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.24549655270413592
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.16714271032533543
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.3428330189684675
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.6909221927925662
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.549469540118431
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.05172988460116744
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.6902007340806171
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.5456877076101203
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.055902637059805256
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.6885036167829154
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.5443681909422375
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.053296800424556284
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/bacarch_bigene.json b/leaderboard/submissions/esm3_sm_open_v1/bacarch_bigene.json
new file mode 100644
index 0000000..8dd50b0
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/bacarch_bigene.json
@@ -0,0 +1,86 @@
+{
+ "task": {
+ "id": "bacarch_bigene",
+ "display_name": "BacArch BiGene",
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/bac_arch_bigene",
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "revision": "...",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.42965408805031446
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.5433962264150943
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.4629769392033543
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.5433962264150943
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7738993710691824
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.8377358490566038
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.7939622641509434
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.8377358490566038
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/convergent_enzymes_classification.json b/leaderboard/submissions/esm3_sm_open_v1/convergent_enzymes_classification.json
new file mode 100644
index 0000000..e447b43
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/convergent_enzymes_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "convergent_enzymes_classification",
+ "display_name": "Convergent Enzymes Classification",
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/convergent_enzymes",
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "revision": "...",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.1275
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.09587499999999999
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.275
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.22541666666666665
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/cyano_operonic_pair.json b/leaderboard/submissions/esm3_sm_open_v1/cyano_operonic_pair.json
new file mode 100644
index 0000000..a318059
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/cyano_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "cyano_operonic_pair",
+ "display_name": "Cyano Operonic Pair",
+ "description": "Evaluate on Cyano operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/cyano_operonic_pair",
+ "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "revision": "...",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7222222222222222
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9997950792312622
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4427665357897916
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9974009990692139
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.2846601941747573
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9959239130434783
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.337253378468868
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7218390804597701
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 327.0663757324219
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.44196563159481467
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 1296.3701171875
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.2839984502130957
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9959239130434783
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.33280379538654115
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7195402298850575
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 9.786035537719727
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4412296564195298
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 57.51576614379883
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.28350116189000774
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9945652173913043
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.32391365711700537
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7187739463601532
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 322538.40625
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.4400597907324364
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 265308.6875
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.28210042161747795
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3207731705793404
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.337253378468868
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7203065134099617
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9866379499435425
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4491869918699187
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9242078065872192
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.29918772563176893
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9008152173913043
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.3525781644570656
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7233716475095785
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 43574.78125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4649681528662421
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 77644.453125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.32882882882882886
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.7934782608695652
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3899191915936865
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7241379310344828
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 1619.7060546875
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4535363146209959
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 5233.001953125
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.29582126603227143
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9714673913043478
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3789538339954788
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.717624521072797
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 165274224.0
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.4403230631169608
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 17215182.0
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.2823168392788646
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.2649290773065526
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.3899191915936865
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/ec_classification.json b/leaderboard/submissions/esm3_sm_open_v1/ec_classification.json
new file mode 100644
index 0000000..ac4750f
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/ec_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification",
+ "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "revision": "...",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.4296875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.37109375
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.640625
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.58125
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/ecoli_operonic_pair.json b/leaderboard/submissions/esm3_sm_open_v1/ecoli_operonic_pair.json
new file mode 100644
index 0000000..9e1ecdb
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/ecoli_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "ecoli_operonic_pair",
+ "display_name": "E.coli Operonic Pair",
+ "description": "Evaluate on E.coli K-12 operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/ecoli_operonic_pair",
+ "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "revision": "...",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.636532220676866
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9995946884155273
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5850875467243754
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9987356662750244
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.4457434052757794
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8511734401831712
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5360226078826122
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6330551692165044
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 486.49554443359375
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5842115728773115
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 851.4708251953125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.44759293113954907
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.840870062965083
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5306776700721911
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.624246638850255
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 16.513151168823242
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.58118490694895
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 48.195823669433594
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.41411192214111925
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9742415569547797
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5148107768472899
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6080203987019008
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 306449.375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5765676567656766
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 262232.0
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4050544864363552
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.46592646226646905
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5360226078826122
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6214649976819657
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.976504921913147
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5863819359355854
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.8973178267478943
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.4223398890569844
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9587864911276474
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5218317714095929
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6488178025034771
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 58031.8671875
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.6091816367265469
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 80041.53125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4676677903769537
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8734974241556954
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5773856033732948
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6467315716272601
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 2202.94287109375
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.6004628655585945
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 3375.35400390625
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.47471723220226214
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8168288494562106
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5731341808177214
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.596430227167362
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 115011088.0
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5762376237623762
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 15893788.0
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4048226292603756
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9994275901545506
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.38760123548875813
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5773856033732948
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/euk_retrieval.json b/leaderboard/submissions/esm3_sm_open_v1/euk_retrieval.json
new file mode 100644
index 0000000..b0ab425
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/euk_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "euk_retrieval",
+ "display_name": "Euk Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/euk_retrieval",
+ "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
+ },
+ {
+ "path": "tattabio/euk_retrieval_qrels",
+ "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "revision": "...",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.6837
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.6755
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.66084
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.23875
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.32019
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.4781
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.25147
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.35629
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.58814
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.60707
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.54534
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.31698
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7798499464094321
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.7851452049201246
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.7886590905635646
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.41928357810017647
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.5290892468467935
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.04043953758876136
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.40804876886618785
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.5383625651482825
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.01759772593814632
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.43399119456019236
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.5212951246080316
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.01371218471516856
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.27286987374586946
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.22003168666870496
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.16926800678329118
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.3276407501683195
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.3666450599737096
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.11949072588665864
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.498662227018851
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.4597011315598956
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.03187150745443391
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.27171559469523077
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.19392467894582097
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.1631735596593573
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.30501148418451934
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.3112246092198893
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.10000246435945939
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.5135191785402357
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.34400775125068594
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.0119120902289244
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.30744744151056824
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.49705438543467206
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.10084482308129951
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.24792520457888326
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.4662889869089896
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.14491216098622942
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.06326065957986557
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.14770445635312474
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.1263680546684453
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.44299200927250004
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.5506272059606037
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.08607775056210379
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.4491987153534213
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.5532024059864912
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.08158272636409196
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.44650104486099906
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.551026552196252
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.08532180157223326
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.91548
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.90603
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.86287
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.34535
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.46156
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.67786
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.35018
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.4715
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.70168
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.81994
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.73376
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.41428
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.9427652733118971
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9427652733118971
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9429544164932854
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6646467247052238
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.6778764436485658
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.21513862915950627
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6698283650372349
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.6390956129452405
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.23500186221206765
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6924793871399406
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.35247236253685754
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.17064311750132002
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.13111830562024587
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.030763792353242985
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.40412645078710363
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.2602272376646378
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.18361191689681303
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.27432252545738767
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.7542001393377396
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.2787250331382991
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.104465533984337
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.12969399328272832
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.020904262568601558
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.4093101686701434
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.25240086482655144
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.1652691811031539
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2866072970878988
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.7612695752290041
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.23942734181223047
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.08592614001207499
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.44130649056515153
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.5342490731780283
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.8075663738228478
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.3247314376392803
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.42214838262701343
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.7330035174411789
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.08490805730470258
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.0845205834811262
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.3952240507561035
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.7270043548372578
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.7352921140815318
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.02920714692373676
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.7270043548372578
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.7352921140815318
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.02920714692373676
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.7265324603536517
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.7344144354146935
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.029875033589520282
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/fefe_phylogeny.json b/leaderboard/submissions/esm3_sm_open_v1/fefe_phylogeny.json
new file mode 100644
index 0000000..79ca261
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/fefe_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "fefe_phylogeny",
+ "display_name": "FeFeHydrogenase Phylogeny",
+ "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/fefe_phylogeny_sequences",
+ "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
+ },
+ {
+ "path": "tattabio/fefe_phylogeny_distances",
+ "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "revision": "...",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.3019747000293895
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.4717597979027869
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.38463047891710866
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.4717597979027869
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.4630693133096334
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.73784072309402
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.514040623446403
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.73784072309402
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/modac_paralogy_bigene.json b/leaderboard/submissions/esm3_sm_open_v1/modac_paralogy_bigene.json
new file mode 100644
index 0000000..23030a2
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/modac_paralogy_bigene.json
@@ -0,0 +1,97 @@
+{
+ "task": {
+ "id": "modac_paralogy_bigene",
+ "display_name": "ModAC Paralogy BiGene",
+ "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/modac_paralogy_bigene",
+ "revision": "241ca6397856e3360da04422d54933035b1fab87"
+ }
+ ],
+ "primary_metric_id": "recall_at_50"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.4952467261118094e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 8.984467652322665e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.04155495978552279
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.498263670223294e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 8.990493452223619e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.19906166219839141
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/mopb_clustering.json b/leaderboard/submissions/esm3_sm_open_v1/mopb_clustering.json
new file mode 100644
index 0000000..1149ed7
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/mopb_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "mopb_clustering",
+ "display_name": "MopB Clustering",
+ "description": "Evaluate on MopB clustering task.",
+ "modality": "protein",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/mopb_clustering",
+ "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "revision": "...",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.603447572970274
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.7454097575629404
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/rpob_arch_phylogeny.json b/leaderboard/submissions/esm3_sm_open_v1/rpob_arch_phylogeny.json
new file mode 100644
index 0000000..ae52589
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/rpob_arch_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_phylogeny_sequences",
+ "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
+ },
+ {
+ "path": "tattabio/rpob_arch_phylogeny_distances",
+ "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "revision": "...",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.21439261220583106
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.24110859242777566
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.08984114827917193
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.24110859242777566
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.11813832747662069
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.19863039504299163
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.16233060651692185
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.19863039504299163
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/rpob_bac_phylogeny.json b/leaderboard/submissions/esm3_sm_open_v1/rpob_bac_phylogeny.json
new file mode 100644
index 0000000..31aaffc
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/rpob_bac_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_phylogeny_sequences",
+ "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
+ },
+ {
+ "path": "tattabio/rpob_bac_phylogeny_distances",
+ "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "revision": "...",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.09049552550031388
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.14923045808816454
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.09609269155388361
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.14923045808816454
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.034250691494333164
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.2102813650664429
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.17546616789647132
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.2102813650664429
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/esm3_sm_open_v1/vibrio_operonic_pair.json b/leaderboard/submissions/esm3_sm_open_v1/vibrio_operonic_pair.json
new file mode 100644
index 0000000..0b2a68a
--- /dev/null
+++ b/leaderboard/submissions/esm3_sm_open_v1/vibrio_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "vibrio_operonic_pair",
+ "display_name": "Vibrio Operonic Pair",
+ "description": "Evaluate on Vibrio operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/vibrio_operonic_pair",
+ "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "esm3_sm_open_v1",
+ "revision": "...",
+ "num_layers": 48,
+ "num_params": 1401735748,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 24,
+ "layer_display_name": "24",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6715895841430237
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9996908903121948
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5164867230814123
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.997383713722229
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.34897476340694006
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9932659932659933
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.4521561741786758
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6708122813835989
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 398.7920837402344
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5162224797219003
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 1435.895263671875
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3479109722764545
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.4485744553427403
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6657598134473377
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 13.880131721496582
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5161478033168461
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 55.61311340332031
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3483896307934014
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9955106621773289
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.43312932328281045
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6568208317139526
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 314096.6875
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5152660657167781
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 270321.46875
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.347723704866562
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9943883277216611
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.38171085731998233
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.4521561741786758
+ }
+ ]
+ },
+ {
+ "layer_number": 47,
+ "layer_display_name": "47",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6692576758647493
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9806265234947205
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5221445221445222
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9249705076217651
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.3712121212121212
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8799102132435466
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.44374575153725326
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6824718227749709
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 54751.6875
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5474349964862966
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 77884.234375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.39846547314578007
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8742985409652076
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.49709099146687025
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6789739603575593
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 1890.283447265625
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5386138613861385
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 3902.364501953125
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3814866760168303
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9158249158249159
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.49224489125057536
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6544889234356782
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 123162152.0
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.514302224790523
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 16955928.0
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.3463035019455253
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9988776655443322
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.32925404339553804
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.49709099146687025
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-131k-base/MIBIG_dna_classification.json b/leaderboard/submissions/evo-1-131k-base/MIBIG_dna_classification.json
new file mode 100644
index 0000000..ac0fcec
--- /dev/null
+++ b/leaderboard/submissions/evo-1-131k-base/MIBIG_dna_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_dna_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_dna",
+ "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-131k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.446229597310732
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.4399092970521542
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.6831437731761506
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.39652342066563384
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.6090325018896443
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.4296723570558472
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.4036281179138322
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.5642457222211535
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.3853936778136288
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.5925925925925916
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-131k-base/arch_16S_phylogeny.json b/leaderboard/submissions/evo-1-131k-base/arch_16S_phylogeny.json
new file mode 100644
index 0000000..35738a9
--- /dev/null
+++ b/leaderboard/submissions/evo-1-131k-base/arch_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "arch_16S_phylogeny",
+ "display_name": "16S Archaeal Phylogeny",
+ "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/arch_16S_sequences",
+ "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0"
+ },
+ {
+ "path": "tattabio/arch_16S_distances",
+ "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-131k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": -0.025266077406048616
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.004690157120927916
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.004253147697855626
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.004690157120927916
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": -0.015580669639663534
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.0188972060041519
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.018436446052964364
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.0188972060041519
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-131k-base/bac_16S_phylogeny.json b/leaderboard/submissions/evo-1-131k-base/bac_16S_phylogeny.json
new file mode 100644
index 0000000..b2d3cdb
--- /dev/null
+++ b/leaderboard/submissions/evo-1-131k-base/bac_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "bac_16S_phylogeny",
+ "display_name": "16S Bacterial Phylogeny",
+ "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/bac_16S_sequences",
+ "revision": "efde1456b86748909cbcfecb07d783756d570aa3"
+ },
+ {
+ "path": "tattabio/bac_16S_distances",
+ "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-131k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.005078797435769416
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.06827101916712418
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.07324156211751019
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.07324156211751019
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.00640567975008095
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.046732462334814075
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.036484161781421304
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.046732462334814075
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-131k-base/ec_dna_classification.json b/leaderboard/submissions/evo-1-131k-base/ec_dna_classification.json
new file mode 100644
index 0000000..d23ea5f
--- /dev/null
+++ b/leaderboard/submissions/evo-1-131k-base/ec_dna_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_dna_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification_dna",
+ "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-131k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.015625
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.015625
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.03125
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.014843749999999998
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-131k-base/ecoli_rna_clustering.json b/leaderboard/submissions/evo-1-131k-base/ecoli_rna_clustering.json
new file mode 100644
index 0000000..cfdc747
--- /dev/null
+++ b/leaderboard/submissions/evo-1-131k-base/ecoli_rna_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "ecoli_rna_clustering",
+ "display_name": "E.coli RNA Clustering",
+ "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.",
+ "modality": "dna",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/e_coli_rnas",
+ "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-131k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.6805148989283036
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.6265730145810015
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-131k-base/euk_18S_phylogeny.json b/leaderboard/submissions/evo-1-131k-base/euk_18S_phylogeny.json
new file mode 100644
index 0000000..41eca90
--- /dev/null
+++ b/leaderboard/submissions/evo-1-131k-base/euk_18S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "euk_18S_phylogeny",
+ "display_name": "18S Eukaryotic Phylogeny",
+ "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/euk_18S_sequences",
+ "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196"
+ },
+ {
+ "path": "tattabio/euk_18S_distances",
+ "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-131k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.057284272544390114
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.15078767895903925
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.16109699901851568
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.16109699901851568
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.028039837868505658
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.10313971802764796
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.07777177959099602
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.10313971802764796
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-131k-base/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/evo-1-131k-base/rpob_arch_dna_phylogeny.json
new file mode 100644
index 0000000..a1af986
--- /dev/null
+++ b/leaderboard/submissions/evo-1-131k-base/rpob_arch_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_dna_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_sequences",
+ "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408"
+ },
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_distances",
+ "revision": "51df97684a927ec2203568e80175ef26a62db039"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-131k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.111612661283259
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.1515232273176025
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.13606007928035405
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.1515232273176025
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.10133747509368889
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.14126576817939926
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.1394041959490421
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.14126576817939926
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-131k-base/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/evo-1-131k-base/rpob_bac_dna_phylogeny.json
new file mode 100644
index 0000000..7d9e0c0
--- /dev/null
+++ b/leaderboard/submissions/evo-1-131k-base/rpob_bac_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_dna_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_sequences",
+ "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6"
+ },
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_distances",
+ "revision": "67339e271b2a1602208153d53d70d35ba6fa8876"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-131k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.04024491891770464
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.06106293578002046
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.05378417692908014
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.06106293578002046
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.03322125351100114
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.07033959001713873
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.06595278201357854
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.07033959001713873
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-8k-base/MIBIG_dna_classification.json b/leaderboard/submissions/evo-1-8k-base/MIBIG_dna_classification.json
new file mode 100644
index 0000000..44e83e4
--- /dev/null
+++ b/leaderboard/submissions/evo-1-8k-base/MIBIG_dna_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_dna_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_dna",
+ "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-8k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.42643961044280726
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.3854875283446712
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.5505888358897618
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.383668058757702
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.5793650793650779
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.40603817804559855
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.3877551020408163
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.5201002548153315
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.3692051425781597
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.5804988662131507
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-8k-base/arch_16S_phylogeny.json b/leaderboard/submissions/evo-1-8k-base/arch_16S_phylogeny.json
new file mode 100644
index 0000000..44f1c37
--- /dev/null
+++ b/leaderboard/submissions/evo-1-8k-base/arch_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "arch_16S_phylogeny",
+ "display_name": "16S Archaeal Phylogeny",
+ "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/arch_16S_sequences",
+ "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0"
+ },
+ {
+ "path": "tattabio/arch_16S_distances",
+ "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-8k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": -0.010027057225064055
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": -0.02215556380490859
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": -0.019732162616989075
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": -0.010027057225064055
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": -0.005205558296298042
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": -0.019367509701809612
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": -0.015643699787755726
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": -0.005205558296298042
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-8k-base/bac_16S_phylogeny.json b/leaderboard/submissions/evo-1-8k-base/bac_16S_phylogeny.json
new file mode 100644
index 0000000..21e5cf2
--- /dev/null
+++ b/leaderboard/submissions/evo-1-8k-base/bac_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "bac_16S_phylogeny",
+ "display_name": "16S Bacterial Phylogeny",
+ "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/bac_16S_sequences",
+ "revision": "efde1456b86748909cbcfecb07d783756d570aa3"
+ },
+ {
+ "path": "tattabio/bac_16S_distances",
+ "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-8k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.04234553644215741
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.06008165397377432
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.0375514113116624
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.06008165397377432
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.05401996118245526
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.0731645579846154
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.06211925714372264
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.0731645579846154
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-8k-base/ec_dna_classification.json b/leaderboard/submissions/evo-1-8k-base/ec_dna_classification.json
new file mode 100644
index 0000000..0dcf5b6
--- /dev/null
+++ b/leaderboard/submissions/evo-1-8k-base/ec_dna_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_dna_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification_dna",
+ "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-8k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.015625
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.01171875
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0234375
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.0077008928571428575
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-8k-base/ecoli_rna_clustering.json b/leaderboard/submissions/evo-1-8k-base/ecoli_rna_clustering.json
new file mode 100644
index 0000000..89fe125
--- /dev/null
+++ b/leaderboard/submissions/evo-1-8k-base/ecoli_rna_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "ecoli_rna_clustering",
+ "display_name": "E.coli RNA Clustering",
+ "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.",
+ "modality": "dna",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/e_coli_rnas",
+ "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-8k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.6510094548633155
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.6604911155601946
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-8k-base/euk_18S_phylogeny.json b/leaderboard/submissions/evo-1-8k-base/euk_18S_phylogeny.json
new file mode 100644
index 0000000..d61618b
--- /dev/null
+++ b/leaderboard/submissions/evo-1-8k-base/euk_18S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "euk_18S_phylogeny",
+ "display_name": "18S Eukaryotic Phylogeny",
+ "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/euk_18S_sequences",
+ "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196"
+ },
+ {
+ "path": "tattabio/euk_18S_distances",
+ "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-8k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.1711778687705844
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.22309569321619807
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.22182726039127965
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.22309569321619807
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.18718252462867385
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.20078257678873968
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.1894448685039182
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.20078257678873968
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-8k-base/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/evo-1-8k-base/rpob_arch_dna_phylogeny.json
new file mode 100644
index 0000000..e605272
--- /dev/null
+++ b/leaderboard/submissions/evo-1-8k-base/rpob_arch_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_dna_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_sequences",
+ "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408"
+ },
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_distances",
+ "revision": "51df97684a927ec2203568e80175ef26a62db039"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-8k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.12141412399945198
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.1224894376801759
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.08436643461024505
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.1224894376801759
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.12074727064487567
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.1436975275108405
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.14577087550878226
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.14577087550878226
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/evo-1-8k-base/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/evo-1-8k-base/rpob_bac_dna_phylogeny.json
new file mode 100644
index 0000000..00c8844
--- /dev/null
+++ b/leaderboard/submissions/evo-1-8k-base/rpob_bac_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_dna_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_sequences",
+ "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6"
+ },
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_distances",
+ "revision": "67339e271b2a1602208153d53d70d35ba6fa8876"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "togethercomputer/evo-1-8k-base",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6452781056,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.050201705681568114
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.07465904170918622
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.05223608317175334
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.07465904170918622
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.04649870099899381
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.08974965386451692
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.08311750592573247
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.08974965386451692
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/MIBIG_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/MIBIG_dna_classification.json
new file mode 100644
index 0000000..12f8570
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/MIBIG_dna_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_dna_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_dna",
+ "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2547801226,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.4987765050770981
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.5124716553287982
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.6793203115492271
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.45292258688111664
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.6768707482993195
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.44306708410477275
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.46258503401360546
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.6572912628877802
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.4111353087388993
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.6402116402116397
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/arch_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/arch_16S_phylogeny.json
new file mode 100644
index 0000000..47b2f9b
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/arch_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "arch_16S_phylogeny",
+ "display_name": "16S Archaeal Phylogeny",
+ "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/arch_16S_sequences",
+ "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0"
+ },
+ {
+ "path": "tattabio/arch_16S_distances",
+ "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2547801226,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": -0.10654835165163948
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": -0.0449208645031223
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": -0.039164294581157263
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": -0.039164294581157263
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": -0.05342124724518901
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.013383992412723606
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.008694532964576274
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.013383992412723606
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/bac_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/bac_16S_phylogeny.json
new file mode 100644
index 0000000..a5c0d2b
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/bac_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "bac_16S_phylogeny",
+ "display_name": "16S Bacterial Phylogeny",
+ "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/bac_16S_sequences",
+ "revision": "efde1456b86748909cbcfecb07d783756d570aa3"
+ },
+ {
+ "path": "tattabio/bac_16S_distances",
+ "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2547801226,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.3034936215778572
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.30766902257663337
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.311140752313069
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.311140752313069
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.23391355792374574
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.23593114226208475
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.22752660313351064
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.23593114226208475
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/ec_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/ec_dna_classification.json
new file mode 100644
index 0000000..6bd3af5
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/ec_dna_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_dna_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification_dna",
+ "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2547801226,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.1640625
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.13072916666666667
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.125
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.09505208333333333
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/ecoli_rna_clustering.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/ecoli_rna_clustering.json
new file mode 100644
index 0000000..c132928
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/ecoli_rna_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "ecoli_rna_clustering",
+ "display_name": "E.coli RNA Clustering",
+ "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.",
+ "modality": "dna",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/e_coli_rnas",
+ "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2547801226,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.1895135298164807
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.12248243519915075
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/euk_18S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/euk_18S_phylogeny.json
new file mode 100644
index 0000000..f6e0bd2
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/euk_18S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "euk_18S_phylogeny",
+ "display_name": "18S Eukaryotic Phylogeny",
+ "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/euk_18S_sequences",
+ "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196"
+ },
+ {
+ "path": "tattabio/euk_18S_distances",
+ "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2547801226,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.29292521556483414
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.2974787640408906
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.30303420658256763
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.30303420658256763
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.2643943410861178
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.2695059233247244
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.2683265266152356
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.2695059233247244
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/rpob_arch_dna_phylogeny.json
new file mode 100644
index 0000000..5461291
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/rpob_arch_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_dna_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_sequences",
+ "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408"
+ },
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_distances",
+ "revision": "51df97684a927ec2203568e80175ef26a62db039"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2547801226,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.1442196721280385
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.18351276436060954
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.16445811077836048
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.18351276436060954
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.12286910318887798
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.15401143244304646
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.15002628314647487
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.15401143244304646
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/rpob_bac_dna_phylogeny.json
new file mode 100644
index 0000000..783073c
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/rpob_bac_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_dna_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_sequences",
+ "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6"
+ },
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_distances",
+ "revision": "67339e271b2a1602208153d53d70d35ba6fa8876"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2547801226,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.11375832395356672
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.13416702069118552
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.13788912961135927
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.13788912961135927
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.0996182572764333
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.11927361003896635
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.12190344621512093
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.12190344621512093
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/MIBIG_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/MIBIG_dna_classification.json
new file mode 100644
index 0000000..e5fe551
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/MIBIG_dna_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_dna_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_dna",
+ "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "revision": "...",
+ "num_layers": 22,
+ "num_params": 97889132,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5026229318789414
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.47619047619047616
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.6915625965314858
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.4393957527314288
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.6402116402116393
+ }
+ ]
+ },
+ {
+ "layer_number": 21,
+ "layer_display_name": "21",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.4666972620012171
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.4512471655328798
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7469863250998614
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.42086846252461213
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.6228269085411932
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/arch_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/arch_16S_phylogeny.json
new file mode 100644
index 0000000..34de4ff
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/arch_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "arch_16S_phylogeny",
+ "display_name": "16S Archaeal Phylogeny",
+ "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/arch_16S_sequences",
+ "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0"
+ },
+ {
+ "path": "tattabio/arch_16S_distances",
+ "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "revision": "...",
+ "num_layers": 22,
+ "num_params": 97889132,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": -0.01873048882073702
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.028071992556529406
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.0278344032639243
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.028071992556529406
+ }
+ ]
+ },
+ {
+ "layer_number": 21,
+ "layer_display_name": "21",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": -0.035107630996407665
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.016885345022489895
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.012808674577804298
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.016885345022489895
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/bac_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/bac_16S_phylogeny.json
new file mode 100644
index 0000000..eca6e8e
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/bac_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "bac_16S_phylogeny",
+ "display_name": "16S Bacterial Phylogeny",
+ "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/bac_16S_sequences",
+ "revision": "efde1456b86748909cbcfecb07d783756d570aa3"
+ },
+ {
+ "path": "tattabio/bac_16S_distances",
+ "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "revision": "...",
+ "num_layers": 22,
+ "num_params": 97889132,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.31696264570144556
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3398798297254198
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.3384167443991009
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.3398798297254198
+ }
+ ]
+ },
+ {
+ "layer_number": 21,
+ "layer_display_name": "21",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.17067324886079815
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.22902435877832863
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.2227097102546006
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.22902435877832863
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/ec_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/ec_dna_classification.json
new file mode 100644
index 0000000..d40bb85
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/ec_dna_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_dna_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification_dna",
+ "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "revision": "...",
+ "num_layers": 22,
+ "num_params": 97889132,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.109375
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.08556547619047619
+ }
+ ]
+ },
+ {
+ "layer_number": 21,
+ "layer_display_name": "21",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.1171875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.08645833333333333
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/ecoli_rna_clustering.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/ecoli_rna_clustering.json
new file mode 100644
index 0000000..b7ab6f8
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/ecoli_rna_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "ecoli_rna_clustering",
+ "display_name": "E.coli RNA Clustering",
+ "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.",
+ "modality": "dna",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/e_coli_rnas",
+ "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "revision": "...",
+ "num_layers": 22,
+ "num_params": 97889132,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.11902139186014724
+ }
+ ]
+ },
+ {
+ "layer_number": 21,
+ "layer_display_name": "21",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.14292083591071578
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/euk_18S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/euk_18S_phylogeny.json
new file mode 100644
index 0000000..3e97274
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/euk_18S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "euk_18S_phylogeny",
+ "display_name": "18S Eukaryotic Phylogeny",
+ "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/euk_18S_sequences",
+ "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196"
+ },
+ {
+ "path": "tattabio/euk_18S_distances",
+ "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "revision": "...",
+ "num_layers": 22,
+ "num_params": 97889132,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.31522237768693256
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3357015318391086
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.33507578766511564
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.3357015318391086
+ }
+ ]
+ },
+ {
+ "layer_number": 21,
+ "layer_display_name": "21",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.29046858175431006
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.31008830299644424
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.30828628825037435
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.31008830299644424
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/rpob_arch_dna_phylogeny.json
new file mode 100644
index 0000000..f78e60d
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/rpob_arch_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_dna_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_sequences",
+ "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408"
+ },
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_distances",
+ "revision": "51df97684a927ec2203568e80175ef26a62db039"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "revision": "...",
+ "num_layers": 22,
+ "num_params": 97889132,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.1234737764549469
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.18655251846904775
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.17777969131912802
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.18655251846904775
+ }
+ ]
+ },
+ {
+ "layer_number": 21,
+ "layer_display_name": "21",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.13078468145534033
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.15872053264382796
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.15263070680987556
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.15872053264382796
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/rpob_bac_dna_phylogeny.json
new file mode 100644
index 0000000..ecb74c5
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/rpob_bac_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_dna_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_sequences",
+ "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6"
+ },
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_distances",
+ "revision": "67339e271b2a1602208153d53d70d35ba6fa8876"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+ "revision": "...",
+ "num_layers": 22,
+ "num_params": 97889132,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.07821452951223222
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.10661690192801286
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.10642148119114682
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.10661690192801286
+ }
+ ]
+ },
+ {
+ "layer_number": 21,
+ "layer_display_name": "21",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.21595314850334169
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.1831784122734034
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.19246083157603466
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.21595314850334169
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/MIBIG_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/MIBIG_dna_classification.json
new file mode 100644
index 0000000..8398208
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/MIBIG_dna_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_dna_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_dna",
+ "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 235120780,
+ "embed_dim": 768
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5061420121003121
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.47619047619047616
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7551144630181957
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.44657932055853555
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.6462585034013596
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.4543771338680633
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.4399092970521542
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.5940398459735476
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.42087398922226954
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.6237717309145869
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/arch_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/arch_16S_phylogeny.json
new file mode 100644
index 0000000..eb2ba07
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/arch_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "arch_16S_phylogeny",
+ "display_name": "16S Archaeal Phylogeny",
+ "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/arch_16S_sequences",
+ "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0"
+ },
+ {
+ "path": "tattabio/arch_16S_distances",
+ "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 235120780,
+ "embed_dim": 768
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.11921584406827451
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.15679219545816353
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.15045441173932622
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.15679219545816353
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.06475995486443784
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.09602276206583968
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.09540879980313609
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.09602276206583968
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/bac_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/bac_16S_phylogeny.json
new file mode 100644
index 0000000..1599088
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/bac_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "bac_16S_phylogeny",
+ "display_name": "16S Bacterial Phylogeny",
+ "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/bac_16S_sequences",
+ "revision": "efde1456b86748909cbcfecb07d783756d570aa3"
+ },
+ {
+ "path": "tattabio/bac_16S_distances",
+ "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 235120780,
+ "embed_dim": 768
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.24014165203750024
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.28990471689345415
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.2909216542477827
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.2909216542477827
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.0408624668440742
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.09721819145306612
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.09461311593137267
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.09721819145306612
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/ec_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/ec_dna_classification.json
new file mode 100644
index 0000000..3b7238b
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/ec_dna_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_dna_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification_dna",
+ "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 235120780,
+ "embed_dim": 768
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.1171875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.10989583333333333
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.125
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.09427083333333333
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/ecoli_rna_clustering.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/ecoli_rna_clustering.json
new file mode 100644
index 0000000..8f02f68
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/ecoli_rna_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "ecoli_rna_clustering",
+ "display_name": "E.coli RNA Clustering",
+ "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.",
+ "modality": "dna",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/e_coli_rnas",
+ "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 235120780,
+ "embed_dim": 768
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.15577517592023576
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.226785366801238
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/euk_18S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/euk_18S_phylogeny.json
new file mode 100644
index 0000000..e264576
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/euk_18S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "euk_18S_phylogeny",
+ "display_name": "18S Eukaryotic Phylogeny",
+ "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/euk_18S_sequences",
+ "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196"
+ },
+ {
+ "path": "tattabio/euk_18S_distances",
+ "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 235120780,
+ "embed_dim": 768
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.28890294917726145
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.33379301514333115
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.33105693660190544
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.33379301514333115
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.1815521898033015
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.25499618145404046
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.2528923474396306
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.25499618145404046
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/rpob_arch_dna_phylogeny.json
new file mode 100644
index 0000000..170ee8d
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/rpob_arch_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_dna_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_sequences",
+ "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408"
+ },
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_distances",
+ "revision": "51df97684a927ec2203568e80175ef26a62db039"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 235120780,
+ "embed_dim": 768
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.15073903119250268
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.21364664625871488
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.19287471846027535
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.21364664625871488
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.1647252908025232
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.19747761810776127
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.1965799552266182
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.19747761810776127
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/rpob_bac_dna_phylogeny.json
new file mode 100644
index 0000000..adef74e
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/rpob_bac_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_dna_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_sequences",
+ "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6"
+ },
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_distances",
+ "revision": "67339e271b2a1602208153d53d70d35ba6fa8876"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 235120780,
+ "embed_dim": 768
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.07392689866136973
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.08180489841927971
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.08993373294524076
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.08993373294524076
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.1196418634913048
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.11994664883382657
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.12250307785318251
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.12250307785318251
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/MIBIG_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/MIBIG_dna_classification.json
new file mode 100644
index 0000000..1907ee3
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/MIBIG_dna_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_dna_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_dna",
+ "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species",
+ "revision": "...",
+ "num_layers": 29,
+ "num_params": 498345436,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 14,
+ "layer_display_name": "14",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.4995376571471013
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.5056689342403629
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7326619778346121
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.45041650239675696
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.6662887377173087
+ }
+ ]
+ },
+ {
+ "layer_number": 28,
+ "layer_display_name": "28",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.4539500344324366
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.4580498866213152
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7216247043960596
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.414670336687932
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.6300075585789865
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/arch_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/arch_16S_phylogeny.json
new file mode 100644
index 0000000..d7d0504
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/arch_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "arch_16S_phylogeny",
+ "display_name": "16S Archaeal Phylogeny",
+ "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/arch_16S_sequences",
+ "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0"
+ },
+ {
+ "path": "tattabio/arch_16S_distances",
+ "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species",
+ "revision": "...",
+ "num_layers": 29,
+ "num_params": 498345436,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 14,
+ "layer_display_name": "14",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.07471725236964263
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.11028500148116578
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.10974020375275832
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.11028500148116578
+ }
+ ]
+ },
+ {
+ "layer_number": 28,
+ "layer_display_name": "28",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.023925193720304064
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.08930443289494266
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.08119092099810726
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.08930443289494266
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/bac_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/bac_16S_phylogeny.json
new file mode 100644
index 0000000..761e325
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/bac_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "bac_16S_phylogeny",
+ "display_name": "16S Bacterial Phylogeny",
+ "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/bac_16S_sequences",
+ "revision": "efde1456b86748909cbcfecb07d783756d570aa3"
+ },
+ {
+ "path": "tattabio/bac_16S_distances",
+ "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species",
+ "revision": "...",
+ "num_layers": 29,
+ "num_params": 498345436,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 14,
+ "layer_display_name": "14",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.28755176441767716
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.2901315012697327
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.29875580881828134
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.29875580881828134
+ }
+ ]
+ },
+ {
+ "layer_number": 28,
+ "layer_display_name": "28",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.2625695435176704
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.2961089715220018
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.29903750149292324
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.29903750149292324
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/ec_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/ec_dna_classification.json
new file mode 100644
index 0000000..c2519cb
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/ec_dna_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_dna_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification_dna",
+ "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species",
+ "revision": "...",
+ "num_layers": 29,
+ "num_params": 498345436,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 14,
+ "layer_display_name": "14",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.1328125
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.09492187499999999
+ }
+ ]
+ },
+ {
+ "layer_number": 28,
+ "layer_display_name": "28",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.109375
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.08854166666666666
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/ecoli_rna_clustering.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/ecoli_rna_clustering.json
new file mode 100644
index 0000000..8236f27
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/ecoli_rna_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "ecoli_rna_clustering",
+ "display_name": "E.coli RNA Clustering",
+ "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.",
+ "modality": "dna",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/e_coli_rnas",
+ "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species",
+ "revision": "...",
+ "num_layers": 29,
+ "num_params": 498345436,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 14,
+ "layer_display_name": "14",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.17872090857056205
+ }
+ ]
+ },
+ {
+ "layer_number": 28,
+ "layer_display_name": "28",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.23101767784787614
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/euk_18S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/euk_18S_phylogeny.json
new file mode 100644
index 0000000..126e967
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/euk_18S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "euk_18S_phylogeny",
+ "display_name": "18S Eukaryotic Phylogeny",
+ "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/euk_18S_sequences",
+ "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196"
+ },
+ {
+ "path": "tattabio/euk_18S_distances",
+ "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species",
+ "revision": "...",
+ "num_layers": 29,
+ "num_params": 498345436,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 14,
+ "layer_display_name": "14",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.29983174865850976
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.30210969201843535
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.30618345571160144
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.30618345571160144
+ }
+ ]
+ },
+ {
+ "layer_number": 28,
+ "layer_display_name": "28",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.3017647880265657
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3056607554377782
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.3053489155102467
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.3056607554377782
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/rpob_arch_dna_phylogeny.json
new file mode 100644
index 0000000..235d9d4
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/rpob_arch_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_dna_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_sequences",
+ "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408"
+ },
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_distances",
+ "revision": "51df97684a927ec2203568e80175ef26a62db039"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species",
+ "revision": "...",
+ "num_layers": 29,
+ "num_params": 498345436,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 14,
+ "layer_display_name": "14",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.22986226120160247
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.27851673593693504
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.2606086674119917
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.27851673593693504
+ }
+ ]
+ },
+ {
+ "layer_number": 28,
+ "layer_display_name": "28",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.17815454079439064
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.19889620650052858
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.2018124663624657
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.2018124663624657
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/rpob_bac_dna_phylogeny.json
new file mode 100644
index 0000000..b4adbc6
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/rpob_bac_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_dna_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_sequences",
+ "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6"
+ },
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_distances",
+ "revision": "67339e271b2a1602208153d53d70d35ba6fa8876"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species",
+ "revision": "...",
+ "num_layers": 29,
+ "num_params": 498345436,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 14,
+ "layer_display_name": "14",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.05135094895715095
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.07685936603629384
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.08785540391133782
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.08785540391133782
+ }
+ ]
+ },
+ {
+ "layer_number": 28,
+ "layer_display_name": "28",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.10564977401255361
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.10576727586891548
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.10692380434412962
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.10692380434412962
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/MIBIG_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/MIBIG_dna_classification.json
new file mode 100644
index 0000000..05e05e7
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/MIBIG_dna_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_dna_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_dna",
+ "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 55904972,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.41732088501675296
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.4126984126984127
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.5828964813759089
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.3790039539706372
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.6005291005290989
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.447108453393839
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.4399092970521542
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.6417782738095238
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.4014322754351143
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.6228269085411934
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/arch_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/arch_16S_phylogeny.json
new file mode 100644
index 0000000..df9efd7
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/arch_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "arch_16S_phylogeny",
+ "display_name": "16S Archaeal Phylogeny",
+ "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/arch_16S_sequences",
+ "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0"
+ },
+ {
+ "path": "tattabio/arch_16S_distances",
+ "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 55904972,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.08614532458123705
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.13483619665236213
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.1312200179763858
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.13483619665236213
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.08593831411686806
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.10878061550161633
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.11155052559650092
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.11155052559650092
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/bac_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/bac_16S_phylogeny.json
new file mode 100644
index 0000000..169f0ce
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/bac_16S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "bac_16S_phylogeny",
+ "display_name": "16S Bacterial Phylogeny",
+ "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/bac_16S_sequences",
+ "revision": "efde1456b86748909cbcfecb07d783756d570aa3"
+ },
+ {
+ "path": "tattabio/bac_16S_distances",
+ "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 55904972,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.2595103060358304
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3341848597559302
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.3320052456892504
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.3341848597559302
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.3430883689079242
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3684961309813556
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.36354876502165473
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.3684961309813556
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/ec_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/ec_dna_classification.json
new file mode 100644
index 0000000..ea12cd2
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/ec_dna_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_dna_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.",
+ "modality": "dna",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification_dna",
+ "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 55904972,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0859375
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.06004464285714285
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0859375
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.0703125
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/ecoli_rna_clustering.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/ecoli_rna_clustering.json
new file mode 100644
index 0000000..490b07a
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/ecoli_rna_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "ecoli_rna_clustering",
+ "display_name": "E.coli RNA Clustering",
+ "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.",
+ "modality": "dna",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/e_coli_rnas",
+ "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 55904972,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.039533351232296544
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.1998891844978737
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/euk_18S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/euk_18S_phylogeny.json
new file mode 100644
index 0000000..1d681f3
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/euk_18S_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "euk_18S_phylogeny",
+ "display_name": "18S Eukaryotic Phylogeny",
+ "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/euk_18S_sequences",
+ "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196"
+ },
+ {
+ "path": "tattabio/euk_18S_distances",
+ "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 55904972,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.2795506243365705
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3377836786404048
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.33795640524026876
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.33795640524026876
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.30224942362141677
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.31698287710587075
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.31744544631481525
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.31744544631481525
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/rpob_arch_dna_phylogeny.json
new file mode 100644
index 0000000..d3cb22f
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/rpob_arch_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_dna_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_sequences",
+ "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408"
+ },
+ {
+ "path": "tattabio/rpob_arch_dna_phylogeny_distances",
+ "revision": "51df97684a927ec2203568e80175ef26a62db039"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 55904972,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.052418023657636856
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.09741263413297728
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.09567551095833512
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.09741263413297728
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.09765252616635897
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.13005622857446536
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.14955059376403426
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.14955059376403426
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/rpob_bac_dna_phylogeny.json
new file mode 100644
index 0000000..7608fd5
--- /dev/null
+++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/rpob_bac_dna_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_dna_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.",
+ "modality": "dna",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_sequences",
+ "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6"
+ },
+ {
+ "path": "tattabio/rpob_bac_dna_phylogeny_distances",
+ "revision": "67339e271b2a1602208153d53d70d35ba6fa8876"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 55904972,
+ "embed_dim": 512
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.07434903594716476
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.08977856846092792
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.09222560782823379
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.09222560782823379
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.08630281284059531
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.09701233167900071
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.09964321196343817
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.09964321196343817
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/MIBIG_protein_classification.json b/leaderboard/submissions/progen2-large/MIBIG_protein_classification.json
new file mode 100644
index 0000000..1d824e1
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/MIBIG_protein_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_protein_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_prot",
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6819849034962754
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6757369614512472
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7747178376990543
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6342294744240076
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.8065003779289504
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6150399394552477
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6303854875283447
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7417051698581646
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.5777687904471503
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.7694633408919135
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/arch_retrieval.json b/leaderboard/submissions/progen2-large/arch_retrieval.json
new file mode 100644
index 0000000..0aad6f8
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/arch_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "arch_retrieval",
+ "display_name": "Arch Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/arch_retrieval",
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+ },
+ {
+ "path": "tattabio/arch_retrieval_qrels",
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.82558
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.80792
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.76707
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.26978
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.37262
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.61886
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.27755
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.38883
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.66873
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.74349
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.67286
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.40231
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.8799046806089053
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.8824834529059877
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.883891445011226
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.4520286686644837
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.5956080481101712
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.021981762025439694
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.4350798975828656
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.6324559228952568
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.051022858934365305
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.4027550261519307
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.6298449675763194
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.03440308913177297
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.04937645677938109
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.324916856701562
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.33579245842687483
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.11864382097961818
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.4753148026458953
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.25080627867928795
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.3800933497303014
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.6976421952392109
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.01576884330549023
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.04078669924959719
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.3242844365632198
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.34140253756691535
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.10048280111893285
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.47618974809301795
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2642681901494963
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.36191474504680493
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.732114878689383
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.041525472784842636
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.37136307757548065
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.42880655129257333
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.3702312562845074
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.3279636463690485
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.3630373065503225
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.3969564917795274
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.19445771639778647
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.08695792887210627
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.30808900316001675
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.5264230291869033
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.5639758102110056
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.1524029506353449
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.5253478695141168
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.5618581571585436
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.15173243498945044
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.5300952350050928
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.559635250154546
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.14988171842990547
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.62071
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.5774
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.49429
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.16138
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.21164
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.3225
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.17546
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.23808
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.40527
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.55988
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.47823
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.25592
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7401550718452126
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.7445013515436046
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.7473056005746137
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.3302418889629452
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.6600101206611111
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.17322466327530292
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.31576812159270473
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.6797481256387631
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.13867873384705962
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.2593741165269576
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.6435030914254599
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.15965132348455088
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.07708299288709394
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.30967007942991703
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3416877145548701
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.13890220773201395
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.43271488328659263
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.2869867740526922
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.23590185458461196
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.6505388159079657
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.16461355770456945
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.043742127235177904
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.2687502523778896
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.3105615838617834
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.09680209916324005
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.37971832751393986
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.26241632489651434
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.17303959367017055
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.5921607885640662
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.16471059233285062
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.313156828536656
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.6189535949883347
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": 0.021027755158267582
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.30202175826168143
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.6132772075682028
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.04760584624936907
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.21884003064346627
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.45683930057316186
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.11585915046402609
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.35175510709952584
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.6208145223835255
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.3507078878526956
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.350942421563067
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.6235307592418955
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.3541831744561419
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.35306527795828485
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.6233511446573011
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.3538673437378702
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/bacarch_bigene.json b/leaderboard/submissions/progen2-large/bacarch_bigene.json
new file mode 100644
index 0000000..a05cf8e
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/bacarch_bigene.json
@@ -0,0 +1,86 @@
+{
+ "task": {
+ "id": "bacarch_bigene",
+ "display_name": "BacArch BiGene",
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/bac_arch_bigene",
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.5524985706117781
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6339622641509434
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5746138521035977
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6339622641509434
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.17530030765879823
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.22641509433962265
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.18399742288681928
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.22641509433962265
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/convergent_enzymes_classification.json b/leaderboard/submissions/progen2-large/convergent_enzymes_classification.json
new file mode 100644
index 0000000..1c99b66
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/convergent_enzymes_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "convergent_enzymes_classification",
+ "display_name": "Convergent Enzymes Classification",
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/convergent_enzymes",
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.1875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.15337499999999998
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.14
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.10284722222222221
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/cyano_operonic_pair.json b/leaderboard/submissions/progen2-large/cyano_operonic_pair.json
new file mode 100644
index 0000000..361c89a
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/cyano_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "cyano_operonic_pair",
+ "display_name": "Cyano Operonic Pair",
+ "description": "Evaluate on Cyano operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/cyano_operonic_pair",
+ "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7180076628352491
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9833443760871887
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.44289609209330505
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.33621838688850403
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.284990253411306
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9932065217391305
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.3213092537718531
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7183908045977011
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 191.33279418945312
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.44019138755980863
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 1111.232421875
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.2822085889570552
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3118566111895106
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7187739463601532
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 6.656366348266602
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4434389140271493
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 28.89664077758789
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.2849941837921675
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.998641304347826
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3269222612599279
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7187739463601532
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 681.6181640625
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.442998760842627
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 113.07124328613281
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.28691813804173355
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9714673913043478
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.30230236744782024
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.3269222612599279
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.717624521072797
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9928083419799805
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.45870897944464484
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.7693469524383545
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.3122238586156112
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8641304347826086
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.34213880348537534
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7206896551724138
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 470.23736572265625
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4549596915527515
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 964.5697631835938
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.30656589513462446
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8817934782608695
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3669944276201573
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7203065134099617
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 11.08890151977539
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.451639344262295
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 26.889080047607422
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.32335680751173707
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.748641304347826
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3595510558455044
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7183908045977011
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 4561.4189453125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.44360666038328617
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 762.20458984375
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.2885165508786269
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9592391304347826
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.2751779532687585
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.3669944276201573
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/ec_classification.json b/leaderboard/submissions/progen2-large/ec_classification.json
new file mode 100644
index 0000000..d4b200a
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/ec_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification",
+ "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.546875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.49739583333333326
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.5
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.4315104166666666
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/ecoli_operonic_pair.json b/leaderboard/submissions/progen2-large/ecoli_operonic_pair.json
new file mode 100644
index 0000000..e5e3b31
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/ecoli_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "ecoli_operonic_pair",
+ "display_name": "E.coli Operonic Pair",
+ "description": "Evaluate on E.coli K-12 operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/ecoli_operonic_pair",
+ "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6367640241075568
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.5684491991996765
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5887910672106507
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.45036613941192627
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.4711340206185567
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.7847738981110475
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5233168375581643
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6418636995827538
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 547.8858642578125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5766871165644171
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 969.0298461914062
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.40592903828197946
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9954207212364052
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5321691186505778
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.652990264255911
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 13.988625526428223
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5945746447684075
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 21.867555618286133
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.4387254901960784
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9221522610188895
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5701562997645315
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.5948076031525267
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 6305.4169921875
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5834810350939383
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 113.59080505371094
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.42259306803594354
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9421866056096165
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.4206948747327784
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5701562997645315
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6495132127955494
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.852018415927887
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.6023952095808384
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.7513113021850586
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.46245786086423535
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8637664567830566
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5385774043629026
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6659712563745943
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 635.199462890625
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.6145662847790507
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 891.3582763671875
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.47819165870741803
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8597595878649112
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5936555209565155
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6636532220676866
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 16.329631805419922
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.6072446072446073
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 26.53228759765625
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.4711083043890117
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8540354894104178
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5832964505139788
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.5948076031525267
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 10073.8828125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5778894472361809
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 666.7764892578125
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.40847738574473125
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9874069834001145
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.37683258403007813
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5936555209565155
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/euk_retrieval.json b/leaderboard/submissions/progen2-large/euk_retrieval.json
new file mode 100644
index 0000000..51817a7
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/euk_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "euk_retrieval",
+ "display_name": "Euk Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/euk_retrieval",
+ "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
+ },
+ {
+ "path": "tattabio/euk_retrieval_qrels",
+ "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.79265
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.78706
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.77934
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.31297
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.42594
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.6285
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.32182
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.44326
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.67623
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.70997
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.63441
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.3681
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.8430332261521974
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.8474850711988975
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.8485140543441441
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.623248146700816
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.6755617794201033
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.2062832776723883
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6150971905749236
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.6851298227412265
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.2164177072767131
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.5606563694900604
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.6720929882395703
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.18233563570111652
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.12726038600829018
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.27883908864306833
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.21034053179876666
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.2388442883998982
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.4984729416529119
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.11734247621635328
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.5658825420942568
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.7844748384936805
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.10488808502349604
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.1148710587472859
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.26683520950781653
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.21895153236754078
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.21398850457265484
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.47731027192523895
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.12934753855868159
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.5329014815194145
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.7796995216647047
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.036946938653229996
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5286196851573478
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.5983839423970794
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.4236911084804896
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.4545782642182253
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.5007516848828493
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.4262645964521107
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.1500642024947319
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.1600499789472809
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.30891321831672286
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.6890316490341247
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.6388576263708866
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.15515652272104835
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.6897689099007713
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.6331945267231739
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.17683210867574006
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.6912368384982113
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.6326576797440999
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.17574979114357966
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.60293
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.56499
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.51375
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.18533
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.23869
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.33028
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.19775
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.26432
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.4146
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.53762
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.45241
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.24424
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7315648445873527
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.7361098351452049
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.7390325351956528
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.5258291178052644
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.6990369551084505
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.19847712933798844
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.49349547399540455
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.6664206673817538
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.1600227050947377
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.42061526717556513
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.6083533212467029
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.1686331650483035
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.14859441615302324
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.23988353743529217
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.30180731282787276
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.23839489619661014
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.40953413539940225
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.2460114370422075
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.3761510941109189
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.602955555500399
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.15749579060843896
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.11444039748419432
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.19218511723799558
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.2825603490529135
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.18835645126114328
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.3410474906629072
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2429814898980251
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.32800009778288497
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.5144024756263704
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.17371372463750084
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5304991625331207
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.7215198097667779
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": 0.05714336353148511
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.4853052816819128
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.6759994282048366
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.03704981651554237
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.31062406812604343
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.4025880047512197
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.1340192827457276
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.5665289123893353
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.6278003990906513
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.37278396283325976
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.5701246528415677
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.6321080355936443
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.369945061055372
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.5726947773737544
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.6348440871378689
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.37227517916401826
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/fefe_phylogeny.json b/leaderboard/submissions/progen2-large/fefe_phylogeny.json
new file mode 100644
index 0000000..a9d59fc
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/fefe_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "fefe_phylogeny",
+ "display_name": "FeFeHydrogenase Phylogeny",
+ "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/fefe_phylogeny_sequences",
+ "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
+ },
+ {
+ "path": "tattabio/fefe_phylogeny_distances",
+ "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.7261313624231767
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.8113485047464291
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.8080630557517142
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.8113485047464291
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.534930337261507
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.6481804042737168
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.571767383850242
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.6481804042737168
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/modac_paralogy_bigene.json b/leaderboard/submissions/progen2-large/modac_paralogy_bigene.json
new file mode 100644
index 0000000..7f3b346
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/modac_paralogy_bigene.json
@@ -0,0 +1,97 @@
+{
+ "task": {
+ "id": "modac_paralogy_bigene",
+ "display_name": "ModAC Paralogy BiGene",
+ "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/modac_paralogy_bigene",
+ "revision": "241ca6397856e3360da04422d54933035b1fab87"
+ }
+ ],
+ "primary_metric_id": "recall_at_50"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.0006706959960131102
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0013404825737265416
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.0006711500886081701
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0013404825737265416
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.17359249329758714
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.005679986057065453
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.013404825737265416
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.006772200533986637
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.013404825737265416
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.33579088471849866
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/mopb_clustering.json b/leaderboard/submissions/progen2-large/mopb_clustering.json
new file mode 100644
index 0000000..5151e73
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/mopb_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "mopb_clustering",
+ "display_name": "MopB Clustering",
+ "description": "Evaluate on MopB clustering task.",
+ "modality": "protein",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/mopb_clustering",
+ "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.8476659794848843
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.6677487389951918
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/rpob_arch_phylogeny.json b/leaderboard/submissions/progen2-large/rpob_arch_phylogeny.json
new file mode 100644
index 0000000..b5ed743
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/rpob_arch_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_phylogeny_sequences",
+ "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
+ },
+ {
+ "path": "tattabio/rpob_arch_phylogeny_distances",
+ "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.2920851957915461
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3483029599824127
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.37488611045945197
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.37488611045945197
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.39164690186549966
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.49705565976563815
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.5007660656360811
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.5007660656360811
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/rpob_bac_phylogeny.json b/leaderboard/submissions/progen2-large/rpob_bac_phylogeny.json
new file mode 100644
index 0000000..529566b
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/rpob_bac_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_phylogeny_sequences",
+ "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
+ },
+ {
+ "path": "tattabio/rpob_bac_phylogeny_distances",
+ "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.3407778454098185
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.42394247953096953
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.440888842114917
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.440888842114917
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.23219864929778225
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3490272067401271
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.358178624850726
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.358178624850726
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-large/vibrio_operonic_pair.json b/leaderboard/submissions/progen2-large/vibrio_operonic_pair.json
new file mode 100644
index 0000000..6c4a4ba
--- /dev/null
+++ b/leaderboard/submissions/progen2-large/vibrio_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "vibrio_operonic_pair",
+ "display_name": "Vibrio Operonic Pair",
+ "description": "Evaluate on Vibrio operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/vibrio_operonic_pair",
+ "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-large",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 2779356160,
+ "embed_dim": 2560
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6715895841430237
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.688905656337738
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5361670395227442
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.4471510946750641
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.40145170295924065
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8069584736251403
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.47290020469130756
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6723668869024485
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 509.0989990234375
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5160537069468768
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 950.35400390625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3487179487179487
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9921436588103255
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.45396208781120356
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6836377769141081
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 13.386638641357422
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5250988743535138
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 23.639881134033203
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.36018363939899833
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9685746352413019
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.4721672326721662
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6544889234356782
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 758.7493896484375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5481784133469527
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 123.69869995117188
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.39345063538611924
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9034792368125701
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.41650121166936427
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.47290020469130756
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6785853089778469
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.8825353384017944
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5314685314685315
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.7442071437835693
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.3778409090909091
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8956228956228957
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.47970762950524704
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6813058686358336
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 602.638427734375
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5384615384615384
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 980.8336181640625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3768506056527591
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9427609427609428
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.4935429984156211
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6813058686358336
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 14.864974975585938
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5296145408962708
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 36.245880126953125
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3673913043478261
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9483726150392817
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.48504972690345405
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6544889234356782
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 4045.42626953125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5217391304347826
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 713.2037353515625
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.3596938775510204
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9494949494949495
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.350425846271822
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.4935429984156211
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/MIBIG_protein_classification.json b/leaderboard/submissions/progen2-medium/MIBIG_protein_classification.json
new file mode 100644
index 0000000..90ae2d4
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/MIBIG_protein_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_protein_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_prot",
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "revision": "...",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6989990988890816
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.691609977324263
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7986955550826269
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.652675371834789
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.8151927437641735
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6510666703327254
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6145124716553289
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.79271911663216
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.597401875633424
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.7605820105820112
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/arch_retrieval.json b/leaderboard/submissions/progen2-medium/arch_retrieval.json
new file mode 100644
index 0000000..3fbc7a5
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/arch_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "arch_retrieval",
+ "display_name": "Arch Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/arch_retrieval",
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+ },
+ {
+ "path": "tattabio/arch_retrieval_qrels",
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "revision": "...",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.8667
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.8539
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.81528
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.28143
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.39613
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.66886
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.28849
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.41121
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.71466
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.78805
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.71985
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.43151
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.90511452553706
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9065473649980684
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9081775693416649
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6612480619372302
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.46128894265628245
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.21116421624243986
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6213052175154115
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.49490230608329266
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.21967149541101536
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.5603531623948176
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.4685128740415315
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.1712953803081673
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.06797559593865152
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.2905404912455848
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3686455576980013
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.1573579335136106
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.4234570447717268
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.2518670668196923
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.5013213694250233
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.5420807636642778
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.05454709391738201
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.051603859613532796
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.2898156924225231
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.36769365765884426
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.12664596322820085
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.4271370583406068
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2532454874575687
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.46112764213529894
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.584450782631934
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.04169227808562108
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5372973075189744
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.2904671378642838
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.577918931914468
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.4337989473686208
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.22378661665353214
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.576137485553088
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.21240666376908013
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.2489241348939673
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.3798499517361313
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.6888804715259021
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.42014922591038306
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.0714471449304972
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.6896553273533154
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.4233185770383571
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.07422848638622621
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.693742352773071
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.41941213949022665
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.08094307327642038
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.69714
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.66471
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.59834
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.19286
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.26117
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.41853
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.20522
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.28836
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.51269
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.63841
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.5609
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.31759
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7844430217669651
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.7890303098049573
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.7915598665697423
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.35843421202903436
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.6600633200863916
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.03627327495753951
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.3353630084809703
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.6761105480874086
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.02350341443487571
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.27106613394122536
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.6690231790973418
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.07650491677426355
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.08906743461719288
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.31949618338174984
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3292636023000051
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.13874504172191632
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.4647631401151212
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.2600910224266903
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.27470079652681245
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.6928461826997683
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.10769891596111343
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.05774041251094078
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.29379200776879594
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.32688275733718275
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.08274480687207497
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.42416625000987423
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2678170789795234
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.17288450743636116
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.6633291061716232
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.15854705547657555
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.32533961340687934
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.6020974295167729
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.1333980927582597
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.29585400791594363
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.5687767036261221
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.189945340565891
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.20906289650717386
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.28943991710214256
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.21305963873675984
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.36279298091388296
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.6362558326995159
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.16901121330011185
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.36248425558706443
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.6402643633377308
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.17314376122591818
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.3631863215067844
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.6401290947847794
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.17258652486506595
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/bacarch_bigene.json b/leaderboard/submissions/progen2-medium/bacarch_bigene.json
new file mode 100644
index 0000000..85d93d3
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/bacarch_bigene.json
@@ -0,0 +1,86 @@
+{
+ "task": {
+ "id": "bacarch_bigene",
+ "display_name": "BacArch BiGene",
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/bac_arch_bigene",
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "revision": "...",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7037735849056603
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.7811320754716982
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.7283018867924528
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.7811320754716982
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.4038430123335784
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.49056603773584906
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.42439088843084405
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.49056603773584906
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/convergent_enzymes_classification.json b/leaderboard/submissions/progen2-medium/convergent_enzymes_classification.json
new file mode 100644
index 0000000..243dd84
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/convergent_enzymes_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "convergent_enzymes_classification",
+ "display_name": "Convergent Enzymes Classification",
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/convergent_enzymes",
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "revision": "...",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.1925
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.14935912698412696
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.1525
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.11368055555555553
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/cyano_operonic_pair.json b/leaderboard/submissions/progen2-medium/cyano_operonic_pair.json
new file mode 100644
index 0000000..5e160ba
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/cyano_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "cyano_operonic_pair",
+ "display_name": "Cyano Operonic Pair",
+ "description": "Evaluate on Cyano operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/cyano_operonic_pair",
+ "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "revision": "...",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7180076628352491
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9889928102493286
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4414824447334201
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.4203949570655823
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.2901709401709402
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9225543478260869
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.3189029210120636
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7180076628352491
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 65.704833984375
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.44071856287425143
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 639.349365234375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.282642089093702
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3036709966412725
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7187739463601532
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 5.881275177001953
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.44363856149894226
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 20.312252044677734
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.2852701127089001
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9972826086956522
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3132279316580241
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7195402298850575
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 308.689697265625
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.44540942928039706
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 95.29304504394531
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.28858520900321544
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9755434782608695
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.32010048431074967
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.32010048431074967
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7183908045977011
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.931535005569458
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4593088071348941
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.5361685752868652
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.31611253196930944
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8396739130434783
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.339620452953373
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.721455938697318
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 298.3204650878906
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4564081960626757
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 635.1521606445312
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3240159726183685
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.7717391304347826
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.37274235680877565
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7222222222222222
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 10.60649299621582
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4526946107784431
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 23.78628921508789
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.320520067834935
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.7703804347826086
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.36747868579778725
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7180076628352491
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 1808.91015625
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.4434278743519366
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 121.43479919433594
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.2858828155721589
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9877717391304348
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.28183146847459695
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.37274235680877565
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/ec_classification.json b/leaderboard/submissions/progen2-medium/ec_classification.json
new file mode 100644
index 0000000..5cdec0a
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/ec_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification",
+ "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "revision": "...",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.5546875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.4921875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.42838541666666663
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/ecoli_operonic_pair.json b/leaderboard/submissions/progen2-medium/ecoli_operonic_pair.json
new file mode 100644
index 0000000..b75b595
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/ecoli_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "ecoli_operonic_pair",
+ "display_name": "E.coli Operonic Pair",
+ "description": "Evaluate on E.coli K-12 operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/ecoli_operonic_pair",
+ "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "revision": "...",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6184515530829856
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.651489794254303
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5857946554149086
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.38596251606941223
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.4227353463587922
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9536348025186033
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.510237868123645
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6098748261474269
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 239.94903564453125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5768392827216356
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 607.368408203125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.40781990521327016
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9851173440183171
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.48682441190216086
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.624246638850255
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 12.428617477416992
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5890985324947589
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 19.439558029174805
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.42393764143827006
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9650829994275901
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5251579034107494
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.5948076031525267
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 3148.2744140625
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5811965811965812
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 93.77833557128906
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4143309773336583
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9730967372638809
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.4434019305350601
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5251579034107494
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6274918868799259
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.6536474823951721
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.601917975923281
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.5033559203147888
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.46766011414077363
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8443045220377791
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5195509705220649
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6659712563745943
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 416.32061767578125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.6131327505590567
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 606.52978515625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.47540983606557374
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8631940469376074
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5921808575876653
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6627260083449235
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 13.742053985595703
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.6071692535107169
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 28.72542381286621
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.44829467939972717
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9404693760732684
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5869667896425095
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.5948076031525267
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 3265.408203125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5812606473594549
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 109.48553466796875
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4137763764249333
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.976531196336577
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.38910400697637215
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5921808575876653
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/euk_retrieval.json b/leaderboard/submissions/progen2-medium/euk_retrieval.json
new file mode 100644
index 0000000..62d22f2
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/euk_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "euk_retrieval",
+ "display_name": "Euk Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/euk_retrieval",
+ "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
+ },
+ {
+ "path": "tattabio/euk_retrieval_qrels",
+ "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "revision": "...",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.85039
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.84383
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.81649
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.33879
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.45506
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.67207
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.34411
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.46708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.70727
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.75949
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.67621
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.37633
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.8896034297963559
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.8904200479763181
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.8917799846355668
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.7603781530194391
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.6376819010899927
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.4212148120292312
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.7322697904169153
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.6496456933761383
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.37980919966190413
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.7156345490529658
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.6522514666477729
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.3042825041288217
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.14758260305763898
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.26194428990169905
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.11232537482428236
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.270789065395862
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.4790544168497472
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.03884116920584031
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.6616399771987015
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.7353350484425378
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.2128494417394226
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.13062461293496178
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.2479018950834343
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.1132534748053531
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.24031161646082083
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.46622985594529703
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.06071938680505056
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.6150865443749446
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.725765511063093
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.19749339031325439
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5614315220453862
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.5348156597752065
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.5720041132270836
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.4384675283398972
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.3967752412060821
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.45432753775042073
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.18118357934146365
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.21480164227817497
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.2754588826031711
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.8454487169250762
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.6151709989094298
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.3483912884875399
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.8442698393532679
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.6188982480035604
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.34895231147497485
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.8447441492866
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.6161593785614531
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.351845322578248
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.70833
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.67903
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.64858
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.25169
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.32689
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.46739
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.25973
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.34512
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.53847
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.63408
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.54244
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.30759
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7887459807073955
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.7936431378553563
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.7959549608392991
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.4249013352122335
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.8037952157465401
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.1413596947741603
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.4204652802279644
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.7876332799563885
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.1462062003777289
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.3584251726169603
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.7541162752580496
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.14407914236714084
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.06823941786698261
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.38255659859692487
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3480674521155449
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.16200995043378846
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.5637260827505329
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.27925638990284274
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.316785523616919
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.7884674708035404
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.15281773067505242
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.05984367603361126
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.37075410024509964
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.3528009604202977
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.13978654053219638
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.5394212306018864
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.28687335814861753
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.27953439115065165
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.7465655368746134
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.15167294388599076
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.42228922529257007
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.7149145518900438
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.11182287887883938
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.39379398750948513
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.6192434223426857
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.17068274953379578
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.2249314962247772
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.1513983474822626
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.2085676426311878
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.4873996930451618
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.790121004884451
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.35101924426054193
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.48679300042700696
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.7911671425593381
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.34602507202599053
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.49234090539442804
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.7908566907426995
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.3445651280400187
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/fefe_phylogeny.json b/leaderboard/submissions/progen2-medium/fefe_phylogeny.json
new file mode 100644
index 0000000..e70d5f0
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/fefe_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "fefe_phylogeny",
+ "display_name": "FeFeHydrogenase Phylogeny",
+ "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/fefe_phylogeny_sequences",
+ "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
+ },
+ {
+ "path": "tattabio/fefe_phylogeny_distances",
+ "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "revision": "...",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.6398006140261373
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.7591242650638738
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.754276136476935
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.7591242650638738
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.5698692272674944
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.7090467502518881
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.662949011431012
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.7090467502518881
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/modac_paralogy_bigene.json b/leaderboard/submissions/progen2-medium/modac_paralogy_bigene.json
new file mode 100644
index 0000000..1441ab3
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/modac_paralogy_bigene.json
@@ -0,0 +1,97 @@
+{
+ "task": {
+ "id": "modac_paralogy_bigene",
+ "display_name": "ModAC Paralogy BiGene",
+ "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/modac_paralogy_bigene",
+ "revision": "241ca6397856e3360da04422d54933035b1fab87"
+ }
+ ],
+ "primary_metric_id": "recall_at_50"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.4952467261118094e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 8.984467652322665e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.04691689008042895
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.0018531981463306205
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.006032171581769437
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.0023899465819002715
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.006032171581769437
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.3652815013404826
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/mopb_clustering.json b/leaderboard/submissions/progen2-medium/mopb_clustering.json
new file mode 100644
index 0000000..61b8b58
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/mopb_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "mopb_clustering",
+ "display_name": "MopB Clustering",
+ "description": "Evaluate on MopB clustering task.",
+ "modality": "protein",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/mopb_clustering",
+ "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "revision": "...",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.878581661578546
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.7368504655746232
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/rpob_arch_phylogeny.json b/leaderboard/submissions/progen2-medium/rpob_arch_phylogeny.json
new file mode 100644
index 0000000..8535905
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/rpob_arch_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_phylogeny_sequences",
+ "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
+ },
+ {
+ "path": "tattabio/rpob_arch_phylogeny_distances",
+ "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "revision": "...",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.2641157727110356
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3119710227372265
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.32296618163345303
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.32296618163345303
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.333762262235085
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.5086678691952276
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.4939985498711707
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.5086678691952276
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/rpob_bac_phylogeny.json b/leaderboard/submissions/progen2-medium/rpob_bac_phylogeny.json
new file mode 100644
index 0000000..afb55cb
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/rpob_bac_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_phylogeny_sequences",
+ "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
+ },
+ {
+ "path": "tattabio/rpob_bac_phylogeny_distances",
+ "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "revision": "...",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.1950082250185355
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.28043315277470116
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.30367273332819217
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.30367273332819217
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.25907183955646845
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3897653256717273
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.39653707661764
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.39653707661764
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-medium/vibrio_operonic_pair.json b/leaderboard/submissions/progen2-medium/vibrio_operonic_pair.json
new file mode 100644
index 0000000..de465c6
--- /dev/null
+++ b/leaderboard/submissions/progen2-medium/vibrio_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "vibrio_operonic_pair",
+ "display_name": "Vibrio Operonic Pair",
+ "description": "Evaluate on Vibrio operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/vibrio_operonic_pair",
+ "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-medium",
+ "revision": "...",
+ "num_layers": 27,
+ "num_params": 764803616,
+ "embed_dim": 1536
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 13,
+ "layer_display_name": "13",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6642052079284881
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.7931315302848816
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5223320831912717
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.44246846437454224
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.3751224289911851
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8597081930415263
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.4427000442773764
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6622619510299261
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 244.42291259765625
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5149384885764499
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 615.18994140625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.34839476813317477
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9865319865319865
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.4096548402507393
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6665371162067625
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 9.324880599975586
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5178359096313913
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 19.698150634765625
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3522038010513546
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9775533108866442
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.4275378121386588
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.657209483093665
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 269.6624755859375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5334692490655792
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 103.44610595703125
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.38255360623781676
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.8810325476992144
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.4402580670162658
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.4427000442773764
+ }
+ ]
+ },
+ {
+ "layer_number": 26,
+ "layer_display_name": "26",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6626506024096386
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.8587204813957214
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5521669341894061
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.5507351756095886
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.4297314178638351
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.7721661054994389
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.46071910274003564
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6743101438010105
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 384.3786926269531
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5400981996726677
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 677.1198120117188
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3812384473197782
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9259259259259259
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.47638983875407687
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6758647493198601
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 12.481945037841797
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5347798340778558
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 28.243602752685547
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.37360677663843067
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9405162738496072
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.4738811750419196
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6541002720559658
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 1609.40478515625
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5332914572864321
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 125.42507934570312
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.37025730484081987
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9528619528619529
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3646222508107507
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.47638983875407687
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/MIBIG_protein_classification.json b/leaderboard/submissions/progen2-small/MIBIG_protein_classification.json
new file mode 100644
index 0000000..4c892a3
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/MIBIG_protein_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_protein_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_prot",
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6388692163537786
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6394557823129252
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7418637884284616
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.5968684138731036
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.781934996220711
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6609534749591477
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6598639455782312
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.8030532546048321
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6121821659489147
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.7843915343915351
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/arch_retrieval.json b/leaderboard/submissions/progen2-small/arch_retrieval.json
new file mode 100644
index 0000000..fab3ede
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/arch_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "arch_retrieval",
+ "display_name": "Arch Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/arch_retrieval",
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+ },
+ {
+ "path": "tattabio/arch_retrieval_qrels",
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.72082
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.70061
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.65352
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.21829
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.30584
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.49789
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.23064
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.33315
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.58309
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.65642
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.59091
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.33787
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7910513586569917
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.7948976688413298
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.7977303757166455
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.5475981836671467
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.5960854077733617
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.07359409377865848
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.5348824611254859
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.593354772525071
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.05382473444665512
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.48145446953604293
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.5746101256658309
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.05768162131908042
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.0671435703618939
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.38563581595942015
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3255398177914751
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.1886920642763862
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.4818911065684608
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.24142443627518678
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.4520357569415462
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.5809839586558266
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.06621717678004216
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.04521085335682694
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.3578304375654747
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.3095295095368689
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.150564871318176
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.4524869709077065
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2422900535368358
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.42091958169718263
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.5521812748376208
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.0654084113222808
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5560865220026086
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.4663024105684298
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.12309166370191016
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.5235008052565618
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.3951829419893739
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.19499705168929
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.3308190035410808
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.056101087790478094
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.21982513157177477
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.5607106301178035
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.5887958978024223
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.19353716925535508
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.5615266952934416
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.5890945924016702
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.19325489061094214
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.5611777163821644
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.5888188766467686
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.19302943083610713
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.7033
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.67869
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.63008
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.21539
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.30029
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.49432
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.22644
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.3248
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.56761
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.63875
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.56773
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.32335
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7747119078104986
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.7786623783102647
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.7819268433047759
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": -0.1896218755037747
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.7779658670429147
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.30058319812194767
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": -0.16176589926755863
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.7855720876943553
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.2677426587955006
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": -0.1349738266153549
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.7755313282780849
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.24684189382047908
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": -0.15458803113699884
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.33049917158388503
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.40734235921704587
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": -0.13918338377349823
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.500927265616027
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.34912969967259155
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": -0.08843078320342054
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.7748433132528942
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.2336215965994099
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": -0.15452256619448723
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.31027973260489217
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.38351765858630604
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": -0.13594798029165953
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.4664400683469538
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.33068387136634236
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": -0.09046828424499535
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.7652624873034768
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.24021392596400007
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": -0.11438252722644132
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.7005627248845133
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": 0.06477303657513034
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": -0.05753489391069873
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.6602227094416574
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.020464258201554942
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.01150401073222253
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.2912034804641511
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.12308576626927949
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": -0.2512008900587547
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.7484636775938552
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.4417496737536807
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": -0.24957809015538476
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.7478134052906925
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.4434699640064948
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": -0.24629364621121616
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.7471770037721222
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.44155968632638204
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/bacarch_bigene.json b/leaderboard/submissions/progen2-small/bacarch_bigene.json
new file mode 100644
index 0000000..d67426b
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/bacarch_bigene.json
@@ -0,0 +1,86 @@
+{
+ "task": {
+ "id": "bacarch_bigene",
+ "display_name": "BacArch BiGene",
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/bac_arch_bigene",
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.5480188679245283
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6264150943396226
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5690179422254895
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6264150943396226
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.5530817610062893
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6415094339622641
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5791194968553458
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6415094339622641
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/convergent_enzymes_classification.json b/leaderboard/submissions/progen2-small/convergent_enzymes_classification.json
new file mode 100644
index 0000000..0a96ef6
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/convergent_enzymes_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "convergent_enzymes_classification",
+ "display_name": "Convergent Enzymes Classification",
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/convergent_enzymes",
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.11
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.08067261904761903
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.1325
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.09460714285714285
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/cyano_operonic_pair.json b/leaderboard/submissions/progen2-small/cyano_operonic_pair.json
new file mode 100644
index 0000000..d1451cb
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/cyano_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "cyano_operonic_pair",
+ "display_name": "Cyano Operonic Pair",
+ "description": "Evaluate on Cyano operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/cyano_operonic_pair",
+ "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7183908045977011
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9855711460113525
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4403834631515877
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.3577058017253876
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.2824750192159877
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.998641304347826
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.3296781486925475
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7183908045977011
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 30.541061401367188
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4404548174745661
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 203.69651794433594
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.28242517267843437
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.30196707221941077
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7187739463601532
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 1.3797529935836792
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4429044893040072
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 8.252227783203125
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.2845528455284553
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.998641304347826
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3140419058392819
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7187739463601532
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 76.20918273925781
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.44019138755980863
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 10.069953918457031
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.2822085889570552
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.32358772339840414
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.3296781486925475
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.717624521072797
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9930706024169922
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.44649324991768197
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.4854004979133606
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.29465449804432853
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9211956521739131
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.3085559879574271
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7180076628352491
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 47.12645721435547
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.44656879481051553
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 257.8835144042969
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.2982216142270862
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8885869565217391
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3316723863885721
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7187739463601532
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 4.9462361335754395
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4484217377155874
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 13.16019058227539
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.2948224219084296
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.936141304347826
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3345743153670524
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7187739463601532
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 294.17828369140625
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.44191763982790416
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 33.673152923583984
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.2855440826052423
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9769021739130435
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.2836210112641132
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.3345743153670524
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/ec_classification.json b/leaderboard/submissions/progen2-small/ec_classification.json
new file mode 100644
index 0000000..9daa24e
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/ec_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification",
+ "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.4765625
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.41744791666666664
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.484375
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.43671875
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/ecoli_operonic_pair.json b/leaderboard/submissions/progen2-small/ecoli_operonic_pair.json
new file mode 100644
index 0000000..a3cf931
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/ecoli_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "ecoli_operonic_pair",
+ "display_name": "E.coli Operonic Pair",
+ "description": "Evaluate on E.coli K-12 operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/ecoli_operonic_pair",
+ "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6240148354195643
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.8872358798980713
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5769548685733179
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.33167386054992676
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.40562529056252905
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9988551803091014
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.4935020874365037
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6223922114047288
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 65.3521499633789
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5784297382897149
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 184.63580322265625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4080432737535277
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9931310818546079
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.49072155439348264
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6293463143254521
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 3.0176098346710205
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5877718787463607
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 8.208200454711914
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.41935483870967744
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9822552947910704
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5146357162147513
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.5948076031525267
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 1122.26953125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5769103539530268
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 11.544685363769531
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4056757385438474
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.998282770463652
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.38847128264957165
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5146357162147513
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6105702364394993
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.8996621966362
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5856863789735394
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.40186625719070435
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.424562306900103
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9439038351459645
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.4864021596361824
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6390820584144645
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 189.96759033203125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.6041311351146486
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 255.98159790039062
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.45155807365439093
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9124212936462507
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5488547270837909
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6478905887807139
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 8.068485260009766
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.609268614100593
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 10.204345703125
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.4942979330007128
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.793932455638237
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5654598094655838
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.5948076031525267
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 1022.4357299804688
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5770122557138125
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 21.223339080810547
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4059659752971335
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9971379507727532
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.39641816140828223
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5654598094655838
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/euk_retrieval.json b/leaderboard/submissions/progen2-small/euk_retrieval.json
new file mode 100644
index 0000000..d225736
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/euk_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "euk_retrieval",
+ "display_name": "Euk Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/euk_retrieval",
+ "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
+ },
+ {
+ "path": "tattabio/euk_retrieval_qrels",
+ "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.76129
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.75262
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.74115
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.29128
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.39027
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.5702
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.3014
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.42427
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.6455
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.68232
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.6
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.34881
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.827706323687031
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.8334736895830142
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.8352247065129689
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6858069307511622
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.4803619867415532
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.10573311085242518
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6333355994172974
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.4904730781745588
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.10224896683374189
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.5933094336884881
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.49029547722523154
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.02071011278307947
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.19751713387011416
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.21060706901174292
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.20347426477581018
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.28431727003040264
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.3923582183860595
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.13979856210713534
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.5863012284678861
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.5306429524718634
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.015444799785488983
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.1792675407300632
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.1906180594860627
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.20465475488944024
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.2116233943415803
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.36118918957126545
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.14906386527872684
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.5729109265785561
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.49567505770955805
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.02404391976493193
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5786223559627802
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.4389250698294924
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.2406582141999514
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.4536397414299
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.3852138942250889
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.2803024103218935
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.18036641882810736
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.06509781077680983
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.21138350693363533
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.7568638646687446
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.4420526206688317
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.027265500921681368
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.7529826623218884
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.4486646568823487
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.01716937343387214
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.7554046556002796
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.4476182957497638
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.01852375862791937
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.70785
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.69868
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.68431
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.25855
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.35363
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.53095
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.2683
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.38229
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.60213
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.64437
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.57106
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.32418
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7742765273311898
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.7811488797019344
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.7825581627833461
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.3966350445773588
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.8068569080446616
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.052946890410233614
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.42473395095306976
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.8025958108917352
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.03819502888483237
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.4116391776486932
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.7697436774828385
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.050274646426970125
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.07758341515963516
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.3823333904818765
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.29568491522146834
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.16150724196642183
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.5760554960542517
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.24345035389279063
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.39011658814228733
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.8122438882456519
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.08899489357630828
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.08852883708015474
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.35485286220839607
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.2869394867481459
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.1895885370766902
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.5177511601949351
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2429373023016436
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.43169016260747917
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.769377960844427
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.11233814702164165
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.3722433989717487
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.7382052017909906
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.11447089919377483
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.36854361632969396
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.6388922941430113
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.19864698587070462
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.23233618462716996
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.09790753029551322
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.2702334237717018
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.35226396208770194
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.766509273666355
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.175557607407714
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.3622958711100552
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.7642130903574725
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.1683219848050957
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.3655727075715084
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.7651993873517181
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.16846554104747688
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/fefe_phylogeny.json b/leaderboard/submissions/progen2-small/fefe_phylogeny.json
new file mode 100644
index 0000000..ae25339
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/fefe_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "fefe_phylogeny",
+ "display_name": "FeFeHydrogenase Phylogeny",
+ "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/fefe_phylogeny_sequences",
+ "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
+ },
+ {
+ "path": "tattabio/fefe_phylogeny_distances",
+ "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.555555588091072
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.6299158443145239
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.6231385548367301
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.6299158443145239
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.5399147814539383
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.711209240447499
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.7010135704373694
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.711209240447499
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/modac_paralogy_bigene.json b/leaderboard/submissions/progen2-small/modac_paralogy_bigene.json
new file mode 100644
index 0000000..eb0492d
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/modac_paralogy_bigene.json
@@ -0,0 +1,97 @@
+{
+ "task": {
+ "id": "modac_paralogy_bigene",
+ "display_name": "ModAC Paralogy BiGene",
+ "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/modac_paralogy_bigene",
+ "revision": "241ca6397856e3360da04422d54933035b1fab87"
+ }
+ ],
+ "primary_metric_id": "recall_at_50"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.4952467261118094e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 8.984467652322665e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.048927613941018765
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.0006888223332782205
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0020107238605898124
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.0007064493294447139
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0020107238605898124
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.1836461126005362
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/mopb_clustering.json b/leaderboard/submissions/progen2-small/mopb_clustering.json
new file mode 100644
index 0000000..cc9f9cc
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/mopb_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "mopb_clustering",
+ "display_name": "MopB Clustering",
+ "description": "Evaluate on MopB clustering task.",
+ "modality": "protein",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/mopb_clustering",
+ "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.7298417430044122
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.7850829954141821
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/rpob_arch_phylogeny.json b/leaderboard/submissions/progen2-small/rpob_arch_phylogeny.json
new file mode 100644
index 0000000..e06754a
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/rpob_arch_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_phylogeny_sequences",
+ "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
+ },
+ {
+ "path": "tattabio/rpob_arch_phylogeny_distances",
+ "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.3829776859261085
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.348654350352103
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.36338027377278787
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.3829776859261085
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.2715351801224828
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.4119718729857529
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.41890778828000724
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.41890778828000724
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/rpob_bac_phylogeny.json b/leaderboard/submissions/progen2-small/rpob_bac_phylogeny.json
new file mode 100644
index 0000000..1b1fc23
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/rpob_bac_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_phylogeny_sequences",
+ "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
+ },
+ {
+ "path": "tattabio/rpob_bac_phylogeny_distances",
+ "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.32857545814663647
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3054923470480063
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.31844371690401796
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.32857545814663647
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.25929991297508803
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3537389858773928
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.37513623635674553
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.37513623635674553
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-small/vibrio_operonic_pair.json b/leaderboard/submissions/progen2-small/vibrio_operonic_pair.json
new file mode 100644
index 0000000..048b1e5
--- /dev/null
+++ b/leaderboard/submissions/progen2-small/vibrio_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "vibrio_operonic_pair",
+ "display_name": "Vibrio Operonic Pair",
+ "description": "Evaluate on Vibrio operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/vibrio_operonic_pair",
+ "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-small",
+ "revision": "...",
+ "num_layers": 12,
+ "num_params": 151148576,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 6,
+ "layer_display_name": "6",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6657598134473377
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9222296476364136
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.514868804664723
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.4063832759857178
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.34777471445450964
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9910213243546577
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.4138001165304024
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6603186941313641
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 60.44577407836914
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5145348837209303
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 185.1787109375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.34719497842291097
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9932659932659933
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.4021725774606305
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6661484648270501
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 2.6326441764831543
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5190249702734839
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 7.227571487426758
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.35301253538212696
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9797979797979798
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.4193700772478897
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6541002720559658
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 72.76122283935547
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.514153668399769
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 9.499689102172852
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.3461688059120965
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9988776655443322
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.35093524337294146
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.4193700772478897
+ }
+ ]
+ },
+ {
+ "layer_number": 11,
+ "layer_display_name": "11",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6622619510299261
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9203916788101196
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5250700716287762
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.42482998967170715
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.36336206896551726
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9461279461279462
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.4258480800533781
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6712009327633113
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 172.4407958984375
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.533378287255563
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 249.7503662109375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3812048192771084
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8877665544332211
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.4625420841865413
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6723668869024485
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 7.234950542449951
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5309222423146474
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 10.6547212600708
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.39167556029882605
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8237934904601572
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.46956741142610603
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6541002720559658
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 234.90899658203125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5204991087344029
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 29.30080795288086
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.35393939393939394
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9831649831649831
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3684973015108067
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.46956741142610603
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/MIBIG_protein_classification.json b/leaderboard/submissions/progen2-xlarge/MIBIG_protein_classification.json
new file mode 100644
index 0000000..ba94584
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/MIBIG_protein_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_protein_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_prot",
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.7001728644239972
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.7029478458049887
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.8280344219424678
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6498111947852081
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.824263038548754
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6139791893295873
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6099773242630385
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7783852719765555
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.5630929156905906
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.7556689342403636
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/arch_retrieval.json b/leaderboard/submissions/progen2-xlarge/arch_retrieval.json
new file mode 100644
index 0000000..f2f0cd5
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/arch_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "arch_retrieval",
+ "display_name": "Arch Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/arch_retrieval",
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+ },
+ {
+ "path": "tattabio/arch_retrieval_qrels",
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.88067
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.86937
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.83252
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.29196
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.40958
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.69033
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.29856
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.42416
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.72823
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.7965
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.72953
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.4405
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.9187437757860286
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9202352160098636
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9210959739756178
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.34069226456540874
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.5303111124586763
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.025647490198929427
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.30583794948281073
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.5821104164244411
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.050433839881315716
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.28253639654117757
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.5376269279974585
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.05367704597406945
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": -0.019865699890794175
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.2580006088571339
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3854553614086897
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.028339000806823566
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.4198812392537043
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.28743486276729935
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.24320253871207173
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.6185612843368935
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.02553041504833834
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": -0.028630138912707673
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.25754210867492744
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.3847002985890222
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.011162568781484269
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.42159429826522843
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2881811374864961
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.22494289440951815
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.6644384705590648
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.015127323483749542
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.29836230337298764
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.35428463987994574
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.5058092977932365
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.25147134130980464
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.2954160057646196
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.5058354133511384
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.13524738619031487
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.24125503477420038
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.3287093653549262
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.42306591629339924
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.4977353552615966
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.11372050884722233
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.4279211933184279
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.4953362570745432
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.10873602019499856
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.42853960149453696
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.495417525780632
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.10619000413359458
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.65505
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.61387
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.53252
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.16876
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.22345
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.34562
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.18233
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.25284
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.43763
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.59232
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.51148
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.28085
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7721724285104562
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.7762465364578036
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.7783536022752451
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.4928690540959825
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.6568501842207822
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.11215893137809109
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.4462954434195642
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.665381100523378
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.08003523793764296
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.35270458066820515
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.599817822507719
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.09880049258383304
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.17597203770503592
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.2575369731993772
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.29440571242786184
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.2175925083646819
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.39533379455352086
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.2343999392197569
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.3103373434513419
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.623277741299874
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.11447631894830992
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.13017232999669545
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.22632566900169906
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.28185503381155413
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.1438011427359236
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.3356014043230637
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.22340272803666755
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.2223778867146586
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.549815782175779
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.13039530768244306
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.4340127722798461
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.6239826780987181
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.031493781720994596
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.3709661062638432
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.6079135839289097
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.09685534037957318
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.24677453371782054
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.38301044535573053
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.14712928623198873
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.5540713256496355
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.6265017258741243
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.2870850914389624
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.5528952321912547
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.6266600018990199
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.28659479517076514
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.5533710928565948
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.6259483449701599
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.2844418766374931
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/bacarch_bigene.json b/leaderboard/submissions/progen2-xlarge/bacarch_bigene.json
new file mode 100644
index 0000000..30b0ee5
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/bacarch_bigene.json
@@ -0,0 +1,86 @@
+{
+ "task": {
+ "id": "bacarch_bigene",
+ "display_name": "BacArch BiGene",
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/bac_arch_bigene",
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7503144654088051
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.8113207547169812
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.7704402515723269
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.8113207547169812
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.13607728099671065
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.17358490566037735
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.14255790731020063
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.17358490566037735
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/convergent_enzymes_classification.json b/leaderboard/submissions/progen2-xlarge/convergent_enzymes_classification.json
new file mode 100644
index 0000000..fc37eec
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/convergent_enzymes_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "convergent_enzymes_classification",
+ "display_name": "Convergent Enzymes Classification",
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/convergent_enzymes",
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.1875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.14842261904761905
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.1475
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.11108333333333334
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/cyano_operonic_pair.json b/leaderboard/submissions/progen2-xlarge/cyano_operonic_pair.json
new file mode 100644
index 0000000..a5e275e
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/cyano_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "cyano_operonic_pair",
+ "display_name": "Cyano Operonic Pair",
+ "description": "Evaluate on Cyano operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/cyano_operonic_pair",
+ "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7180076628352491
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9752044677734375
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4415116976604679
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.0913393497467041
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.28329484218629714
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 1.0
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.30503917581036627
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7180076628352491
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 404.81805419921875
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4403834631515877
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 3619.329345703125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.2824750192159877
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.998641304347826
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.2925298541520232
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7180076628352491
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 8.454354286193848
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.44354081951829843
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 67.32762145996094
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.2880942706216985
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9633152173913043
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3108581457843357
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7187739463601532
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 4583.71484375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.4513049013367282
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 263.0050964355469
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.2946799667497922
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9633152173913043
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.29691610324048845
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.3108581457843357
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7218390804597701
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.8307529091835022
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4664991624790619
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.49792495369911194
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.3371670702179177
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.7567934782608695
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.36858629659158926
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7210727969348659
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 1641.822998046875
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4584763212079616
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 4072.3740234375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.30670339761248855
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.907608695652174
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3705206660251011
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7206896551724138
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 32.319210052490234
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.45816409423233145
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 69.70606994628906
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3267670915411356
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.7663043478260869
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3704629292673498
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.717624521072797
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 15245.76953125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.45340751043115446
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 1607.8956298828125
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.30467289719626167
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.8858695652173914
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3003882097832594
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.3705206660251011
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/ec_classification.json b/leaderboard/submissions/progen2-xlarge/ec_classification.json
new file mode 100644
index 0000000..5e09e4e
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/ec_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification",
+ "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.609375
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5486979166666666
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.46875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.40937499999999993
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/ecoli_operonic_pair.json b/leaderboard/submissions/progen2-xlarge/ecoli_operonic_pair.json
new file mode 100644
index 0000000..0b2e2ef
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/ecoli_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "ecoli_operonic_pair",
+ "display_name": "E.coli Operonic Pair",
+ "description": "Evaluate on E.coli K-12 operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/ecoli_operonic_pair",
+ "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.597589244320816
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.5353635549545288
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.582427374799501
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.17530038952827454
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.42287784679089024
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9353176874642244
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.4701568355111498
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6087158089939732
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 1468.429443359375
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5763831544178365
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 3691.033203125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4050603528319406
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9988551803091014
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.46972522412271756
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6279554937413073
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 39.83434295654297
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5918518518518519
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 59.0245361328125
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.43744867232411716
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9147109330280481
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5291568188660792
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.5948076031525267
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 38678.29296875
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5772385509227614
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 234.51239013671875
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4114494518879415
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9668002289639381
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.4105969970779999
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5291568188660792
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.642790913305517
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.6761397123336792
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.6134134134134135
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.4874127507209778
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.47167487684729065
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8769318832283916
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5599254090069472
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6515994436717664
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 2297.15966796875
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.604589417788735
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 3568.659423828125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.45207033465683494
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9124212936462507
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5658647904632282
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6464997681965693
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 45.08574676513672
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.6067834513604174
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 79.49329376220703
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.44984802431610943
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9318832283915284
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5628362091175183
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6017617060732499
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 2657.70654296875
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5945250780819401
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 1609.9322509765625
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.43777056277056275
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9261591299370349
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.46695267318075395
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5658647904632282
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/euk_retrieval.json b/leaderboard/submissions/progen2-xlarge/euk_retrieval.json
new file mode 100644
index 0000000..bc5dd0f
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/euk_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "euk_retrieval",
+ "display_name": "Euk Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/euk_retrieval",
+ "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
+ },
+ {
+ "path": "tattabio/euk_retrieval_qrels",
+ "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.84774
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.8374
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.82416
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.34395
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.45708
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.67546
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.34724
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.4629
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.70514
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.74791
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.66367
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.38521
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.8982851018220793
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.8995610677282704
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.900508188422815
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.4767898888088076
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.7419553503608369
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.055414003237389486
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.43775298679589786
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.7407504551889852
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.05347140613062027
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.41293112354686706
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.7476717808315242
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.08006377164933114
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": -0.0148881359491563
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.23989310515269469
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3287363118687835
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.05281666093138101
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.4712488352245418
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.281326814344777
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.33174842347070294
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.8375866923633879
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.1383451658034337
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": -0.024413048209636076
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.2357082185796465
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.3392331612284683
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.03992063669730916
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.4662588167162248
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2931262438573035
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.2837205586716561
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.8522921818796286
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.162147287293181
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.4146288409630512
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.615634517860307
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.37956355864345537
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.3407833568385157
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.4829041685657454
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.3726537217663794
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.21298158451575644
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.17142552632901348
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.28865788487324007
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.6383125172254681
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.6960093217220756
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.08088971499853534
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.6431409368495994
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.693162244516911
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.07470607309971362
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.6408878710877405
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.6952625726667562
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.07873314340871365
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.58687
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.5516
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.51338
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.19386
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.24564
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.33821
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.2105
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.27395
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.43525
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.52154
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.43441
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.23408
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7136655948553056
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.717022661154494
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.7203295622151216
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.5080779306269386
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.6888089972666763
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": 0.10673258599936329
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.4632005033585787
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.6712012494044541
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": 0.08784010802216201
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.38857259040070996
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.5895619261020698
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.14080687873239334
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.16090136788609444
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.31243391620873545
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.3390512300528643
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.22579196089292114
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.4669121206457578
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.26122356825986964
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.30204264302375927
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.6332673466736974
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.16004379501441202
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.11110155948455377
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.25591724289649287
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.3227114364901176
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.17196503188722667
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.39247300342855024
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.2629668690094458
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.22851294474341216
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.530880033887029
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.1726004902442935
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5070785300123103
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.6876905262817138
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.04779793396663549
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.4351829938283804
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.6459334312111928
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.13694795084624573
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.29963197362191873
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.31559221570405743
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.18803068785533397
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.6013539055037981
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.6207506271548514
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.2669837260163356
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.6020956263248132
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.6245273952623522
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.2710930700853572
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.6044496050365885
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.6292104477286984
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.27333065711002147
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/fefe_phylogeny.json b/leaderboard/submissions/progen2-xlarge/fefe_phylogeny.json
new file mode 100644
index 0000000..504e7db
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/fefe_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "fefe_phylogeny",
+ "display_name": "FeFeHydrogenase Phylogeny",
+ "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/fefe_phylogeny_sequences",
+ "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
+ },
+ {
+ "path": "tattabio/fefe_phylogeny_distances",
+ "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.687389103121127
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.8359818853206277
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.8191020336133575
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.8359818853206277
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.5497951203631188
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.5787807165799065
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.5626204870453207
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.5787807165799065
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/modac_paralogy_bigene.json b/leaderboard/submissions/progen2-xlarge/modac_paralogy_bigene.json
new file mode 100644
index 0000000..432d51e
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/modac_paralogy_bigene.json
@@ -0,0 +1,97 @@
+{
+ "task": {
+ "id": "modac_paralogy_bigene",
+ "display_name": "ModAC Paralogy BiGene",
+ "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/modac_paralogy_bigene",
+ "revision": "241ca6397856e3360da04422d54933035b1fab87"
+ }
+ ],
+ "primary_metric_id": "recall_at_50"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.516450720102903e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 9.02681867829321e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.05630026809651475
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.010579204388082606
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.029490616621983913
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.012979104896871197
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.029490616621983913
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.435656836461126
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/mopb_clustering.json b/leaderboard/submissions/progen2-xlarge/mopb_clustering.json
new file mode 100644
index 0000000..db44f1e
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/mopb_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "mopb_clustering",
+ "display_name": "MopB Clustering",
+ "description": "Evaluate on MopB clustering task.",
+ "modality": "protein",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/mopb_clustering",
+ "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.9081213752554907
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.7589723753650268
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/rpob_arch_phylogeny.json b/leaderboard/submissions/progen2-xlarge/rpob_arch_phylogeny.json
new file mode 100644
index 0000000..79d2705
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/rpob_arch_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_phylogeny_sequences",
+ "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
+ },
+ {
+ "path": "tattabio/rpob_arch_phylogeny_distances",
+ "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.20539647600239538
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.2959954224929351
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.31970215732214785
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.31970215732214785
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.4102284573265796
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.4946045987721917
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.5014444676012894
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.5014444676012894
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/rpob_bac_phylogeny.json b/leaderboard/submissions/progen2-xlarge/rpob_bac_phylogeny.json
new file mode 100644
index 0000000..caa0ab6
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/rpob_bac_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_phylogeny_sequences",
+ "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
+ },
+ {
+ "path": "tattabio/rpob_bac_phylogeny_distances",
+ "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.28914273667063095
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.4766469672120496
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.47147258109872947
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.4766469672120496
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.2596902166209353
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3450947432996055
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.3497832144429374
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.3497832144429374
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/progen2-xlarge/vibrio_operonic_pair.json b/leaderboard/submissions/progen2-xlarge/vibrio_operonic_pair.json
new file mode 100644
index 0000000..ba256f8
--- /dev/null
+++ b/leaderboard/submissions/progen2-xlarge/vibrio_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "vibrio_operonic_pair",
+ "display_name": "Vibrio Operonic Pair",
+ "description": "Evaluate on Vibrio operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/vibrio_operonic_pair",
+ "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "hugohrban/progen2-xlarge",
+ "revision": "...",
+ "num_layers": 32,
+ "num_params": 6443638816,
+ "embed_dim": 4096
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 16,
+ "layer_display_name": "16",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6587640886125146
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.8188399076461792
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5227848101265823
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.17655116319656372
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.3640370207139709
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9270482603815937
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.42481524037914975
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6626506024096386
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 1462.2254638671875
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5146886016451233
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 3214.6376953125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3485873458018305
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9831649831649831
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3918753881823718
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6642052079284881
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 28.370464324951172
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5240963855421686
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 66.16458129882812
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3581720872787155
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9764309764309764
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.42221114260233406
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6537116206762534
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 5076.0380859375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5336442371752165
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 293.55999755859375
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.379441023211748
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.898989898989899
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.39506197369719853
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.42481524037914975
+ }
+ ]
+ },
+ {
+ "layer_number": 31,
+ "layer_display_name": "31",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6820831713952584
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.742120623588562
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5437981779957954
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.46992284059524536
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.39531329597554765
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8709315375982043
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.49399145765039254
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6774193548387096
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 2092.191162109375
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5341494845360825
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 3469.343505859375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3746046091278807
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9304152637485971
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.46616460843353874
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6762534006995725
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 41.604942321777344
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5326569435637286
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 75.52215576171875
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3711886875828546
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9427609427609428
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.46666836903764686
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6544889234356782
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 10279.9619140625
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5383381456528714
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 1468.63623046875
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.37690925426774485
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.941638608305275
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.4308905193342436
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.49399145765039254
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/MIBIG_protein_classification.json b/leaderboard/submissions/prot_t5_xl_bfd/MIBIG_protein_classification.json
new file mode 100644
index 0000000..775b64b
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/MIBIG_protein_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_protein_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_prot",
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.665347347353334
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.671201814058957
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.78845191704169
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6203371228716182
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.8023431594860176
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6393665718980249
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.655328798185941
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7777498564416659
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.5932752607728978
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.782879818594105
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/arch_retrieval.json b/leaderboard/submissions/prot_t5_xl_bfd/arch_retrieval.json
new file mode 100644
index 0000000..c2ff5f1
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/arch_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "arch_retrieval",
+ "display_name": "Arch Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/arch_retrieval",
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+ },
+ {
+ "path": "tattabio/arch_retrieval_qrels",
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.91556
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.90692
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.87968
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.30562
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.42599
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.71437
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.31146
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.43927
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.75802
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.82868
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.76219
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.4731
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.9424384691990327
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9442957407746143
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9450892859974356
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6758404864845804
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.07802141173529904
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.34014079454644375
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6559176060420032
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.11191286028806371
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.36216008877717815
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.588166705701525
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": -0.00013877612539943873
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.26109772787783125
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": -0.014830905831570812
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.10595506924709744
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.32085091803527405
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.10033887584469645
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.2053648224876042
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.20098155837221496
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.5383618167014114
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.18560538825485104
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.14646774451947722
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": -0.02008919630315214
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.11234151602554744
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.3242694849254786
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.0837787484872365
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.22103746678628963
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.21375392931643278
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.5147756801091333
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.22064295388157423
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.11436202458939185
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5561758833104028
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.09041415273912264
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.7212291337345235
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.46267035130101786
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.06806184226878606
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.6634938646543213
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.18101555042086578
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.32278953058129545
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.35250619598434496
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.6997884970620865
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.04506108475253997
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.150746697861468
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.6974394278188117
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.050149955898670835
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.14664534711931623
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.6968457854626777
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.049163188110125974
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.14643427219711108
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.76922
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.73124
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.65851
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.22822
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.30193
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.46351
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.24018
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.32508
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.54975
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.69415
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.60568
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.33925
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.8569426660975955
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.8589987737875058
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.860342096901009
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6229293301375335
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.4128916234696497
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.12491964721295902
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.5725634494942511
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.40575051204835366
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.15984334362764854
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.4671079862629836
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.37026058182180693
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.10868446365529066
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.14085023375284314
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.22636481411436557
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.20416003342491348
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.23722391725543646
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.3063646829510937
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.11281577118835237
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.46696020043403985
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.42152653625289493
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.08111993114861822
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.12046877670841287
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.21017995546377888
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.19460706495216118
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.20295180744150895
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.28352319675208193
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.10984164101180431
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.4349341161444355
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.3879826102213279
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.07896065211344214
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5328788932256486
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.32074885397774344
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.31911971640195713
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.45500288678534834
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.2681194584757835
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.34939580835864753
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.23627988535364855
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.043589016181725906
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.26692480361724236
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.7415289598377633
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.43021039474005807
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.11071333809933313
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.7421025217893243
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.434990212167193
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.1124635999964573
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.742616888935384
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.4371335788300678
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.11342495339203741
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/bacarch_bigene.json b/leaderboard/submissions/prot_t5_xl_bfd/bacarch_bigene.json
new file mode 100644
index 0000000..98ccb5b
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/bacarch_bigene.json
@@ -0,0 +1,86 @@
+{
+ "task": {
+ "id": "bacarch_bigene",
+ "display_name": "BacArch BiGene",
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/bac_arch_bigene",
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.7597484276729559
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.8301886792452831
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.7823899371069182
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.8301886792452831
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.2519729152748021
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.33584905660377357
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.26899616107163277
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.33584905660377357
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/convergent_enzymes_classification.json b/leaderboard/submissions/prot_t5_xl_bfd/convergent_enzymes_classification.json
new file mode 100644
index 0000000..966f9c3
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/convergent_enzymes_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "convergent_enzymes_classification",
+ "display_name": "Convergent Enzymes Classification",
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/convergent_enzymes",
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.275
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.22734523809523807
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.2275
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.17930555555555558
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/cyano_operonic_pair.json b/leaderboard/submissions/prot_t5_xl_bfd/cyano_operonic_pair.json
new file mode 100644
index 0000000..9550a89
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/cyano_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "cyano_operonic_pair",
+ "display_name": "Cyano Operonic Pair",
+ "description": "Evaluate on Cyano operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/cyano_operonic_pair",
+ "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7272030651340996
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9687467813491821
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.45071380013596196
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.8978159427642822
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.30054397098821395
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.9008152173913043
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.3941340265884971
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7203065134099617
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 1735.5322265625
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.44430258538142353
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 5307.4521484375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.2903629536921151
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9456521739130435
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3528795703139007
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.721455938697318
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 126.05880737304688
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.44332175560467313
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 292.90557861328125
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.2887700534759358
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.9538043478260869
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3695630635425535
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7195402298850575
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 356795.21875
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.4499004644990046
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 190484.53125
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.2976294995610184
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9211956521739131
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.36196828415543614
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.3941340265884971
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7344827586206897
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.97544264793396
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4597894736842105
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9250391721725464
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.3331299572910311
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.7418478260869565
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.40694383571640735
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7310344827586207
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 9917.591796875
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.4648117839607201
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 15072.4345703125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3325526932084309
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.7717391304347826
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.3986910724493818
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7306513409961686
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 457.9825439453125
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4638027048528242
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 840.006103515625
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.327896512935883
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.7921195652173914
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.39658322011908265
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7222222222222222
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 4470940.0
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.44773175542406307
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 3160863.0
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.295316565481353
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9252717391304348
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3594057338410264
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.40694383571640735
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/ec_classification.json b/leaderboard/submissions/prot_t5_xl_bfd/ec_classification.json
new file mode 100644
index 0000000..a191718
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/ec_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification",
+ "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6328125
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5651041666666667
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.609375
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5447916666666667
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/ecoli_operonic_pair.json b/leaderboard/submissions/prot_t5_xl_bfd/ecoli_operonic_pair.json
new file mode 100644
index 0000000..fbdca25
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/ecoli_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "ecoli_operonic_pair",
+ "display_name": "E.coli Operonic Pair",
+ "description": "Evaluate on E.coli K-12 operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/ecoli_operonic_pair",
+ "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6585535465924895
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9535143375396729
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.6174863387978143
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9261614680290222
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.48787778146795086
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.840870062965083
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5911089754596931
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.631432545201669
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 3524.670166015625
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5858181818181818
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 5154.9697265625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4292565947242206
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9221522610188895
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5325132413673002
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6455725544738062
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 174.34619140625
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.6126418152350082
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 223.7291259765625
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.47412982126058323
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8654836863194046
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5706504173017982
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6376912378303199
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 300376.21875
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5884498480243161
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 240064.3125
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.45545796737766625
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.8311390955924441
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.5400873122794386
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5911089754596931
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6738525730180807
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9607139825820923
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.6289983656315666
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9430916905403137
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.5311514195583596
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.7710360618202633
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.6204750761919859
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6787204450625869
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 11232.892578125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.6275144508670519
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 14495.390625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.5263770364623739
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.7767601602747567
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.6255504867654197
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6736207695873899
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 551.5784912109375
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.6314819082737958
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 733.4508056640625
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.5285493827160493
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.7842014882655982
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.6208618380913407
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6124246638850255
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 4513109.0
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5824329034506797
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 3365215.5
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4186920571285392
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.95649685174585
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.4987551828608048
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.6255504867654197
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/euk_retrieval.json b/leaderboard/submissions/prot_t5_xl_bfd/euk_retrieval.json
new file mode 100644
index 0000000..7084ba9
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/euk_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "euk_retrieval",
+ "display_name": "Euk Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/euk_retrieval",
+ "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
+ },
+ {
+ "path": "tattabio/euk_retrieval_qrels",
+ "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.92672
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.92306
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.8895
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.35502
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.47966
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.70554
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.35897
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.48744
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.73218
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.83087
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.75016
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.42894
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.9415862808145767
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9430153626295105
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9437368653683487
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6450941038620643
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.3234724628590291
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.7353405227616494
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6784370493921454
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.356307778900032
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.7553235221093711
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6829124921854598
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.3430036735531466
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.5016210325394614
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.14516238032739714
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.05981617619794749
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.2506683978946674
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.26720700851970014
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.2748532106324955
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.13039242553014585
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.7404748343804409
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.4062731822335897
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.20921986403462078
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.15345536503202384
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.05976560645876083
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.24331468000836254
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.27273696746447923
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.2772936528689855
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.12056716886495585
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.7668012857938902
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.44460772529114895
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.18875065148055795
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.3930073745085516
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.41020902636849293
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.932622625910148
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.2635529758242909
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.3323551761779577
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.7524748595057036
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": -0.005519805496856407
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.23536165525170333
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.358047396892961
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.7135224342947254
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.318638219709438
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.6795248041287989
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.7103575293672233
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.3180491040497493
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.6905725180457766
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.7113420373917401
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.328353633746779
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.6797371842783072
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.71234
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.70278
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.67585
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.22489
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.31161
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.47218
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.24057
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.35114
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.58748
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.64823
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.58457
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.33505
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.7939442658092176
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.7984675649466646
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.8004998179261701
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.649294362751511
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.4789146724672918
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.06361407354634938
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6278496426823811
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.485871035859316
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.04265838292616143
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6160495322931576
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.4735471084683332
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.038745902550895
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.35996321488756433
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.31196420531010555
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.23017037321039516
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.4194454380907453
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.47086648384193386
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.14309964875425385
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.5964554555580853
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.5208142856976985
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.014199249179051887
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.32992418491537023
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.2705437266239038
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.246661828066766
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.35831658068419964
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.39458331969775373
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.18432558994969014
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.5472469480990645
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.46257256009768155
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.03652551189856071
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5222050117811435
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.4633140941813137
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.29411592084499516
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.3939497429662996
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.3920607553465279
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.2770064989363528
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.15262499225505957
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.0399399666879417
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.25354614582075213
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.7990542802042412
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.4330228140756498
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.07372075423984396
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.799568634188793
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.42435558306674237
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.07777149596651926
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.8000664803994814
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.4278349242317028
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.07345720773957888
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/fefe_phylogeny.json b/leaderboard/submissions/prot_t5_xl_bfd/fefe_phylogeny.json
new file mode 100644
index 0000000..93bc496
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/fefe_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "fefe_phylogeny",
+ "display_name": "FeFeHydrogenase Phylogeny",
+ "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/fefe_phylogeny_sequences",
+ "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
+ },
+ {
+ "path": "tattabio/fefe_phylogeny_distances",
+ "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.33209122731338475
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.6244747787953618
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.5884202230855742
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.6244747787953618
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.43006317577077985
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.6215222814983533
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.5928180835249242
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.6215222814983533
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/modac_paralogy_bigene.json b/leaderboard/submissions/prot_t5_xl_bfd/modac_paralogy_bigene.json
new file mode 100644
index 0000000..b143d2d
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/modac_paralogy_bigene.json
@@ -0,0 +1,97 @@
+{
+ "task": {
+ "id": "modac_paralogy_bigene",
+ "display_name": "ModAC Paralogy BiGene",
+ "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/modac_paralogy_bigene",
+ "revision": "241ca6397856e3360da04422d54933035b1fab87"
+ }
+ ],
+ "primary_metric_id": "recall_at_50"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.4952467261118094e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 8.984467652322665e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.09316353887399464
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.00217875092414885
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.004021447721179625
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.00230626824933552
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.004021447721179625
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.2734584450402145
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/mopb_clustering.json b/leaderboard/submissions/prot_t5_xl_bfd/mopb_clustering.json
new file mode 100644
index 0000000..a57e33a
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/mopb_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "mopb_clustering",
+ "display_name": "MopB Clustering",
+ "description": "Evaluate on MopB clustering task.",
+ "modality": "protein",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/mopb_clustering",
+ "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.8281800072486031
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.7658272771219637
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/rpob_arch_phylogeny.json b/leaderboard/submissions/prot_t5_xl_bfd/rpob_arch_phylogeny.json
new file mode 100644
index 0000000..ce67216
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/rpob_arch_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_phylogeny_sequences",
+ "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
+ },
+ {
+ "path": "tattabio/rpob_arch_phylogeny_distances",
+ "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.18285301896376252
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.1904500888228494
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.2132223506047016
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.2132223506047016
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.23327313257646354
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3555894078051998
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.32671911270173437
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.3555894078051998
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/rpob_bac_phylogeny.json b/leaderboard/submissions/prot_t5_xl_bfd/rpob_bac_phylogeny.json
new file mode 100644
index 0000000..9785c1f
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/rpob_bac_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_phylogeny_sequences",
+ "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
+ },
+ {
+ "path": "tattabio/rpob_bac_phylogeny_distances",
+ "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.21148648442107776
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.2367395745608637
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.2724794083627195
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.2724794083627195
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.25893556661958106
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.2733544641048402
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.25026214474431
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.2733544641048402
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_bfd/vibrio_operonic_pair.json b/leaderboard/submissions/prot_t5_xl_bfd/vibrio_operonic_pair.json
new file mode 100644
index 0000000..5a12578
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_bfd/vibrio_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "vibrio_operonic_pair",
+ "display_name": "Vibrio Operonic Pair",
+ "description": "Evaluate on Vibrio operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/vibrio_operonic_pair",
+ "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_bfd",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6871356393315197
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9634510278701782
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5520751761942052
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9320050477981567
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.4239326518340349
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.7912457912457912
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5172201267603933
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6743101438010105
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 3110.09326171875
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5299209634926608
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 4711.361328125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3986409966024915
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.7901234567901234
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.4750578849012121
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6789739603575593
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 144.1129608154297
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5490048817123545
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 214.9871368408203
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.41252821670428896
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8204264870931538
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.502524788853816
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6661484648270501
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 337874.09375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5233226837060703
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 211411.4375
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.36578829834747656
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9191919191919192
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.4546480066127966
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5172201267603933
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6984065293431791
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9698377251625061
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.558586246638494
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9381648898124695
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.4246495327102804
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8159371492704826
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.539827365743389
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6995724834823164
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 10790.3623046875
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5534351145038168
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 14495.240234375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4193175245806825
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.813692480359147
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5413814180716444
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7030703458997279
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 521.355712890625
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5590062111801242
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 730.8151245117188
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.42729970326409494
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8080808080808081
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5389712975239336
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6638165565487757
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 4920725.5
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5192250372578241
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 3139242.0
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.3534902597402597
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9775533108866442
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.4328071265147608
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5413814180716444
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/MIBIG_protein_classification.json b/leaderboard/submissions/prot_t5_xl_uniref50/MIBIG_protein_classification.json
new file mode 100644
index 0000000..3f4681f
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/MIBIG_protein_classification.json
@@ -0,0 +1,98 @@
+{
+ "task": {
+ "id": "MIBIG_protein_classification",
+ "display_name": "MIBiG Classification",
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/mibig_classification_prot",
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6921288381092022
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6961451247165533
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.8139754057916093
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6470389372073905
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.8191609977324276
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.6619098471417835
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6394557823129252
+ },
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.8554266113510164
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.6089150997875741
+ },
+ {
+ "id": "lrap",
+ "display_name": "lrap",
+ "description": null,
+ "value": 0.7777777777777787
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/arch_retrieval.json b/leaderboard/submissions/prot_t5_xl_uniref50/arch_retrieval.json
new file mode 100644
index 0000000..5f6fe6f
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/arch_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "arch_retrieval",
+ "display_name": "Arch Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/arch_retrieval",
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+ },
+ {
+ "path": "tattabio/arch_retrieval_qrels",
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.92437
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.91832
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.88929
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.31084
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.43426
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.73127
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.31741
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.44657
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.7625
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.83594
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.7723
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.47935
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.944664959453692
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9465603384617468
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9470471736604207
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6904257424907183
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.17626530867610984
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.3256037208699509
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6755859537896345
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.19312694738891206
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.34336751560690454
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6099399964441327
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.003032523727931495
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.24904093099919736
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": -0.01944424474842768
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.12247733739429825
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.30213657878474254
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.09661075440355467
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.23052413036526725
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.1929524400881027
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.5485655483431373
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.21502729379562965
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.143434087412841
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": -0.027816152462282782
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.12775708504995087
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.3024520158353276
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.08095008455559108
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.2419416823372581
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.20458678360282087
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.536333878981592
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.23988126483702843
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.11687231401703965
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5391898283330372
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.11534434911274209
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.7316665111394176
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.4498954557047002
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.0841995836786073
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.6432552889288048
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.16726557824854918
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.354798802661032
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.3199379354401648
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.7364765721392718
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.24266478409531936
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.17829833398517161
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.7372889177413674
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.2470438039467095
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.1591466954839909
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.7376354579559358
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.24228421772128148
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.16077725815978616
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.76933
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.73698
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.66495
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.22744
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.30213
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.46438
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.23986
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.32866
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.55316
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.69313
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.61144
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.34595
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.8586498790724141
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.8614352783366866
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.8629394391752504
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.5641842814317276
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.4880046637927035
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.037498225871017386
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.5222240258842824
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.45928236160268926
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.0735168315819288
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.44245593635026653
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.39907625818163395
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.04156312252212681
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.1948456953101706
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.261665582778819
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.2279556194678241
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.2794927054324812
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.32900702387812925
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.14553044394893827
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.47965579454358886
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.4173120928894022
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.022111983640584615
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.17398804372634855
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.25358909422151527
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.2304294274459766
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.24544039747014393
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.3020161386947229
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.14739917022628052
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.4516091047820593
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.36055725309104575
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.01451301873367621
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.45918558853461183
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.35010629637375024
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.2492532917210604
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.37443959671596155
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.2695210685235055
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.28869106912215586
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.14690009878485263
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": 0.03721365892480409
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.22839727740438145
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.6724896908702344
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.5605484676610025
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.17667808149086445
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.674416832047262
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.5583444357986156
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.17924052942171584
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.6740382845670758
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.5567109464037232
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.17814332805230865
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/bacarch_bigene.json b/leaderboard/submissions/prot_t5_xl_uniref50/bacarch_bigene.json
new file mode 100644
index 0000000..7ede09b
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/bacarch_bigene.json
@@ -0,0 +1,86 @@
+{
+ "task": {
+ "id": "bacarch_bigene",
+ "display_name": "BacArch BiGene",
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/bac_arch_bigene",
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.779245283018868
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.8415094339622642
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.7987421383647799
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.8415094339622642
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.1946602749010892
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.27169811320754716
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.20994827269201596
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.27169811320754716
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/convergent_enzymes_classification.json b/leaderboard/submissions/prot_t5_xl_uniref50/convergent_enzymes_classification.json
new file mode 100644
index 0000000..e45a738
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/convergent_enzymes_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "convergent_enzymes_classification",
+ "display_name": "Convergent Enzymes Classification",
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/convergent_enzymes",
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.2975
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.24263690476190475
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.2075
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.162297619047619
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/cyano_operonic_pair.json b/leaderboard/submissions/prot_t5_xl_uniref50/cyano_operonic_pair.json
new file mode 100644
index 0000000..584a51d
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/cyano_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "cyano_operonic_pair",
+ "display_name": "Cyano Operonic Pair",
+ "description": "Evaluate on Cyano operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/cyano_operonic_pair",
+ "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7298850574712644
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9710829257965088
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4658696490551485
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9307838678359985
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.325255788906839
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8206521739130435
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.4031848326409708
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7222222222222222
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 4015.896484375
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.44346733668341703
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 8008.25048828125
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.2883986928104575
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9592391304347826
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.35232572385733507
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7245210727969349
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 176.74635314941406
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.45276447540269926
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 315.7908935546875
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3331197950032031
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.7065217391304348
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.3766570634605429
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7199233716475095
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 854446.125
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.45307917888563054
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 539589.25
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.3102409638554217
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.8396739130434783
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.35933836728432894
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.4031848326409708
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.728735632183908
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.980133056640625
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.4666406554818573
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9235023260116577
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.32731253420908596
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8125
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.40918084231605883
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7283524904214559
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 14713.158203125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.46769230769230763
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 23292.853515625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.345679012345679
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.7228260869565217
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.4074355169035304
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.7279693486590039
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 704.3648681640625
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.4682057513163224
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 1315.472412109375
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.3335256780150029
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.7853260869565217
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.40226336417241415
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.7195402298850575
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 15402820.0
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.4510385756676557
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 9822080.0
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.31020408163265306
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.8260869565217391
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.3522933436299668
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.40918084231605883
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/ec_classification.json b/leaderboard/submissions/prot_t5_xl_uniref50/ec_classification.json
new file mode 100644
index 0000000..c9a8c4b
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/ec_classification.json
@@ -0,0 +1,62 @@
+{
+ "task": {
+ "id": "ec_classification",
+ "display_name": "EC Classification",
+ "description": "Evaluate on Enzyme Commission number classification task.",
+ "modality": "protein",
+ "type": "classification",
+ "datasets": [
+ {
+ "path": "tattabio/ec_classification",
+ "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
+ }
+ ],
+ "primary_metric_id": "f1"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6875
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.62890625
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.6015625
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.5325520833333333
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/ecoli_operonic_pair.json b/leaderboard/submissions/prot_t5_xl_uniref50/ecoli_operonic_pair.json
new file mode 100644
index 0000000..ff1c161
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/ecoli_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "ecoli_operonic_pair",
+ "display_name": "E.coli Operonic Pair",
+ "description": "Evaluate on E.coli K-12 operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/ecoli_operonic_pair",
+ "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6541492814093649
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9587552547454834
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.6128899835796389
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9337368011474609
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.47776
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8546078992558672
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5843393095554787
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6328233657858137
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 5367.236328125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5791978246091095
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 8266.498046875
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.41189267585206674
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.9753863766456783
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5170368370974361
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6481223922114048
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 246.59368896484375
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.6126373626373627
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 339.4747314453125
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.46610928635413557
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8935317687464225
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.563578784188266
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6321279554937413
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 748599.9375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.594391785150079
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 589889.0
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.45372324389508595
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.8614768174012593
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.5455742497849516
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5843393095554787
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.672461752433936
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9680966138839722
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.6246070947462955
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9479633569717407
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.513852973771703
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.7962220950200344
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.6120691357655745
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6722299490032453
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 19384.970703125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.6298500357057844
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 22904.546875
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.539119804400978
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.7572982255294791
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.6180060687920407
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6719981455725544
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 1008.8643798828125
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.6255144032921811
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 1199.65380859375
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.5207460982108869
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.7830566685746995
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.6130483779613698
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6286509040333796
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 14326900.0
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5898066783831283
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 9925826.0
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.4255642911488714
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9605037206639955
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.5133120730662288
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.6180060687920407
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/euk_retrieval.json b/leaderboard/submissions/prot_t5_xl_uniref50/euk_retrieval.json
new file mode 100644
index 0000000..f990d80
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/euk_retrieval.json
@@ -0,0 +1,762 @@
+{
+ "task": {
+ "id": "euk_retrieval",
+ "display_name": "Euk Retrieval",
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+ "modality": "protein",
+ "type": "retrieval",
+ "datasets": [
+ {
+ "path": "tattabio/euk_retrieval",
+ "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
+ },
+ {
+ "path": "tattabio/euk_retrieval_qrels",
+ "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
+ }
+ ],
+ "primary_metric_id": "map_at_5"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.93403
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.93219
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.9007
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.35922
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.48597
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.71785
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.36271
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.49199
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.7473
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.83794
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.7582
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.43505
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.9461414790996785
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.9461414790996785
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.9474721011861392
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6360571033125667
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.45784494058726904
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.41909498633906045
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6586594124801434
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.4984244373586518
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.44705403624496476
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6967911851047006
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.42357206240837786
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": -0.24784276595880533
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.14095037854383827
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.07113531637419679
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.27337398758596637
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.25837425131598796
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.3071068476320376
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.17651087109287392
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.7557208386859483
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.481523205045611
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": -0.14518612903534914
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.1471878058724989
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.07018518602956217
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.2691033520117919
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.2620639184111711
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.3079361861092893
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.16933644121075292
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.7551127432888673
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.49476345922374765
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": -0.11518415139468428
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.34608944934490565
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.4810929802383669
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.8218128321986103
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.2241056474991675
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.3806484972502808
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.6415372814462837
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": -0.012605789522662619
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.2495720768505924
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.3001699673500121
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.7150455812547505
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.468623009585809
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": -0.3088280098255423
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.7150455812547505
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.468623009585809
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": -0.3088280098255423
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.7147040760070875
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.47258452887445196
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": -0.29486714517449736
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "ndcg_at_5",
+ "display_name": "ndcg_at_5",
+ "description": null,
+ "value": 0.6853
+ },
+ {
+ "id": "ndcg_at_10",
+ "display_name": "ndcg_at_10",
+ "description": null,
+ "value": 0.67354
+ },
+ {
+ "id": "ndcg_at_50",
+ "display_name": "ndcg_at_50",
+ "description": null,
+ "value": 0.64834
+ },
+ {
+ "id": "map_at_5",
+ "display_name": "map_at_5",
+ "description": null,
+ "value": 0.22427
+ },
+ {
+ "id": "map_at_10",
+ "display_name": "map_at_10",
+ "description": null,
+ "value": 0.30389
+ },
+ {
+ "id": "map_at_50",
+ "display_name": "map_at_50",
+ "description": null,
+ "value": 0.44304
+ },
+ {
+ "id": "recall_at_5",
+ "display_name": "recall_at_5",
+ "description": null,
+ "value": 0.24201
+ },
+ {
+ "id": "recall_at_10",
+ "display_name": "recall_at_10",
+ "description": null,
+ "value": 0.34737
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.57233
+ },
+ {
+ "id": "precision_at_5",
+ "display_name": "precision_at_5",
+ "description": null,
+ "value": 0.62186
+ },
+ {
+ "id": "precision_at_10",
+ "display_name": "precision_at_10",
+ "description": null,
+ "value": 0.55338
+ },
+ {
+ "id": "precision_at_50",
+ "display_name": "precision_at_50",
+ "description": null,
+ "value": 0.31569
+ },
+ {
+ "id": "mrr_at_5",
+ "display_name": "mrr_at_5",
+ "description": null,
+ "value": 0.794694533762058
+ },
+ {
+ "id": "mrr_at_10",
+ "display_name": "mrr_at_10",
+ "description": null,
+ "value": 0.799452610626244
+ },
+ {
+ "id": "mrr_at_50",
+ "display_name": "mrr_at_50",
+ "description": null,
+ "value": 0.8019849636131453
+ },
+ {
+ "id": "nauc_ndcg_at_5_max",
+ "display_name": "nauc_ndcg_at_5_max",
+ "description": null,
+ "value": 0.6780659145596223
+ },
+ {
+ "id": "nauc_ndcg_at_5_std",
+ "display_name": "nauc_ndcg_at_5_std",
+ "description": null,
+ "value": 0.4686194164677635
+ },
+ {
+ "id": "nauc_ndcg_at_5_diff1",
+ "display_name": "nauc_ndcg_at_5_diff1",
+ "description": null,
+ "value": -0.013171906236838202
+ },
+ {
+ "id": "nauc_ndcg_at_10_max",
+ "display_name": "nauc_ndcg_at_10_max",
+ "description": null,
+ "value": 0.6797508716872995
+ },
+ {
+ "id": "nauc_ndcg_at_10_std",
+ "display_name": "nauc_ndcg_at_10_std",
+ "description": null,
+ "value": 0.4588806794061712
+ },
+ {
+ "id": "nauc_ndcg_at_10_diff1",
+ "display_name": "nauc_ndcg_at_10_diff1",
+ "description": null,
+ "value": -0.026029553352657123
+ },
+ {
+ "id": "nauc_ndcg_at_50_max",
+ "display_name": "nauc_ndcg_at_50_max",
+ "description": null,
+ "value": 0.6532393400819172
+ },
+ {
+ "id": "nauc_ndcg_at_50_std",
+ "display_name": "nauc_ndcg_at_50_std",
+ "description": null,
+ "value": 0.45314823534779886
+ },
+ {
+ "id": "nauc_ndcg_at_50_diff1",
+ "display_name": "nauc_ndcg_at_50_diff1",
+ "description": null,
+ "value": 0.000803786369534146
+ },
+ {
+ "id": "nauc_map_at_5_max",
+ "display_name": "nauc_map_at_5_max",
+ "description": null,
+ "value": 0.4667523187539349
+ },
+ {
+ "id": "nauc_map_at_5_std",
+ "display_name": "nauc_map_at_5_std",
+ "description": null,
+ "value": 0.3541285833071939
+ },
+ {
+ "id": "nauc_map_at_5_diff1",
+ "display_name": "nauc_map_at_5_diff1",
+ "description": null,
+ "value": 0.2621735361977053
+ },
+ {
+ "id": "nauc_map_at_10_max",
+ "display_name": "nauc_map_at_10_max",
+ "description": null,
+ "value": 0.5293122208340053
+ },
+ {
+ "id": "nauc_map_at_10_std",
+ "display_name": "nauc_map_at_10_std",
+ "description": null,
+ "value": 0.46647778152678937
+ },
+ {
+ "id": "nauc_map_at_10_diff1",
+ "display_name": "nauc_map_at_10_diff1",
+ "description": null,
+ "value": 0.16392477080326415
+ },
+ {
+ "id": "nauc_map_at_50_max",
+ "display_name": "nauc_map_at_50_max",
+ "description": null,
+ "value": 0.704438550127725
+ },
+ {
+ "id": "nauc_map_at_50_std",
+ "display_name": "nauc_map_at_50_std",
+ "description": null,
+ "value": 0.44483676603571776
+ },
+ {
+ "id": "nauc_map_at_50_diff1",
+ "display_name": "nauc_map_at_50_diff1",
+ "description": null,
+ "value": 0.009175316156314641
+ },
+ {
+ "id": "nauc_recall_at_5_max",
+ "display_name": "nauc_recall_at_5_max",
+ "description": null,
+ "value": 0.41466058265722383
+ },
+ {
+ "id": "nauc_recall_at_5_std",
+ "display_name": "nauc_recall_at_5_std",
+ "description": null,
+ "value": 0.31066030507292597
+ },
+ {
+ "id": "nauc_recall_at_5_diff1",
+ "display_name": "nauc_recall_at_5_diff1",
+ "description": null,
+ "value": 0.27580011350774186
+ },
+ {
+ "id": "nauc_recall_at_10_max",
+ "display_name": "nauc_recall_at_10_max",
+ "description": null,
+ "value": 0.46165533483400295
+ },
+ {
+ "id": "nauc_recall_at_10_std",
+ "display_name": "nauc_recall_at_10_std",
+ "description": null,
+ "value": 0.37450617812296255
+ },
+ {
+ "id": "nauc_recall_at_10_diff1",
+ "display_name": "nauc_recall_at_10_diff1",
+ "description": null,
+ "value": 0.19872702396564773
+ },
+ {
+ "id": "nauc_recall_at_50_max",
+ "display_name": "nauc_recall_at_50_max",
+ "description": null,
+ "value": 0.6548039005583735
+ },
+ {
+ "id": "nauc_recall_at_50_std",
+ "display_name": "nauc_recall_at_50_std",
+ "description": null,
+ "value": 0.34672855192148566
+ },
+ {
+ "id": "nauc_recall_at_50_diff1",
+ "display_name": "nauc_recall_at_50_diff1",
+ "description": null,
+ "value": 0.0623575763027755
+ },
+ {
+ "id": "nauc_precision_at_5_max",
+ "display_name": "nauc_precision_at_5_max",
+ "description": null,
+ "value": 0.5457235360403251
+ },
+ {
+ "id": "nauc_precision_at_5_std",
+ "display_name": "nauc_precision_at_5_std",
+ "description": null,
+ "value": 0.43521558420021866
+ },
+ {
+ "id": "nauc_precision_at_5_diff1",
+ "display_name": "nauc_precision_at_5_diff1",
+ "description": null,
+ "value": -0.2043697991483657
+ },
+ {
+ "id": "nauc_precision_at_10_max",
+ "display_name": "nauc_precision_at_10_max",
+ "description": null,
+ "value": 0.4235593791032231
+ },
+ {
+ "id": "nauc_precision_at_10_std",
+ "display_name": "nauc_precision_at_10_std",
+ "description": null,
+ "value": 0.3456476885464854
+ },
+ {
+ "id": "nauc_precision_at_10_diff1",
+ "display_name": "nauc_precision_at_10_diff1",
+ "description": null,
+ "value": -0.26441406895731684
+ },
+ {
+ "id": "nauc_precision_at_50_max",
+ "display_name": "nauc_precision_at_50_max",
+ "description": null,
+ "value": 0.10170718952519693
+ },
+ {
+ "id": "nauc_precision_at_50_std",
+ "display_name": "nauc_precision_at_50_std",
+ "description": null,
+ "value": -0.0021640216870025875
+ },
+ {
+ "id": "nauc_precision_at_50_diff1",
+ "display_name": "nauc_precision_at_50_diff1",
+ "description": null,
+ "value": -0.2410207582674104
+ },
+ {
+ "id": "nauc_mrr_at_5_max",
+ "display_name": "nauc_mrr_at_5_max",
+ "description": null,
+ "value": 0.7652388812149624
+ },
+ {
+ "id": "nauc_mrr_at_5_std",
+ "display_name": "nauc_mrr_at_5_std",
+ "description": null,
+ "value": 0.4064602524914446
+ },
+ {
+ "id": "nauc_mrr_at_5_diff1",
+ "display_name": "nauc_mrr_at_5_diff1",
+ "description": null,
+ "value": 0.17155691541647752
+ },
+ {
+ "id": "nauc_mrr_at_10_max",
+ "display_name": "nauc_mrr_at_10_max",
+ "description": null,
+ "value": 0.7654107127585745
+ },
+ {
+ "id": "nauc_mrr_at_10_std",
+ "display_name": "nauc_mrr_at_10_std",
+ "description": null,
+ "value": 0.4008546845028568
+ },
+ {
+ "id": "nauc_mrr_at_10_diff1",
+ "display_name": "nauc_mrr_at_10_diff1",
+ "description": null,
+ "value": 0.17755957672456033
+ },
+ {
+ "id": "nauc_mrr_at_50_max",
+ "display_name": "nauc_mrr_at_50_max",
+ "description": null,
+ "value": 0.7669372470722907
+ },
+ {
+ "id": "nauc_mrr_at_50_std",
+ "display_name": "nauc_mrr_at_50_std",
+ "description": null,
+ "value": 0.40628434902865657
+ },
+ {
+ "id": "nauc_mrr_at_50_diff1",
+ "display_name": "nauc_mrr_at_50_diff1",
+ "description": null,
+ "value": 0.17922685800302057
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/fefe_phylogeny.json b/leaderboard/submissions/prot_t5_xl_uniref50/fefe_phylogeny.json
new file mode 100644
index 0000000..a5fcaff
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/fefe_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "fefe_phylogeny",
+ "display_name": "FeFeHydrogenase Phylogeny",
+ "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/fefe_phylogeny_sequences",
+ "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
+ },
+ {
+ "path": "tattabio/fefe_phylogeny_distances",
+ "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.3028987188957946
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.6208332719687013
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.5492170350160599
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.6208332719687013
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.5203074993813921
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.7065064377307892
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.6427521570554848
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.7065064377307892
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/modac_paralogy_bigene.json b/leaderboard/submissions/prot_t5_xl_uniref50/modac_paralogy_bigene.json
new file mode 100644
index 0000000..736354e
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/modac_paralogy_bigene.json
@@ -0,0 +1,97 @@
+{
+ "task": {
+ "id": "modac_paralogy_bigene",
+ "display_name": "ModAC Paralogy BiGene",
+ "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+ "modality": "protein",
+ "type": "bigene_mining",
+ "datasets": [
+ {
+ "path": "tattabio/modac_paralogy_bigene",
+ "revision": "241ca6397856e3360da04422d54933035b1fab87"
+ }
+ ],
+ "primary_metric_id": "recall_at_50"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 4.4952467261118094e-7
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 8.984467652322665e-7
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.0006702412868632708
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.07238605898123325
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "precision",
+ "display_name": "precision",
+ "description": null,
+ "value": 0.00138083916941656
+ },
+ {
+ "id": "recall",
+ "display_name": "recall",
+ "description": null,
+ "value": 0.004021447721179625
+ },
+ {
+ "id": "f1",
+ "display_name": "f1",
+ "description": null,
+ "value": 0.0017770613376799141
+ },
+ {
+ "id": "accuracy",
+ "display_name": "accuracy",
+ "description": null,
+ "value": 0.004021447721179625
+ },
+ {
+ "id": "recall_at_50",
+ "display_name": "recall_at_50",
+ "description": null,
+ "value": 0.2754691689008043
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/mopb_clustering.json b/leaderboard/submissions/prot_t5_xl_uniref50/mopb_clustering.json
new file mode 100644
index 0000000..60b0cae
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/mopb_clustering.json
@@ -0,0 +1,50 @@
+{
+ "task": {
+ "id": "mopb_clustering",
+ "display_name": "MopB Clustering",
+ "description": "Evaluate on MopB clustering task.",
+ "modality": "protein",
+ "type": "clustering",
+ "datasets": [
+ {
+ "path": "tattabio/mopb_clustering",
+ "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
+ }
+ ],
+ "primary_metric_id": "v_measure"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.8721358813405494
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "v_measure",
+ "display_name": "v_measure",
+ "description": null,
+ "value": 0.8476433301105049
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/rpob_arch_phylogeny.json b/leaderboard/submissions/prot_t5_xl_uniref50/rpob_arch_phylogeny.json
new file mode 100644
index 0000000..40647fe
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/rpob_arch_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_arch_phylogeny",
+ "display_name": "RpoB Archaeal Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_arch_phylogeny_sequences",
+ "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
+ },
+ {
+ "path": "tattabio/rpob_arch_phylogeny_distances",
+ "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.16530787131181637
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.17285646620196074
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.20022744097724682
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.20022744097724682
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.2057601718535701
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.3389561715648331
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.3019024422360054
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.3389561715648331
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/rpob_bac_phylogeny.json b/leaderboard/submissions/prot_t5_xl_uniref50/rpob_bac_phylogeny.json
new file mode 100644
index 0000000..4a7f7af
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/rpob_bac_phylogeny.json
@@ -0,0 +1,90 @@
+{
+ "task": {
+ "id": "rpob_bac_phylogeny",
+ "display_name": "RpoB Bacterial Phylogeny",
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+ "modality": "protein",
+ "type": "eds",
+ "datasets": [
+ {
+ "path": "tattabio/rpob_bac_phylogeny_sequences",
+ "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
+ },
+ {
+ "path": "tattabio/rpob_bac_phylogeny_distances",
+ "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
+ }
+ ],
+ "primary_metric_id": "top_corr"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.1299405606726582
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.17513769500057838
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.20887166155118908
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.20887166155118908
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim",
+ "display_name": "cos_sim",
+ "description": null,
+ "value": 0.2797577428704427
+ },
+ {
+ "id": "manhattan",
+ "display_name": "manhattan",
+ "description": null,
+ "value": 0.2883505010928631
+ },
+ {
+ "id": "euclidean",
+ "display_name": "euclidean",
+ "description": null,
+ "value": 0.27367273640477174
+ },
+ {
+ "id": "top_corr",
+ "display_name": "top_corr",
+ "description": null,
+ "value": 0.2883505010928631
+ }
+ ]
+ }
+ ]
+}
diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/vibrio_operonic_pair.json b/leaderboard/submissions/prot_t5_xl_uniref50/vibrio_operonic_pair.json
new file mode 100644
index 0000000..f882de0
--- /dev/null
+++ b/leaderboard/submissions/prot_t5_xl_uniref50/vibrio_operonic_pair.json
@@ -0,0 +1,386 @@
+{
+ "task": {
+ "id": "vibrio_operonic_pair",
+ "display_name": "Vibrio Operonic Pair",
+ "description": "Evaluate on Vibrio operonic pair classification task.",
+ "modality": "protein",
+ "type": "pair_classification",
+ "datasets": [
+ {
+ "path": "tattabio/vibrio_operonic_pair",
+ "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
+ }
+ ],
+ "primary_metric_id": "top_ap"
+ },
+ "model": {
+ "hf_name": "Rostlab/prot_t5_xl_uniref50",
+ "revision": "...",
+ "num_layers": 24,
+ "num_params": 1208141824,
+ "embed_dim": 1024
+ },
+ "dgeb_version": "0.0.0",
+ "results": [
+ {
+ "layer_number": 12,
+ "layer_display_name": "12",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.6910221531286436
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9639373421669006
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5603482390185991
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9406915307044983
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.43276283618581907
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.7946127946127947
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5243404260729363
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.6739214924212981
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 4568.9521484375
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5292119565217391
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 7354.16015625
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.3794447150511447
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.8742985409652076
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.4665911372163652
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6770307034589973
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 226.07009887695312
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.5547385620915032
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 304.6053771972656
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.4360950545921644
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.7620650953984287
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.5035992709654147
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6669257675864749
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 836162.375
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5298930144745122
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 519992.53125
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.36816790555312634
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9450056116722784
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.4635132398582616
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5243404260729363
+ }
+ ]
+ },
+ {
+ "layer_number": 23,
+ "layer_display_name": "23",
+ "metrics": [
+ {
+ "id": "cos_sim_accuracy",
+ "display_name": "cos_sim_accuracy",
+ "description": null,
+ "value": 0.7026816945200155
+ },
+ {
+ "id": "cos_sim_accuracy_threshold",
+ "display_name": "cos_sim_accuracy_threshold",
+ "description": null,
+ "value": 0.9759871959686279
+ },
+ {
+ "id": "cos_sim_f1",
+ "display_name": "cos_sim_f1",
+ "description": null,
+ "value": 0.5580322828593389
+ },
+ {
+ "id": "cos_sim_f1_threshold",
+ "display_name": "cos_sim_f1_threshold",
+ "description": null,
+ "value": 0.9470945596694946
+ },
+ {
+ "id": "cos_sim_precision",
+ "display_name": "cos_sim_precision",
+ "description": null,
+ "value": 0.4243132670952659
+ },
+ {
+ "id": "cos_sim_recall",
+ "display_name": "cos_sim_recall",
+ "description": null,
+ "value": 0.8148148148148148
+ },
+ {
+ "id": "cos_sim_ap",
+ "display_name": "cos_sim_ap",
+ "description": null,
+ "value": 0.5421318920287235
+ },
+ {
+ "id": "manhattan_accuracy",
+ "display_name": "manhattan_accuracy",
+ "description": null,
+ "value": 0.7022930431403032
+ },
+ {
+ "id": "manhattan_accuracy_threshold",
+ "display_name": "manhattan_accuracy_threshold",
+ "description": null,
+ "value": 17089.51953125
+ },
+ {
+ "id": "manhattan_f1",
+ "display_name": "manhattan_f1",
+ "description": null,
+ "value": 0.5625282167042889
+ },
+ {
+ "id": "manhattan_f1_threshold",
+ "display_name": "manhattan_f1_threshold",
+ "description": null,
+ "value": 21019.880859375
+ },
+ {
+ "id": "manhattan_precision",
+ "display_name": "manhattan_precision",
+ "description": null,
+ "value": 0.4705438066465257
+ },
+ {
+ "id": "manhattan_recall",
+ "display_name": "manhattan_recall",
+ "description": null,
+ "value": 0.6992143658810326
+ },
+ {
+ "id": "manhattan_ap",
+ "display_name": "manhattan_ap",
+ "description": null,
+ "value": 0.5427024596748751
+ },
+ {
+ "id": "euclidean_accuracy",
+ "display_name": "euclidean_accuracy",
+ "description": null,
+ "value": 0.6991838321026039
+ },
+ {
+ "id": "euclidean_accuracy_threshold",
+ "display_name": "euclidean_accuracy_threshold",
+ "description": null,
+ "value": 779.5919189453125
+ },
+ {
+ "id": "euclidean_f1",
+ "display_name": "euclidean_f1",
+ "description": null,
+ "value": 0.565200158541419
+ },
+ {
+ "id": "euclidean_f1_threshold",
+ "display_name": "euclidean_f1_threshold",
+ "description": null,
+ "value": 1155.1326904296875
+ },
+ {
+ "id": "euclidean_precision",
+ "display_name": "euclidean_precision",
+ "description": null,
+ "value": 0.4368872549019608
+ },
+ {
+ "id": "euclidean_recall",
+ "display_name": "euclidean_recall",
+ "description": null,
+ "value": 0.8002244668911336
+ },
+ {
+ "id": "euclidean_ap",
+ "display_name": "euclidean_ap",
+ "description": null,
+ "value": 0.541038608227211
+ },
+ {
+ "id": "dot_accuracy",
+ "display_name": "dot_accuracy",
+ "description": null,
+ "value": 0.6634279051690634
+ },
+ {
+ "id": "dot_accuracy_threshold",
+ "display_name": "dot_accuracy_threshold",
+ "description": null,
+ "value": 14770648.0
+ },
+ {
+ "id": "dot_f1",
+ "display_name": "dot_f1",
+ "description": null,
+ "value": 0.5273250239693194
+ },
+ {
+ "id": "dot_f1_threshold",
+ "display_name": "dot_f1_threshold",
+ "description": null,
+ "value": 9928615.0
+ },
+ {
+ "id": "dot_precision",
+ "display_name": "dot_precision",
+ "description": null,
+ "value": 0.36863270777479895
+ },
+ {
+ "id": "dot_recall",
+ "display_name": "dot_recall",
+ "description": null,
+ "value": 0.9259259259259259
+ },
+ {
+ "id": "dot_ap",
+ "display_name": "dot_ap",
+ "description": null,
+ "value": 0.4423444299763757
+ },
+ {
+ "id": "top_ap",
+ "display_name": "top_ap",
+ "description": null,
+ "value": 0.5427024596748751
+ }
+ ]
+ }
+ ]
+}
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..40a0a8e
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,129 @@
+[build-system]
+requires = ["setuptools>=42", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "dgeb"
+version = "0.0.10"
+description = "Diverse Genomic Embedding Benchmark"
+readme = "README.md"
+license = { file = "LICENSE" }
+keywords = [
+ "scientific software",
+ "genomic embeddings",
+ "machine learning",
+ "benchmark",
+]
+classifiers = [
+ "Development Status :: 2 - Pre-Alpha",
+ "Environment :: Console",
+ "Intended Audience :: Developers",
+ "Intended Audience :: Information Technology",
+ "Intended Audience :: Science/Research",
+ "License :: OSI Approved :: Apache Software License",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python",
+]
+dependencies = [
+ "datasets>=2.20.0",
+ "matplotlib>=3.9.0",
+ "numpy>=2.0.0",
+ "pandas>=2.2.2",
+ "pydantic>=2.7.4",
+ "pytrec_eval_terrier>=0.5",
+ "rich>=13.7.1",
+ "scikit_learn>=1.5.0",
+ "scipy>=1.13.1",
+ "seaborn>=0.13.2",
+ "torch>=2.3.1",
+ "tqdm>=4.66.4",
+ "transformers>=4.41.2",
+]
+
+[project.urls]
+homepage = "https://github.com/TattaBio/DGEB"
+"Huggingface Organization" = "https://huggingface.co/tattabio"
+"Source Code" = "https://github.com/TattaBio/DGEB"
+
+[project.scripts]
+dgeb = "dgeb.cli:main"
+
+[project.optional-dependencies]
+dev = ["ruff>=0.0.254", "pytest", "pytest-xdist"]
+
+[tool.setuptools.packages.find]
+exclude = ["tests", "results", "leaderboard", "Dockerfile"]
+
+[tool.setuptools.package-data]
+"*" = ["*.json"]
+
+[tool.ruff]
+target-version = "py38"
+exclude = [".venv", "build/"]
+line-length = 88
+indent-width = 4
+
+[tool.semantic_release]
+version_toml = ["pyproject.toml:project.version"]
+build_command = "python -m pip install build; python -m build"
+commit_message = "{version}\n\nAutomatically generated by python-semantic-release [skip ci]"
+logging_use_named_masks = false
+major_on_zero = true
+allow_zero_version = true
+no_git_verify = false
+tag_format = "v{version}"
+
+[tool.semantic_release.branches.main]
+match = "(main|master)"
+prerelease_token = "rc"
+prerelease = false
+
+[tool.semantic_release.changelog]
+template_dir = "templates"
+changelog_file = "CHANGELOG.md"
+exclude_commit_patterns = []
+
+[tool.semantic_release.changelog.environment]
+block_start_string = "{%"
+block_end_string = "%}"
+variable_start_string = "{{"
+variable_end_string = "}}"
+comment_start_string = "{#"
+comment_end_string = "#}"
+trim_blocks = false
+lstrip_blocks = false
+newline_sequence = "\n"
+keep_trailing_newline = false
+extensions = []
+autoescape = true
+
+[tool.semantic_release.commit_author]
+env = "GIT_COMMIT_AUTHOR"
+default = "semantic-release "
+
+[tool.semantic_release.commit_parser_options]
+allowed_tags = [
+ "build",
+ "chore",
+ "ci",
+ "docs",
+ "feat",
+ "fix",
+ "perf",
+ "style",
+ "refactor",
+ "test",
+]
+minor_tags = ["feat"]
+patch_tags = ["fix", "perf"]
+default_bump_level = 0
+
+[tool.semantic_release.remote]
+name = "origin"
+type = "github"
+ignore_token_for_push = false
+insecure = false
+
+[tool.semantic_release.publish]
+dist_glob_patterns = ["dist/*"]
+upload_to_vcs_release = true
diff --git a/ruff.toml b/ruff.toml
new file mode 100644
index 0000000..5ff69db
--- /dev/null
+++ b/ruff.toml
@@ -0,0 +1,8 @@
+exclude = [
+ ".venv",
+ "build/",
+]
+# Same as Black.
+line-length = 88
+indent-width = 4
+
diff --git a/scripts/eval_all_models.py b/scripts/eval_all_models.py
new file mode 100644
index 0000000..8deba28
--- /dev/null
+++ b/scripts/eval_all_models.py
@@ -0,0 +1,86 @@
+"""Script to replicate results from the DGEB paper."""
+
+import torch
+import dgeb
+from functools import partial
+
+
+ALL_DEVICES = list(range(torch.cuda.device_count()))
+DEFAULT_BATCH_SIZE = 64
+DEFAULT_SEQ_LEN = 1024
+
+
+get_model = partial(
+ dgeb.get_model,
+ devices=ALL_DEVICES,
+ batch_size=DEFAULT_BATCH_SIZE,
+ max_seq_length=DEFAULT_SEQ_LEN,
+)
+
+
+def main():
+ ######################### Protein Models #########################
+ protein_tasks = dgeb.get_tasks_by_modality(dgeb.Modality.PROTEIN)
+ protein_evaluation = dgeb.DGEB(tasks=protein_tasks)
+
+ # ESM models.
+ protein_evaluation.run(get_model("facebook/esm2_t6_8M_UR50D"))
+ protein_evaluation.run(get_model("facebook/esm2_t12_35M_UR50D"))
+ protein_evaluation.run(get_model("facebook/esm2_t30_150M_UR50D"))
+ protein_evaluation.run(get_model("facebook/esm2_t33_650M_UR50D", batch_size=32))
+ protein_evaluation.run(get_model("facebook/esm2_t36_3B_UR50D", batch_size=1))
+
+ # ESM3 models.
+ protein_evaluation.run(get_model("esm3_sm_open_v1", batch_size=1, devices=[0]))
+
+ # ProtT5 models.
+ protein_evaluation.run(get_model("Rostlab/prot_t5_xl_uniref50", batch_size=32))
+ protein_evaluation.run(get_model("Rostlab/prot_t5_xl_bfd", batch_size=32))
+
+ # ProGen2 models.
+ protein_evaluation.run(get_model("hugohrban/progen2-small"))
+ protein_evaluation.run(get_model("hugohrban/progen2-medium", batch_size=32))
+ protein_evaluation.run(get_model("hugohrban/progen2-large", batch_size=1))
+ protein_evaluation.run(get_model("hugohrban/progen2-xlarge", batch_size=1))
+
+ ######################### DNA Models #########################
+ dna_tasks = dgeb.get_tasks_by_modality(dgeb.Modality.DNA)
+ dna_evaluation = dgeb.DGEB(tasks=dna_tasks)
+
+ # Evo models
+ dna_evaluation.run(
+ get_model(
+ "togethercomputer/evo-1-8k-base", batch_size=1, seq_len=8192, devices=[0]
+ )
+ )
+ # 131k will OOM so we use half this length.
+ evo_131k_max_seq_len = int(131072 / 2)
+ dna_evaluation.run(
+ get_model(
+ "togethercomputer/evo-1-131k-base",
+ batch_size=1,
+ seq_len=evo_131k_max_seq_len,
+ devices=[0],
+ )
+ )
+
+ # Nucleotide Transformer models.
+ dna_evaluation.run(
+ get_model("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species")
+ )
+ dna_evaluation.run(
+ get_model("InstaDeepAI/nucleotide-transformer-v2-100m-multi-species")
+ )
+ dna_evaluation.run(
+ get_model("InstaDeepAI/nucleotide-transformer-v2-250m-multi-species")
+ )
+ dna_evaluation.run(
+ get_model("InstaDeepAI/nucleotide-transformer-v2-500m-multi-species")
+ )
+ dna_evaluation.run(
+ get_model("InstaDeepAI/nucleotide-transformer-2.5b-multi-species", batch_size=1)
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/plot_benchmarks.py b/scripts/plot_benchmarks.py
new file mode 100644
index 0000000..009476e
--- /dev/null
+++ b/scripts/plot_benchmarks.py
@@ -0,0 +1,150 @@
+"""
+Given a directory of results, plot the benchmarks for each task as a bar chart and line chart.
+"""
+
+import argparse
+import os
+from typing import Optional
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+from dgeb import TaskResult, get_all_tasks, get_output_folder, get_tasks_by_name
+
+ALL_TASKS = [task.metadata.id for task in get_all_tasks()]
+
+
+def plot_benchmarks(
+ results_dir,
+ task_ids: Optional[list[str]] = None,
+ output="benchmarks.png",
+ model_substring=None,
+):
+ models = os.listdir(results_dir)
+ all_results = []
+ tasks = get_all_tasks() if task_ids is None else get_tasks_by_name(task_ids)
+ for model_name in models:
+ if model_substring is not None and all(
+ substr not in model_name for substr in model_substring
+ ):
+ continue
+
+ for task in tasks:
+ if task.metadata.display_name == "NoOp Task":
+ continue
+ filepath = get_output_folder(model_name, task, results_dir, create=False)
+ # if the file does not exist, skip
+ if not os.path.exists(filepath):
+ continue
+
+ with open(filepath) as f:
+ task_result = TaskResult.model_validate_json(f.read())
+ num_params = task_result.model["num_params"]
+ primary_metric_id = task_result.task.primary_metric_id
+ main_scores = [
+ metric.value
+ for layer_result in task_result.results
+ for metric in layer_result.metrics
+ if metric.id == primary_metric_id
+ ]
+ best_score = max(main_scores)
+ all_results.append(
+ {
+ "task": task.metadata.display_name,
+ "model": model_name,
+ "num_params": num_params,
+ "score": best_score,
+ }
+ )
+
+ results_df = pd.DataFrame(all_results)
+ # order the models by ascending number of parameters
+ results_df["num_params"] = results_df["num_params"].astype(int)
+ results_df = results_df.sort_values(by="num_params")
+ # number of tasks
+ n_tasks = len(set(results_df["task"]))
+
+ _, ax = plt.subplots(2, n_tasks, figsize=(5 * n_tasks, 10))
+
+ for i, task in enumerate(set(results_df["task"])):
+ if n_tasks > 1:
+ sns.barplot(
+ x="model",
+ y="score",
+ data=results_df[results_df["task"] == task],
+ ax=ax[0][i],
+ )
+ ax[0][i].set_title(task)
+ # rotate the x axis labels
+ for tick in ax[0][i].get_xticklabels():
+ tick.set_rotation(90)
+ else:
+ sns.barplot(
+ x="model",
+ y="score",
+ data=results_df[results_df["task"] == task],
+ ax=ax[0],
+ )
+ ax[0].set_title(task)
+ # rotate the x axis labels
+ for tick in ax[0].get_xticklabels():
+ tick.set_rotation(90)
+
+ # make a line graph with number of parameters on x axis for each task in the second row of figures
+ for i, task in enumerate(set(results_df["task"])):
+ if n_tasks > 1:
+ sns.lineplot(
+ x="num_params",
+ y="score",
+ data=results_df[results_df["task"] == task],
+ ax=ax[1][i],
+ )
+ ax[1][i].set_title(task)
+ ax[1][i].set_xlabel("Number of parameters")
+ else:
+ sns.lineplot(
+ x="num_params",
+ y="score",
+ data=results_df[results_df["task"] == task],
+ ax=ax[1],
+ )
+ ax[1].set_title(task)
+ ax[1].set_xlabel("Number of parameters")
+
+ plt.tight_layout()
+ plt.savefig(output)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-d",
+ "--results_dir",
+ type=str,
+ default="results",
+ help="Directory containing the results of the benchmarking",
+ )
+ parser.add_argument(
+ "-t",
+ "--tasks",
+ type=lambda s: [item for item in s.split(",")],
+ default=None,
+ help=f"Comma separated list of tasks to plot. Choose from {ALL_TASKS} or do not specify to plot all tasks. ",
+ )
+ parser.add_argument(
+ "-o",
+ "--output",
+ type=str,
+ default="benchmarks.png",
+ help="Output file for the plot",
+ )
+ parser.add_argument(
+ "--model_substring",
+ type=lambda s: [item for item in s.split(",")],
+ default=None,
+ help="Comma separated list of model substrings. Only plot results for models containing this substring",
+ )
+ args = parser.parse_args()
+
+ plot_benchmarks(args.results_dir, args.tasks, args.output, args.model_substring)