diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..efb1e94 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,12 @@ +# top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file +[*] +end_of_line = lf +insert_final_newline = true + +[*.py] +charset = utf-8 +indent_style = space +indent_size = 4 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f675c0d --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,34 @@ +name: CI for DGEB + +on: + push: + branches: ["**"] + pull_request: + branches: ["**"] + +permissions: + id-token: write + contents: read + actions: write + pull-requests: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + - uses: yezz123/setup-uv@v4 + with: + uv-venv: ".geb_venv" + - run: uv pip install ruff + - run: ruff format . + - run: ruff check . + # TODO: pytest + # TODO: pyright diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..2e34f9d --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,50 @@ +# This workflow will +# - Find the latest version tag based on the commit history +# - Create a git tag for the new version +# - Update the version number in pyproject.toml based on the commit history +# - Upload the package to PyPI +# - Create a release on GitHub + +# This workflow required the following secrets to be set: +# - a GitHub personal access token with the `repo` scope called `RELEASE` +# - and that you setup trusted publishing using PyPI as described here: https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/ + +name: Release +on: + push: + branches: + - main + +jobs: + release: + runs-on: ubuntu-latest + concurrency: release + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing using PyPI + contents: write + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GH_TOKEN }} + + - name: Python Semantic Release + id: release + uses: python-semantic-release/python-semantic-release@v9.8.3 + with: + github_token: ${{ secrets.GH_TOKEN }} + + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@v1.9.0 + if: steps.release.outputs.released == 'true' + # This action supports PyPI's trusted publishing implementation, which allows authentication to PyPI without a manually + # configured API token or username/password combination. To perform trusted publishing with this action, your project's + # publisher must already be configured on PyPI. + + - name: Publish package distributions to GitHub Releases + uses: python-semantic-release/upload-to-gh-release@v9.8.3 + if: steps.release.outputs.released == 'true' + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + tag: ${{ steps.release.outputs.tag }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bb077e1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.venv/ +__pycache__/ +.vscode/ +build/ +dist/ +*egg-info/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f73d605 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,197 @@ +# CHANGELOG + +## v0.0.10 (2024-07-09) + +### Fix + +* fix: remove noop task ([`7d5b393`](https://github.com/TattaBio/DGEB/commit/7d5b3933f48e51fb4c71945f01af2cc5a7dba3ed)) + +## v0.0.9 (2024-07-09) + +### Fix + +* fix: update cli script name ([`633e14d`](https://github.com/TattaBio/DGEB/commit/633e14db7e1eed0d9606ef1097e369e4f5e245f5)) + +### Unknown + +* 0.0.9 + +Automatically generated by python-semantic-release [skip ci] ([`a8c1a96`](https://github.com/TattaBio/DGEB/commit/a8c1a96d18af589795bc9532fee8ad9764cd52ed)) + +* Merge pull request #9 from TattaBio/andre + +Update ModAC main metric ([`3c67e65`](https://github.com/TattaBio/DGEB/commit/3c67e6559d0e49d90ffe2858eb9e287abd1b6e6c)) + +* ruff format ([`78461ac`](https://github.com/TattaBio/DGEB/commit/78461ac901b8617821ca15e543c0dd8e2dbf6e95)) + +* update top_k=50 for modac ([`2c3dcd5`](https://github.com/TattaBio/DGEB/commit/2c3dcd5856b6679a80999b3c4b3512876ac0b58d)) + +* remove revision ([`2d587da`](https://github.com/TattaBio/DGEB/commit/2d587daa79f32c49201b419892b7f95f3dc5eedb)) + +* Merge pull request #8 from TattaBio/cli + +Cli & cleanup ([`9698c8f`](https://github.com/TattaBio/DGEB/commit/9698c8f5ab0bab6c3c0a76d59dc29cfd964ebf15)) + +* Exclude leaderboard files in anticipation of merging leaderboard PR ([`58bdcba`](https://github.com/TattaBio/DGEB/commit/58bdcba11af605bdef11cfecc087c9efb0e97b72)) + +* Update README ([`d323905`](https://github.com/TattaBio/DGEB/commit/d3239059e29fb149f9c348b951bc4988d8b9f8dc)) + +* cleanup ([`1f0fe16`](https://github.com/TattaBio/DGEB/commit/1f0fe16de6910200d88c918b08cbf26067313469)) + +* Add cli to pyproject.toml ([`5404218`](https://github.com/TattaBio/DGEB/commit/54042181ef54c11db74ebb53c403b21a8114c02b)) + +* Remove Dataset 'description' which does not exist on model. ([`46b0040`](https://github.com/TattaBio/DGEB/commit/46b0040a302384fa00791bbfdd6fae24645d6a6d)) + +* Merge pull request #7 from TattaBio/add_dna_tasks + +Add dna tasks ([`cfc5799`](https://github.com/TattaBio/DGEB/commit/cfc57995f9b1e584bb60e998f9cf68bea5ec39fa)) + +* ruff ([`f9fa125`](https://github.com/TattaBio/DGEB/commit/f9fa12502df9837b5381da17b17198f3667c4911)) + +* adding rpob datasets and updating ec revision ([`8f9cc3f`](https://github.com/TattaBio/DGEB/commit/8f9cc3f819beb70f51a5cc59f16c65bffceedbad)) + +* Update README.md ([`d5d7c24`](https://github.com/TattaBio/DGEB/commit/d5d7c24215d347fc17d6016ac2a3eddfb3cf2a12)) + +* Merge pull request #4 from TattaBio/andre + +Add dataset revisions ([`95b6f11`](https://github.com/TattaBio/DGEB/commit/95b6f11ffee3dccc45ab119ac4f602066750f7ef)) + +* add dataset revision numbers ([`7e069a2`](https://github.com/TattaBio/DGEB/commit/7e069a237de5391e7c6b7f09c108292ac10c25af)) + +* Merge pull request #3 from TattaBio/andre + +Update readme and task imports ([`ade30a8`](https://github.com/TattaBio/DGEB/commit/ade30a856deffe35ddf57d16705d030b6d0192c8)) + +* rename dgeb ([`6b1c2ee`](https://github.com/TattaBio/DGEB/commit/6b1c2ee76798d89e487386116efe23c90d2d039c)) + +* add intro ([`a2280dd`](https://github.com/TattaBio/DGEB/commit/a2280dd732984d58caed45b9a429038c0d81851a)) + +* update readme and tasks ([`00e0a79`](https://github.com/TattaBio/DGEB/commit/00e0a791f070ca37e5b92770b3363ef066e2789f)) + +* Merge pull request #2 from TattaBio/andre + +rename dgeb imports ([`1894ba9`](https://github.com/TattaBio/DGEB/commit/1894ba9a92a8f369053ddb9d351ae48fd8e2d674)) + +* rename dgeb imports ([`5f1f8b8`](https://github.com/TattaBio/DGEB/commit/5f1f8b850f271cd6785291e3feb2c2d4bf979f9c)) + +## v0.0.8 (2024-07-01) + +### Fix + +* fix: don't run ci on release of new version ([`fa97104`](https://github.com/TattaBio/DGEB/commit/fa971049429975d06c8aca086e86b19d92383969)) + +### Unknown + +* 0.0.8 + +Automatically generated by python-semantic-release [skip ci] ([`8dc15d3`](https://github.com/TattaBio/DGEB/commit/8dc15d34c6317087253950893974d16b9f75a17c)) + +## v0.0.7 (2024-07-01) + +### Fix + +* fix: try again ([`e7d0ecd`](https://github.com/TattaBio/DGEB/commit/e7d0ecdcb63e909f9ab727f11fb3fd57414d2fa5)) + +* fix: edit readme to see if job still works with restricted permissions ([`93cd728`](https://github.com/TattaBio/DGEB/commit/93cd728c8a632b9bed611c55dace2e2ffb103410)) + +### Unknown + +* 0.0.7 + +Automatically generated by python-semantic-release ([`9808d4f`](https://github.com/TattaBio/DGEB/commit/9808d4f328a577c066affd34d408ad26eb6098d0)) + +* Merge pull request #1 from TattaBio/edit-readme + +fix: edit readme to see if job still works with restricted permissions ([`c45599c`](https://github.com/TattaBio/DGEB/commit/c45599cf9628155603245f906c09cf6483cffce8)) + +## v0.0.6 (2024-07-01) + +### Fix + +* fix: nevermind that broke it ([`ec33a1c`](https://github.com/TattaBio/DGEB/commit/ec33a1c6539ac1fb2710869a2d436483a02236e0)) + +* fix: see if I can remove this line ([`246d4e9`](https://github.com/TattaBio/DGEB/commit/246d4e9841a83d18217506d46f211f1341c63526)) + +### Unknown + +* 0.0.6 + +Automatically generated by python-semantic-release ([`1b28df5`](https://github.com/TattaBio/DGEB/commit/1b28df559c95db0aea95111a5f27d01645d23786)) + +## v0.0.5 (2024-07-01) + +### Fix + +* fix: try fixing release to handle protected branch ([`5cedad3`](https://github.com/TattaBio/DGEB/commit/5cedad3e9f34d249eda9257e3c21fc8443d000cf)) + +* fix: another change... ([`c5ad3f0`](https://github.com/TattaBio/DGEB/commit/c5ad3f098d36e25afdf4fa9aae20967eb968568e)) + +* fix: update pip install command in readme ([`ff90791`](https://github.com/TattaBio/DGEB/commit/ff90791398f9a9b907c308400f88811a8f8633dc)) + +### Unknown + +* 0.0.5 + +Automatically generated by python-semantic-release ([`ec24ca3`](https://github.com/TattaBio/DGEB/commit/ec24ca343b49bee85c72907554772976f02eab1a)) + +## v0.0.4 (2024-07-01) + +### Fix + +* fix: move gh token to env ([`95e292c`](https://github.com/TattaBio/DGEB/commit/95e292c46f7908659d46bc093ef4903609f1edc5)) + +### Unknown + +* 0.0.4 + +Automatically generated by python-semantic-release ([`03f3004`](https://github.com/TattaBio/DGEB/commit/03f300476b0aeca2796b780139fce0be037ae636)) + +## v0.0.3 (2024-07-01) + +### Fix + +* fix: remove persist credentials ([`2ae683e`](https://github.com/TattaBio/DGEB/commit/2ae683ed7a68b0559b81b1b7f5716636beef1415)) + +* fix: try to fix release CI ([`1dfc938`](https://github.com/TattaBio/DGEB/commit/1dfc9383b2dab8bba444b09c6b85500dadee7203)) + +### Unknown + +* 0.0.3 + +Automatically generated by python-semantic-release ([`7cbfc8d`](https://github.com/TattaBio/DGEB/commit/7cbfc8d0acef975d046ff485001ed289800d143f)) + +## v0.0.2 (2024-07-01) + +### Fix + +* fix: new repository name ([`8fc1145`](https://github.com/TattaBio/DGEB/commit/8fc1145985eab8aa97562f697edab45a30b189ba)) + +* fix: addl geb references ([`86a5af8`](https://github.com/TattaBio/DGEB/commit/86a5af8c24244ac8f2670801468e1a25b8e3e9df)) + +### Unknown + +* 0.0.2 + +Automatically generated by python-semantic-release ([`1c7b19b`](https://github.com/TattaBio/DGEB/commit/1c7b19b50597e9dabe07fbf7cb7d3c589438917a)) + +## v0.0.1 (2024-07-01) + +### Fix + +* fix: rename geb to dgeb ([`be712f8`](https://github.com/TattaBio/DGEB/commit/be712f8d19678801b9148ac8397f13afe826871b)) + +### Unknown + +* 0.0.1 + +Automatically generated by python-semantic-release ([`1503e03`](https://github.com/TattaBio/DGEB/commit/1503e030bb1277e1a2dcad7b99c9ed3472243f5d)) + +## v0.0.0 (2024-07-01) + +### Unknown + +* 0.0.0 + +Automatically generated by python-semantic-release ([`4b791ee`](https://github.com/TattaBio/DGEB/commit/4b791ee07085647427afec31a1adf61977e6bd4c)) + +* Initial commit ([`36fe62c`](https://github.com/TattaBio/DGEB/commit/36fe62c234331de97f2827a49bf62d5c35b92a1f)) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..58c8634 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +# Docker file for leaderboard +FROM python:3.11-slim + +WORKDIR /app + +# install curl +RUN apt-get update && apt-get install -y curl +ADD https://astral.sh/uv/install.sh /install.sh +RUN chmod +x /install.sh +RUN /install.sh && rm /install.sh + +# install deps +COPY leaderboard/requirements.txt ./ +RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt + +# copy src +COPY dgeb dgeb +COPY leaderboard/ leaderboard/ + +# Run gradio when the container launches +EXPOSE 7860 +ENV GRADIO_SERVER_NAME="0.0.0.0" +ENV GRADIO_TEMP_DIR="/app" +WORKDIR /app/leaderboard +CMD ["python", "app.py"] + + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f49a4e1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..535d54c --- /dev/null +++ b/README.md @@ -0,0 +1,181 @@ +--- +title: DGEB +app_file : leaderboard/app.py +sdk: docker +sdk_version: 4.36.1 +--- +

Diverse Genomic Embedding Benchmark

+ +

+ + GitHub release + + + arXiv URL + + + License + + + Downloads + +

+ +

+

+ Installation | + Usage | + Leaderboard | + Citing +

+

+ +

+ +

+ +DGEB is a benchmark for evaluating biological sequence models on functional and evolutionary information. + +DGEB is designed to evaluate model embeddings using: + +- Diverse sequences accross the tree of life. +- Diverse tasks that capture different aspects of biological function. +- Both amino acid and nucleotide sequences. + +The current version of DGEB consists of 18 datasets covering all three domains of life (Bacteria, Archaea and Eukarya). DGEB evaluates embeddings using six different embedding tasks: Classification, BiGene mining, Evolutionary Distance Similarity (EDS), Pair Classification, Clustering, and Retrieval. + +We welcome contributions of new tasks and datasets. + +## Installation + +Install DGEB using pip. + +```bash +pip install dgeb +``` + +## Usage + +- Launch evaluation using the python script (see [cli.py](https://github.com/tattabio/dgeb/blob/main/dgeb/cli.py)): + +```bash +dgeb --model facebook/esm2_t6_8M_UR50D +``` + +- To see all supported models and tasks: + +```bash +dgeb --help +``` + +- Using the python API: + +```py +import dgeb + +model = dgeb.get_model("facebook/esm2_t6_8M_UR50D") +tasks = dgeb.get_tasks_by_modality(dgeb.Modality.PROTEIN) +evaluation = dgeb.DGEB(tasks=tasks) +evaluation.run(model, output_folder="results") +``` + +### Using a custom model + +Custom models should be wrapped with the `dgeb.models.BioSeqTransformer` abstract class, and specify the modality, number of layers, and embedding dimension. See [models.py](https://github.com/tattabio/dgeb/blob/main/dgeb/models.py) for additional examples on custom model loading and inference. + +```python +import dgeb +from dgeb.models import BioSeqTransformer +from dgeb.tasks.tasks import Modality + +class MyModel(BioSeqTransformer): + + @property + def modality(self) -> Modality: + return Modality.PROTEIN + + @property + def num_layers(self) -> int: + return self.config.num_hidden_layers + + @property + def embed_dim(self) -> int: + return self.config.hidden_size + + +model = MyModel(model_name='path_to/huggingface_model') +tasks = dgeb.get_tasks_by_modality(model.modality) +evaluation = dgeb.DGEB(tasks=tasks) +evaluation.run(model) +``` + +### Evaluating on a custom dataset + +**We strongly encourage users to contribute their custom datasets to DGEB. Please open a PR adding your dataset so that the community can benefit!** + +To evaluate on a custom dataset, first upload your dataset to the [Huggingface Hub](https://huggingface.co/docs/hub/en/datasets-adding). Then define a `Task` subclass with `TaskMetadata` that points to your huggingface dataset. For example, a classification task on a custom dataset can be defined as follows: + +```python +import dgeb +from dgeb.models import BioSeqTransformer +from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult +from dgeb.tasks.classification_tasks import run_classification_task + +class MyCustomTask(Task): + metadata = TaskMetadata( + id="my_custom_classification", + display_name="...", + description="...", + type="classification", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="path_to/huggingface_dataset", + revision="...", + ) + ], + primary_metric_id="f1", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_classification_task(model, self.metadata) + +model = dgeb.get_model("facebook/esm2_t6_8M_UR50D") +evaluation = dgeb.DGEB(tasks=[MyCustomTask]) +evaluation.run(model) +``` + +## Leaderboard + +To add your submission to the DGEB leaderboard, proceed through the following instructions. + +1. Fork the DGEB repository by following GitHub's instruction [Forking Workflow](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork). + +2. Add your submission .json file to the leaderboard/submissions// directory. + +```bash +mv /path/to/.json /path/to/DGEB/leaderboard/submissions// +``` + +4. Update your fork with the new submission: + +```bash +git add leaderboard/submissions//.json +git commit -m "Add submission for " +git push +``` + +5. Open a pull request to the main branch of the repository via the Github interface. + +6. Once the PR is review and merged, your submission will be added to the leaderboard! + + +## Acknowledgements + +DGEB follows the design of text embedding bechmark [MTEB](https://github.com/embeddings-benchmark/mteb) developed by Huggingface 🤗. The evaluation code is adapted from the MTEB codebase. + +## Citing + +DGEB was introduced in "[Diverse Genomic Embedding Benchmark for Functional Evaluation Across the Tree of Life]()", feel free to cite: + +TODO diff --git a/dgeb/__init__.py b/dgeb/__init__.py new file mode 100644 index 0000000..38d6f4a --- /dev/null +++ b/dgeb/__init__.py @@ -0,0 +1,28 @@ +from dgeb.dgeb import ( + DGEB, + get_all_model_names, + get_all_task_names, + get_all_tasks, + get_model, + get_output_folder, + get_tasks_by_modality, + get_tasks_by_name, +) +from dgeb.modality import Modality +from dgeb.tasks.tasks import TaskResult + +# importing without setting `__all__` produces a Ruff error: +# "imported but unused; consider removing, adding to __all__, or using a redundant alias RuffF401" +# See https://docs.astral.sh/ruff/rules/unused-import/#why-is-this-bad +__all__ = [ + "DGEB", + "get_all_tasks", + "get_all_task_names", + "get_tasks_by_name", + "get_tasks_by_modality", + "get_all_model_names", + "get_model", + "get_output_folder", + "TaskResult", + "Modality", +] diff --git a/dgeb/cli.py b/dgeb/cli.py new file mode 100644 index 0000000..0be74fb --- /dev/null +++ b/dgeb/cli.py @@ -0,0 +1,136 @@ +""" +Main command to run diverse genomic embedding benchmarks (DGEB) on a model. +example command to run DGEB: +python run_dgeb.py -m facebook/esm2_t6_8M_UR50D +""" + +import argparse +import logging +import os + +import dgeb + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +ALL_TASK_NAMES = dgeb.get_all_task_names() +ALL_MODEL_NAMES = dgeb.get_all_model_names() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-m", + "--model", + type=str, + default=None, + help=f"Model to evaluate. Choose from {ALL_MODEL_NAMES}", + ) + parser.add_argument( + "-t", + "--tasks", + type=lambda s: [item for item in s.split(",")], + default=None, + help=f"Comma separated tasks to evaluate on. Choose from {ALL_TASK_NAMES} or do not specify to evaluate on all tasks", + ) + parser.add_argument( + "-l", + "--layers", + type=str, + default=None, + help="Layer to evaluate. Comma separated list of integers or 'mid' and 'last'. Default is 'mid,last'", + ) + parser.add_argument( + "--devices", + type=str, + default="0", + help="Comma separated list of GPU device ids to use. Default is 0 (if GPUs are detected).", + ) + parser.add_argument( + "--output_folder", + type=str, + default=None, + help="Output directory for results. Will default to results/model_name if not set.", + ) + parser.add_argument( + "-v", "--verbosity", type=int, default=2, help="Verbosity level" + ) + parser.add_argument( + "-b", "--batch_size", type=int, default=64, help="Batch size for evaluation" + ) + parser.add_argument( + "--max_seq_len", + type=int, + default=1024, + help="Maximum sequence length for model, default is 1024.", + ) + parser.add_argument( + "--pool_type", + type=str, + default="mean", + help="Pooling type for model, choose from mean, max, cls, last. Default is mean.", + ) + + args = parser.parse_args() + + # set logging based on verbosity level + if args.verbosity == 0: + logging.getLogger("geb").setLevel(logging.CRITICAL) + elif args.verbosity == 1: + logging.getLogger("geb").setLevel(logging.WARNING) + elif args.verbosity == 2: + logging.getLogger("geb").setLevel(logging.INFO) + elif args.verbosity == 3: + logging.getLogger("geb").setLevel(logging.DEBUG) + + if args.model is None: + raise ValueError("Please specify a model using the -m or --model argument") + + # make sure that devices are comma separated list of integers + try: + devices = [int(device) for device in args.devices.split(",")] + except ValueError: + raise ValueError("Devices must be comma separated list of integers") + + layers = args.layers + if layers: + if layers not in ["mid", "last"]: + # Layers should be list of integers. + try: + layers = [int(layer) for layer in layers.split(",")] + except ValueError: + raise ValueError("Layers must be a list of integers.") + + model_name = args.model.split("/")[-1] + output_folder = args.output_folder + if output_folder is None: + output_folder = os.path.join("results", model_name) + # create output folder if it does not exist + if not os.path.exists(output_folder): + os.makedirs(output_folder) + logger.info(f"Results will be saved to {output_folder}") + + # Load the model by name. + model = dgeb.get_model( + model_name=args.model, + layers=layers, + devices=devices, + max_seq_length=args.max_seq_len, + batch_size=args.batch_size, + pool_type=args.pool_type, + ) + + all_tasks_for_modality = dgeb.get_tasks_by_modality(model.modality) + + if args.tasks: + task_list = dgeb.get_tasks_by_name(args.tasks) + if not all([task.metadata.modality == model.modality for task in task_list]): + raise ValueError(f"Tasks must be one of {all_tasks_for_modality}") + else: + task_list = all_tasks_for_modality + evaluation = dgeb.DGEB(tasks=task_list) + _ = evaluation.run(model) + + +if __name__ == "__main__": + main() diff --git a/dgeb/dgeb.py b/dgeb/dgeb.py new file mode 100644 index 0000000..6dfd940 --- /dev/null +++ b/dgeb/dgeb.py @@ -0,0 +1,129 @@ +import logging +import os +import traceback +from itertools import chain +from typing import Any, List + +from rich.console import Console + +from .eval_utils import set_all_seeds +from .modality import Modality +from .models import BioSeqTransformer +from .tasks.tasks import Task + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class DGEB: + """GEB class to run the evaluation pipeline.""" + + def __init__(self, tasks: List[type[Task]], seed: int = 42): + self.tasks = tasks + set_all_seeds(seed) + + def print_selected_tasks(self): + """Print the selected tasks.""" + console = Console() + console.rule("[bold]Selected Tasks\n", style="grey15") + for task in self.tasks: + prefix = " - " + name = f"{task.metadata.display_name}" + category = f", [italic grey39]{task.metadata.type}[/]" + console.print(f"{prefix}{name}{category}") + console.print("\n") + + def run( + self, + model, # type encoder + output_folder: str = "results", + ): + """Run the evaluation pipeline on the selected tasks. + + Args: + model: Model to be used for evaluation + output_folder: Folder where the results will be saved. Default to 'results'. Where it will save the results in the format: + `{output_folder}/{model_name}/{model_revision}/{task_name}.json`. + + Returns: + A list of MTEBResults objects, one for each task evaluated. + """ + # Run selected tasks + self.print_selected_tasks() + results = [] + + for task in self.tasks: + logger.info( + f"\n\n********************** Evaluating {task.metadata.display_name} **********************" + ) + + try: + result = task().run(model) + except Exception as e: + logger.error(e) + logger.error(traceback.format_exc()) + logger.error(f"Error running task {task}") + continue + + results.append(result) + + save_path = get_output_folder(model.hf_name, task, output_folder) + with open(save_path, "w") as f_out: + f_out.write(result.model_dump_json(indent=2)) + return results + + +def get_model(model_name: str, **kwargs: Any) -> type[BioSeqTransformer]: + all_names = get_all_model_names() + for cls in BioSeqTransformer.__subclasses__(): + if model_name in cls.MODEL_NAMES: + return cls(model_name, **kwargs) + raise ValueError(f"Model {model_name} not found in {all_names}.") + + +def get_all_model_names() -> List[str]: + return list( + chain.from_iterable( + cls.MODEL_NAMES for cls in BioSeqTransformer.__subclasses__() + ) + ) + + +def get_all_task_names() -> List[str]: + return [task.metadata.id for task in get_all_tasks()] + + +def get_tasks_by_name(tasks: List[str]) -> List[type[Task]]: + return [_get_task(task) for task in tasks] + + +def get_tasks_by_modality(modality: Modality) -> List[type[Task]]: + return [task for task in get_all_tasks() if task.metadata.modality == modality] + + +def get_all_tasks() -> List[type[Task]]: + return Task.__subclasses__() + + +def _get_task(task_name: str) -> type[Task]: + logger.info(f"Getting task {task_name}") + for task in get_all_tasks(): + if task.metadata.id == task_name: + return task + + raise ValueError( + f"Task {task_name} not found, available tasks are: {[task.metadata.id for task in get_all_tasks()]}" + ) + + +def get_output_folder( + model_hf_name: str, task: type[Task], output_folder: str, create: bool = True +): + output_folder = os.path.join(output_folder, os.path.basename(model_hf_name)) + # create output folder if it does not exist + if create and not os.path.exists(output_folder): + os.makedirs(output_folder) + return os.path.join( + output_folder, + f"{task.metadata.id}.json", + ) diff --git a/dgeb/eval_utils.py b/dgeb/eval_utils.py new file mode 100644 index 0000000..7b5f630 --- /dev/null +++ b/dgeb/eval_utils.py @@ -0,0 +1,394 @@ +"""Utility functions for evaluation.""" + +from typing import Any, Dict, List, Tuple +import json +import torch +import random +import numpy as np +from sklearn.metrics import auc + + +class ForwardHook: + """Pytorch forward hook class to store outputs of intermediate layers.""" + + def __init__(self, module: torch.nn.Module): + self.hook = module.register_forward_hook(self.hook_fn) + self.output = None + + def hook_fn(self, module, input, output): + self.output = output + + def close(self): + self.hook.remove() + + +def pool( + last_hidden_states: torch.Tensor, attention_mask: torch.Tensor, pool_type: str +) -> torch.Tensor: + """Pool embeddings across the sequence length dimension.""" + assert ( + last_hidden_states.ndim == 3 + ), f"Expected hidden_states to have shape [batch, seq_len, D], got shape: {last_hidden_states.shape}" + assert ( + attention_mask.ndim == 2 + ), f"Expected attention_mask to have shape [batch, seq_len], got shape: {attention_mask.shape}" + last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0) + if pool_type == "mean": + emb = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None] + elif pool_type == "max": + emb = last_hidden.max(dim=1)[0] + elif pool_type == "cls": + emb = last_hidden[:, 0] + elif pool_type == "last": + emb = last_hidden[torch.arange(last_hidden.size(0)), attention_mask.sum(1) - 1] + else: + raise ValueError(f"pool_type {pool_type} not supported") + return emb + + +def set_all_seeds(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.backends.cudnn.deterministic = True + + +def write_results_to_json(results: Dict[str, Any], results_path: str): + """Write results dict to a json file.""" + with open(results_path, "w") as f: + json.dump(results, f, indent=4) + + +def merge_split_elem_embeds(ids, embeds, preserve_order: bool = False): + """Merge embeddings with the same id by mean-pooling and optionally preserve order in which they appear. + + Args: + ids: Array of string ids, [batch]. + embeds: Array of embeddings, [batch, ...]. + + Returns: + ids: Unique ids, [unique_batch]. + embeds: Array of embeddings, [unique_batch, ...]. + """ + unique_ids, indices = np.unique(ids, return_inverse=True) + shape_no_batch = embeds.shape[1:] + sums = np.zeros([unique_ids.size, *shape_no_batch], dtype=embeds.dtype) + counts = np.bincount(indices, minlength=unique_ids.size) + np.add.at(sums, indices, embeds) + # Add trailing dimensions to counts. + counts = counts[(...,) + (None,) * len(shape_no_batch)] + mean_pooled = sums / counts + # Preserve the order of the input ids. + if preserve_order: + order = [] + for id in unique_ids: + idx = np.where(ids == id)[0][0] + order.append(idx) + re_order = np.argsort(order) + unique_ids = unique_ids[re_order] + mean_pooled = mean_pooled[re_order] + return unique_ids, mean_pooled + + +def paired_dataset(labels, embeds): + """Creates a paired dataset for consecutive operonic gene pairs.""" + embeds1 = embeds[:-1] + embeds2 = embeds[1:] + labels = labels[:-1] + return embeds1, embeds2, labels + + +def cos_sim(a, b): + """Computes the cosine similarity cos_sim(a[i], b[j]) for all i and j. + + Return: + Matrix with res[i][j] = cos_sim(a[i], b[j]) + """ # noqa: D402 + if not isinstance(a, torch.Tensor): + a = torch.tensor(a) + + if not isinstance(b, torch.Tensor): + b = torch.tensor(b) + + if len(a.shape) == 1: + a = a.unsqueeze(0) + + if len(b.shape) == 1: + b = b.unsqueeze(0) + + a_norm = torch.nn.functional.normalize(a, p=2, dim=1) + b_norm = torch.nn.functional.normalize(b, p=2, dim=1) + return torch.mm(a_norm, b_norm.transpose(0, 1)) + + +def dot_score(a: torch.Tensor, b: torch.Tensor): + """Computes the dot-product dot_prod(a[i], b[j]) for all i and j. + :return: Matrix with res[i][j] = dot_prod(a[i], b[j]) + """ + if not isinstance(a, torch.Tensor): + a = torch.tensor(a) + + if not isinstance(b, torch.Tensor): + b = torch.tensor(b) + + if len(a.shape) == 1: + a = a.unsqueeze(0) + + if len(b.shape) == 1: + b = b.unsqueeze(0) + + return torch.mm(a, b.transpose(0, 1)) + + +# From https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/beir/retrieval/custom_metrics.py#L4 +def mrr( + qrels: dict[str, dict[str, int]], + results: dict[str, dict[str, float]], + k_values: List[int], + output_type: str = "mean", +) -> Tuple[Dict[str, float]]: + MRR = {} + + for k in k_values: + MRR[f"MRR@{k}"] = [] + + k_max, top_hits = max(k_values), {} + + for query_id, doc_scores in results.items(): + top_hits[query_id] = sorted( + doc_scores.items(), key=lambda item: item[1], reverse=True + )[0:k_max] + + for query_id in top_hits: + query_relevant_docs = set( + [doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0] + ) + for k in k_values: + rr = 0 + for rank, hit in enumerate(top_hits[query_id][0:k]): + if hit[0] in query_relevant_docs: + rr = 1.0 / (rank + 1) + break + MRR[f"MRR@{k}"].append(rr) + + if output_type == "mean": + for k in k_values: + MRR[f"MRR@{k}"] = round(sum(MRR[f"MRR@{k}"]) / len(qrels), 5) + + elif output_type == "all": + pass + + return MRR + + +# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py +def recall_cap( + qrels: dict[str, dict[str, int]], + results: dict[str, dict[str, float]], + k_values: List[int], + output_type: str = "mean", +) -> Tuple[Dict[str, float]]: + capped_recall = {} + + for k in k_values: + capped_recall[f"R_cap@{k}"] = [] + + k_max = max(k_values) + + for query_id, doc_scores in results.items(): + top_hits = sorted(doc_scores.items(), key=lambda item: item[1], reverse=True)[ + 0:k_max + ] + query_relevant_docs = [ + doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0 + ] + for k in k_values: + retrieved_docs = [ + row[0] for row in top_hits[0:k] if qrels[query_id].get(row[0], 0) > 0 + ] + denominator = min(len(query_relevant_docs), k) + capped_recall[f"R_cap@{k}"].append(len(retrieved_docs) / denominator) + + if output_type == "mean": + for k in k_values: + capped_recall[f"R_cap@{k}"] = round( + sum(capped_recall[f"R_cap@{k}"]) / len(qrels), 5 + ) + + elif output_type == "all": + pass + + return capped_recall + + +# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py +def hole( + qrels: dict[str, dict[str, int]], + results: dict[str, dict[str, float]], + k_values: List[int], + output_type: str = "mean", +) -> Tuple[Dict[str, float]]: + Hole = {} + + for k in k_values: + Hole[f"Hole@{k}"] = [] + + annotated_corpus = set() + for _, docs in qrels.items(): + for doc_id, score in docs.items(): + annotated_corpus.add(doc_id) + + k_max = max(k_values) + + for _, scores in results.items(): + top_hits = sorted(scores.items(), key=lambda item: item[1], reverse=True)[ + 0:k_max + ] + for k in k_values: + hole_docs = [ + row[0] for row in top_hits[0:k] if row[0] not in annotated_corpus + ] + Hole[f"Hole@{k}"].append(len(hole_docs) / k) + + if output_type == "mean": + for k in k_values: + Hole[f"Hole@{k}"] = round(Hole[f"Hole@{k}"] / len(qrels), 5) + + elif output_type == "all": + pass + + return Hole + + +# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py +def top_k_accuracy( + qrels: dict[str, dict[str, int]], + results: dict[str, dict[str, float]], + k_values: List[int], + output_type: str = "mean", +) -> Tuple[Dict[str, float]]: + top_k_acc = {} + + for k in k_values: + top_k_acc[f"Accuracy@{k}"] = [] + + k_max, top_hits = max(k_values), {} + + for query_id, doc_scores in results.items(): + top_hits[query_id] = [ + item[0] + for item in sorted( + doc_scores.items(), key=lambda item: item[1], reverse=True + )[0:k_max] + ] + + for query_id in top_hits: + query_relevant_docs = set( + [doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0] + ) + for k in k_values: + for relevant_doc_id in query_relevant_docs: + if relevant_doc_id in top_hits[query_id][0:k]: + top_k_acc[f"Accuracy@{k}"].append(1.0) + break + + if output_type == "mean": + for k in k_values: + top_k_acc[f"Accuracy@{k}"] = round( + top_k_acc[f"Accuracy@{k}"] / len(qrels), 5 + ) + + elif output_type == "all": + pass + + return top_k_acc + + +# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py +def confidence_scores(sim_scores: List[float]) -> Dict[str, float]: + """Computes confidence scores for a single instance = (query, positives, negatives) + + Args: + sim_scores: Query-documents similarity scores with length `num_pos+num_neg` + + Returns: + conf_scores: + - `max`: Maximum similarity score + - `std`: Standard deviation of similarity scores + - `diff1`: Difference between highest and second highest similarity scores + """ + sim_scores_sorted = sorted(sim_scores)[::-1] + + cs_max = sim_scores_sorted[0] + cs_std = np.std(sim_scores) + if len(sim_scores) > 1: + cs_diff1 = sim_scores_sorted[0] - sim_scores_sorted[1] + elif len(sim_scores) == 1: + cs_diff1 = 0.0 + + conf_scores = {"max": cs_max, "std": cs_std, "diff1": cs_diff1} + + return conf_scores + + +# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py +def nAUC( + conf_scores: np.ndarray, + metrics: np.ndarray, + abstention_rates: np.ndarray = np.linspace(0, 1, 11)[:-1], +) -> float: + """Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997 + 1/ Computes the raw abstention curve, i.e., the average evaluation metric at different abstention rates determined by the confidence scores + 2/ Computes the oracle abstention curve, i.e., the best theoretical abstention curve (e.g.: at a 10% abstention rate, the oracle abstains on the bottom-10% instances with regard to the evaluation metric) + 3/ Computes the flat abstention curve, i.e., the one remains flat for all abstention rates (ineffective abstention) + 4/ Computes the area under the three curves + 5/ Finally scales the raw AUC between the oracle and the flat AUCs to get normalized AUC + + Args: + conf_scores: Instance confidence scores used for abstention thresholding, with shape `(num_test_instances,)` + metrics: Metric evaluations at instance-level (e.g.: average precision, NDCG...), with shape `(num_test_instances,)` + abstention_rates: Target rates for the computation of the abstention curve + + Returns: + abst_nauc: Normalized area under the abstention curve (upper-bounded by 1) + """ + + def abstention_curve( + conf_scores: np.ndarray, + metrics: np.ndarray, + abstention_rates: np.ndarray = np.linspace(0, 1, 11)[:-1], + ) -> np.ndarray: + """Computes the raw abstention curve for a given set of evaluated instances and corresponding confidence scores + + Args: + conf_scores: Instance confidence scores used for abstention thresholding, with shape `(num_test_instances,)` + metrics: Metric evaluations at instance-level (e.g.: average precision, NDCG...), with shape `(num_test_instances,)` + abstention_rates: Target rates for the computation of the abstention curve + + Returns: + abst_curve: Abstention curve of length `len(abstention_rates)` + """ + conf_scores_argsort = np.argsort(conf_scores) + abst_curve = np.zeros(len(abstention_rates)) + + for i, rate in enumerate(abstention_rates): + num_instances_abst = min( + round(rate * len(conf_scores_argsort)), len(conf_scores) - 1 + ) + abst_curve[i] = metrics[conf_scores_argsort[num_instances_abst:]].mean() + + return abst_curve + + abst_curve = abstention_curve(conf_scores, metrics, abstention_rates) + or_curve = abstention_curve(metrics, metrics, abstention_rates) + abst_auc = auc(abstention_rates, abst_curve) + or_auc = auc(abstention_rates, or_curve) + flat_auc = or_curve[0] * (abstention_rates[-1] - abstention_rates[0]) + + if or_auc == flat_auc: + abst_nauc = np.nan + else: + abst_nauc = (abst_auc - flat_auc) / (or_auc - flat_auc) + + return abst_nauc diff --git a/dgeb/evaluators.py b/dgeb/evaluators.py new file mode 100644 index 0000000..5098970 --- /dev/null +++ b/dgeb/evaluators.py @@ -0,0 +1,839 @@ +""" +Evaluator objects for different evaluation types. +""" + +import logging +import random +from abc import ABC, abstractmethod +import heapq +from collections import defaultdict +import pytrec_eval +import numpy as np +import sklearn.cluster +import torch +from scipy.stats import pearsonr +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import ( + accuracy_score, + average_precision_score, + classification_report, + f1_score, + precision_score, + recall_score, + label_ranking_average_precision_score, +) +from sklearn.metrics.cluster import v_measure_score +from sklearn.metrics.pairwise import ( + paired_cosine_distances, + paired_euclidean_distances, + paired_manhattan_distances, +) +from sklearn.multioutput import MultiOutputRegressor +from sklearn.preprocessing import MultiLabelBinarizer +from typing import Dict, List, Tuple + +from .eval_utils import ( + cos_sim, + dot_score, + mrr, + recall_cap, + hole, + confidence_scores, + nAUC, + top_k_accuracy, +) + + +class Evaluator(ABC): + """Base class for all evaluators + Extend this class and implement __call__ for custom evaluators. + """ + + def __init__(self, seed=42, **kwargs): + self.seed = seed + random.seed(self.seed) + np.random.seed(self.seed) + torch.manual_seed(self.seed) + torch.cuda.manual_seed_all(self.seed) + + @abstractmethod + def __call__(self, model): + """This is called during training to evaluate the model. + It returns scores. + + Parameters + ---------- + model: + the model to evaluate + """ + pass + + +logger = logging.getLogger(__name__) + + +class logRegClassificationEvaluator(Evaluator): + def __init__( + self, + embeds_train, + y_train, + embeds_test, + y_test, + max_iter=1000, + **kwargs, + ): + super().__init__(**kwargs) + self.embeds_train = embeds_train + self.y_train = y_train + self.embeds_test = embeds_test + self.y_test = y_test + + self.max_iter = max_iter + + def __call__(self): + scores = {} + clf = LogisticRegression( + random_state=self.seed, + n_jobs=-1, + max_iter=self.max_iter, + verbose=1 if logger.isEnabledFor(logging.DEBUG) else 0, + ) + logger.info(f"Encoding {len(self.embeds_train)} training embeds...") + X_train = np.asarray(self.embeds_train) + + logger.info(f"Encoding {len(self.embeds_test)} test embeds...") + X_test = np.asarray(self.embeds_test) + logger.info("Fitting logistic regression classifier...") + clf.fit(X_train, self.y_train) + logger.info("Evaluating...") + y_pred = clf.predict(X_test) + accuracy = accuracy_score(self.y_test, y_pred) + f1 = f1_score(self.y_test, y_pred, average="macro") + scores["accuracy"] = accuracy + scores["f1"] = f1 + + # if binary classification + if len(np.unique(self.y_train)) == 2: + ap = average_precision_score(self.y_test, y_pred) + scores["ap"] = ap + + return scores + + +class ClusteringEvaluator(Evaluator): + def __init__( + self, + embeds, + labels, + clustering_batch_size=500, + **kwargs, + ): + super().__init__(**kwargs) + self.embeds = embeds + self.labels = labels + self.clustering_batch_size = clustering_batch_size + + def __call__(self): + logger.info(f"Encoding {len(self.embeds)} embeds...") + corpus_embeddings = np.asarray(self.embeds) + + logger.info("Fitting Mini-Batch K-Means model...") + clustering_model = sklearn.cluster.MiniBatchKMeans( + n_clusters=len(set(self.labels)), + batch_size=self.clustering_batch_size, + n_init="auto", + ) + clustering_model.fit(corpus_embeddings) + cluster_assignment = clustering_model.labels_ + + logger.info("Evaluating...") + v_measure = v_measure_score(self.labels, cluster_assignment) + + return {"v_measure": v_measure} + + +class PairClassificationEvaluator(Evaluator): + """Evaluate a model based on the similarity of the embeddings by calculating the accuracy of identifying similar and + dissimilar embeds. + The metrics are the cosine similarity as well as euclidean and Manhattan distance + The returned score is the accuracy with a specified metric. + The results are written in a CSV. If a CSV already exists, then values are appended. + The labels need to be 0 for dissimilar pairs and 1 for similar pairs. + :param embeds1: The first column of embeds + :param embeds2: The second column of embeds + :param labels: labels[i] is the label for the pair (embeds1[i], embeds2[i]). Must be 0 or 1 + :param name: Name for the output + :param write_csv: Write results to a CSV file + """ + + def __init__(self, embeds1, embeds2, labels, **kwargs): + super().__init__(**kwargs) + self.embeds1 = embeds1 + self.embeds2 = embeds2 + self.labels = labels + + assert len(self.embeds1) == len(self.embeds2) + assert len(self.embeds1) == len(self.labels) + for label in labels: + assert label == 0 or label == 1 + + def __call__(self): + scores = self.compute_metrics() + # Compute the max of Average Precision (AP) over all distance metrics. + top_ap_score = max(score for k, score in scores.items() if k.endswith("_ap")) + scores["top_ap"] = top_ap_score + return scores + + def compute_metrics(self): + embeddings1 = np.array(self.embeds1) + embeddings2 = np.array(self.embeds2) + + logger.info("Computing similarity distances...") + cosine_scores = 1 - paired_cosine_distances(embeddings1, embeddings2) + manhattan_distances = paired_manhattan_distances(embeddings1, embeddings2) + euclidean_distances = paired_euclidean_distances(embeddings1, embeddings2) + + embeddings1_np = np.asarray(embeddings1) + embeddings2_np = np.asarray(embeddings2) + dot_scores = [ + np.dot(embeddings1_np[i], embeddings2_np[i]) + for i in range(len(embeddings1_np)) + ] + + logger.info("Computing metrics...") + labels = np.asarray(self.labels) + output_scores = {} + for short_name, name, scores, reverse in [ + ["cos_sim", "Cosine-Similarity", cosine_scores, True], + ["manhattan", "Manhattan-Distance", manhattan_distances, False], + ["euclidean", "Euclidean-Distance", euclidean_distances, False], + ["dot", "Dot-Product", dot_scores, True], + ]: + metrics = self._compute_metrics(scores, labels, reverse) + metrics = {short_name + "_" + k: v for k, v in metrics.items()} + output_scores.update(metrics) + + return output_scores + + @staticmethod + def _compute_metrics(scores, labels, high_score_more_similar): + """Compute the metrics for the given scores and labels. + + Args: + scores (`np.ndarray` of shape (n_pairs, )): The similarity/dissimilarity scores for the pairs. + labels (`np.ndarray` of shape (n_pairs, )): The labels for the pairs. + high_score_more_similar (`bool`): If true, then the higher the score, the more similar the pairs are. + + Returns: + `dict`: The metrics for the given scores and labels. + """ + acc, acc_threshold = PairClassificationEvaluator.find_best_acc_and_threshold( + scores, labels, high_score_more_similar + ) + f1, precision, recall, f1_threshold = ( + PairClassificationEvaluator.find_best_f1_and_threshold( + scores, labels, high_score_more_similar + ) + ) + ap = PairClassificationEvaluator.ap_score( + scores, labels, high_score_more_similar + ) + + return { + "accuracy": acc, + "accuracy_threshold": acc_threshold, + "f1": f1, + "f1_threshold": f1_threshold, + "precision": precision, + "recall": recall, + "ap": ap, + } + + @staticmethod + def find_best_acc_and_threshold(scores, labels, high_score_more_similar: bool): + assert len(scores) == len(labels) + rows = list(zip(scores, labels)) + + rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar) + + max_acc = 0 + best_threshold = -1 + + positive_so_far = 0 + remaining_negatives = sum(np.array(labels) == 0) + + for i in range(len(rows) - 1): + score, label = rows[i] + if label == 1: + positive_so_far += 1 + else: + remaining_negatives -= 1 + + acc = (positive_so_far + remaining_negatives) / len(labels) + if acc > max_acc: + max_acc = acc + best_threshold = (rows[i][0] + rows[i + 1][0]) / 2 + + return max_acc, best_threshold + + @staticmethod + def find_best_f1_and_threshold(scores, labels, high_score_more_similar: bool): + assert len(scores) == len(labels) + + scores = np.asarray(scores) + labels = np.asarray(labels) + + rows = list(zip(scores, labels)) + + rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar) + + best_f1 = best_precision = best_recall = 0 + threshold = 0 + nextract = 0 + ncorrect = 0 + total_num_duplicates = sum(labels) + + for i in range(len(rows) - 1): + score, label = rows[i] + nextract += 1 + + if label == 1: + ncorrect += 1 + + if ncorrect > 0: + precision = ncorrect / nextract + recall = ncorrect / total_num_duplicates + f1 = 2 * precision * recall / (precision + recall) + if f1 > best_f1: + best_f1 = f1 + best_precision = precision + best_recall = recall + threshold = (rows[i][0] + rows[i + 1][0]) / 2 + + return best_f1, best_precision, best_recall, threshold + + @staticmethod + def ap_score(scores, labels, high_score_more_similar: bool): + return average_precision_score( + labels, scores * (1 if high_score_more_similar else -1) + ) + + +class MultiClassMultiOutputLogRegClassificationEvaluator(Evaluator): + def __init__( + self, + embeds_train, + y_train, + embeds_test, + y_test, + max_iter=1000, + **kwargs, + ): + super().__init__(**kwargs) + self.embeds_train = embeds_train + self.y_train = y_train + self.embeds_test = embeds_test + self.y_test = y_test + self.max_iter = max_iter + + def __call__(self): + scores = {} + mlb = MultiLabelBinarizer() + # all classes in y_train and y_test + + class_labels = list(self.y_train) + list(self.y_test) + labels = [class_label.split(", ") for class_label in class_labels] + mlb.fit(labels) + train_labels = [class_label.split(", ") for class_label in self.y_train] + test_labels = [class_label.split(", ") for class_label in self.y_test] + + y_train = mlb.transform(train_labels) + y_test = mlb.transform(test_labels) + clf = MultiOutputRegressor( + LogisticRegression( + random_state=self.seed, solver="lbfgs", max_iter=self.max_iter + ) + ).fit(self.embeds_train, y_train) + y_pred = clf.predict(self.embeds_test) + + results_dict = classification_report(y_test, y_pred, output_dict=True) + assert isinstance( + results_dict, dict + ), "Should always be true since `output_dict=True` is passed to sklearn.metric.classification_report" + scores["precision"] = results_dict["macro avg"]["precision"] + scores["recall"] = results_dict["macro avg"]["recall"] + scores["f1"] = results_dict["macro avg"]["f1-score"] + scores["accuracy"] = accuracy_score(y_test, y_pred) + + return scores + + +class MultiClassMultiOutputKNNClassificationEvaluator(Evaluator): + def __init__( + self, + embeds_train, + y_train, + embeds_test, + y_test, + n_neighbors=5, + **kwargs, + ): + super().__init__(**kwargs) + self.embeds_train = embeds_train + self.y_train = y_train + self.embeds_test = embeds_test + self.y_test = y_test + self.n_neighbors = n_neighbors + + def __call__(self): + scores = {} + + mlb = MultiLabelBinarizer() + class_labels = list(self.y_train) + list(self.y_test) + labels = [class_label.split(", ") for class_label in class_labels] + mlb.fit(labels) + train_labels = [class_label.split(", ") for class_label in self.y_train] + test_labels = [class_label.split(", ") for class_label in self.y_test] + + y_train = mlb.transform(train_labels) + y_test = mlb.transform(test_labels) + clf = sklearn.neighbors.KNeighborsClassifier( + n_neighbors=self.n_neighbors, metric="cosine" + ) + logger.info("Fitting KNN classifier...") + clf.fit(self.embeds_train, y_train) + logger.info("Evaluating...") + y_pred = clf.predict(self.embeds_test) + accuracy = accuracy_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred, average="macro") + precision = precision_score(y_test, y_pred, average="macro") + recall = recall_score(y_test, y_pred, average="macro") + lrap = label_ranking_average_precision_score(y_test, y_pred) + scores["f1"] = f1 + scores["accuracy"] = accuracy + scores["precision"] = precision + scores["recall"] = recall + scores["lrap"] = lrap + + return scores + + +class BiGeneMiningEvaluator(Evaluator): + """ + BiGene Mining Evaluator, analogous to Bitext Mining Evaluator https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/BitextMiningEvaluator.py. + + If top_k > 1, then recall@k is also computed. + """ + + def __init__(self, embeds1, embeds2, top_k=1, **kwargs): + super().__init__(**kwargs) + self.n = len(embeds1) + self.embeds1 = np.array(embeds1) + self.embeds2 = np.array(embeds2) + self.gold = list(zip(range(self.n), range(self.n))) + self.top_k = top_k + + def __call__(self): + scores = self.compute_metrics() + return scores + + def compute_metrics(self): + logger.info(f"Finding nearest neighbors... with top_k={self.top_k}") + nearest_neighbors = self._similarity_search( + self.embeds1, self.embeds2, top_k=self.top_k + ) + + # Compute errors + logger.info("Computing metrics...") + labels = [] + predictions = [] + + # Get predictions and labels for top_k=1. + for i, x in enumerate(nearest_neighbors): + j = x[0]["corpus_id"] + predictions.append(j) + labels.append(self.gold[i][1]) + + scores = { + "precision": precision_score( + labels, predictions, zero_division=0, average="weighted" + ), + "recall": recall_score( + labels, predictions, zero_division=0, average="weighted" + ), + "f1": f1_score(labels, predictions, zero_division=0, average="weighted"), + "accuracy": accuracy_score(labels, predictions), + } + + if self.top_k > 1: + # Compute recall@k. + top_k_preds = [] + for i, x in enumerate(nearest_neighbors): + top_k_preds.append([pred["corpus_id"] for pred in x]) + top_k_recall = [ + self.gold[i][1] in top_k_pred + for i, top_k_pred in enumerate(top_k_preds) + ] + scores[f"recall_at_{self.top_k}"] = sum(top_k_recall) / len(top_k_recall) + return scores + + def _similarity_search( + self, + query_embeddings, + corpus_embeddings, + query_chunk_size=100, + corpus_chunk_size=500000, + top_k=1, + score_function=cos_sim, + ): + """This function performs a cosine similarity search between a list of query embeddings and a list of corpus embeddings. + It can be used for Information Retrieval / Semantic Search for corpora up to about 1 Million entries. + :param query_embeddings: A 2 dimensional tensor with the query embeddings. + :param corpus_embeddings: A 2 dimensional tensor with the corpus embeddings. + :param query_chunk_size: Process 100 queries simultaneously. Increasing that value increases the speed, but requires more memory. + :param corpus_chunk_size: Scans the corpus 50k entries at a time. Increasing that value increases the speed, but requires more memory. + :param top_k: Retrieve top k matching entries. + :param score_function: Function for computing scores. By default, cosine similarity. + :return: Returns a list with one entry for each query. Each entry is a list of dictionaries with the keys 'corpus_id' and 'score', sorted by decreasing cosine similarity scores. + """ + query_embeddings = torch.from_numpy(query_embeddings) + corpus_embeddings = torch.from_numpy(corpus_embeddings) + if len(query_embeddings.shape) == 1: + query_embeddings = query_embeddings.unsqueeze(0) + if len(corpus_embeddings.shape) == 1: + corpus_embeddings = corpus_embeddings.unsqueeze(0) + + # Check that corpus and queries are on the same device + if corpus_embeddings.device != query_embeddings.device: + query_embeddings = query_embeddings.to(corpus_embeddings.device) + + queries_result_list = [[] for _ in range(len(query_embeddings))] + + for query_start_idx in range(0, len(query_embeddings), query_chunk_size): + # Iterate over chunks of the corpus + for corpus_start_idx in range(0, len(corpus_embeddings), corpus_chunk_size): + # Compute cosine similarities + cos_scores = score_function( + query_embeddings[ + query_start_idx : query_start_idx + query_chunk_size + ], + corpus_embeddings[ + corpus_start_idx : corpus_start_idx + corpus_chunk_size + ], + ) + + # Get top-k scores + cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk( + cos_scores, + min(top_k, len(cos_scores[0])), + dim=1, + largest=True, + sorted=False, + ) + cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist() + cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist() + + for query_itr in range(len(cos_scores)): + for sub_corpus_id, score in zip( + cos_scores_top_k_idx[query_itr], + cos_scores_top_k_values[query_itr], + ): + corpus_id = corpus_start_idx + sub_corpus_id + query_id = query_start_idx + query_itr + queries_result_list[query_id].append( + {"corpus_id": corpus_id, "score": score} + ) + + # Sort and strip to top_k results + for idx in range(len(queries_result_list)): + queries_result_list[idx] = sorted( + queries_result_list[idx], key=lambda x: x["score"], reverse=True + ) + queries_result_list[idx] = queries_result_list[idx][0:top_k] + + return queries_result_list + + +class EDSEvaluator(Evaluator): + """ + Evolutionary Distance Similarity Evaluator, analogous to Semantic Textual Similarity Evaluator. + Adapted from https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/STSEvaluator.py + """ + + def __init__(self, embeds1, embeds2, gold_scores, **kwargs): + super().__init__(**kwargs) + self.embeds1 = embeds1 + self.embeds2 = embeds2 + self.gold_scores = gold_scores + + def __call__(self): + embeddings1 = np.array(self.embeds1) + embeddings2 = np.array(self.embeds2) + logger.info("Evaluating...") + cosine_scores = paired_cosine_distances(embeddings1, embeddings2) + manhattan_distances = paired_manhattan_distances(embeddings1, embeddings2) + euclidean_distances = paired_euclidean_distances(embeddings1, embeddings2) + + cosine_pearson, _ = pearsonr(self.gold_scores, cosine_scores) + manhattan_pearson, _ = pearsonr(self.gold_scores, manhattan_distances) + euclidean_pearson, _ = pearsonr(self.gold_scores, euclidean_distances) + + top_corr = max( + cosine_pearson, + manhattan_pearson, + euclidean_pearson, + ) + return { + "cos_sim": cosine_pearson, + "manhattan": manhattan_pearson, + "euclidean": euclidean_pearson, + "top_corr": top_corr, + } + + +class RetrievalEvaluator(Evaluator): + """Adapted from + https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/RetrievalEvaluator.py + """ + + def __init__( + self, + corpus_embeds, + query_embeds, + corpus_ids, + query_ids, + qrels: Dict[str, Dict[str, int]], + k_values: List[int] = [5, 10, 50], + score_function: str = "cos_sim", + corpus_chunk_size: int = 50000, + **kwargs, + ): + super().__init__(**kwargs) + self.corpus_embeds = corpus_embeds + self.query_embeds = query_embeds + self.corpus_ids = corpus_ids + self.query_ids = query_ids + self.qrels = qrels + self.k_values = k_values + self.top_k = max(k_values) if "top_k" not in kwargs else kwargs["top_k"] + self.score_function = score_function + self.score_functions = { + "cos_sim": cos_sim, + "dot": dot_score, + } + self.corpus_chunk_size = corpus_chunk_size + + def __call__(self): + results = self.search( + self.corpus_embeds, + self.query_embeds, + self.corpus_ids, + self.query_ids, + self.top_k, + self.score_function, + ) + ndcg, _map, recall, precision, naucs = self.evaluate( + self.qrels, results, self.k_values + ) + mrr, naucs_mrr = self.evaluate_custom(self.qrels, results, self.k_values, "mrr") + scores = { + **{f"ndcg_at_{k.split('@')[1]}": v for (k, v) in ndcg.items()}, + **{f"map_at_{k.split('@')[1]}": v for (k, v) in _map.items()}, + **{f"recall_at_{k.split('@')[1]}": v for (k, v) in recall.items()}, + **{f"precision_at_{k.split('@')[1]}": v for (k, v) in precision.items()}, + **{f"mrr_at_{k.split('@')[1]}": v for (k, v) in mrr.items()}, + **{ + k.replace("@", "_at_").replace("_P", "_precision").lower(): v + for k, v in naucs.items() + }, + **{ + k.replace("@", "_at_").replace("_P", "_precision").lower(): v + for k, v in naucs_mrr.items() + }, + } + return scores + + def search( + self, + corpus_embeds, + query_embeds, + corpus_ids, + query_ids, + top_k: int, + score_function: str, + return_sorted: bool = False, + **kwargs, + ) -> dict[str, dict[str, float]]: + # Create embeddings for all queries using model.encode() + # Runs semantic search against the corpus embeddings + # Returns a ranked list with the corpus ids + if score_function not in self.score_functions: + raise ValueError( + f"score function: {score_function} must be either (cos_sim) for cosine similarity or (dot) for dot product" + ) + # make query embeds and corpus embeds torch tensors + query_embeds = torch.from_numpy(query_embeds) + corpus_embeds = torch.from_numpy(corpus_embeds) + itr = range(0, len(corpus_embeds), self.corpus_chunk_size) + results = defaultdict(dict) + # Keep only the top-k docs for each query + result_heaps = defaultdict(list) + for batch_num, corpus_start_idx in enumerate(itr): + logger.info("Searching Batch {}/{}...".format(batch_num + 1, len(itr))) + corpus_end_idx = min( + corpus_start_idx + self.corpus_chunk_size, len(corpus_ids) + ) + sub_corpus_embeds = corpus_embeds[corpus_start_idx:corpus_end_idx] + # Compute similarites using either cosine-similarity or dot product + cos_scores = self.score_functions[score_function]( + query_embeds, sub_corpus_embeds + ) + cos_scores[torch.isnan(cos_scores)] = -1 + + # Get top-k values + cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk( + cos_scores, + min( + top_k + 1, + len(cos_scores[1]) if len(cos_scores) > 1 else len(cos_scores[-1]), + ), + dim=1, + largest=True, + sorted=return_sorted, + ) + cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist() + cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist() + + for query_itr in range(len(query_embeds)): + query_id = query_ids[query_itr] + for sub_corpus_id, score in zip( + cos_scores_top_k_idx[query_itr], cos_scores_top_k_values[query_itr] + ): + corpus_id = corpus_ids[corpus_start_idx + sub_corpus_id] + if corpus_id != query_id: + if len(result_heaps[query_id]) < top_k: + # Push item on the heap + heapq.heappush(result_heaps[query_id], (score, corpus_id)) + else: + # If item is larger than the smallest in the heap, push it on the heap then pop the smallest element + heapq.heappushpop( + result_heaps[query_id], (score, corpus_id) + ) + + for qid in result_heaps: + for score, corpus_id in result_heaps[qid]: + results[qid][corpus_id] = score + + return results + + @staticmethod + def evaluate( + qrels: dict[str, dict[str, int]], + results: dict[str, dict[str, float]], + k_values: List[int], + ignore_identical_ids: bool = True, + ) -> Tuple[Dict[str, float], dict[str, float], dict[str, float], dict[str, float]]: + if ignore_identical_ids: + logger.info( + "For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this." + ) + popped = [] + for qid, rels in results.items(): + for pid in list(rels): + if qid == pid: + results[qid].pop(pid) + popped.append(pid) + + all_ndcgs, all_aps, all_recalls, all_precisions = {}, {}, {}, {} + + for k in k_values: + all_ndcgs[f"NDCG@{k}"] = [] + all_aps[f"MAP@{k}"] = [] + all_recalls[f"Recall@{k}"] = [] + all_precisions[f"P@{k}"] = [] + + map_string = "map_cut." + ",".join([str(k) for k in k_values]) + ndcg_string = "ndcg_cut." + ",".join([str(k) for k in k_values]) + recall_string = "recall." + ",".join([str(k) for k in k_values]) + precision_string = "P." + ",".join([str(k) for k in k_values]) + evaluator = pytrec_eval.RelevanceEvaluator( + qrels, {map_string, ndcg_string, recall_string, precision_string} + ) + scores = evaluator.evaluate(results) + + for query_id in scores.keys(): + for k in k_values: + all_ndcgs[f"NDCG@{k}"].append(scores[query_id]["ndcg_cut_" + str(k)]) + all_aps[f"MAP@{k}"].append(scores[query_id]["map_cut_" + str(k)]) + all_recalls[f"Recall@{k}"].append(scores[query_id]["recall_" + str(k)]) + all_precisions[f"P@{k}"].append(scores[query_id]["P_" + str(k)]) + ndcg, _map, recall, precision = ( + all_ndcgs.copy(), + all_aps.copy(), + all_recalls.copy(), + all_precisions.copy(), + ) + + for k in k_values: + ndcg[f"NDCG@{k}"] = round(sum(ndcg[f"NDCG@{k}"]) / len(scores), 5) + _map[f"MAP@{k}"] = round(sum(_map[f"MAP@{k}"]) / len(scores), 5) + recall[f"Recall@{k}"] = round(sum(recall[f"Recall@{k}"]) / len(scores), 5) + precision[f"P@{k}"] = round(sum(precision[f"P@{k}"]) / len(scores), 5) + naucs = RetrievalEvaluator.evaluate_abstention( + results, {**all_ndcgs, **all_aps, **all_recalls, **all_precisions} + ) + return ndcg, _map, recall, precision, naucs + + @staticmethod + def evaluate_abstention( + results: dict[str, dict[str, float]], + metric_scores: dict[str, list[float]], + ) -> Dict[str, float]: + """Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997""" + all_sim_scores = [list(results[qid].values()) for qid in list(results.keys())] + all_conf_scores = [ + confidence_scores(sim_scores) for sim_scores in all_sim_scores + ] + conf_fcts = list(all_conf_scores[0].keys()) + all_conf_scores = { + fct: np.array([x[fct] for x in all_conf_scores]) for fct in conf_fcts + } + metric_scores = {k: np.array(v) for k, v in metric_scores.items()} + naucs = {} + + for metric_name, scores in metric_scores.items(): + for fct, conf_scores in all_conf_scores.items(): + naucs[f"nAUC_{metric_name}_{fct}"] = nAUC(conf_scores, scores) + + return naucs + + @staticmethod + def evaluate_custom( + qrels: dict[str, dict[str, int]], + results: dict[str, dict[str, float]], + k_values: List[int], + metric: str, + output_type: str = "all", + ) -> Tuple[Dict[str, float]]: + if metric.lower() in ["mrr", "mrr@k", "mrr_cut"]: + metric_scores = mrr(qrels, results, k_values, output_type) + + elif metric.lower() in ["recall_cap", "r_cap", "r_cap@k"]: + metric_scores = recall_cap(qrels, results, k_values, output_type) + + elif metric.lower() in ["hole", "hole@k"]: + metric_scores = hole(qrels, results, k_values, output_type) + + elif metric.lower() in [ + "acc", + "top_k_acc", + "accuracy", + "accuracy@k", + "top_k_accuracy", + ]: + metric_scores = top_k_accuracy(qrels, results, k_values, output_type) + + naucs = RetrievalEvaluator.evaluate_abstention(results, metric_scores) + metric_scores_avg = {k: sum(v) / len(v) for k, v in metric_scores.items()} + + return metric_scores_avg, naucs diff --git a/dgeb/modality.py b/dgeb/modality.py new file mode 100644 index 0000000..88c23c9 --- /dev/null +++ b/dgeb/modality.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class Modality(Enum): + """Data modality, either DNA or protein sequence.""" + + PROTEIN = "protein" + DNA = "dna" diff --git a/dgeb/models.py b/dgeb/models.py new file mode 100644 index 0000000..bce7b50 --- /dev/null +++ b/dgeb/models.py @@ -0,0 +1,481 @@ +import logging +import re +from abc import ABC, abstractmethod +from functools import partial +from types import SimpleNamespace +from typing import Dict, List, Literal, Optional + +import numpy as np +import torch +import tqdm as tqdm +from datasets import Dataset +from torch import Tensor +from torch.nn import functional as F +from torch.utils.data import DataLoader +from transformers import ( + AutoConfig, + AutoModel, + AutoModelForCausalLM, + AutoModelForMaskedLM, + AutoTokenizer, + BatchEncoding, + DefaultDataCollator, + T5EncoderModel, + T5Tokenizer, +) +from transformers.modeling_outputs import BaseModelOutput + +from .modality import Modality +from .eval_utils import ForwardHook, pool + +logger = logging.getLogger(__name__) + + +class BioSeqTransformer(ABC): + """ + Abstract class to wrap models which map biological sequences (DNA/Prot) to embeddings. + Modelled after SentenceTransformer (https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/SentenceTransformer.py) + + Args: + model_name: Name or path to the pretrained model. + layers: List of model layers to probe. Can be integers or "mid" or "last". + devices: List of device ids for inference. If cuda is not available, will use cpu. + num_processes: Number of processes to use for data loading. + max_seq_length: Maximum sequence length of the input sequences. + l2_norm: If true, embeddings are L2-normalized before they are returned. + batch_size: Batch size for encoding. + pool_type: Pooling strategy to use. One of "mean", "max", "cls", "last". + """ + + def __init__( + self, + model_name: str, + layers: Optional[List[int] | Literal["mid"] | Literal["last"]] = None, + devices: List[int] = [0], + num_processes: int = 16, + max_seq_length: int = 1024, + l2_norm: bool = False, + batch_size: int = 128, + pool_type: str = "mean", + ): + super().__init__() + + self.id = self.__class__.__name__ + self.hf_name = model_name + self.encoder = self._load_model(model_name) + if not hasattr(self.encoder, "config"): + raise ValueError( + 'The model from `self._load_model()` must have a "config" attribute.' + ) + self.config = self.encoder.config + self.tokenizer = self._get_tokenizer(model_name) + self.num_param = sum(p.numel() for p in self.encoder.parameters()) + self.data_collator = DefaultDataCollator() + self.gpu_count = len(devices) + self.l2_norm = l2_norm + + self.device = torch.device( + f"cuda:{devices[0]}" if torch.cuda.is_available() else "cpu" + ) + self.num_processes = num_processes + self.max_seq_length = max_seq_length + self.batch_size = batch_size + self.pool_type = pool_type + + if self.gpu_count > 1: + self.encoder = torch.nn.DataParallel(self.encoder, device_ids=devices) + self.encoder.to(self.device) + self.encoder.eval() + + mid_layer = self.num_layers // 2 + last_layer = self.num_layers - 1 + mid_layer_label = f"mid ({mid_layer})" + last_layer_label = f"last ({self.num_layers - 1})" + + if layers is None: + logger.debug(f"Using default layers: {mid_layer_label}, {last_layer_label}") + self.layers = [mid_layer, last_layer] + self.layer_labels = [mid_layer_label, last_layer_label] + elif layers == "mid": + self.layers = [mid_layer] + self.layer_labels = [mid_layer_label] + elif layers == "last": + self.layers = [last_layer] + self.layer_labels = [last_layer_label] + else: + self.layers = layers + self.layer_labels = [str(layer) for layer in layers] + + def _encode_single_batch(self, batch_dict: Dict[str, Tensor]): + """Returns the output embedding for the given batch with shape [batch, num_layers, D].""" + outputs = self.encoder(**batch_dict, output_hidden_states=True) + embeds = [outputs.hidden_states[layer] for layer in self.layers] + embeds = [ + pool(layer_embeds, batch_dict["attention_mask"], self.pool_type) + for layer_embeds in embeds + ] + # Stack with shape [B, num_layers, D]. + embeds = torch.stack(embeds, dim=1) + return embeds + + def _load_model(self, model_name): + return AutoModel.from_pretrained(model_name, trust_remote_code=True) + + def _get_tokenizer(self, model_name): + return AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) + + def _tokenize_func( + self, tokenizer, examples: Dict[str, List], max_seq_length: int + ) -> BatchEncoding: + batch_dict = tokenizer( + examples["input_seqs"], + max_length=max_seq_length, + padding=True, + truncation=True, + ) + return batch_dict + + @property + def metadata(self) -> Dict: + return { + "hf_name": self.hf_name, + "num_layers": self.num_layers, + "num_params": self.num_param, + "embed_dim": self.embed_dim, + } + + @property + @abstractmethod + def num_layers(self) -> int: + pass + + @property + @abstractmethod + def embed_dim(self) -> int: + pass + + @property + @abstractmethod + def modality(self) -> Modality: + pass + + @torch.no_grad() + def encode(self, sequences, **kwargs) -> np.ndarray: + """Returns a list of embeddings for the given sequences. + Args: + sequences (`List[str]`): List of sequences to encode + Returns: + `np.ndarray`: Embeddings for the given sequences of shape [num_sequences, num_layers, embedding_dim]. + """ + dataset = Dataset.from_dict({"input_seqs": sequences}) + dataset.set_transform( + partial( + self._tokenize_func, self.tokenizer, max_seq_length=self.max_seq_length + ) + ) + data_loader = DataLoader( + dataset, + batch_size=self.batch_size * self.gpu_count, + shuffle=False, + drop_last=False, + num_workers=self.num_processes, + collate_fn=self.data_collator, + pin_memory=True, + ) + + if max(self.layers) >= self.num_layers: + raise ValueError( + f"Layer {max(self.layers)} is not available in the model. Choose a layer between 0 and {self.num_layers - 1}" + ) + + encoded_embeds = [] + for batch_dict in tqdm.tqdm( + data_loader, desc="encoding", mininterval=10, disable=len(sequences) < 128 + ): + batch_dict = {k: v.to(self.device) for k, v in batch_dict.items()} + + embeds = self._encode_single_batch(batch_dict) + + if self.l2_norm: + embeds = F.normalize(embeds, p=2, dim=-1) + encoded_embeds.append(embeds.cpu().numpy()) + + return np.concatenate(encoded_embeds, axis=0) + + +class ESM(BioSeqTransformer): + """ESM model from https://huggingface.co/docs/transformers/en/model_doc/esm""" + + MODEL_NAMES = [ + "facebook/esm2_t6_8M_UR50D", + "facebook/esm2_t12_35M_UR50D", + "facebook/esm2_t30_150M_UR50D", + "facebook/esm2_t33_650M_UR50D", + "facebook/esm2_t36_3B_UR50D", + "facebook/esm2_t48_15B_UR50D", + ] + + @property + def modality(self) -> Modality: + return Modality.PROTEIN + + @property + def num_layers(self) -> int: + return self.config.num_hidden_layers + + @property + def embed_dim(self) -> int: + return self.config.hidden_size + + +class ESM3(BioSeqTransformer): + """ESM3 model from https://github.com/evolutionaryscale/esm""" + + MODEL_NAMES = ["esm3_sm_open_v1"] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Register forward hooks to store embeddings per layer. + self.hooks = [ + ForwardHook(self.encoder.transformer.blocks[layer]) for layer in self.layers + ] + + @property + def modality(self) -> Modality: + return Modality.PROTEIN + + @property + def num_layers(self) -> int: + return self.config.num_hidden_layers + + @property + def embed_dim(self) -> int: + return self.config.hidden_size + + def _load_model(self, model_name): + try: + from esm.models.esm3 import ESM3 as ModelESM3 + except ImportError: + raise ImportError( + "ESM3 is not installed. Please install it with `pip install esm`." + ) + model = ModelESM3.from_pretrained("esm3_sm_open_v1") + model.config = SimpleNamespace( + num_hidden_layers=len(model.transformer.blocks), + hidden_size=model.transformer.blocks[0].ffn[-1].out_features, + ) + return model + + def _get_tokenizer(self, model_name): + try: + from esm.tokenization.sequence_tokenizer import EsmSequenceTokenizer + except ImportError: + raise ImportError( + "ESM3 is not installed. Please install it with `pip install esm`." + ) + return EsmSequenceTokenizer() + + def _encode_single_batch(self, batch_dict: Dict[str, Tensor]): + _ = self.encoder.forward(sequence_tokens=batch_dict["input_ids"]) + embeds = [hook.output for hook in self.hooks] + embeds = [ + pool(layer_embeds, batch_dict["attention_mask"], self.pool_type) + for layer_embeds in embeds + ] + # Stack with shape [B, num_layers, D]. + embeds = torch.stack(embeds, dim=1) + embeds = embeds.to(torch.float32) + return embeds + + +class ProtT5(BioSeqTransformer): + """ProtT5 model from https://github.com/agemagician/ProtTrans""" + + MODEL_NAMES = [ + "Rostlab/prot_t5_xl_uniref50", + "Rostlab/prot_t5_xl_bfd", + "Rostlab/prot_t5_xxl_uniref50", + "Rostlab/prot_t5_xxl_bfd", + ] + + @property + def modality(self) -> Modality: + return Modality.PROTEIN + + @property + def num_layers(self) -> int: + return self.config.num_layers + + @property + def embed_dim(self) -> int: + return self.config.d_model + + def _load_model(self, model_name): + return T5EncoderModel.from_pretrained(model_name) + + def _get_tokenizer(self, model_name): + return T5Tokenizer.from_pretrained(model_name, do_lower_case=False) + + def _tokenize_func( + self, tokenizer, examples: Dict[str, List], max_seq_length: int + ) -> BatchEncoding: + example_sequences = examples["input_seqs"] + # Add space between amino acids to make sure they are tokenized correctly. + example_sequences = [" ".join(sequence) for sequence in example_sequences] + example_sequences = [ + re.sub(r"[UZOB]", "X", sequence) for sequence in example_sequences + ] + batch_dict = tokenizer( + example_sequences, + max_length=max_seq_length, + padding=True, + truncation=True, + add_special_tokens=True, + ) + + return batch_dict + + +class ProGen(BioSeqTransformer): + """ProGen models from https://github.com/salesforce/progen.""" + + MODEL_NAMES = [ + "hugohrban/progen2-small", + "hugohrban/progen2-medium", + "hugohrban/progen2-base", + "hugohrban/progen2-large", + "hugohrban/progen2-xlarge", + ] + + @property + def modality(self) -> Modality: + return Modality.PROTEIN + + @property + def num_layers(self) -> int: + return self.config.n_layer + + @property + def embed_dim(self) -> int: + return self.config.embed_dim + + def _load_model(self, model_name): + return AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) + + def _get_tokenizer(self, model_name_or_path): + tokenizer = AutoTokenizer.from_pretrained( + model_name_or_path, trust_remote_code=True + ) + tokenizer.pad_token = "<|pad|>" + return tokenizer + + def _encode_single_batch(self, batch_dict: Dict[str, Tensor]): + """Returns the output embedding for the given batch with shape [batch, num_layers, D].""" + outputs: BaseModelOutput = self.encoder( + input_ids=batch_dict["input_ids"], + output_hidden_states=True, + use_cache=False, + ) + embeds = [outputs.hidden_states[layer] for layer in self.layers] + embeds = [ + pool(layer_embeds, batch_dict["attention_mask"], self.pool_type) + for layer_embeds in embeds + ] + # Stack with shape [B, num_layers, D]. + embeds = torch.stack(embeds, dim=1) + return embeds + + +class EvoModel(BioSeqTransformer): + """https://github.com/evo-design/evo.""" + + MODEL_NAMES = [ + "togethercomputer/evo-1-8k-base", + "togethercomputer/evo-1-131k-base", + ] + + @property + def modality(self) -> Modality: + return Modality.DNA + + @property + def num_layers(self) -> int: + return self.config.num_layers + + @property + def embed_dim(self) -> int: + return self.config.hidden_size + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Register forward hooks to store embeddings per layer. + self.hooks = [] + for layer in self.layers: + # For the last layer, get the output of `backbone.norm`, which directly precedes `backbone.unembed`. + # This is equivalent to the approach in https://github.com/evo-design/evo/issues/32. + if layer == self.num_layers - 1 or layer == -1: + self.hooks.append(ForwardHook(self.encoder.backbone.norm)) + else: + self.hooks.append(ForwardHook(self.encoder.backbone.blocks[layer])) + + def _load_model(self, model_name): + config = AutoConfig.from_pretrained( + model_name, trust_remote_code=True, revision="1.1_fix" + ) + model = AutoModelForCausalLM.from_pretrained( + model_name, config=config, trust_remote_code=True, revision="1.1_fix" + ) + return model + + def _get_tokenizer(self, model_name): + tokenizer = AutoTokenizer.from_pretrained( + model_name, revision="1.1_fix", trust_remote_code=True + ) + # Evo tokenizer is missing pad_token by default. + tokenizer.add_special_tokens({"pad_token": "N"}) + return tokenizer + + def _encode_single_batch(self, batch_dict: Dict[str, Tensor]): + _ = self.encoder(batch_dict["input_ids"], use_cache=False) + embeds = [hook.output for hook in self.hooks] + # The hook output for Evo middle layers is a tuple (embedding, inference_params=None). + embeds = [x[0] if isinstance(x, tuple) else x for x in embeds] + embeds = [ + pool(layer_embeds, batch_dict["attention_mask"], self.pool_type) + for layer_embeds in embeds + ] + # Stack with shape [B, num_layers, D]. + embeds = torch.stack(embeds, dim=1) + embeds = embeds.to(torch.float32) + return embeds + + +class NTModel(BioSeqTransformer): + """Nucleotide Transformer https://github.com/instadeepai/nucleotide-transformer""" + + MODEL_NAMES = [ + "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", + "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species", + "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species", + "InstaDeepAI/nucleotide-transformer-2.5b-multi-species", + ] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.max_seq_length = self.tokenizer.model_max_length + + @property + def modality(self) -> Modality: + return Modality.DNA + + @property + def num_layers(self) -> int: + return self.config.num_hidden_layers + + @property + def embed_dim(self) -> int: + return self.config.hidden_size + + def _load_model(self, model_name): + return AutoModelForMaskedLM.from_pretrained(model_name, trust_remote_code=True) diff --git a/dgeb/tasks/__init__.py b/dgeb/tasks/__init__.py new file mode 100644 index 0000000..8cec126 --- /dev/null +++ b/dgeb/tasks/__init__.py @@ -0,0 +1,16 @@ +# ruff: noqa: F403 + +from .tasks import Dataset, Task, TaskMetadata, TaskResult +from .eds_tasks import * +from .pair_classification_tasks import * +from .retrieval_tasks import * +from .classification_tasks import * +from .clustering_tasks import * +from .bigene_mining_tasks import * + +__all__ = [ + "Dataset", + "Task", + "TaskMetadata", + "TaskResult", +] diff --git a/dgeb/tasks/bigene_mining_tasks.py b/dgeb/tasks/bigene_mining_tasks.py new file mode 100644 index 0000000..45d7679 --- /dev/null +++ b/dgeb/tasks/bigene_mining_tasks.py @@ -0,0 +1,77 @@ +""" +Bigene mining tasks are analogous to bitext matching tasks, but for genes. +Cosine similarity is used to mine genes of related functions from different organisms. +""" + +import logging +from collections import defaultdict + +from dgeb.evaluators import BiGeneMiningEvaluator +from dgeb.modality import Modality +from dgeb.models import BioSeqTransformer +from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult + +logger = logging.getLogger(__name__) + + +def run_bigene_mining_tasks( + model: BioSeqTransformer, metadata: TaskMetadata, top_k: int = 1 +) -> TaskResult: + """Evaluate bigene mining task. Utilizes the BiGeneMiningEvaluator.""" + if len(metadata.datasets) != 1: + raise ValueError("BiGeneMining tasks require 1 dataset.") + ds = metadata.datasets[0].load()["train"] + layer_results = defaultdict(dict) + embeds1 = model.encode(ds["Seq1"]) + embeds2 = model.encode(ds["Seq2"]) + for i, layer in enumerate(model.layers): + evaluator = BiGeneMiningEvaluator(embeds1[:, i], embeds2[:, i], top_k=top_k) + layer_results["layers"][layer] = evaluator() + logger.info( + f"Layer: {layer}, {metadata.display_name} matching results: {layer_results['layers'][layer]}" + ) + return TaskResult.from_dict(metadata, layer_results, model.metadata) + + +class BacArchBiGeneMining(Task): + metadata = TaskMetadata( + id="bacarch_bigene", + display_name="BacArch BiGene", + description="Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.", + type="bigene_mining", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/bac_arch_bigene", + revision="d5a65e44bae43a9ba9f2fdc03056dff9c12f6631", + ) + ], + primary_metric_id="f1", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_bigene_mining_tasks(model, self.metadata) + + +class ModACParalogyBiGeneMining(Task): + # ModAC Paralogy matching with top_k=1 is too strict (most models have accuracy < 0.1%) + # Instead use recall@50 as the main metric. + TOP_K = 50 + + metadata = TaskMetadata( + id="modac_paralogy_bigene", + display_name="ModAC Paralogy BiGene", + description="Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).", + type="bigene_mining", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/modac_paralogy_bigene", + revision="241ca6397856e3360da04422d54933035b1fab87", + ) + ], + primary_metric_id=f"recall_at_{TOP_K}", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_bigene_mining_tasks(model, self.metadata, top_k=self.TOP_K) diff --git a/dgeb/tasks/classification_tasks.py b/dgeb/tasks/classification_tasks.py new file mode 100644 index 0000000..4da268b --- /dev/null +++ b/dgeb/tasks/classification_tasks.py @@ -0,0 +1,213 @@ +""" +Classification tasks take in biological sequence and functional labels. +Multi-class and/or multi-label classification tasks are supported. +""" + +import logging +from collections import defaultdict + +import datasets +import numpy as np + +from dgeb.eval_utils import merge_split_elem_embeds +from dgeb.evaluators import ( + MultiClassMultiOutputKNNClassificationEvaluator, + logRegClassificationEvaluator, +) +from dgeb.modality import Modality +from dgeb.models import BioSeqTransformer +from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult + +logger = logging.getLogger(__name__) + + +def split_sequences( + ds: datasets.DatasetDict, max_seq_length: int +) -> datasets.DatasetDict: + """Split sequences into chunks of max_seq_length using datasets.Dataset.map().""" + + def _split_sequence(examples, max_seq_length): + assert ( + len(examples["Sequence"]) == 1 + ), "split map function should use batch size of 1." + example = {k: v[0] for k, v in examples.items()} + seq = example["Sequence"] + # Split by chunks of max_seq_length. + seq_split = [ + seq[i : i + max_seq_length] for i in range(0, len(seq), max_seq_length) + ] + # Repeat other fields by the number of splits. + example = { + k: [v] * len(seq_split) for k, v in example.items() if k != "Sequence" + } + example["Sequence"] = seq_split + return example + + ds = ds.map( + _split_sequence, + batched=True, + batch_size=1, + fn_kwargs={"max_seq_length": max_seq_length}, + keep_in_memory=True, + load_from_cache_file=False, + ) + return ds + + +def run_classification_task( + model: BioSeqTransformer, metadata: TaskMetadata +) -> TaskResult: + """Evaluate on classification tasks using logistic regression classifier.""" + ds = metadata.datasets[0].load() + layer_results = defaultdict(dict) + train_embeds = model.encode(ds["train"]["Sequence"]) + test_embeds = model.encode(ds["test"]["Sequence"]) + for i, layer in enumerate(model.layers): + layer_results["layers"][layer] = logRegClassificationEvaluator( + train_embeds[:, i], + ds["train"]["Label"], + test_embeds[:, i], + ds["test"]["Label"], + )() + logger.info( + f"Layer: {layer}, {metadata.display_name} results: {layer_results['layers'][layer]}" + ) + return TaskResult.from_dict(metadata, layer_results, model.metadata) + + +class EnzymeCommissionClassification(Task): + metadata = TaskMetadata( + id="ec_classification", + display_name="EC Classification", + description="Evaluate on Enzyme Commission number classification task.", + type="classification", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/ec_classification", + revision="ead5570168e6969a5149f6861e8a33d6b5d22498", + ) + ], + primary_metric_id="f1", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_classification_task(model, self.metadata) + + +class EnzymeCommissionDNAClassification(Task): + metadata = TaskMetadata( + id="ec_dna_classification", + display_name="EC Classification", + description="Evaluate on Enzyme Commission number classification task using DNA sequences.", + type="classification", + modality=Modality.DNA, + datasets=[ + Dataset( + path="tattabio/ec_classification_dna", + revision="cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd", + ) + ], + primary_metric_id="f1", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_classification_task(model, self.metadata) + + +class ConvergentEnzymesClassification(Task): + metadata = TaskMetadata( + id="convergent_enzymes_classification", + display_name="Convergent Enzymes Classification", + description="Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other", + type="classification", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/convergent_enzymes", + revision="37f75609f54de2bc0911ccb72faf1c2f5a4285aa", + ) + ], + primary_metric_id="f1", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_classification_task(model, self.metadata) + + +def run_mibig_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult: + """ + Evaluate on MIBIG classification tasks. Multiclass, multi-label KNN classification is used for evaluation. + """ + ds = metadata.datasets[0].load() + if metadata.modality == Modality.DNA: + # MIBiG DNA sequences can be very long. Instead of truncating to max_seq_length, + # split into multiple sequences and mean pool the resulting embeddings. + ds = split_sequences(ds, model.max_seq_length) + + layer_results = defaultdict(dict) + train_embeds = model.encode(ds["train"]["Sequence"]) + test_embeds = model.encode(ds["test"]["Sequence"]) + + train_ids = ds["train"]["Entry"] + test_ids = ds["test"]["Entry"] + train_labels = ds["train"]["class"] + test_labels = ds["test"]["class"] + train_id_to_label = {id: label for id, label in zip(train_ids, train_labels)} + test_id_to_label = {id: label for id, label in zip(test_ids, test_labels)} + # Mean pool embeds with the same ID. + train_ids, train_embeds = merge_split_elem_embeds(train_ids, train_embeds) + test_ids, test_embeds = merge_split_elem_embeds(test_ids, test_embeds) + # Gather the labels after merging by unique ID. + train_labels = np.array([train_id_to_label[id] for id in train_ids]) + test_labels = np.array([test_id_to_label[id] for id in test_ids]) + + for i, layer in enumerate(model.layers): + evaluator = MultiClassMultiOutputKNNClassificationEvaluator( + train_embeds[:, i], train_labels, test_embeds[:, i], test_labels + ) + layer_results["layers"][layer] = evaluator() + logger.info( + f"Layer: {layer}, MIBiG classification results: {layer_results['layers'][layer]}" + ) + return TaskResult.from_dict(metadata, layer_results, model.metadata) + + +class MIBiGProteinClassification(Task): + metadata = TaskMetadata( + id="MIBIG_protein_classification", + display_name="MIBiG Classification", + description="Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", + type="classification", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/mibig_classification_prot", + revision="915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f", + ) + ], + primary_metric_id="f1", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_mibig_task(model, self.metadata) + + +class MIBiGDNAClassification(Task): + metadata = TaskMetadata( + id="MIBIG_dna_classification", + display_name="MIBiG Classification", + description="Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.", + type="classification", + modality=Modality.DNA, + datasets=[ + Dataset( + path="tattabio/mibig_classification_dna", + revision="b5ca7a76d469e4e66c46f1b655903972571e6b61", + ) + ], + primary_metric_id="f1", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_mibig_task(model, self.metadata) diff --git a/dgeb/tasks/clustering_tasks.py b/dgeb/tasks/clustering_tasks.py new file mode 100644 index 0000000..ba441f8 --- /dev/null +++ b/dgeb/tasks/clustering_tasks.py @@ -0,0 +1,70 @@ +""" +Biological sequences are clustered and performance is determined by how well clustering matches assigned labels. +""" + +import logging +from collections import defaultdict + +from dgeb.evaluators import ClusteringEvaluator +from dgeb.modality import Modality +from dgeb.models import BioSeqTransformer +from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult + +logger = logging.getLogger(__name__) + + +def run_clustering_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult: + """Evaluate clustering task. Utilizes the ClusteringEvaluator.""" + if len(metadata.datasets) != 1: + raise ValueError("Clustering tasks require 1 dataset.") + ds = metadata.datasets[0].load()["train"] + embeds = model.encode(ds["Sequence"]) + layer_results = defaultdict(dict) + for i, layer in enumerate(model.layers): + labels = ds["Label"] + evaluator = ClusteringEvaluator(embeds[:, i], labels) + layer_results["layers"][layer] = evaluator() + logger.info( + f"Layer: {layer}, {metadata.display_name} results: {layer_results['layers'][layer]}" + ) + return TaskResult.from_dict(metadata, layer_results, model.metadata) + + +class RNAclustering(Task): + metadata = TaskMetadata( + id="ecoli_rna_clustering", + display_name="E.coli RNA Clustering", + description="Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.", + type="clustering", + modality=Modality.DNA, + datasets=[ + Dataset( + path="tattabio/e_coli_rnas", + revision="4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6", + ) + ], + primary_metric_id="v_measure", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_clustering_task(model, self.metadata) + + +class MopBClustering(Task): + metadata = TaskMetadata( + id="mopb_clustering", + display_name="MopB Clustering", + description="Evaluate on MopB clustering task.", + type="clustering", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/mopb_clustering", + revision="eed4bfff9c5bd2dc2500c50757bfcb90425d999a", + ) + ], + primary_metric_id="v_measure", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_clustering_task(model, self.metadata) diff --git a/dgeb/tasks/eds_tasks.py b/dgeb/tasks/eds_tasks.py new file mode 100644 index 0000000..c7512d2 --- /dev/null +++ b/dgeb/tasks/eds_tasks.py @@ -0,0 +1,246 @@ +""" +Evolutionary Distance Similarity (EDS) tasks compare embedding distances to continuous evolutionary distances. +The label distances are typically derived from phylogenetic trees. +""" + +import logging +from collections import defaultdict + +import numpy as np +import pandas as pd + +from dgeb.evaluators import EDSEvaluator +from dgeb.modality import Modality +from dgeb.models import BioSeqTransformer +from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult + +logger = logging.getLogger(__name__) + + +def run_eds_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult: + """Evaluate phylogeny distance correlation task. Utilizes the Evolutionary Distance Similarity (EDS) evaluator.""" + if len(metadata.datasets) != 2: + raise ValueError("Phylogeny tasks require 2 datasets: sequences and distances.") + + ds = metadata.datasets[0].load()["train"] + distance_df = metadata.datasets[1].load()["train"].to_pandas() + assert isinstance( + distance_df, pd.DataFrame + ), f"Expected DataFrame, got {type(distance_df)}" + + id_index_dict = {k: i for i, k in enumerate(ds["Entry"])} + distance_df["embeds1"] = None + distance_df["embeds2"] = None + test_embeds = model.encode(ds["Sequence"]) + layer_results = defaultdict(dict) + for i, layer in enumerate(model.layers): + for row_idx, row in distance_df.iterrows(): + id1 = row["ID1"] + id2 = row["ID2"] + embedding1 = test_embeds[id_index_dict[id1], i] + embedding2 = test_embeds[id_index_dict[id2], i] + distance_df.at[row_idx, "embeds1"] = embedding1 + distance_df.at[row_idx, "embeds2"] = embedding2 + embeds1 = np.array(distance_df["embeds1"].tolist()) + embeds2 = np.array(distance_df["embeds2"].tolist()) + dists = np.array(distance_df["distance"].tolist()) + evaluator = EDSEvaluator(embeds1, embeds2, dists) + layer_results["layers"][layer] = evaluator() + # log results + logger.info( + f"Layer: {layer}, {metadata.display_name} distance correlation results: {layer_results['layers'][layer]}" + ) + + return TaskResult.from_dict(metadata, layer_results, model.metadata) + + +class RpobBacPhylogeny(Task): + metadata = TaskMetadata( + id="rpob_bac_phylogeny", + display_name="RpoB Bacterial Phylogeny", + description="Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.", + type="eds", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/rpob_bac_phylogeny_sequences", + revision="b833ef8d8d873ea5387540562873f41d073d3e03", + ), + Dataset( + path="tattabio/rpob_bac_phylogeny_distances", + revision="0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7", + ), + ], + primary_metric_id="top_corr", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_eds_task(model, self.metadata) + + +class RpobArchPhylogeny(Task): + metadata = TaskMetadata( + id="rpob_arch_phylogeny", + display_name="RpoB Archaeal Phylogeny", + description="Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.", + type="eds", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/rpob_arch_phylogeny_sequences", + revision="10de75b9f5ad12340d629fd1ad015ef4319d6ee4", + ), + Dataset( + path="tattabio/rpob_arch_phylogeny_distances", + revision="2a585f0e135fe74b8ae6d31e7801c6031b0dcc18", + ), + ], + primary_metric_id="top_corr", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_eds_task(model, self.metadata) + + +class RpobBacDNAPhylogeny(Task): + metadata = TaskMetadata( + id="rpob_bac_dna_phylogeny", + display_name="RpoB Bacterial Phylogeny", + description="Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.", + type="eds", + modality=Modality.DNA, + datasets=[ + Dataset( + path="tattabio/rpob_bac_dna_phylogeny_sequences", + revision="8e137d3fb8886d8739ce08d1918745444c7d30d6", + ), + Dataset( + path="tattabio/rpob_bac_dna_phylogeny_distances", + revision="67339e271b2a1602208153d53d70d35ba6fa8876", + ), + ], + primary_metric_id="top_corr", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_eds_task(model, self.metadata) + + +class RpobArchDNAPhylogeny(Task): + metadata = TaskMetadata( + id="rpob_arch_dna_phylogeny", + display_name="RpoB Archaeal Phylogeny", + description="Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.", + type="eds", + modality=Modality.DNA, + datasets=[ + Dataset( + path="tattabio/rpob_arch_dna_phylogeny_sequences", + revision="4453552a0e1021fee8697c71a559f4d3f6da2408", + ), + Dataset( + path="tattabio/rpob_arch_dna_phylogeny_distances", + revision="51df97684a927ec2203568e80175ef26a62db039", + ), + ], + primary_metric_id="top_corr", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_eds_task(model, self.metadata) + + +class FeFePhylogeny(Task): + metadata = TaskMetadata( + id="fefe_phylogeny", + display_name="FeFeHydrogenase Phylogeny", + description="Evaluate on FeFeHydrogenase phylogeny distance correlation task.", + type="eds", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/fefe_phylogeny_sequences", + revision="bce06d79d9ce58413e7bcbed6943905d1afb8b26", + ), + Dataset( + path="tattabio/fefe_phylogeny_distances", + revision="d6357cee9b4071a8dcdeef54083006f0d5e94fd2", + ), + ], + primary_metric_id="top_corr", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_eds_task(model, self.metadata) + + +class Bac16SPhylogeny(Task): + metadata = TaskMetadata( + id="bac_16S_phylogeny", + display_name="16S Bacterial Phylogeny", + description="Evaluate on 16S Bacterial phylogeny distance correlation task.", + type="eds", + modality=Modality.DNA, + datasets=[ + Dataset( + path="tattabio/bac_16S_sequences", + revision="efde1456b86748909cbcfecb07d783756d570aa3", + ), + Dataset( + path="tattabio/bac_16S_distances", + revision="5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3", + ), + ], + primary_metric_id="top_corr", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_eds_task(model, self.metadata) + + +class Arch16SPhylogeny(Task): + metadata = TaskMetadata( + id="arch_16S_phylogeny", + display_name="16S Archaeal Phylogeny", + description="Evaluate on 16S Archaeal phylogeny distance correlation task.", + type="eds", + modality=Modality.DNA, + datasets=[ + Dataset( + path="tattabio/arch_16S_sequences", + revision="e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0", + ), + Dataset( + path="tattabio/arch_16S_distances", + revision="b0356b632a954be70cefd57e3a02e7e1ccd34408", + ), + ], + primary_metric_id="top_corr", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_eds_task(model, self.metadata) + + +class Euk18SPhylogeny(Task): + metadata = TaskMetadata( + id="euk_18S_phylogeny", + display_name="18S Eukaryotic Phylogeny", + description="Evaluate on 18S Eukaryotic phylogeny distance correlation task.", + type="eds", + modality=Modality.DNA, + datasets=[ + Dataset( + path="tattabio/euk_18S_sequences", + revision="5174cb3b2c5c46b61307fd1c2c08f5c432655196", + ), + Dataset( + path="tattabio/euk_18S_distances", + revision="c4cea4fbb1185d08e0e01fd28ffb8b06a25025da", + ), + ], + primary_metric_id="top_corr", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_eds_task(model, self.metadata) diff --git a/dgeb/tasks/pair_classification_tasks.py b/dgeb/tasks/pair_classification_tasks.py new file mode 100644 index 0000000..6b34b57 --- /dev/null +++ b/dgeb/tasks/pair_classification_tasks.py @@ -0,0 +1,96 @@ +""" +Pair classification tasks evaluating distances between functionally relevant gene pairs. +For instance, distance thresholds distinguish between co-transcribed and non-co-transcribed gene pairs. +""" + +import logging +from collections import defaultdict + +from dgeb.evaluators import PairClassificationEvaluator +from dgeb.modality import Modality +from dgeb.models import BioSeqTransformer +from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult + +from ..eval_utils import paired_dataset + +logger = logging.getLogger(__name__) + + +def run_pair_classification_task( + model: BioSeqTransformer, metadata: TaskMetadata +) -> TaskResult: + """Evaluate pair classification task. Utilizes the PairClassificationEvaluator.""" + if len(metadata.datasets) != 1: + raise ValueError("Pair classification tasks require 1 dataset.") + ds = metadata.datasets[0].load()["train"] + embeds = model.encode(ds["Sequence"]) + layer_results = defaultdict(dict) + for i, layer in enumerate(model.layers): + labels = ds["Label"] + embeds1, embeds2, labels = paired_dataset(labels, embeds[:, i]) + evaluator = PairClassificationEvaluator(embeds1, embeds2, labels) + layer_results["layers"][layer] = evaluator() + logger.info( + f"Layer: {layer}, {metadata.display_name} classification results: {layer_results['layers'][layer]}" + ) + return TaskResult.from_dict(metadata, layer_results, model.metadata) + + +class EcoliOperon(Task): + metadata = TaskMetadata( + id="ecoli_operonic_pair", + display_name="E.coli Operonic Pair", + description="Evaluate on E.coli K-12 operonic pair classification task.", + type="pair_classification", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/ecoli_operonic_pair", + revision="a62c01143a842696fc8200b91c1acb825e8cb891", + ) + ], + primary_metric_id="top_ap", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_pair_classification_task(model, self.metadata) + + +class CyanoOperonPair(Task): + metadata = TaskMetadata( + id="cyano_operonic_pair", + display_name="Cyano Operonic Pair", + description="Evaluate on Cyano operonic pair classification task.", + type="pair_classification", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/cyano_operonic_pair", + revision="eeb4cb71ec2a4ff688af9de7c0662123577d32ec", + ) + ], + primary_metric_id="top_ap", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_pair_classification_task(model, self.metadata) + + +class VibrioOperonPair(Task): + metadata = TaskMetadata( + id="vibrio_operonic_pair", + display_name="Vibrio Operonic Pair", + description="Evaluate on Vibrio operonic pair classification task.", + type="pair_classification", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/vibrio_operonic_pair", + revision="24781b12b45bf81a079a6164ef0d2124948c1878", + ) + ], + primary_metric_id="top_ap", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_pair_classification_task(model, self.metadata) diff --git a/dgeb/tasks/retrieval_tasks.py b/dgeb/tasks/retrieval_tasks.py new file mode 100644 index 0000000..5bfcc22 --- /dev/null +++ b/dgeb/tasks/retrieval_tasks.py @@ -0,0 +1,96 @@ +""" +Retrieval tasks find functionally relevant genes in a corpus of genes based on a query gene. +Typically corpus is derived from a different phylogenetic group than the query genes. +""" + +import logging +from collections import defaultdict + +from dgeb.evaluators import RetrievalEvaluator +from dgeb.modality import Modality +from dgeb.models import BioSeqTransformer +from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult + +logger = logging.getLogger(__name__) + + +def run_retrieval_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult: + """Evaluate retrieval task. Utilizes the Retrieval evaluator.""" + if len(metadata.datasets) != 2: + raise ValueError("Retrieval tasks require 3 datasets: corpus, query and qrels.") + corpus_ds = metadata.datasets[0].load()["train"] + query_ds = metadata.datasets[0].load()["test"] + qrels = metadata.datasets[1].load() + corpus_embeds = model.encode(corpus_ds["Sequence"]) + query_embeds = model.encode(query_ds["Sequence"]) + qrels_dict = defaultdict(dict) + + def qrels_dict_init(row): + qrels_dict[str(row["query_id"])][str(row["corpus_id"])] = int(row["fuzz_ratio"]) + + # Populate `qrels_dict` from the dataset. + # See https://github.com/cvangysel/pytrec_eval for qrels format. + qrels.map(qrels_dict_init) + qrels = qrels_dict + layer_results = defaultdict(dict) + for i, layer in enumerate(model.layers): + evaluator = RetrievalEvaluator( + corpus_embeds[:, i], + query_embeds[:, i], + corpus_ds["Entry"], + query_ds["Entry"], + qrels, + ) + layer_results["layers"][layer] = evaluator() + logger.info( + f"Layer: {layer}, Retrieval results: {layer_results['layers'][layer]}" + ) + return TaskResult.from_dict(metadata, layer_results, model.metadata) + + +class ArchRetrieval(Task): + metadata = TaskMetadata( + id="arch_retrieval", + display_name="Arch Retrieval", + description="Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein", + type="retrieval", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/arch_retrieval", + revision="a19124322604a21b26b1b3c13a1bd0b8a63c9f7b", + ), + Dataset( + path="tattabio/arch_retrieval_qrels", + revision="3f142f2f9a0995d56c6e77188c7251761450afcf", + ), + ], + primary_metric_id="map_at_5", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_retrieval_task(model, self.metadata) + + +class EukRetrieval(Task): + metadata = TaskMetadata( + id="euk_retrieval", + display_name="Euk Retrieval", + description="Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein", + type="retrieval", + modality=Modality.PROTEIN, + datasets=[ + Dataset( + path="tattabio/euk_retrieval", + revision="c93dc56665cedd19fbeaea9ace146f2474c895f0", + ), + Dataset( + path="tattabio/euk_retrieval_qrels", + revision="a5aa01e9b9738074aba57fc07434e352c4c71e4b", + ), + ], + primary_metric_id="map_at_5", + ) + + def run(self, model: BioSeqTransformer) -> TaskResult: + return run_retrieval_task(model, self.metadata) diff --git a/dgeb/tasks/tasks.py b/dgeb/tasks/tasks.py new file mode 100644 index 0000000..e385f9f --- /dev/null +++ b/dgeb/tasks/tasks.py @@ -0,0 +1,135 @@ +"""Task abstract class for evaluation and results.""" + +import logging +from typing import List, Literal, Optional, Any +from importlib.metadata import version +from enum import Enum +import datasets +from pydantic import BaseModel, model_validator +from abc import ABC, abstractmethod + + +# HACK: if Modality is not defined, then import it from modality.py +try: + from ..modality import Modality +except Exception: + # if not, super hack to get the leaderboard working. + # SHOULD MATCH the code exactly in modality.py + # can we read the file and run that code? + from enum import Enum + + class Modality(Enum): + """Data modality, either DNA or protein sequence.""" + + PROTEIN = "protein" + DNA = "dna" + + +logging.basicConfig(level=logging.INFO) + +TaskType = Literal[ + "classification", + "pair_classification", + "clustering", + "eds", + "bigene_mining", + "retrieval", +] + + +class TaskMetric(BaseModel): + id: str + display_name: str + description: Optional[str] = None + value: float = 0.0 + + +class LayerResult(BaseModel): + layer_number: int + layer_display_name: str + metrics: List[TaskMetric] + + +class GEBModel(BaseModel): + hf_name: str + num_layers: int + num_params: int + embed_dim: int + + +class Dataset(BaseModel): + path: str + revision: str + + def load(self) -> datasets.DatasetDict: + ds = datasets.load_dataset(self.path, revision=self.revision) + if not isinstance(ds, datasets.DatasetDict): + raise ValueError( + f"Dataset {self.path} is not a datasets.DatasetDict object." + ) + return ds + + +class TaskMetadata(BaseModel): + id: str + display_name: str + description: str + modality: Modality + type: TaskType + # List of datasets used by the task. + # Each dataset is a dict of all arguments to pass to `datasets.load_dataset()`. + datasets: List[Dataset] + primary_metric_id: str + + +# tasks.py +class TaskResult(BaseModel): + dgeb_version: str + task: "TaskMetadata" + # TODO: Convert model to ModelMetadata + model: GEBModel + results: List[LayerResult] + + @model_validator(mode="after") + def check_valid_primary_metric(self): + for result in self.results: + if all( + metric.id != self.task.primary_metric_id for metric in result.metrics + ): + raise ValueError( + f"Primary metric {self.task.primary_metric_id} not found in results.metrics" + ) + return self + + @staticmethod + def from_dict( + task_metadata: "TaskMetadata", + layer_results: LayerResult, + model_metadata: GEBModel, + ): + return TaskResult( + dgeb_version=version("dgeb"), + task=task_metadata, + model=model_metadata, + results=list( + LayerResult( + layer_number=int(layer), + layer_display_name=str(layer), + metrics=[ + TaskMetric(id=metric, display_name=metric, value=value) + for metric, value in metrics.items() + ], + ) + for layer, metrics in layer_results["layers"].items() + ), + ) + + +# move to model.py? +class Task(ABC): + metadata: TaskMetadata + + # using Any instead of "BioSeqTransformer" to avoid installing all deps in leaderboard + @abstractmethod + def run(self, model: Any, layers: Optional[List[int]] = None) -> TaskResult: + pass diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6429ba3 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,8 @@ +version: "3" +services: + dgeb-leaderboard: + build: + context: ./ + dockerfile: Dockerfile + ports: + - "7680:7860" diff --git a/docs/images/tatta_logo.png b/docs/images/tatta_logo.png new file mode 100644 index 0000000..76220bd Binary files /dev/null and b/docs/images/tatta_logo.png differ diff --git a/leaderboard/.gitignore b/leaderboard/.gitignore new file mode 100644 index 0000000..912a462 --- /dev/null +++ b/leaderboard/.gitignore @@ -0,0 +1,2 @@ +/.projectile +**/__pycache__/ diff --git a/leaderboard/DGEB_Figure.png b/leaderboard/DGEB_Figure.png new file mode 100644 index 0000000..8fdeeaa Binary files /dev/null and b/leaderboard/DGEB_Figure.png differ diff --git a/leaderboard/README.md b/leaderboard/README.md new file mode 100644 index 0000000..964b4a7 --- /dev/null +++ b/leaderboard/README.md @@ -0,0 +1,2 @@ +# to set up hf repo to recieve origin pushes +git remote set-url --add origin git@hf.co:spaces/tattabio/DGEB diff --git a/leaderboard/__init__.py b/leaderboard/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/leaderboard/app.py b/leaderboard/app.py new file mode 100644 index 0000000..d6463c2 --- /dev/null +++ b/leaderboard/app.py @@ -0,0 +1,260 @@ +import math +import json +from pathlib import Path +import gradio as gr +from typing import List +import pandas as pd +import importlib.util +from pydantic import ValidationError, parse_obj_as + +SIG_FIGS = 4 + +# HACK: very hacky way to import from parent directory, while avoiding needing all the deps of the parent package +modality_path = "../dgeb/modality.py" +spec = importlib.util.spec_from_file_location("modality", modality_path) +modality = importlib.util.module_from_spec(spec) +spec.loader.exec_module(modality) +Modality = modality.Modality + + +tasks_path = "../dgeb/tasks/tasks.py" + +# Load the module +spec = importlib.util.spec_from_file_location("tasks", tasks_path) +tasks = importlib.util.module_from_spec(spec) +spec.loader.exec_module(tasks) +TaskResult = tasks.TaskResult +GEBModel = tasks.GEBModel + + +# Assuming the class definitions provided above are complete and imported here + + +def format_num_params(param: int) -> str: + # if the number of parameters is greater than 1 billion, display billion + million = 1_000_000 + # billion = 1_000_000_000 + # if param >= billion: + # num_billions = int(param / 1_000_000_000) + # return f"{num_billions:}B" + if param >= million: + num_millions = int(param / 1_000_000) + return f"{num_millions:}M" + else: + return f"{param:,}" + + +def load_json_files_from_directory(directory_path: Path) -> List[dict]: + """ + Recursively load all JSON files within the specified directory path. + + :param directory_path: Path to the directory to search for JSON files. + :return: List of dictionaries loaded from JSON files. + """ + json_files_content = [] + for json_file in directory_path.rglob("*.json"): # Recursively find all JSON files + try: + with open(json_file, "r", encoding="utf-8") as file: + json_content = json.load(file) + json_files_content.append(json_content) + except Exception as e: + print(f"Error loading {json_file}: {e}") + return json_files_content + + +def load_results() -> List[TaskResult]: + """ + Recursively load JSON files in ./submissions/** and return a list of TaskResult objects. + """ + submissions_path = Path("./submissions") + json_contents = load_json_files_from_directory(submissions_path) + + task_results_objects = [] + for content in json_contents: + try: + task_result = parse_obj_as( + TaskResult, content + ) # Using Pydantic's parse_obj_as for creating TaskResult objects + task_results_objects.append(task_result) + except ValidationError as e: + print(f"Error parsing TaskResult object: {e}") + raise e + + return task_results_objects + + +def task_results_to_dgeb_score( + model: GEBModel, model_results: List[TaskResult] +) -> dict: + best_scores_per_task = [] + modalities_seen = set() + for task_result in model_results: + modalities_seen.add(task_result.task.modality) + assert ( + task_result.model.hf_name == model.hf_name + ), f"Model names do not match, {task_result.model.hf_name} != {model.hf_name}" + primary_metric_id = task_result.task.primary_metric_id + scores = [] + # Get the primary score for each layer. + for result in task_result.results: + for metric in result.metrics: + if metric.id == primary_metric_id: + scores.append(metric.value) + best_score = max(scores) + best_scores_per_task.append(best_score) + + assert ( + len(modalities_seen) == 1 + ), f"Multiple modalities found for model {model.hf_name}" + # Calculate the average of the best scores for each task. + assert len(best_scores_per_task) > 0, f"No tasks found for model {model.hf_name}" + dgeb_score = sum(best_scores_per_task) / len(best_scores_per_task) + return { + "Task Name": "DGEB Score", + "Task Category": "DGEB", + "Model": model.hf_name, + "Modality": list(modalities_seen)[0], + "Num. Parameters (millions)": format_num_params(model.num_params), + "Emb. Dimension": model.embed_dim, + "Score": dgeb_score, + } + + +def task_results_to_df(model_results: List[TaskResult]) -> pd.DataFrame: + # Initialize an empty list to hold all rows of data + data_rows = [] + all_models = {} + for res in model_results: + task = res.task + model = res.model + all_models[model.hf_name] = model + print(f"Processing {task.display_name} for {model.hf_name}") + for layer in res.results: + total_layers = model.num_layers - 1 + mid_layer = math.ceil(total_layers / 2) + if mid_layer == layer.layer_number: + layer.layer_display_name = "mid" + elif total_layers == layer.layer_number: + layer.layer_display_name = "last" + + if layer.layer_display_name not in ["mid", "last"]: + # calculate if the layer is mid or last + print( + f"Layer {layer.layer_number} is not mid or last out of {total_layers}. Skipping" + ) + continue + else: + # For each Metric in the Layer + # pivoting the data so that each metric is a row + metric_ids = [] + primary_metric_label = f"{task.primary_metric_id} (primary metric)" + for metric in layer.metrics: + if task.primary_metric_id == metric.id: + metric_ids.append(primary_metric_label) + else: + metric_ids.append(metric.id) + + metric_values = [metric.value for metric in layer.metrics] + zipped = zip(metric_ids, metric_values) + # sort primary metric id first + sorted_zip = sorted( + zipped, + key=lambda x: x[0] != primary_metric_label, + ) + data_rows.append( + { + "Task Name": task.display_name, + "Task Category": task.type, + "Model": model.hf_name, + "Num. Parameters (millions)": format_num_params( + model.num_params + ), + "Emb. Dimension": model.embed_dim, + "Modality": task.modality, + "Layer": layer.layer_display_name, + **dict(sorted_zip), + } + ) + for model_name, model in all_models.items(): + results_for_model = [ + res for res in model_results if res.model.hf_name == model_name + ] + assert len(results_for_model) > 0, f"No results found for model {model_name}" + dgeb_score_record = task_results_to_dgeb_score(model, results_for_model) + print(f'model {model.hf_name} dgeb score: {dgeb_score_record["Score"]}') + data_rows.append(dgeb_score_record) + print("Finished processing all results") + df = pd.DataFrame(data_rows) + return df + + +df = task_results_to_df(load_results()) +image_path = "./DGEB_Figure.png" +with gr.Blocks() as demo: + gr.Label("Diverse Genomic Embedding Benchmark", show_label=False, scale=2) + gr.HTML( + f"DGEB Figure" + ) + gr.HTML( + """ +
+DGEB Leaderboard. To submit, refer to the DGEB GitHub repository Refer to the DGEB paper for details on metrics, tasks, and models. +
+""" + ) + + unique_categories = df["Task Category"].unique() + # sort "DGEB" to the start + unique_categories = sorted(unique_categories, key=lambda x: x != "DGEB") + for category in unique_categories: + with gr.Tab(label=category): + unique_tasks_in_category = df[df["Task Category"] == category][ + "Task Name" + ].unique() + # sort "Overall" to the start + unique_tasks_in_category = sorted( + unique_tasks_in_category, key=lambda x: x != "Overall" + ) + for task in unique_tasks_in_category: + with gr.Tab(label=task): + columns_to_hide = ["Task Name", "Task Category"] + # get rows where Task Name == task and Task Category == category + filtered_df = ( + df[ + (df["Task Name"] == task) + & (df["Task Category"] == category) + ].drop(columns=columns_to_hide) + ).dropna(axis=1, how="all") # drop all NaN columns for Overall tab + # round all values to 4 decimal places + rounded_df = filtered_df.round(SIG_FIGS) + + # calculate ranking column + # if in Overview tab, rank by average metric value + if task == "Overall": + # rank by average col + rounded_df["Rank"] = filtered_df["Average"].rank( + ascending=False + ) + else: + avoid_cols = [ + "Model", + "Emb. Dimension", + "Num. Parameters (millions)", + "Modality", + "Layer", + ] + rounded_df["Rank"] = ( + rounded_df.drop(columns=avoid_cols, errors="ignore") + .sum(axis=1) + .rank(ascending=False) + ) + # make Rank first column + cols = list(rounded_df.columns) + cols.insert(0, cols.pop(cols.index("Rank"))) + rounded_df = rounded_df[cols] + # sort by rank + rounded_df = rounded_df.sort_values("Rank") + data_frame = gr.DataFrame(rounded_df) + + +demo.launch(allowed_paths=["."]) diff --git a/leaderboard/requirements.txt b/leaderboard/requirements.txt new file mode 100644 index 0000000..d557bda --- /dev/null +++ b/leaderboard/requirements.txt @@ -0,0 +1,82 @@ +aiofiles==23.2.1 +aiohttp==3.9.5 +aiosignal==1.3.1 +altair==5.3.0 +annotated-types==0.7.0 +anyio==4.4.0 +attrs==23.2.0 +certifi==2024.6.2 +charset-normalizer==3.3.2 +click==8.1.7 +contourpy==1.2.1 +cycler==0.12.1 +datasets==2.14.4 +dill==0.3.7 +dnspython==2.6.1 +email-validator==2.1.2 +fastapi==0.111.0 +fastapi-cli==0.0.4 +ffmpy==0.3.2 +filelock==3.15.1 +fonttools==4.53.0 +frozenlist==1.4.1 +fsspec==2024.6.0 +gradio==4.37.2 +gradio-client==1.0.2 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +huggingface-hub==0.23.4 +idna==3.7 +importlib-resources==6.4.0 +jinja2==3.1.4 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +kiwisolver==1.4.5 +markdown-it-py==3.0.0 +markupsafe==2.1.5 +matplotlib==3.9.0 +mdurl==0.1.2 +multidict==6.0.5 +multiprocess==0.70.15 +numpy==2.0.0 +orjson==3.10.5 +packaging==24.1 +pandas==2.2.2 +pillow==10.3.0 +pyarrow==16.1.0 +pydantic==2.7.4 +pydantic-core==2.18.4 +pydub==0.25.1 +pygments==2.18.0 +pyparsing==3.1.2 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-multipart==0.0.9 +pytz==2024.1 +pyyaml==6.0.1 +referencing==0.35.1 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.18.1 +ruff==0.4.9 +semantic-version==2.10.0 +shellingham==1.5.4 +six==1.16.0 +sniffio==1.3.1 +starlette==0.37.2 +tomlkit==0.12.0 +toolz==0.12.1 +tqdm==4.66.4 +typer==0.12.3 +typing-extensions==4.12.2 +tzdata==2024.1 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn==0.30.1 +uvloop==0.19.0 +watchfiles==0.22.0 +websockets==11.0.3 +xxhash==3.4.1 +yarl==1.9.4 diff --git a/leaderboard/submissions/.DS_Store b/leaderboard/submissions/.DS_Store new file mode 100644 index 0000000..9832d4d Binary files /dev/null and b/leaderboard/submissions/.DS_Store differ diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/MIBIG_protein_classification.json b/leaderboard/submissions/esm2_t12_35M_UR50D/MIBIG_protein_classification.json new file mode 100644 index 0000000..854f854 --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/MIBIG_protein_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_protein_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_prot", + "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "revision": "...", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6537260383267297 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6689342403628118 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7853286513915045 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6020175670931918 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.798563869992442 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.645844633541225 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.655328798185941 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7407876819384401 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.5970376985838431 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.7849584278155715 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/arch_retrieval.json b/leaderboard/submissions/esm2_t12_35M_UR50D/arch_retrieval.json new file mode 100644 index 0000000..07555ea --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/arch_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "arch_retrieval", + "display_name": "Arch Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/arch_retrieval", + "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b" + }, + { + "path": "tattabio/arch_retrieval_qrels", + "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "revision": "...", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.84127 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.82701 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.79635 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.27329 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.37939 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.64453 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.2839 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.40033 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.70443 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.7621 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.69407 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.42452 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.8853108550291645 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.8879126611520968 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.8892435700922602 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6178391415234327 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.27510768020625387 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.2751226626247053 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6158935362175889 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.29490376307826244 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.3173510395378902 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6282820888186709 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.217967587602592 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.3392167130961565 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.02706102865662817 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.33465305568189146 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.29252115202920864 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.1461797349288265 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.3984979781227535 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.15678893453735943 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.5443958382387585 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.3379769732428374 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.23212587702223994 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.008899383756080657 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.3376357180005265 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.2949278653804833 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.11957594632298725 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.4084900248156052 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.16409679466126934 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.5478175261971683 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.3566768602643857 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.24770750166012404 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5588205820812548 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.053528426968584814 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.5895997876864452 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.5109397710788774 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": -0.0014360394688449447 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.5972188824684267 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.30493219390483955 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.35096314542920914 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.4163370977258702 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.6041064087877195 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.2995447501683336 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.1176892239839227 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.6055526314461911 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.3015594122136539 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.11951448723943421 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.6050403183375579 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.3012299482545067 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.12091114334431136 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.82819 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.81615 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.78982 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.27067 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.37321 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.63596 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.27906 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.39106 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.69746 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.7487 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.68506 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.42266 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.8752382984777344 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.878253189168681 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.8795454419523189 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6238124910465183 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.3878031710482511 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.22961445620397436 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6136556294192528 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.4027695454909326 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.23933162739820324 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6039490411056802 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.379240829313294 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.23134380586116654 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": -0.018274861348075953 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.3153330580523699 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.31839102956934573 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.10106646301687382 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.4143687386138405 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.18923312509326384 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.5144031685310609 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.45693618989546114 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.1513413062960939 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": -0.031265621786664255 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.32028522957198785 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.32056979656535384 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.07820354892522365 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.42551786412535775 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2040509113490322 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.5060801621108716 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.5071691349011768 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.11952783139053508 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5923656191314365 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.1954332256400316 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.5508269378169939 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.5458701611463479 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.12975949111453675 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.5537528325655148 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.3549845967268747 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.26254902560124815 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.3919186481758992 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.6284613562335846 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.3609822238622607 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.13691647729285375 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.6282780633119702 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.36649482857679033 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.1301211341279461 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.6290574535816186 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.367920824556504 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.13036774230606793 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/bacarch_bigene.json b/leaderboard/submissions/esm2_t12_35M_UR50D/bacarch_bigene.json new file mode 100644 index 0000000..66fb190 --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/bacarch_bigene.json @@ -0,0 +1,86 @@ +{ + "task": { + "id": "bacarch_bigene", + "display_name": "BacArch BiGene", + "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/bac_arch_bigene", + "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "revision": "...", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.6215094339622641 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.7056603773584905 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6469182389937107 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.7056603773584905 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.6138364779874214 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.7018867924528301 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6413836477987421 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.7018867924528301 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/convergent_enzymes_classification.json b/leaderboard/submissions/esm2_t12_35M_UR50D/convergent_enzymes_classification.json new file mode 100644 index 0000000..622c6c9 --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/convergent_enzymes_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "convergent_enzymes_classification", + "display_name": "Convergent Enzymes Classification", + "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/convergent_enzymes", + "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "revision": "...", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.2475 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.20116666666666666 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.2425 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.19904761904761906 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/cyano_operonic_pair.json b/leaderboard/submissions/esm2_t12_35M_UR50D/cyano_operonic_pair.json new file mode 100644 index 0000000..1d71bcd --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/cyano_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "cyano_operonic_pair", + "display_name": "Cyano Operonic Pair", + "description": "Evaluate on Cyano operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/cyano_operonic_pair", + "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "revision": "...", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7203065134099617 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.990619957447052 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.44058665070338227 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.815308690071106 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.28253358925143957 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 1.0 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.32424099100055437 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7187739463601532 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 40.061012268066406 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.43963963963963965 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 380.5898742675781 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.28218966846569005 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9945652173913043 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3051200502841412 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7187739463601532 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 2.2720906734466553 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4404548174745661 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 25.41253662109375 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.28242517267843437 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 1.0 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3117112729287826 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7206896551724138 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 1764.11328125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.44177215189873426 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 1021.9218139648438 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.28795379537953797 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9483695652173914 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.35181607664099845 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.35181607664099845 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7206896551724138 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9833309650421143 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4454067429631921 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.8805520534515381 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.2883460152182619 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9782608695652174 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.3325946475342702 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.721455938697318 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 230.74539184570312 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4439615026389321 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 690.979248046875 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.28772635814889336 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9714673913043478 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.33577510329678106 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7210727969348659 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 13.784924507141113 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.44413697682462816 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 39.12321472167969 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.29791183294663576 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8722826086956522 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.33823458280589236 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7191570881226054 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 10542.0 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.4403230631169608 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 4913.24560546875 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.2823168392788646 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 1.0 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.28278909833025945 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.33823458280589236 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/ec_classification.json b/leaderboard/submissions/esm2_t12_35M_UR50D/ec_classification.json new file mode 100644 index 0000000..b3bad3d --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/ec_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification", + "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "revision": "...", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6015625 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.55390625 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.5546875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5096354166666667 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/ecoli_operonic_pair.json b/leaderboard/submissions/esm2_t12_35M_UR50D/ecoli_operonic_pair.json new file mode 100644 index 0000000..2ed5eb3 --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/ecoli_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "ecoli_operonic_pair", + "display_name": "E.coli Operonic Pair", + "description": "Evaluate on E.coli K-12 operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/ecoli_operonic_pair", + "revision": "a62c01143a842696fc8200b91c1acb825e8cb891" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "revision": "...", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6309689383402874 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9664175510406494 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5831148400629261 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.876137375831604 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.41972823351786614 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.954779622209502 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5226436718954207 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6237830319888734 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 151.0961456298828 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5765230312035661 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 417.6656494140625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4051044083526682 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9994275901545506 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5038561800803791 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.624246638850255 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 9.827131271362305 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5778148457047539 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 23.485851287841797 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.4077212806026365 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9914138523182598 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5109707609256201 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6200741770978211 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 1509.6474609375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.576427863981512 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 827.195556640625 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.40501043841336115 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9994275901545506 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.498147478687894 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5226436718954207 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6305053314789059 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9585829377174377 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5934650455927052 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9002124071121216 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.44412851862382713 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8941041785918717 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.545021841060869 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6342141863699583 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 444.21954345703125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.6035735322992343 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 612.2872314453125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.45935445307830247 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8797939324556382 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5574639922170803 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6339823829392675 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 29.62457275390625 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5996841689696012 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 38.6270751953125 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.45766797228080747 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.86949055523755 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5553872058517757 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.5948076031525267 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 14395.623046875 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.577018736527939 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 5674.908203125 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4061624649859944 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9959931310818546 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3862357442891778 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5574639922170803 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/euk_retrieval.json b/leaderboard/submissions/esm2_t12_35M_UR50D/euk_retrieval.json new file mode 100644 index 0000000..69e10b0 --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/euk_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "euk_retrieval", + "display_name": "Euk Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/euk_retrieval", + "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0" + }, + { + "path": "tattabio/euk_retrieval_qrels", + "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "revision": "...", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.80067 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.79455 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.77429 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.30914 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.41095 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.60087 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.31905 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.43473 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.66233 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.7119 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.63408 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.3663 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.8471596998928188 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.8513856989741232 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.8527043294326252 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.7166495695870103 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.5383304196281262 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.38408074718110424 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.71056350273151 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.5386325626626473 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.3678412023083028 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6787542765531929 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.4678010355684318 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.3023078330221261 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.17506411594869709 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.344228905317099 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.26025197550499063 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.28364735198157687 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.4946084063548821 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.13024980686869012 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.6456837506614725 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.5024354435806796 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.18849105999507082 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.15537143366366737 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.3338972930408563 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.27534514133854515 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.24230061291494534 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.4763992415794819 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.17167004025145782 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.6062660448007379 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.45445564371902375 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.09621042247019258 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5420327575630611 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.37248428210075407 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.6517795575595553 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.46182346579179107 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.2556997419766225 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.6371093546193429 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.22395520722060117 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.27077611986871364 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.4324048296185153 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.7966902615822546 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.5623896062382641 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.27875113624180275 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.7982850278647994 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.5623589312727257 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.27578274493030464 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.7977600079745486 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.5625363754999084 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.2708948491113527 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.79574 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.7872 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.76804 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.30344 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.40308 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.59158 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.31068 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.41808 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.64688 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.70611 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.63055 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.36862 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.8521436227224009 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.8555504516919309 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.8571980685347454 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.687147173549288 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.534917528750057 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.039388068191112346 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6821413074357394 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.541004104911246 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.06613569078084217 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6546658854714889 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.5141528362539365 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.045010206374762184 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.1717014705213338 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.298486867259319 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3158992753503486 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.29394629114728443 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.4807193931287969 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.200767704240122 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.6266013107050147 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.5400967080146492 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.06821295960747309 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.15728927641821855 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.3020952193182204 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.3196038571595756 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.273851179897414 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.4822263524474807 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.1998852576547706 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.610064992339158 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.5237697244132881 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.047861477876695854 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5642831983945668 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.41268016275342806 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.3902377594145758 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.4757631079174044 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.32238368240767273 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.4280345103983777 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.19318747544949869 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.2262940005534252 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.2898939009819229 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.7559907957579797 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.5232164154691852 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.016325972601983724 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.7604182097391701 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.5188685708290457 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.008720431706015956 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.7617325890747185 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.5213157058041827 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.015621035073521741 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/fefe_phylogeny.json b/leaderboard/submissions/esm2_t12_35M_UR50D/fefe_phylogeny.json new file mode 100644 index 0000000..b7fc4fa --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/fefe_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "fefe_phylogeny", + "display_name": "FeFeHydrogenase Phylogeny", + "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/fefe_phylogeny_sequences", + "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26" + }, + { + "path": "tattabio/fefe_phylogeny_distances", + "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "revision": "...", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.46213607103563425 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.5621218764061721 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.5442663405841599 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.5621218764061721 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.1524486344353939 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.5194125891005561 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.48868066660269227 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.5194125891005561 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/modac_paralogy_bigene.json b/leaderboard/submissions/esm2_t12_35M_UR50D/modac_paralogy_bigene.json new file mode 100644 index 0000000..e8d7077 --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/modac_paralogy_bigene.json @@ -0,0 +1,97 @@ +{ + "task": { + "id": "modac_paralogy_bigene", + "display_name": "ModAC Paralogy BiGene", + "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/modac_paralogy_bigene", + "revision": "241ca6397856e3360da04422d54933035b1fab87" + } + ], + "primary_metric_id": "recall_at_50" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.4952467261118094e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 8.984467652322665e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.03485254691689008 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.4952467261118094e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 8.984467652322665e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.05361930294906166 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/mopb_clustering.json b/leaderboard/submissions/esm2_t12_35M_UR50D/mopb_clustering.json new file mode 100644 index 0000000..bfae754 --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/mopb_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "mopb_clustering", + "display_name": "MopB Clustering", + "description": "Evaluate on MopB clustering task.", + "modality": "protein", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/mopb_clustering", + "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "revision": "...", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.7366377426487285 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.7842647128962572 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/rpob_arch_phylogeny.json b/leaderboard/submissions/esm2_t12_35M_UR50D/rpob_arch_phylogeny.json new file mode 100644 index 0000000..fb1d53f --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/rpob_arch_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_phylogeny_sequences", + "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4" + }, + { + "path": "tattabio/rpob_arch_phylogeny_distances", + "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "revision": "...", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.2624971928673971 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.31502824152693154 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.3088945849814121 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.31502824152693154 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.34668475738519444 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.372455403853565 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.369729316093801 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.372455403853565 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/rpob_bac_phylogeny.json b/leaderboard/submissions/esm2_t12_35M_UR50D/rpob_bac_phylogeny.json new file mode 100644 index 0000000..d9bc6ac --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/rpob_bac_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_phylogeny_sequences", + "revision": "b833ef8d8d873ea5387540562873f41d073d3e03" + }, + { + "path": "tattabio/rpob_bac_phylogeny_distances", + "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "revision": "...", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.12971577033648743 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.18177734472255433 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.16423413011355156 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.18177734472255433 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.10194557773024183 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.18622026845391912 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.15405389239655473 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.18622026845391912 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t12_35M_UR50D/vibrio_operonic_pair.json b/leaderboard/submissions/esm2_t12_35M_UR50D/vibrio_operonic_pair.json new file mode 100644 index 0000000..c8afaa6 --- /dev/null +++ b/leaderboard/submissions/esm2_t12_35M_UR50D/vibrio_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "vibrio_operonic_pair", + "display_name": "Vibrio Operonic Pair", + "description": "Evaluate on Vibrio operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/vibrio_operonic_pair", + "revision": "24781b12b45bf81a079a6164ef0d2124948c1878" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t12_35M_UR50D", + "revision": "...", + "num_layers": 12, + "num_params": 33992881, + "embed_dim": 480 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6781966575981345 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.970278263092041 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.518608169440242 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.8757017254829407 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.35501242750621376 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9618406285072951 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.4581544787406372 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6731441896618733 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 137.3688507080078 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5146164978292329 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 391.87298583984375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3467238689547582 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9977553310886644 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.4383109013756369 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.672755538282161 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 8.506048202514648 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5152786099460755 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 21.124141693115234 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.35145075602778914 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9652076318742986 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.4438681594614018 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6599300427516518 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 1570.195556640625 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5147654892877822 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 898.4225463867188 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.3468591494342567 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9977553310886644 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.4179931403914694 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.4581544787406372 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6746987951807228 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9681814312934875 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5363604114934374 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9120055437088013 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.3921161825726141 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8484848484848485 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.46704651746605186 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6746987951807228 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 360.30352783203125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5305821665438467 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 576.9113159179688 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3949533735600658 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8080808080808081 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.468990806236423 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6758647493198601 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 22.342727661132812 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5301837270341208 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 39.38741683959961 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.37459434399629116 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9068462401795735 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.46775797789146023 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6541002720559658 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 9448.685546875 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5145827317354895 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 4854.8955078125 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.3464230171073095 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 1.0 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3679854825040224 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.468990806236423 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/MIBIG_protein_classification.json b/leaderboard/submissions/esm2_t30_150M_UR50D/MIBIG_protein_classification.json new file mode 100644 index 0000000..1898101 --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/MIBIG_protein_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_protein_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_prot", + "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "revision": "...", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.721568117708931 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.7165532879818595 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.820388189148414 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6689951528396479 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.8363567649281944 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6298307655443518 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6099773242630385 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7648458169950588 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.5789820341918578 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.752078609221467 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/arch_retrieval.json b/leaderboard/submissions/esm2_t30_150M_UR50D/arch_retrieval.json new file mode 100644 index 0000000..0e91873 --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/arch_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "arch_retrieval", + "display_name": "Arch Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/arch_retrieval", + "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b" + }, + { + "path": "tattabio/arch_retrieval_qrels", + "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "revision": "...", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.91537 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.90635 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.87424 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.30526 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.42635 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.72433 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.31067 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.4378 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.75859 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.82689 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.76159 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.46726 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.9422321809645754 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9439900344829917 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9446453591992101 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6549640359156222 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.11037035667235007 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.41554431142868614 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6536082943031309 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.140251553474609 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.4541965457157918 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6159871931946869 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.006651176818080506 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.39627086499203873 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": -0.047556791244411895 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.16420917659496206 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.28627326792803204 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.06426190649373154 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.23746446970773183 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.15565045001627686 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.5237897180891637 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.1865080232459892 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.2688572949738638 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": -0.054074967730710764 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.1711511016438979 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.2896050332877169 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.05005034152582497 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.24918235642253458 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.16768640965952947 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.5114754425984644 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.2173420630028766 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.2526274232326276 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5525639421444303 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.01857146637175079 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.7765476306675947 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.48362026531371466 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": -0.0051297270434755475 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.7004665714420365 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.24671476154878727 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.37006645670815747 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.36951553698605216 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.64312359548717 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.04622765419712948 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.22259410250972433 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.6385468425832173 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.058640802937365115 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.21579087208897282 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.6402042049799889 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.052782783025246006 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.21896215733129423 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.83285 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.81413 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.76701 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.25404 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.35083 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.58387 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.266 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.37545 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.66303 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.75621 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.6866 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.41047 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.8947289799402471 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.895975855130784 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.8970771214115124 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6033756709037629 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.48175424620769186 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.1614695329433979 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.5820557360820439 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.48937482522317327 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.18205509390904553 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.49384788238425553 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.354953353704701 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.10767304568721194 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.03598090314920231 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.11662947626949612 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.28974453988735166 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.13482748795676255 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.22360013731689057 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.19043309088480928 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.42287317105206507 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.32712992457779794 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.02056986996465222 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.021824220192766298 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.11009705855814085 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.28505819859304804 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.10661440304261144 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.2092712287791401 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.19742570630860265 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.38620604109572715 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.2924386961038862 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.025319280347884648 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5425386973889819 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.4063280755847313 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.43965420847555414 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.4721960038905336 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.35700671463443756 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.44652985217538876 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.2526299155090765 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.021434326602753354 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.3009002533330021 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.6726463178530804 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.49687521406966506 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.05561071266486503 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.6731608376359998 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.49491217127896847 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.05832429376042118 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.6735463200113443 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.495779540068593 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.06154966156964915 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/bacarch_bigene.json b/leaderboard/submissions/esm2_t30_150M_UR50D/bacarch_bigene.json new file mode 100644 index 0000000..0a06474 --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/bacarch_bigene.json @@ -0,0 +1,86 @@ +{ + "task": { + "id": "bacarch_bigene", + "display_name": "BacArch BiGene", + "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/bac_arch_bigene", + "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "revision": "...", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7591194968553459 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.8188679245283019 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.7779874213836478 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.8188679245283019 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.656010781671159 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.7320754716981132 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6774213836477987 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.7320754716981132 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/convergent_enzymes_classification.json b/leaderboard/submissions/esm2_t30_150M_UR50D/convergent_enzymes_classification.json new file mode 100644 index 0000000..987fb41 --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/convergent_enzymes_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "convergent_enzymes_classification", + "display_name": "Convergent Enzymes Classification", + "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/convergent_enzymes", + "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "revision": "...", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.2975 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.24646428571428572 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.2475 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.20091666666666666 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/cyano_operonic_pair.json b/leaderboard/submissions/esm2_t30_150M_UR50D/cyano_operonic_pair.json new file mode 100644 index 0000000..75d1682 --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/cyano_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "cyano_operonic_pair", + "display_name": "Cyano Operonic Pair", + "description": "Evaluate on Cyano operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/cyano_operonic_pair", + "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "revision": "...", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7218390804597701 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9874778389930725 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4406451612903225 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9247815608978271 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.288917089678511 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9279891304347826 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.32971769257135214 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7187739463601532 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 340.19317626953125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4400597907324364 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 1209.041259765625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.28210042161747795 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 1.0 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.30836065241808736 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7187739463601532 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 5.940918922424316 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4398773006134969 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 47.593170166015625 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.2840729001584786 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9741847826086957 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.31645982324175975 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7210727969348659 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 12984.419921875 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.44671433435478375 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 8378.552734375 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.2980463425715584 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.8913043478260869 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.34218016632367393 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.34218016632367393 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7237547892720306 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9779865145683289 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.44695441710367084 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9349428415298462 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.3178427997705106 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.7527173913043478 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.36912560854307275 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7241379310344828 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 674.400634765625 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.44820441988950277 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 1384.0185546875 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.300462962962963 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8817934782608695 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.36707205079753535 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.724904214559387 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 39.354820251464844 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4482758620689655 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 84.60987854003906 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.307035175879397 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8301630434782609 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.37335963472985745 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.717624521072797 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 67287.0625 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.44063721070033063 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 29718.42578125 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.28290235430335775 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9959239130434783 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.2624766540914564 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.37335963472985745 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/ec_classification.json b/leaderboard/submissions/esm2_t30_150M_UR50D/ec_classification.json new file mode 100644 index 0000000..013f529 --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/ec_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification", + "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "revision": "...", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6640625 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6111979166666666 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.578125 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5226562499999999 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/ecoli_operonic_pair.json b/leaderboard/submissions/esm2_t30_150M_UR50D/ecoli_operonic_pair.json new file mode 100644 index 0000000..dcba464 --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/ecoli_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "ecoli_operonic_pair", + "display_name": "E.coli Operonic Pair", + "description": "Evaluate on E.coli K-12 operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/ecoli_operonic_pair", + "revision": "a62c01143a842696fc8200b91c1acb825e8cb891" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "revision": "...", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6312007417709782 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.964427649974823 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5821596244131455 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9228619337081909 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.42521762068055924 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9227246708643388 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.524131558836932 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6191469633750579 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 472.7284851074219 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5769426104621634 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 942.1561279296875 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4096153846153846 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9753863766456783 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.49992125667466114 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.624246638850255 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 28.615245819091797 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5795512930296284 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 47.59563064575195 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.41348973607038125 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9685174585002863 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5150815389359208 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6193787668057488 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 11865.05078125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5762376237623762 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 6186.25341796875 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4048226292603756 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9994275901545506 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.5074172652530077 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.524131558836932 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6483541956420955 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9662680625915527 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.6057630736392743 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9369711875915527 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.48298162014976176 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8122495706926159 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5752810967507045 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6488178025034771 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 912.6686401367188 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.6068776865963268 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 1335.36962890625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4606941560367843 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8889524899828277 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5731535780314824 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6467315716272601 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 48.278228759765625 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.6060103626943005 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 73.76244354248047 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.47498375568551005 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8368631940469377 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5733375822821691 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.5948076031525267 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 89725.625 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5765676567656766 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 26633.4609375 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4050544864363552 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 1.0 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3632996357288699 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5752810967507045 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/euk_retrieval.json b/leaderboard/submissions/esm2_t30_150M_UR50D/euk_retrieval.json new file mode 100644 index 0000000..e7d0dc3 --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/euk_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "euk_retrieval", + "display_name": "Euk Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/euk_retrieval", + "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0" + }, + { + "path": "tattabio/euk_retrieval_qrels", + "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "revision": "...", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.92259 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.9167 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.88687 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.35223 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.47142 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.70071 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.35765 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.48426 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.74119 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.81865 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.73633 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.42412 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.9517684887459807 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9522278364722093 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.953643102583901 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.7150037451899073 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.2906625946483748 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.7209294602302925 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6907192680160392 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.3058553886156155 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.6912724175806588 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6773879240615319 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.2230353104353846 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.5037777804193893 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.07512993110545117 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.10451684557609604 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.20258838285578284 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.15504770699858275 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.27125172402690906 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.08216003549646085 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.5793806460927751 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.39855546980064466 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.3212267685833858 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.06378940095453296 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.0935410532779427 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.20565896341550313 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.12463958126591933 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.2460932574460236 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.09141269581756521 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.5316388055512066 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.39363828399474204 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.27072875132052776 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.3814594443420771 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.27374483565752716 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.8999025518235271 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.2990471971388469 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.2179409172747032 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.69964609275835 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.14129861784392306 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.26407541710839577 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.31442605468864243 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.9209205235715922 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.26647535110302417 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.8207529427307493 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.9201601439905501 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.26067869360312956 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.8298259416241756 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.9202405827206523 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.265282549872998 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.8251010908893353 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.84626 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.82923 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.78558 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.30997 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.40782 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.59403 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.31985 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.43295 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.66666 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.74598 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.65852 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.36688 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.9123258306538048 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.914544735364671 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9150978563852772 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6917517819391329 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.5237021136032984 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.12166243275163607 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.7037426830988499 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.484773980307184 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.12250966461302211 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6749011438082552 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.4004427197799687 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.09048181785892781 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.1748175382079989 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.011739002651428564 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3400304609570671 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.29156932199723384 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.16970879434540548 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.2357874576308254 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.6261176648013426 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.26553990452224274 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.00008479750409092293 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.16601631967349068 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": -0.0028582806705203046 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.3381167573210711 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.25720692642755344 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.13614183480704828 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2557093468354816 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.5690350990599277 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.18579980394403742 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.0024995904740863243 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5027516634592902 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.5209659666134092 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.5193484166288937 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.42341736061780094 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.4441506486245349 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.5127529647369428 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.18125443712215236 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.047925843664078704 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.3532612529819783 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.7446679342833693 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.6132809211091352 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.09338614253655397 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.742408783880129 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.620360028959556 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.07601111866100194 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.7411037680464723 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.6185536637957788 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.07840809223865396 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/fefe_phylogeny.json b/leaderboard/submissions/esm2_t30_150M_UR50D/fefe_phylogeny.json new file mode 100644 index 0000000..f0ef494 --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/fefe_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "fefe_phylogeny", + "display_name": "FeFeHydrogenase Phylogeny", + "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/fefe_phylogeny_sequences", + "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26" + }, + { + "path": "tattabio/fefe_phylogeny_distances", + "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "revision": "...", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.42638593677257985 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.613514079803676 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.6057539011664933 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.613514079803676 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.2903783693037115 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.5353019718492421 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.5569899700006391 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.5569899700006391 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/modac_paralogy_bigene.json b/leaderboard/submissions/esm2_t30_150M_UR50D/modac_paralogy_bigene.json new file mode 100644 index 0000000..93a430d --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/modac_paralogy_bigene.json @@ -0,0 +1,97 @@ +{ + "task": { + "id": "modac_paralogy_bigene", + "display_name": "ModAC Paralogy BiGene", + "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/modac_paralogy_bigene", + "revision": "241ca6397856e3360da04422d54933035b1fab87" + } + ], + "primary_metric_id": "recall_at_50" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.4952467261118094e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 8.984467652322665e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.040214477211796246 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.504309723543487e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 9.002569333287721e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.14544235924932977 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/mopb_clustering.json b/leaderboard/submissions/esm2_t30_150M_UR50D/mopb_clustering.json new file mode 100644 index 0000000..5a97c06 --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/mopb_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "mopb_clustering", + "display_name": "MopB Clustering", + "description": "Evaluate on MopB clustering task.", + "modality": "protein", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/mopb_clustering", + "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "revision": "...", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.843073317600245 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.7993491625653556 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/rpob_arch_phylogeny.json b/leaderboard/submissions/esm2_t30_150M_UR50D/rpob_arch_phylogeny.json new file mode 100644 index 0000000..b792041 --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/rpob_arch_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_phylogeny_sequences", + "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4" + }, + { + "path": "tattabio/rpob_arch_phylogeny_distances", + "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "revision": "...", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.17685077749657868 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.18397145017500027 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.1845894895803573 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.1845894895803573 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.3483214537780648 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3731512151106379 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.34706946425354485 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.3731512151106379 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/rpob_bac_phylogeny.json b/leaderboard/submissions/esm2_t30_150M_UR50D/rpob_bac_phylogeny.json new file mode 100644 index 0000000..f8490f6 --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/rpob_bac_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_phylogeny_sequences", + "revision": "b833ef8d8d873ea5387540562873f41d073d3e03" + }, + { + "path": "tattabio/rpob_bac_phylogeny_distances", + "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "revision": "...", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.11953486541105843 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.15790804024970093 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.1545090554656792 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.15790804024970093 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.1810910478431202 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.2420094772926146 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.2240652992499544 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.2420094772926146 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t30_150M_UR50D/vibrio_operonic_pair.json b/leaderboard/submissions/esm2_t30_150M_UR50D/vibrio_operonic_pair.json new file mode 100644 index 0000000..e005f54 --- /dev/null +++ b/leaderboard/submissions/esm2_t30_150M_UR50D/vibrio_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "vibrio_operonic_pair", + "display_name": "Vibrio Operonic Pair", + "description": "Evaluate on Vibrio operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/vibrio_operonic_pair", + "revision": "24781b12b45bf81a079a6164ef0d2124948c1878" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t30_150M_UR50D", + "revision": "...", + "num_layers": 30, + "num_params": 148795481, + "embed_dim": 640 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 15, + "layer_display_name": "15", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6743101438010105 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9717499613761902 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5162846803377563 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9134178161621094 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.3529896907216495 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9607182940516273 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.44141636821591623 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6669257675864749 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 500.4721984863281 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5148800924588268 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 1224.591064453125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.34669260700389104 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 1.0 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.41668517460563326 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6692576758647493 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 25.1038875579834 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5144927536231884 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 46.55123519897461 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.35192069392812886 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9562289562289562 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.4358738619047138 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6614846482705014 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 11981.70703125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5147313691507799 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 6498.75244140625 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.34655775962660446 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 1.0 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.4304420964721535 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.44141636821591623 + } + ] + }, + { + "layer_number": 29, + "layer_display_name": "29", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.68558103381267 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9706978797912598 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.535526776338817 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9334700107574463 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.3891149542217701 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8585858585858586 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.49246488546090866 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6809172172561213 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 795.2196044921875 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5332845647403073 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 1224.1300048828125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3955507325013565 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8181818181818182 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.4833411491930948 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6801399144966964 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 43.079410552978516 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5346938775510204 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 77.91600036621094 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.383601756954612 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8821548821548821 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.48219648792984926 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6541002720559658 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 60487.8359375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5148456057007126 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 30271.841796875 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.3500201857085184 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9730639730639731 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.34301425719850337 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.49246488546090866 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/MIBIG_protein_classification.json b/leaderboard/submissions/esm2_t33_650M_UR50D/MIBIG_protein_classification.json new file mode 100644 index 0000000..fd6cc07 --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/MIBIG_protein_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_protein_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_prot", + "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "revision": "...", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6646213726039093 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.7142857142857143 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7797818341533279 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6226719674475112 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.8295540438397593 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6625178133673452 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6598639455782312 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.8568965517241379 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6022767137392347 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.7842025699168563 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/arch_retrieval.json b/leaderboard/submissions/esm2_t33_650M_UR50D/arch_retrieval.json new file mode 100644 index 0000000..ae38f34 --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/arch_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "arch_retrieval", + "display_name": "Arch Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/arch_retrieval", + "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b" + }, + { + "path": "tattabio/arch_retrieval_qrels", + "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "revision": "...", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.93123 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.92445 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.8848 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.30898 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.43383 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.737 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.31538 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.44791 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.76764 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.84481 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.78015 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.46964 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.9498435054773084 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9516662263141135 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9519930735110318 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6308519525257628 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.089031581354036 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.4412273663070538 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6032994212940577 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.09724157956704019 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.4738679731426537 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.5712702884058568 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": -0.1658603066862231 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.3611726321158017 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": -0.017602606962065153 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.09514819051312666 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.2848295232149835 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.08673113556503126 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.15294567339061488 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.16894744699362754 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.5120519885236438 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": -0.0038334646025400855 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.2199525642638632 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": -0.02455812168337926 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.09896005096881812 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.2875714656687537 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.06805556416914144 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.15852083147934737 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.1784723750352869 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.500056208041346 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.002586904787276158 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.20499742952429414 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5039507321590944 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.05258016937163956 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.7647229953284452 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.4021315165507268 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.016992811311858026 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.6768345994695547 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.17895723642667355 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.33113192778327805 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.3516930058377777 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.6679881478327583 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.1530559182468113 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.2950913220027977 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.6645424475589732 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.16781717474258154 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.2930131697897344 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.6647896092012971 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.16547097365694646 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.2935701117386994 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.80913 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.79635 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.75732 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.24603 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.34022 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.56511 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.25919 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.37057 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.65688 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.73419 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.672 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.40938 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.874619433774363 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.8781182041745417 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.8796387772814498 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6393103094705105 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.39489258054605936 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.14026462748913124 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6190457124081322 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.4030000242340694 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.1730798319035512 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.5320810476895583 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.3325889831793021 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.08862322432493497 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.14094623550319177 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.1285128991955811 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.19503770328831505 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.23096756151958173 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.221478673456825 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.11283510381365157 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.4540497453609997 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.29125135118566164 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.036933801012767234 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.11450448159576665 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.12487127801737757 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.18688120243913126 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.17741445748537313 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.19925238150416746 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.11233216281151082 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.3791957310622827 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.2551711179587873 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.00018595264688904266 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5314308675519602 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.34301160006074155 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.33926190682858753 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.4448067496058197 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.2968723779500677 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.3528818604969278 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.1936211550389429 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.0215074969245683 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.20326952474019822 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.7158514971972625 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.39909861586332773 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.11127787012043149 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.7189756388185046 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.39804773603313176 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.11462677765400364 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.7198766577873941 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.40101385908354403 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.1168522554652628 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/bacarch_bigene.json b/leaderboard/submissions/esm2_t33_650M_UR50D/bacarch_bigene.json new file mode 100644 index 0000000..61d4bde --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/bacarch_bigene.json @@ -0,0 +1,86 @@ +{ + "task": { + "id": "bacarch_bigene", + "display_name": "BacArch BiGene", + "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/bac_arch_bigene", + "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "revision": "...", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.790251572327044 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.8490566037735849 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.808427672955975 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.8490566037735849 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.5652650494159928 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6566037735849056 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5896406109613657 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6566037735849056 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/convergent_enzymes_classification.json b/leaderboard/submissions/esm2_t33_650M_UR50D/convergent_enzymes_classification.json new file mode 100644 index 0000000..8d899de --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/convergent_enzymes_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "convergent_enzymes_classification", + "display_name": "Convergent Enzymes Classification", + "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/convergent_enzymes", + "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "revision": "...", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.3125 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.25675 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.2675 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.22246428571428567 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/cyano_operonic_pair.json b/leaderboard/submissions/esm2_t33_650M_UR50D/cyano_operonic_pair.json new file mode 100644 index 0000000..5b10e58 --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/cyano_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "cyano_operonic_pair", + "display_name": "Cyano Operonic Pair", + "description": "Evaluate on Cyano operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/cyano_operonic_pair", + "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "revision": "...", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7206896551724138 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9891349673271179 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.44163658243080617 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.8806867003440857 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.2836166924265842 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9972826086956522 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.34297195857616974 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7203065134099617 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 1108.904541015625 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4404548174745661 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 3620.740966796875 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.28242517267843437 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 1.0 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3146763612933017 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7206896551724138 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 38.761192321777344 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4416441644164416 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 127.99288940429688 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.28340392760877936 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 1.0 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3240934026221954 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7199233716475095 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 59584.36328125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.44705020153902525 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 42462.1484375 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.30607124937280483 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.8288043478260869 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3553266247045366 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.3553266247045366 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7256704980842912 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9802291393280029 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.45144356955380577 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9365932941436768 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.31175556706369756 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8179347826086957 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.3711095484875728 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7268199233716475 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 2405.0166015625 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4552 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 3873.87744140625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.322562358276644 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.7730978260869565 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.37188521395957413 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7283524904214559 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 89.34696960449219 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4592169657422513 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 158.45071411132812 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3280885780885781 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.7649456521739131 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.37329497692478114 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.717624521072797 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 414854.375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.4394618834080718 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 120746.546875 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.28171713300114987 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.998641304347826 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.2524373913991999 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.37329497692478114 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/ec_classification.json b/leaderboard/submissions/esm2_t33_650M_UR50D/ec_classification.json new file mode 100644 index 0000000..849e5a8 --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/ec_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification", + "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "revision": "...", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6953125 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6372395833333333 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.640625 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5825520833333333 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/ecoli_operonic_pair.json b/leaderboard/submissions/esm2_t33_650M_UR50D/ecoli_operonic_pair.json new file mode 100644 index 0000000..2afeb54 --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/ecoli_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "ecoli_operonic_pair", + "display_name": "E.coli Operonic Pair", + "description": "Evaluate on E.coli K-12 operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/ecoli_operonic_pair", + "revision": "a62c01143a842696fc8200b91c1acb825e8cb891" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "revision": "...", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.634445989800649 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9598076343536377 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5842179759377212 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9043072462081909 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.4227912932138284 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9450486548368632 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5276812260133239 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6212331942512749 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 1719.5322265625 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5772681954137587 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 3341.0849609375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4066963240458909 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9942759015455066 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.49092987587616777 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6256374594343996 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 66.70790100097656 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5782290279627164 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 122.32955932617188 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.40765078620042244 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9942759015455066 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.511753481157234 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6302735280482151 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 54134.9296875 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5839282445542742 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 41969.6640625 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4292249730893434 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9129937034917001 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.5301473268684156 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5301473268684156 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6430227167362077 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9668768048286438 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.6095796676441838 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9326354265213013 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.46288598574821854 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8923869490555237 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5636125661225051 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6460361613351877 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 2996.8798828125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.602950342821525 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 3910.81005859375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.47325505544683627 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8305666857469949 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.568026958169892 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.645804357904497 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 107.93898010253906 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.6100134279685403 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 177.9056396484375 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.45874206578188115 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9101316542644533 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5727211593488946 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.5948076031525267 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 522833.3125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5762376237623762 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 105705.90625 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4048226292603756 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9994275901545506 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3449597323547321 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5727211593488946 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/euk_retrieval.json b/leaderboard/submissions/esm2_t33_650M_UR50D/euk_retrieval.json new file mode 100644 index 0000000..62e64a9 --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/euk_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "euk_retrieval", + "display_name": "Euk Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/euk_retrieval", + "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0" + }, + { + "path": "tattabio/euk_retrieval_qrels", + "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "revision": "...", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.93328 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.92927 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.89032 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.35914 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.48142 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.71036 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.36397 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.48703 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.73804 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.83537 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.75402 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.42482 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.9437299035369775 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9440514469453377 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.945011211244356 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.7620928279266368 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.33055988644552864 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.49891672668881054 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.7238394046424372 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.2897858408379934 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.4991960417643786 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6394920275348465 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": -0.033060154820210755 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.3423790207125621 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.15329766118784013 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": -0.04748558443889553 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.19738372855185915 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.21780702746711833 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.08698230377058089 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.07610409072111464 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.566496850232904 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.07469827272103352 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.29548110389437116 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.15194985870758682 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": -0.049271771318523803 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.2108956515825209 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.21617272285778213 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.0839962096295339 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.09028425010830723 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.5469629807206992 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.05743544528584219 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.27372175464202625 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.30507744236501566 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.39710388130115676 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.8234215276991089 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.17621225835821736 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.32037529086829686 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.6212516491077492 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.029957995813415312 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.17440396313077902 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.2483983668583826 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.8982147001536217 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.3436237156115937 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.4261298572775073 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.8976297271659986 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.3435359664305329 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.42532802107589146 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.8958429750776126 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.34758003000370125 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.4212983155790701 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.86745 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.85726 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.81762 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.32161 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.42823 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.63052 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.32639 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.43888 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.68495 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.77749 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.69678 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.38842 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.9054126473740622 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9076634512325832 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.908978059494486 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.7748586911316915 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.5142764011620478 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.16183638440404874 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.7792667164072754 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.5091522839696889 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.15010464498820264 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6885174711474481 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.4634219612093561 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.0958526447757281 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.28942359787845134 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.05518275112905171 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3024360599910821 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.37812716933920204 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.20300375200619958 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.23169359168341253 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.618121347997912 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.32151209873733966 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.014585296410912104 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.2890637788771335 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.047744544718804925 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.29922142051515754 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.3727257601790394 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.18909483665427484 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.23075313240938425 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.5831797713164615 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.3065281773765325 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.017022054288872285 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.46921647871568267 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.5005062576058754 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.5495785640432075 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.3017994065661075 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.408118171847032 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.4865073732708222 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.03870849804412649 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.0005533993463936359 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.34313948852939863 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.8384104464388212 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.5062896003344488 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.02489705741590697 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.8397121680667853 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.4954436348725267 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.029156290363593018 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.8408067544253827 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.507869466281193 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.025479819846184824 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/fefe_phylogeny.json b/leaderboard/submissions/esm2_t33_650M_UR50D/fefe_phylogeny.json new file mode 100644 index 0000000..c780f2c --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/fefe_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "fefe_phylogeny", + "display_name": "FeFeHydrogenase Phylogeny", + "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/fefe_phylogeny_sequences", + "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26" + }, + { + "path": "tattabio/fefe_phylogeny_distances", + "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "revision": "...", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.5321010160894765 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.6485251532984211 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.640158436352614 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.6485251532984211 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.5866984424931825 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.7174938899857625 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.6962913981388911 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.7174938899857625 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/modac_paralogy_bigene.json b/leaderboard/submissions/esm2_t33_650M_UR50D/modac_paralogy_bigene.json new file mode 100644 index 0000000..a124df5 --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/modac_paralogy_bigene.json @@ -0,0 +1,97 @@ +{ + "task": { + "id": "modac_paralogy_bigene", + "display_name": "ModAC Paralogy BiGene", + "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/modac_paralogy_bigene", + "revision": "241ca6397856e3360da04422d54933035b1fab87" + } + ], + "primary_metric_id": "recall_at_50" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.4952467261118094e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 8.984467652322665e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.04423592493297587 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.0003572022938507674 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0020107238605898124 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.0004896388988695724 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0020107238605898124 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.1742627345844504 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/mopb_clustering.json b/leaderboard/submissions/esm2_t33_650M_UR50D/mopb_clustering.json new file mode 100644 index 0000000..4d57c76 --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/mopb_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "mopb_clustering", + "display_name": "MopB Clustering", + "description": "Evaluate on MopB clustering task.", + "modality": "protein", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/mopb_clustering", + "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "revision": "...", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.8403349456017069 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.871856033794394 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/rpob_arch_phylogeny.json b/leaderboard/submissions/esm2_t33_650M_UR50D/rpob_arch_phylogeny.json new file mode 100644 index 0000000..185a15b --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/rpob_arch_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_phylogeny_sequences", + "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4" + }, + { + "path": "tattabio/rpob_arch_phylogeny_distances", + "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "revision": "...", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.1267191344357247 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.13544894658566908 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.14103248399983875 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.14103248399983875 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.23178083925138884 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.31760837159962363 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.3066594331560383 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.31760837159962363 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/rpob_bac_phylogeny.json b/leaderboard/submissions/esm2_t33_650M_UR50D/rpob_bac_phylogeny.json new file mode 100644 index 0000000..c2434f6 --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/rpob_bac_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_phylogeny_sequences", + "revision": "b833ef8d8d873ea5387540562873f41d073d3e03" + }, + { + "path": "tattabio/rpob_bac_phylogeny_distances", + "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "revision": "...", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.07801983788225005 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.15172067185368715 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.16114607476373227 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.16114607476373227 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.20519819494619662 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.28437203263094524 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.2855776096898296 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.2855776096898296 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t33_650M_UR50D/vibrio_operonic_pair.json b/leaderboard/submissions/esm2_t33_650M_UR50D/vibrio_operonic_pair.json new file mode 100644 index 0000000..8400658 --- /dev/null +++ b/leaderboard/submissions/esm2_t33_650M_UR50D/vibrio_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "vibrio_operonic_pair", + "display_name": "Vibrio Operonic Pair", + "description": "Evaluate on Vibrio operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/vibrio_operonic_pair", + "revision": "24781b12b45bf81a079a6164ef0d2124948c1878" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t33_650M_UR50D", + "revision": "...", + "num_layers": 33, + "num_params": 652353941, + "embed_dim": 1280 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6743101438010105 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9669862985610962 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5211475894705708 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.8917249441146851 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.3538152610441767 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9887766554433222 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.4326581796370962 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6649825106879129 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 1687.185791015625 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5159010600706714 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 3160.9052734375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.34970059880239523 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9831649831649831 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3953543828676884 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6696463272444617 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 61.02823257446289 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5176045741799579 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 109.26025390625 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3536184210526316 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9652076318742986 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.41999108125278384 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6595413913719393 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 58606.640625 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5259608900876601 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 44785.0859375 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.3759036144578313 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.8754208754208754 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.44069080050077336 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.44069080050077336 + } + ] + }, + { + "layer_number": 32, + "layer_display_name": "32", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6820831713952584 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9717143177986145 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.536664503569111 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9286491274833679 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.37745321770880874 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9281705948372615 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.4828328003170023 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6797512631169841 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 2772.750732421875 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5311884438608011 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 4146.494140625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.37540603248259863 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9079685746352413 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.4753549489730454 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6789739603575593 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 100.84526062011719 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5390972663699936 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 190.31085205078125 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3760532150776053 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9517396184062851 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.479168611593493 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.653322969296541 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 364605.8125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5140051978053711 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 113893.3515625 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.34603421461897355 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9988776655443322 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3154670845055397 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.4828328003170023 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/MIBIG_protein_classification.json b/leaderboard/submissions/esm2_t36_3B_UR50D/MIBIG_protein_classification.json new file mode 100644 index 0000000..91eaa70 --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/MIBIG_protein_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_protein_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_prot", + "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "revision": "...", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.7132237125944091 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.746031746031746 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.836830500285544 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6633593919422381 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.8495842781557078 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6695716322239411 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6689342403628118 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7821148498790867 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.623537709467486 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.799697656840515 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/arch_retrieval.json b/leaderboard/submissions/esm2_t36_3B_UR50D/arch_retrieval.json new file mode 100644 index 0000000..d6041dc --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/arch_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "arch_retrieval", + "display_name": "Arch Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/arch_retrieval", + "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b" + }, + { + "path": "tattabio/arch_retrieval_qrels", + "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "revision": "...", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.9278 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.92019 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.88622 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.31302 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.43564 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.734 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.31902 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.4476 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.76254 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.83841 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.77217 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.47315 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.948968558827714 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9506117513159769 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9511580419936511 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.5695825862687866 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.221986509500315 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.4203950131177228 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.564322416808483 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.19108656309781616 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.42395611376427206 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.5055593534589494 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": -0.09468644055518095 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.2864463127750372 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": -0.03538843910397287 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.09311883793714422 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.30555605847252953 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.0783240568174268 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.17795919882735975 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.18509762934741394 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.4771715396594046 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.068423803745228 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.17379854324787608 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": -0.03848314855918574 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.09553829284420427 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.30587990770622553 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.07003591555908308 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.18213605943910144 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.19768968297795225 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.47791953190170117 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.06888807128628972 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.14996648800723503 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.502204692599048 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.1575682995183711 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.806722029506651 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.4134966659666948 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.08972449319873597 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.6905241985128383 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.14715073209729895 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.3324778553017647 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.3229141728806895 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.6064510581732133 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.35520955023129336 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.2776339495301657 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.6013823108442311 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.3564245142833086 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.2553085460733028 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.6029004356105379 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.35308050283451 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.257866385843169 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.87956 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.86893 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.82298 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.28161 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.38967 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.6507 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.29346 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.41081 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.71119 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.79385 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.7303 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.43768 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.9240219092331764 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9247296912789864 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9256488541632965 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6207749722953523 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.515941363395918 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.16217044846365308 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.5968096620157092 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.5154687268366953 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.1920119276825337 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.529142718670939 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.2280823824107517 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.10670338306671276 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.16178078739669308 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.020984503452804337 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.23075737918650982 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.25410720612296833 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.12652298327617323 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.1499452335235867 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.5089639558817692 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.13626981480705233 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.01710354293008994 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.13773178438608552 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.013499315164379576 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.22477658297065084 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.21976238711840093 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.1146387796750991 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.15154255626633595 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.47534977446281584 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.07116715411722786 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.008607115109327266 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.424843376611528 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.46963649338900365 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.47110941056522865 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.3148320527956752 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.39164203516684803 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.4331079065864579 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.07686361736738528 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.0710043916712577 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.22846701483468224 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.7122038946666341 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.5616947469059596 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.07473438287064475 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.7119380442662104 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.5584194442105753 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.07250782682707735 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.7118323491211572 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.5561670251588102 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.07251247155922119 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/bacarch_bigene.json b/leaderboard/submissions/esm2_t36_3B_UR50D/bacarch_bigene.json new file mode 100644 index 0000000..b59b067 --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/bacarch_bigene.json @@ -0,0 +1,86 @@ +{ + "task": { + "id": "bacarch_bigene", + "display_name": "BacArch BiGene", + "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/bac_arch_bigene", + "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "revision": "...", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7773584905660378 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.8415094339622642 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.7973584905660378 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.8415094339622642 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.6844384546271338 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.7547169811320755 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.7023450134770889 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.7547169811320755 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/convergent_enzymes_classification.json b/leaderboard/submissions/esm2_t36_3B_UR50D/convergent_enzymes_classification.json new file mode 100644 index 0000000..afbd6b4 --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/convergent_enzymes_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "convergent_enzymes_classification", + "display_name": "Convergent Enzymes Classification", + "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/convergent_enzymes", + "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "revision": "...", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.3175 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.26516666666666666 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.2875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.23929761904761904 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/cyano_operonic_pair.json b/leaderboard/submissions/esm2_t36_3B_UR50D/cyano_operonic_pair.json new file mode 100644 index 0000000..a65fadf --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/cyano_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "cyano_operonic_pair", + "display_name": "Cyano Operonic Pair", + "description": "Evaluate on Cyano operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/cyano_operonic_pair", + "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "revision": "...", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7229885057471265 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9862627983093262 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4427099594130503 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9512487649917603 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.287393595460073 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9633152173913043 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.3521021786502902 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7222222222222222 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 4447.77392578125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4404548174745661 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 11370.837890625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.28242517267843437 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 1.0 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3253018180693411 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7222222222222222 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 111.42530822753906 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.44223826714801445 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 316.37481689453125 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.2840030911901082 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.998641304347826 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3371280175314328 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7191570881226054 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 655867.3125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.44874551971326165 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 434046.21875 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.3047711781888997 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.8505434782608695 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3402695550794156 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.3521021786502902 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.721455938697318 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9799139499664307 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4436528497409326 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9414458274841309 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.29124149659863946 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9307065217391305 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.34858593267642246 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.721455938697318 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 8714.66015625 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4455159112825458 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 15921.369140625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.29178947368421054 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9415760869565217 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.34892147616715924 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7222222222222222 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 217.1142578125 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4531132783195799 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 380.83392333984375 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.31295336787564765 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8206521739130435 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3577233751789095 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.717624521072797 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 2331774.5 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.4394618834080718 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 1028537.625 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.28171713300114987 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.998641304347826 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.25200076084231776 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.3577233751789095 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/ec_classification.json b/leaderboard/submissions/esm2_t36_3B_UR50D/ec_classification.json new file mode 100644 index 0000000..e3775f9 --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/ec_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification", + "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "revision": "...", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.734375 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6796875 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6263020833333334 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/ecoli_operonic_pair.json b/leaderboard/submissions/esm2_t36_3B_UR50D/ecoli_operonic_pair.json new file mode 100644 index 0000000..c7fae4b --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/ecoli_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "ecoli_operonic_pair", + "display_name": "E.coli Operonic Pair", + "description": "Evaluate on E.coli K-12 operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/ecoli_operonic_pair", + "revision": "a62c01143a842696fc8200b91c1acb825e8cb891" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "revision": "...", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6321279554937413 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9784801006317139 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5851825340014316 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.955406904220581 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.42567039833376724 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9358900973096738 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5401199692237064 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6210013908205841 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 5729.439453125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5777106411547499 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 10155.3583984375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4086915222037521 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9851173440183171 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5020331902964992 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6265646731571627 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 148.85182189941406 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5890001757160429 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 254.08401489257812 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.4249492900608519 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9593589009730967 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5261132930952928 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6235512285581827 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 551950.0 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5956977385548815 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 433432.1875 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4387865655471289 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9273039496279336 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.5241523190145476 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5401199692237064 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6420955030134446 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9660873413085938 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5942733657482442 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9371581077575684 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.43352601156069365 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9444762449914138 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5507975673875168 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6432545201668984 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 11284.8125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.594818279956819 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 16347.359375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4337444240356862 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9461934745277619 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5513298747216403 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6446453407510431 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 286.11083984375 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.6041088284286508 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 419.63922119140625 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.44638949671772427 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9341728677733258 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.560712040100291 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.5948076031525267 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 2749512.5 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5762376237623762 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 935082.25 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4048226292603756 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9994275901545506 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3470113984193546 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.560712040100291 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/euk_retrieval.json b/leaderboard/submissions/esm2_t36_3B_UR50D/euk_retrieval.json new file mode 100644 index 0000000..e6aaccd --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/euk_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "euk_retrieval", + "display_name": "Euk Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/euk_retrieval", + "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0" + }, + { + "path": "tattabio/euk_retrieval_qrels", + "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "revision": "...", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.92746 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.92435 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.8887 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.35702 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.48097 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.71102 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.36361 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.49573 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.73945 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.8283 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.74662 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.42386 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.9445337620578779 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9454269381922114 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9461824139335013 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.7182511735125956 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.5222382381680614 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.28061527472757375 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6822551267755836 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.47487826635542546 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.2879774120935547 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.5831687400559494 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.19720517604494417 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.12322021001506091 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.15995605492393108 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": -0.009192337725249395 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.2726694815884632 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.23974115018115752 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.17361801624364362 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.18401877459714397 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.5824063620629947 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.2841159591470895 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.033462605905602995 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.1552580393454503 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": -0.018330467820246478 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.2916738259676148 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.21463442654398712 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.15438579748881917 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.18787323197368067 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.5745278139796267 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.2757383419710411 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.05586820546617385 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.3488804246892938 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.4962700661015737 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.6999956331879145 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.2078054685387809 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.4017745706742885 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.5648444286940194 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": -0.020389884132193954 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.21348688617893796 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.2569747609893314 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.8526479528304391 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.7145003856437514 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.2523037494515739 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.8502362990306102 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.7119663865365718 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.27066105682545166 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.8500349732046102 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.7081399222244705 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.2783135947517424 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.91085 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.90346 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.85899 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.3399 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.4558 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.67548 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.34867 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.47383 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.71868 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.82251 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.73633 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.40997 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.930010718113612 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9314844587352624 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9323214148863197 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.7640857998353766 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.7354333242704569 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.30519240283573684 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.7355219364749873 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.6683650025567451 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.3707995393372363 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6610500400872863 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.3111396784423232 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.1876061714382793 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.30631322003416805 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": -0.024074646428667697 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.30856020616160795 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.38789519784115756 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.12667431447242408 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.22285611272661204 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.674757049307216 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.21754000071916246 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.027057421884101825 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.29285629152302867 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": -0.04105614851900655 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.30446018048841167 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.35827571757915555 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.0869475077364285 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.21288114526580437 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.6580911825358527 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.1427550599573899 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.021156553400256822 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.3608478586760989 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.6317427657580859 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.7043182301188912 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.16171321927396293 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.5007867347225371 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.6246067899980982 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": -0.07045167205515267 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.05554080115033524 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.3542460994718293 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.8324314570384903 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.8215594631368153 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.20461209640061234 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.8314402422775857 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.8177212818589602 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.21316411254806447 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.831948038836457 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.8169691016937358 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.20728005964482124 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/fefe_phylogeny.json b/leaderboard/submissions/esm2_t36_3B_UR50D/fefe_phylogeny.json new file mode 100644 index 0000000..bf457f5 --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/fefe_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "fefe_phylogeny", + "display_name": "FeFeHydrogenase Phylogeny", + "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/fefe_phylogeny_sequences", + "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26" + }, + { + "path": "tattabio/fefe_phylogeny_distances", + "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "revision": "...", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.6558782216716175 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.7365256290104596 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.7136968365232351 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.7365256290104596 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.7465702832852074 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.7917831210213578 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.7649782553422211 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.7917831210213578 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/modac_paralogy_bigene.json b/leaderboard/submissions/esm2_t36_3B_UR50D/modac_paralogy_bigene.json new file mode 100644 index 0000000..5fc7e25 --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/modac_paralogy_bigene.json @@ -0,0 +1,97 @@ +{ + "task": { + "id": "modac_paralogy_bigene", + "display_name": "ModAC Paralogy BiGene", + "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/modac_paralogy_bigene", + "revision": "241ca6397856e3360da04422d54933035b1fab87" + } + ], + "primary_metric_id": "recall_at_50" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.4952467261118094e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 8.984467652322665e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.036193029490616625 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.513409339146605e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 9.02074410313958e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.1306970509383378 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/mopb_clustering.json b/leaderboard/submissions/esm2_t36_3B_UR50D/mopb_clustering.json new file mode 100644 index 0000000..4745131 --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/mopb_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "mopb_clustering", + "display_name": "MopB Clustering", + "description": "Evaluate on MopB clustering task.", + "modality": "protein", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/mopb_clustering", + "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "revision": "...", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.7761858715009393 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.9024367024356471 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/rpob_arch_phylogeny.json b/leaderboard/submissions/esm2_t36_3B_UR50D/rpob_arch_phylogeny.json new file mode 100644 index 0000000..a45c688 --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/rpob_arch_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_phylogeny_sequences", + "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4" + }, + { + "path": "tattabio/rpob_arch_phylogeny_distances", + "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "revision": "...", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.106058717692602 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.1528085277532408 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.1702712021061018 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.1702712021061018 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.22892873976632383 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.2920991527135924 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.293053056643355 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.293053056643355 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/rpob_bac_phylogeny.json b/leaderboard/submissions/esm2_t36_3B_UR50D/rpob_bac_phylogeny.json new file mode 100644 index 0000000..5791419 --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/rpob_bac_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_phylogeny_sequences", + "revision": "b833ef8d8d873ea5387540562873f41d073d3e03" + }, + { + "path": "tattabio/rpob_bac_phylogeny_distances", + "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "revision": "...", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.06958717767819357 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.1566146955803603 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.19202284530665048 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.19202284530665048 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.19060675364434318 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.25577724644638433 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.2587461756509838 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.2587461756509838 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t36_3B_UR50D/vibrio_operonic_pair.json b/leaderboard/submissions/esm2_t36_3B_UR50D/vibrio_operonic_pair.json new file mode 100644 index 0000000..9168761 --- /dev/null +++ b/leaderboard/submissions/esm2_t36_3B_UR50D/vibrio_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "vibrio_operonic_pair", + "display_name": "Vibrio Operonic Pair", + "description": "Evaluate on Vibrio operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/vibrio_operonic_pair", + "revision": "24781b12b45bf81a079a6164ef0d2124948c1878" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t36_3B_UR50D", + "revision": "...", + "num_layers": 36, + "num_params": 2841627041, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 18, + "layer_display_name": "18", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6708122813835989 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.981367290019989 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5193538555318501 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9532276391983032 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.35648535564853556 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9562289562289562 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.43327834671246207 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6622619510299261 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 4904.591796875 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5153980244044161 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 10609.9453125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3477067816542532 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9955106621773289 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.40016477443788834 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6653711620676254 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 131.35702514648438 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5229448662925057 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 228.46902465820312 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.37043966323666977 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8888888888888888 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.4280072686520109 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6603186941313641 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 597536.0 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5275381552753815 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 464319.0625 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.37447008949599625 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.8922558922558923 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.44016861120869744 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.44016861120869744 + } + ] + }, + { + "layer_number": 35, + "layer_display_name": "35", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6712009327633113 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9714021682739258 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5251497005988024 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9320735335350037 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.35810534912209063 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9842873176206509 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.44585848288179364 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6692576758647493 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 10474.798828125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5252225519287834 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 18217.796875 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.35699878983461075 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9932659932659933 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.43460961961390276 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6708122813835989 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 269.1448974609375 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5331240188383045 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 407.750244140625 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3700959023539669 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9528619528619529 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.444004705862405 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.653322969296541 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 2218353.5 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5145827317354895 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 976602.5625 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.3464230171073095 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 1.0 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3232698755870762 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.44585848288179364 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/MIBIG_protein_classification.json b/leaderboard/submissions/esm2_t6_8M_UR50D/MIBIG_protein_classification.json new file mode 100644 index 0000000..5d11160 --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/MIBIG_protein_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_protein_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_prot", + "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "revision": "...", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6564698290049759 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6213151927437641 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.8560673597146576 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.598094204265186 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.7698412698412702 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6539081896462444 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6825396825396826 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7848864063703949 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6078800724477005 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.8102796674225253 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/arch_retrieval.json b/leaderboard/submissions/esm2_t6_8M_UR50D/arch_retrieval.json new file mode 100644 index 0000000..010e209 --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/arch_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "arch_retrieval", + "display_name": "Arch Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/arch_retrieval", + "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b" + }, + { + "path": "tattabio/arch_retrieval_qrels", + "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "revision": "...", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.46922 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.43575 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.39114 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.10438 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.13772 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.21355 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.1218 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.17417 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.3415 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.42595 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.36573 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.20961 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.6061032863849769 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.6136309439126343 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.618612010194391 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.1998075116367266 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.501606836885053 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.1378118530981871 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.18302753229030955 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.529252934890408 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.13526580782340344 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.1600349751039775 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.5759387637864083 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.14570847362804085 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.06563284509194764 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.4956919284414832 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3095151767985699 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.11335639333019934 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.5577957210632679 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.254596062635998 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.20045555920826685 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.6132212804732518 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.16302757181104657 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.03401084834217468 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.4617531031384631 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.28397660576444894 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.06720993976029453 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.5073816131438185 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.23261778468205002 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.16667921011510828 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.5259393842070421 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.13381540436587672 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.2105023896681829 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.4512153840299376 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": 0.04767765797059535 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.2000938079661469 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.4461163108781014 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": 0.01450421362018999 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.12765115099732313 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.3146014632375712 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.044088192546631495 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.21854370496963085 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.4305406870224175 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.2065829490711869 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.21766766497784718 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.43116699819575055 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.20801462901159104 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.2176313827069169 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.43135087322870347 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.2078825282336836 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.63152 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.60452 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.56721 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.17864 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.24217 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.39543 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.19424 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.27639 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.51307 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.56901 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.50265 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.29486 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7392801251956179 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.7450229999525767 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.7481511664471421 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.39989468043684767 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.4936426613891317 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.09522925993445099 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.3810925649678609 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.5282583606322245 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.08763512132492676 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.3621673695795759 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.6080254445975454 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.09742405454353203 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.07782146154396122 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.48693859418142155 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3599139708725904 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.15672935830334864 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.5656574495870028 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.2853689795806975 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.35302238221077475 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.645119394317702 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.12237037613331439 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.052013553698076434 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.45739246576262554 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.3493212024717546 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.11192308593513252 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.5489391222279509 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2769457461783965 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.3133757496247297 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.6618732618345899 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.11423599863005311 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.4062309467686318 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.3828260336036461 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.0870886550678612 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.3881405881948008 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.35340692971248944 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.1401576823890037 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.3163198713977749 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.10096336675596154 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.20998793758526332 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.4276357773831127 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.42675005803237664 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.1732983192011071 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.42738519405434844 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.4349603370469823 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.17457505671954143 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.4269447696243609 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.43456693807574653 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.17238808668733543 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/bacarch_bigene.json b/leaderboard/submissions/esm2_t6_8M_UR50D/bacarch_bigene.json new file mode 100644 index 0000000..53bd271 --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/bacarch_bigene.json @@ -0,0 +1,86 @@ +{ + "task": { + "id": "bacarch_bigene", + "display_name": "BacArch BiGene", + "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/bac_arch_bigene", + "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "revision": "...", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.24651143056803437 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.30943396226415093 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.26085981104849026 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.30943396226415093 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.43651991614255764 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.5132075471698113 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.4573989218328841 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.5132075471698113 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/convergent_enzymes_classification.json b/leaderboard/submissions/esm2_t6_8M_UR50D/convergent_enzymes_classification.json new file mode 100644 index 0000000..15c068a --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/convergent_enzymes_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "convergent_enzymes_classification", + "display_name": "Convergent Enzymes Classification", + "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/convergent_enzymes", + "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "revision": "...", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.06491666666666666 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.15 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.11641666666666665 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/cyano_operonic_pair.json b/leaderboard/submissions/esm2_t6_8M_UR50D/cyano_operonic_pair.json new file mode 100644 index 0000000..1585d8d --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/cyano_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "cyano_operonic_pair", + "display_name": "Cyano Operonic Pair", + "description": "Evaluate on Cyano operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/cyano_operonic_pair", + "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "revision": "...", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7218390804597701 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9959506988525391 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.442998760842627 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9245835542678833 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.28691813804173355 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9714673913043478 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.3475030371533737 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.721455938697318 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 19.038516998291016 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4394618834080718 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 111.03251647949219 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.28171713300114987 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.998641304347826 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3346837880060032 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7206896551724138 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 1.384537696838379 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4400597907324364 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 8.970820426940918 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.28210042161747795 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 1.0 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3353446157868629 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7241379310344828 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 269.9737854003906 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.4413710162357186 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 153.47955322265625 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.2833976833976834 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9972826086956522 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3529107024982295 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.3529107024982295 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7203065134099617 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9935691356658936 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.44019138755980863 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.6014477014541626 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.2822085889570552 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 1.0 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.33649957902185235 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7210727969348659 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 52.736236572265625 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4401197604790419 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 375.45855712890625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.28225806451612906 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.998641304347826 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3307875908554976 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7210727969348659 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 4.403592109680176 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4403230631169608 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 28.599159240722656 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.2823168392788646 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 1.0 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.33161356625614147 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7199233716475095 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 1383.460693359375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.44529750479846447 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 968.0859985351562 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.29121338912133893 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9456521739130435 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3259103133557722 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.33649957902185235 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/ec_classification.json b/leaderboard/submissions/esm2_t6_8M_UR50D/ec_classification.json new file mode 100644 index 0000000..ed88033 --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/ec_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification", + "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "revision": "...", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.3984375 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.33880208333333334 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.4921875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.43671875 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/ecoli_operonic_pair.json b/leaderboard/submissions/esm2_t6_8M_UR50D/ecoli_operonic_pair.json new file mode 100644 index 0000000..999fdcd --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/ecoli_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "ecoli_operonic_pair", + "display_name": "E.coli Operonic Pair", + "description": "Evaluate on E.coli K-12 operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/ecoli_operonic_pair", + "revision": "a62c01143a842696fc8200b91c1acb825e8cb891" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "revision": "...", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6353732035234122 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9866563081741333 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5892511013215859 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9222266674041748 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.4256619144602851 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9570692615912993 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5334395272423883 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6332869726471951 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 28.67717933654785 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5785809018567639 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 108.07534790039062 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4072345390898483 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9988551803091014 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5209630222281199 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6316643486323598 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 2.2130112648010254 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5811421872383186 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 7.783103942871094 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.4107481060606061 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9931310818546079 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5214913767122241 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6105702364394993 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 263.1507568359375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5766182298546895 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 145.48297119140625 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.40519842190763516 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9994275901545506 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.45836132847658473 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5334395272423883 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.625173852573018 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9659532308578491 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5768532276704639 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.690697431564331 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.4053364269141531 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 1.0 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5151490072703857 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.626332869726472 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 107.05496215820312 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5768911055694097 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 344.6910705566406 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4065135895032802 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9931310818546079 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5190509198117845 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6270282800185443 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 7.555862903594971 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5776892430278885 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 25.03308868408203 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.40682721533785365 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9959931310818546 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.521895461247817 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.5948076031525267 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 1922.33544921875 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.577347143334447 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 901.3367309570312 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.40764331210191085 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9891242129364625 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3848147438817163 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.521895461247817 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/euk_retrieval.json b/leaderboard/submissions/esm2_t6_8M_UR50D/euk_retrieval.json new file mode 100644 index 0000000..1f7668d --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/euk_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "euk_retrieval", + "display_name": "Euk Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/euk_retrieval", + "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0" + }, + { + "path": "tattabio/euk_retrieval_qrels", + "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "revision": "...", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.49408 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.47293 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.46848 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.12033 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.17266 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.25802 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.13544 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.21307 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.414 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.45209 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.40289 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.25177 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.6258842443729904 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.6334430664012657 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.63890993904596 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.3197308988578981 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.39435369504510975 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.11150171386250221 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.29199381948996095 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.4225854193571499 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.09968997256246669 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.23946169242646484 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.4884005419729609 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.15426795632949822 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.18414959332523892 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.5120588644087861 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.2941293942685382 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.1871139452652369 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.6089646840398025 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.2564925569791867 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.2706367050944652 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.5951870280953666 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.18224740482812346 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.1661806048683229 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.4546332994565831 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.2642973757882288 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.15499500800330515 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.5128221338936043 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2151781820356106 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.26130667769549404 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.46903523907914035 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.18462384496314488 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.292996802846984 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.3791795767825314 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": 0.04216356786779477 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.24939481046185955 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.377667896018253 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.0009061997167830926 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.11321554826255 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.07535552700441209 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.09619972438311733 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.35543307829742765 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.31072161870298926 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.1905412426846346 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.358928797571008 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.30496859707141505 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.18287037565676256 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.36422026130776614 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.3034707810121825 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.18390019146509284 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.63931 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.62964 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.61528 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.21465 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.29645 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.435 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.22756 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.33071 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.53849 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.57878 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.51801 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.29942 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7176848874598072 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.7224187209717757 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.7253074741993061 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.392629018512806 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.6258691925738074 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.05004738255742322 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.395700048668899 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.626564211565225 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.05691623676909853 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.3789799295065068 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.6450219381389026 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.07483337993245745 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.10430912918132056 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.5179584195524325 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3593900689021102 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.22103111933461858 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.6496944258232327 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.2622775364232766 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.3908889482223821 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.7223406412444101 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.14242751621845157 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.11222028982139254 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.4733253400027294 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.3295287493268477 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.22279476415603025 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.5802492026654497 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.24853557230212295 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.4335923460209573 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.5885101603016908 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.09840850865407258 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.37166379252531523 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.5201669057711779 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.18172576887258612 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.3776413732817113 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.4464394014516979 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.21994682279900005 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.1885532531573671 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.006154947469005304 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.2438525109491442 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.40584332584647376 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.6023237957095166 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.09886691841743467 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.4110434425857959 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.6016275572651675 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.10021734255007336 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.40988630845569324 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.6016744312872032 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.09594082080189775 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/fefe_phylogeny.json b/leaderboard/submissions/esm2_t6_8M_UR50D/fefe_phylogeny.json new file mode 100644 index 0000000..f92aa0e --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/fefe_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "fefe_phylogeny", + "display_name": "FeFeHydrogenase Phylogeny", + "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/fefe_phylogeny_sequences", + "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26" + }, + { + "path": "tattabio/fefe_phylogeny_distances", + "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "revision": "...", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.16751888592484196 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3387417973183231 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.28409648418772426 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.3387417973183231 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.09209866020666911 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.393364401792143 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.35353088826306645 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.393364401792143 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/modac_paralogy_bigene.json b/leaderboard/submissions/esm2_t6_8M_UR50D/modac_paralogy_bigene.json new file mode 100644 index 0000000..547345d --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/modac_paralogy_bigene.json @@ -0,0 +1,97 @@ +{ + "task": { + "id": "modac_paralogy_bigene", + "display_name": "ModAC Paralogy BiGene", + "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/modac_paralogy_bigene", + "revision": "241ca6397856e3360da04422d54933035b1fab87" + } + ], + "primary_metric_id": "recall_at_50" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.5012846666438603e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 8.996527340446588e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.048927613941018765 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.4952467261118094e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 8.984467652322665e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.039544235924932974 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/mopb_clustering.json b/leaderboard/submissions/esm2_t6_8M_UR50D/mopb_clustering.json new file mode 100644 index 0000000..6e61599 --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/mopb_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "mopb_clustering", + "display_name": "MopB Clustering", + "description": "Evaluate on MopB clustering task.", + "modality": "protein", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/mopb_clustering", + "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "revision": "...", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.5331854224162181 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.6537410442469105 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/rpob_arch_phylogeny.json b/leaderboard/submissions/esm2_t6_8M_UR50D/rpob_arch_phylogeny.json new file mode 100644 index 0000000..2ccdcb0 --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/rpob_arch_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_phylogeny_sequences", + "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4" + }, + { + "path": "tattabio/rpob_arch_phylogeny_distances", + "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "revision": "...", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.27249253585753297 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.32110156722667066 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.2765400534230894 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.32110156722667066 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.3522084172242064 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.37491879803690276 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.37357997215191524 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.37491879803690276 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/rpob_bac_phylogeny.json b/leaderboard/submissions/esm2_t6_8M_UR50D/rpob_bac_phylogeny.json new file mode 100644 index 0000000..d0eeedc --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/rpob_bac_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_phylogeny_sequences", + "revision": "b833ef8d8d873ea5387540562873f41d073d3e03" + }, + { + "path": "tattabio/rpob_bac_phylogeny_distances", + "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "revision": "...", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.1718947821310708 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.19149793117776248 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.19007575246850486 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.19149793117776248 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.08790159385645117 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.15824982794036926 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.14789213236823395 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.15824982794036926 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm2_t6_8M_UR50D/vibrio_operonic_pair.json b/leaderboard/submissions/esm2_t6_8M_UR50D/vibrio_operonic_pair.json new file mode 100644 index 0000000..89f729c --- /dev/null +++ b/leaderboard/submissions/esm2_t6_8M_UR50D/vibrio_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "vibrio_operonic_pair", + "display_name": "Vibrio Operonic Pair", + "description": "Evaluate on Vibrio operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/vibrio_operonic_pair", + "revision": "24781b12b45bf81a079a6164ef0d2124948c1878" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "facebook/esm2_t6_8M_UR50D", + "revision": "...", + "num_layers": 6, + "num_params": 7840121, + "embed_dim": 320 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 3, + "layer_display_name": "3", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.672755538282161 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9907011985778809 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.519350811485643 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9296838045120239 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.359706009511457 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9337822671156004 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.4530183167188049 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6700349786241742 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 26.794944763183594 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.514153668399769 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 108.27510070800781 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3461688059120965 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9988776655443322 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.4362485957941368 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6684803731053245 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 1.9865591526031494 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5159492102818211 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 6.243534088134766 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3562874251497006 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9349046015712682 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.4373990660087158 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6673144189661874 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 249.0767059326172 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.514153668399769 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 140.143310546875 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.3461688059120965 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9988776655443322 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.4217483948524102 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.4530183167188049 + } + ] + }, + { + "layer_number": 5, + "layer_display_name": "5", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6739214924212981 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9804219007492065 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5148973098061903 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.7031583786010742 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.34684333593141076 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9988776655443322 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.4509860248736615 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6731441896618733 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 87.79132080078125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5145827317354895 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 424.6683349609375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3464230171073095 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 1.0 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.44705936021219694 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6723668869024485 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 6.4593658447265625 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5145995952587453 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 28.367399215698242 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.34657320872274144 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9988776655443322 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.44864006673188783 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.655266226195103 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 1411.238037109375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5150289017341041 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 733.3570556640625 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.3468275593616193 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 1.0 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.37998347107914326 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.4509860248736615 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/MIBIG_protein_classification.json b/leaderboard/submissions/esm3_sm_open_v1/MIBIG_protein_classification.json new file mode 100644 index 0000000..b664b6d --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/MIBIG_protein_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_protein_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_prot", + "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "revision": "...", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.636355104266605 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6167800453514739 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7069604380909595 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.597171733895904 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.7634164777021923 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6312487280675517 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6485260770975056 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.8287305765851944 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.593295341094183 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.7853363567649292 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/arch_retrieval.json b/leaderboard/submissions/esm3_sm_open_v1/arch_retrieval.json new file mode 100644 index 0000000..eb8f3f7 --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/arch_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "arch_retrieval", + "display_name": "Arch Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/arch_retrieval", + "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b" + }, + { + "path": "tattabio/arch_retrieval_qrels", + "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "revision": "...", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.70515 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.67508 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.61454 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.19626 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.26995 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.4346 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.20967 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.29976 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.54307 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.64225 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.5694 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.31906 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7978588703940812 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.8018806441341647 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.8042019381614351 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.3579650015240981 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.5538662118374643 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.07121392537711281 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.36004282446502917 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.5500899121550181 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.06407608269846478 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.3770368533261319 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.534374599363524 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.07015060905281298 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.18539848898802547 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.31854810998743216 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.28004800188179146 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.25192779729106995 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.38402228376270797 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.20900765662652615 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.3954699798402059 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.4920278710857863 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.06884183044564153 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.16650302541068765 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.2850510299682119 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.2636967276986314 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.2249286475281941 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.33887450650989764 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.20401547306921805 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.4023781046085471 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.43234270658640944 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.060365206823630356 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.308376572382042 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.43665840937338507 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.10907818514059998 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.2846959530839789 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.3847779977100886 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.15212765230925407 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.18337257938419196 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.18530369222525087 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.1644079959042882 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.36754804900818905 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.5693513814779096 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.2062429461049503 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.3691004694413564 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.5691709967620638 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.20330937371937058 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.37177469619915476 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.5703399454020713 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.20367868350764826 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.90233 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.88489 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.82906 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.28889 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.39901 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.66224 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.29863 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.41847 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.71763 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.82057 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.7449 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.43684 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.939586000853606 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9404775454071223 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9412010959480642 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.606910907737967 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.5199070917615352 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.17599253890391636 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.5841557989626389 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.5054057074197627 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.19935613525227047 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.542974079382819 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.25968971303126415 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.14191979932817245 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.016840485704212264 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.08991283753636957 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3257883729634555 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.10638156884546024 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.2262347259166251 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.23593506757296429 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.49736494683455656 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.27319531923466184 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.054060401364761604 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": -0.0007354671211860954 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.07694495575013391 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.32263639676831435 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.07571120622707732 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.20449939474022025 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2426929738765447 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.4472592915344256 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.2222518817744839 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.04044050438475481 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.43825590057266733 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.4364103471025071 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.5504677601558484 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.38558205107498994 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.34273043994349706 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.5338137215032046 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.24549655270413592 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.16714271032533543 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.3428330189684675 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.6909221927925662 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.549469540118431 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.05172988460116744 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.6902007340806171 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.5456877076101203 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.055902637059805256 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.6885036167829154 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.5443681909422375 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.053296800424556284 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/bacarch_bigene.json b/leaderboard/submissions/esm3_sm_open_v1/bacarch_bigene.json new file mode 100644 index 0000000..8dd50b0 --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/bacarch_bigene.json @@ -0,0 +1,86 @@ +{ + "task": { + "id": "bacarch_bigene", + "display_name": "BacArch BiGene", + "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/bac_arch_bigene", + "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "revision": "...", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.42965408805031446 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.5433962264150943 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.4629769392033543 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.5433962264150943 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7738993710691824 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.8377358490566038 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.7939622641509434 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.8377358490566038 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/convergent_enzymes_classification.json b/leaderboard/submissions/esm3_sm_open_v1/convergent_enzymes_classification.json new file mode 100644 index 0000000..e447b43 --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/convergent_enzymes_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "convergent_enzymes_classification", + "display_name": "Convergent Enzymes Classification", + "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/convergent_enzymes", + "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "revision": "...", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.1275 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.09587499999999999 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.275 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.22541666666666665 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/cyano_operonic_pair.json b/leaderboard/submissions/esm3_sm_open_v1/cyano_operonic_pair.json new file mode 100644 index 0000000..a318059 --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/cyano_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "cyano_operonic_pair", + "display_name": "Cyano Operonic Pair", + "description": "Evaluate on Cyano operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/cyano_operonic_pair", + "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "revision": "...", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7222222222222222 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9997950792312622 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4427665357897916 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9974009990692139 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.2846601941747573 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9959239130434783 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.337253378468868 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7218390804597701 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 327.0663757324219 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.44196563159481467 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 1296.3701171875 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.2839984502130957 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9959239130434783 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.33280379538654115 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7195402298850575 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 9.786035537719727 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4412296564195298 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 57.51576614379883 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.28350116189000774 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9945652173913043 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.32391365711700537 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7187739463601532 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 322538.40625 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.4400597907324364 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 265308.6875 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.28210042161747795 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 1.0 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3207731705793404 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.337253378468868 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7203065134099617 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9866379499435425 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4491869918699187 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9242078065872192 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.29918772563176893 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9008152173913043 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.3525781644570656 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7233716475095785 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 43574.78125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4649681528662421 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 77644.453125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.32882882882882886 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.7934782608695652 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3899191915936865 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7241379310344828 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 1619.7060546875 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4535363146209959 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 5233.001953125 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.29582126603227143 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9714673913043478 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3789538339954788 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.717624521072797 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 165274224.0 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.4403230631169608 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 17215182.0 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.2823168392788646 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 1.0 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.2649290773065526 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.3899191915936865 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/ec_classification.json b/leaderboard/submissions/esm3_sm_open_v1/ec_classification.json new file mode 100644 index 0000000..ac4750f --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/ec_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification", + "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "revision": "...", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.4296875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.37109375 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.640625 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.58125 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/ecoli_operonic_pair.json b/leaderboard/submissions/esm3_sm_open_v1/ecoli_operonic_pair.json new file mode 100644 index 0000000..9e1ecdb --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/ecoli_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "ecoli_operonic_pair", + "display_name": "E.coli Operonic Pair", + "description": "Evaluate on E.coli K-12 operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/ecoli_operonic_pair", + "revision": "a62c01143a842696fc8200b91c1acb825e8cb891" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "revision": "...", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.636532220676866 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9995946884155273 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5850875467243754 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9987356662750244 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.4457434052757794 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8511734401831712 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5360226078826122 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6330551692165044 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 486.49554443359375 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5842115728773115 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 851.4708251953125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.44759293113954907 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.840870062965083 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5306776700721911 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.624246638850255 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 16.513151168823242 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.58118490694895 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 48.195823669433594 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.41411192214111925 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9742415569547797 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5148107768472899 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6080203987019008 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 306449.375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5765676567656766 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 262232.0 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4050544864363552 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 1.0 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.46592646226646905 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5360226078826122 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6214649976819657 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.976504921913147 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5863819359355854 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.8973178267478943 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.4223398890569844 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9587864911276474 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5218317714095929 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6488178025034771 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 58031.8671875 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.6091816367265469 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 80041.53125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4676677903769537 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8734974241556954 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5773856033732948 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6467315716272601 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 2202.94287109375 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.6004628655585945 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 3375.35400390625 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.47471723220226214 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8168288494562106 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5731341808177214 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.596430227167362 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 115011088.0 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5762376237623762 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 15893788.0 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4048226292603756 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9994275901545506 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.38760123548875813 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5773856033732948 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/euk_retrieval.json b/leaderboard/submissions/esm3_sm_open_v1/euk_retrieval.json new file mode 100644 index 0000000..b0ab425 --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/euk_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "euk_retrieval", + "display_name": "Euk Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/euk_retrieval", + "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0" + }, + { + "path": "tattabio/euk_retrieval_qrels", + "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "revision": "...", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.6837 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.6755 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.66084 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.23875 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.32019 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.4781 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.25147 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.35629 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.58814 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.60707 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.54534 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.31698 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7798499464094321 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.7851452049201246 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.7886590905635646 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.41928357810017647 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.5290892468467935 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.04043953758876136 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.40804876886618785 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.5383625651482825 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.01759772593814632 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.43399119456019236 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.5212951246080316 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.01371218471516856 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.27286987374586946 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.22003168666870496 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.16926800678329118 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.3276407501683195 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.3666450599737096 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.11949072588665864 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.498662227018851 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.4597011315598956 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.03187150745443391 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.27171559469523077 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.19392467894582097 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.1631735596593573 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.30501148418451934 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.3112246092198893 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.10000246435945939 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.5135191785402357 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.34400775125068594 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.0119120902289244 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.30744744151056824 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.49705438543467206 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.10084482308129951 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.24792520457888326 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.4662889869089896 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.14491216098622942 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.06326065957986557 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.14770445635312474 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.1263680546684453 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.44299200927250004 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.5506272059606037 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.08607775056210379 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.4491987153534213 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.5532024059864912 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.08158272636409196 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.44650104486099906 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.551026552196252 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.08532180157223326 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.91548 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.90603 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.86287 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.34535 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.46156 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.67786 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.35018 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.4715 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.70168 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.81994 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.73376 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.41428 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.9427652733118971 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9427652733118971 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9429544164932854 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6646467247052238 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.6778764436485658 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.21513862915950627 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6698283650372349 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.6390956129452405 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.23500186221206765 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6924793871399406 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.35247236253685754 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.17064311750132002 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.13111830562024587 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.030763792353242985 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.40412645078710363 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.2602272376646378 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.18361191689681303 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.27432252545738767 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.7542001393377396 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.2787250331382991 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.104465533984337 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.12969399328272832 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.020904262568601558 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.4093101686701434 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.25240086482655144 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.1652691811031539 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2866072970878988 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.7612695752290041 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.23942734181223047 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.08592614001207499 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.44130649056515153 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.5342490731780283 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.8075663738228478 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.3247314376392803 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.42214838262701343 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.7330035174411789 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.08490805730470258 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.0845205834811262 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.3952240507561035 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.7270043548372578 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.7352921140815318 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.02920714692373676 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.7270043548372578 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.7352921140815318 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.02920714692373676 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.7265324603536517 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.7344144354146935 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.029875033589520282 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/fefe_phylogeny.json b/leaderboard/submissions/esm3_sm_open_v1/fefe_phylogeny.json new file mode 100644 index 0000000..79ca261 --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/fefe_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "fefe_phylogeny", + "display_name": "FeFeHydrogenase Phylogeny", + "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/fefe_phylogeny_sequences", + "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26" + }, + { + "path": "tattabio/fefe_phylogeny_distances", + "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "revision": "...", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.3019747000293895 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.4717597979027869 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.38463047891710866 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.4717597979027869 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.4630693133096334 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.73784072309402 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.514040623446403 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.73784072309402 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/modac_paralogy_bigene.json b/leaderboard/submissions/esm3_sm_open_v1/modac_paralogy_bigene.json new file mode 100644 index 0000000..23030a2 --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/modac_paralogy_bigene.json @@ -0,0 +1,97 @@ +{ + "task": { + "id": "modac_paralogy_bigene", + "display_name": "ModAC Paralogy BiGene", + "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/modac_paralogy_bigene", + "revision": "241ca6397856e3360da04422d54933035b1fab87" + } + ], + "primary_metric_id": "recall_at_50" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.4952467261118094e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 8.984467652322665e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.04155495978552279 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.498263670223294e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 8.990493452223619e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.19906166219839141 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/mopb_clustering.json b/leaderboard/submissions/esm3_sm_open_v1/mopb_clustering.json new file mode 100644 index 0000000..1149ed7 --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/mopb_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "mopb_clustering", + "display_name": "MopB Clustering", + "description": "Evaluate on MopB clustering task.", + "modality": "protein", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/mopb_clustering", + "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "revision": "...", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.603447572970274 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.7454097575629404 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/rpob_arch_phylogeny.json b/leaderboard/submissions/esm3_sm_open_v1/rpob_arch_phylogeny.json new file mode 100644 index 0000000..ae52589 --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/rpob_arch_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_phylogeny_sequences", + "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4" + }, + { + "path": "tattabio/rpob_arch_phylogeny_distances", + "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "revision": "...", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.21439261220583106 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.24110859242777566 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.08984114827917193 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.24110859242777566 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.11813832747662069 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.19863039504299163 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.16233060651692185 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.19863039504299163 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/rpob_bac_phylogeny.json b/leaderboard/submissions/esm3_sm_open_v1/rpob_bac_phylogeny.json new file mode 100644 index 0000000..31aaffc --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/rpob_bac_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_phylogeny_sequences", + "revision": "b833ef8d8d873ea5387540562873f41d073d3e03" + }, + { + "path": "tattabio/rpob_bac_phylogeny_distances", + "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "revision": "...", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.09049552550031388 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.14923045808816454 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.09609269155388361 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.14923045808816454 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.034250691494333164 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.2102813650664429 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.17546616789647132 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.2102813650664429 + } + ] + } + ] +} diff --git a/leaderboard/submissions/esm3_sm_open_v1/vibrio_operonic_pair.json b/leaderboard/submissions/esm3_sm_open_v1/vibrio_operonic_pair.json new file mode 100644 index 0000000..0b2a68a --- /dev/null +++ b/leaderboard/submissions/esm3_sm_open_v1/vibrio_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "vibrio_operonic_pair", + "display_name": "Vibrio Operonic Pair", + "description": "Evaluate on Vibrio operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/vibrio_operonic_pair", + "revision": "24781b12b45bf81a079a6164ef0d2124948c1878" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "esm3_sm_open_v1", + "revision": "...", + "num_layers": 48, + "num_params": 1401735748, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 24, + "layer_display_name": "24", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6715895841430237 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9996908903121948 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5164867230814123 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.997383713722229 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.34897476340694006 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9932659932659933 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.4521561741786758 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6708122813835989 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 398.7920837402344 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5162224797219003 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 1435.895263671875 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3479109722764545 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 1.0 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.4485744553427403 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6657598134473377 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 13.880131721496582 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5161478033168461 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 55.61311340332031 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3483896307934014 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9955106621773289 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.43312932328281045 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6568208317139526 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 314096.6875 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5152660657167781 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 270321.46875 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.347723704866562 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9943883277216611 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.38171085731998233 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.4521561741786758 + } + ] + }, + { + "layer_number": 47, + "layer_display_name": "47", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6692576758647493 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9806265234947205 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5221445221445222 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9249705076217651 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.3712121212121212 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8799102132435466 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.44374575153725326 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6824718227749709 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 54751.6875 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5474349964862966 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 77884.234375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.39846547314578007 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8742985409652076 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.49709099146687025 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6789739603575593 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 1890.283447265625 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5386138613861385 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 3902.364501953125 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3814866760168303 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9158249158249159 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.49224489125057536 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6544889234356782 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 123162152.0 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.514302224790523 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 16955928.0 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.3463035019455253 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9988776655443322 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.32925404339553804 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.49709099146687025 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-131k-base/MIBIG_dna_classification.json b/leaderboard/submissions/evo-1-131k-base/MIBIG_dna_classification.json new file mode 100644 index 0000000..ac0fcec --- /dev/null +++ b/leaderboard/submissions/evo-1-131k-base/MIBIG_dna_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_dna_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_dna", + "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "togethercomputer/evo-1-131k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.446229597310732 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.4399092970521542 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.6831437731761506 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.39652342066563384 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.6090325018896443 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.4296723570558472 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.4036281179138322 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.5642457222211535 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.3853936778136288 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.5925925925925916 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-131k-base/arch_16S_phylogeny.json b/leaderboard/submissions/evo-1-131k-base/arch_16S_phylogeny.json new file mode 100644 index 0000000..35738a9 --- /dev/null +++ b/leaderboard/submissions/evo-1-131k-base/arch_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "arch_16S_phylogeny", + "display_name": "16S Archaeal Phylogeny", + "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/arch_16S_sequences", + "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0" + }, + { + "path": "tattabio/arch_16S_distances", + "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "togethercomputer/evo-1-131k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": -0.025266077406048616 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.004690157120927916 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.004253147697855626 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.004690157120927916 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": -0.015580669639663534 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.0188972060041519 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.018436446052964364 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.0188972060041519 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-131k-base/bac_16S_phylogeny.json b/leaderboard/submissions/evo-1-131k-base/bac_16S_phylogeny.json new file mode 100644 index 0000000..b2d3cdb --- /dev/null +++ b/leaderboard/submissions/evo-1-131k-base/bac_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "bac_16S_phylogeny", + "display_name": "16S Bacterial Phylogeny", + "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/bac_16S_sequences", + "revision": "efde1456b86748909cbcfecb07d783756d570aa3" + }, + { + "path": "tattabio/bac_16S_distances", + "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "togethercomputer/evo-1-131k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.005078797435769416 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.06827101916712418 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.07324156211751019 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.07324156211751019 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.00640567975008095 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.046732462334814075 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.036484161781421304 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.046732462334814075 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-131k-base/ec_dna_classification.json b/leaderboard/submissions/evo-1-131k-base/ec_dna_classification.json new file mode 100644 index 0000000..d23ea5f --- /dev/null +++ b/leaderboard/submissions/evo-1-131k-base/ec_dna_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_dna_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification_dna", + "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "togethercomputer/evo-1-131k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.015625 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.015625 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.03125 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.014843749999999998 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-131k-base/ecoli_rna_clustering.json b/leaderboard/submissions/evo-1-131k-base/ecoli_rna_clustering.json new file mode 100644 index 0000000..cfdc747 --- /dev/null +++ b/leaderboard/submissions/evo-1-131k-base/ecoli_rna_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "ecoli_rna_clustering", + "display_name": "E.coli RNA Clustering", + "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.", + "modality": "dna", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/e_coli_rnas", + "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "togethercomputer/evo-1-131k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.6805148989283036 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.6265730145810015 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-131k-base/euk_18S_phylogeny.json b/leaderboard/submissions/evo-1-131k-base/euk_18S_phylogeny.json new file mode 100644 index 0000000..41eca90 --- /dev/null +++ b/leaderboard/submissions/evo-1-131k-base/euk_18S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "euk_18S_phylogeny", + "display_name": "18S Eukaryotic Phylogeny", + "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/euk_18S_sequences", + "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196" + }, + { + "path": "tattabio/euk_18S_distances", + "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "togethercomputer/evo-1-131k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.057284272544390114 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.15078767895903925 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.16109699901851568 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.16109699901851568 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.028039837868505658 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.10313971802764796 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.07777177959099602 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.10313971802764796 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-131k-base/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/evo-1-131k-base/rpob_arch_dna_phylogeny.json new file mode 100644 index 0000000..a1af986 --- /dev/null +++ b/leaderboard/submissions/evo-1-131k-base/rpob_arch_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_dna_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_dna_phylogeny_sequences", + "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408" + }, + { + "path": "tattabio/rpob_arch_dna_phylogeny_distances", + "revision": "51df97684a927ec2203568e80175ef26a62db039" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "togethercomputer/evo-1-131k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.111612661283259 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.1515232273176025 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.13606007928035405 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.1515232273176025 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.10133747509368889 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.14126576817939926 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.1394041959490421 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.14126576817939926 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-131k-base/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/evo-1-131k-base/rpob_bac_dna_phylogeny.json new file mode 100644 index 0000000..7d9e0c0 --- /dev/null +++ b/leaderboard/submissions/evo-1-131k-base/rpob_bac_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_dna_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_dna_phylogeny_sequences", + "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6" + }, + { + "path": "tattabio/rpob_bac_dna_phylogeny_distances", + "revision": "67339e271b2a1602208153d53d70d35ba6fa8876" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "togethercomputer/evo-1-131k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.04024491891770464 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.06106293578002046 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.05378417692908014 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.06106293578002046 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.03322125351100114 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.07033959001713873 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.06595278201357854 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.07033959001713873 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-8k-base/MIBIG_dna_classification.json b/leaderboard/submissions/evo-1-8k-base/MIBIG_dna_classification.json new file mode 100644 index 0000000..44e83e4 --- /dev/null +++ b/leaderboard/submissions/evo-1-8k-base/MIBIG_dna_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_dna_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_dna", + "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "togethercomputer/evo-1-8k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.42643961044280726 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.3854875283446712 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.5505888358897618 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.383668058757702 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.5793650793650779 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.40603817804559855 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.3877551020408163 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.5201002548153315 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.3692051425781597 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.5804988662131507 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-8k-base/arch_16S_phylogeny.json b/leaderboard/submissions/evo-1-8k-base/arch_16S_phylogeny.json new file mode 100644 index 0000000..44f1c37 --- /dev/null +++ b/leaderboard/submissions/evo-1-8k-base/arch_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "arch_16S_phylogeny", + "display_name": "16S Archaeal Phylogeny", + "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/arch_16S_sequences", + "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0" + }, + { + "path": "tattabio/arch_16S_distances", + "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "togethercomputer/evo-1-8k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": -0.010027057225064055 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": -0.02215556380490859 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": -0.019732162616989075 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": -0.010027057225064055 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": -0.005205558296298042 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": -0.019367509701809612 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": -0.015643699787755726 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": -0.005205558296298042 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-8k-base/bac_16S_phylogeny.json b/leaderboard/submissions/evo-1-8k-base/bac_16S_phylogeny.json new file mode 100644 index 0000000..21e5cf2 --- /dev/null +++ b/leaderboard/submissions/evo-1-8k-base/bac_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "bac_16S_phylogeny", + "display_name": "16S Bacterial Phylogeny", + "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/bac_16S_sequences", + "revision": "efde1456b86748909cbcfecb07d783756d570aa3" + }, + { + "path": "tattabio/bac_16S_distances", + "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "togethercomputer/evo-1-8k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.04234553644215741 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.06008165397377432 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.0375514113116624 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.06008165397377432 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.05401996118245526 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.0731645579846154 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.06211925714372264 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.0731645579846154 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-8k-base/ec_dna_classification.json b/leaderboard/submissions/evo-1-8k-base/ec_dna_classification.json new file mode 100644 index 0000000..0dcf5b6 --- /dev/null +++ b/leaderboard/submissions/evo-1-8k-base/ec_dna_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_dna_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification_dna", + "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "togethercomputer/evo-1-8k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.015625 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.01171875 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0234375 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.0077008928571428575 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-8k-base/ecoli_rna_clustering.json b/leaderboard/submissions/evo-1-8k-base/ecoli_rna_clustering.json new file mode 100644 index 0000000..89fe125 --- /dev/null +++ b/leaderboard/submissions/evo-1-8k-base/ecoli_rna_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "ecoli_rna_clustering", + "display_name": "E.coli RNA Clustering", + "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.", + "modality": "dna", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/e_coli_rnas", + "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "togethercomputer/evo-1-8k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.6510094548633155 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.6604911155601946 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-8k-base/euk_18S_phylogeny.json b/leaderboard/submissions/evo-1-8k-base/euk_18S_phylogeny.json new file mode 100644 index 0000000..d61618b --- /dev/null +++ b/leaderboard/submissions/evo-1-8k-base/euk_18S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "euk_18S_phylogeny", + "display_name": "18S Eukaryotic Phylogeny", + "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/euk_18S_sequences", + "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196" + }, + { + "path": "tattabio/euk_18S_distances", + "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "togethercomputer/evo-1-8k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.1711778687705844 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.22309569321619807 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.22182726039127965 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.22309569321619807 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.18718252462867385 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.20078257678873968 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.1894448685039182 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.20078257678873968 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-8k-base/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/evo-1-8k-base/rpob_arch_dna_phylogeny.json new file mode 100644 index 0000000..e605272 --- /dev/null +++ b/leaderboard/submissions/evo-1-8k-base/rpob_arch_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_dna_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_dna_phylogeny_sequences", + "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408" + }, + { + "path": "tattabio/rpob_arch_dna_phylogeny_distances", + "revision": "51df97684a927ec2203568e80175ef26a62db039" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "togethercomputer/evo-1-8k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.12141412399945198 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.1224894376801759 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.08436643461024505 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.1224894376801759 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.12074727064487567 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.1436975275108405 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.14577087550878226 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.14577087550878226 + } + ] + } + ] +} diff --git a/leaderboard/submissions/evo-1-8k-base/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/evo-1-8k-base/rpob_bac_dna_phylogeny.json new file mode 100644 index 0000000..00c8844 --- /dev/null +++ b/leaderboard/submissions/evo-1-8k-base/rpob_bac_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_dna_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_dna_phylogeny_sequences", + "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6" + }, + { + "path": "tattabio/rpob_bac_dna_phylogeny_distances", + "revision": "67339e271b2a1602208153d53d70d35ba6fa8876" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "togethercomputer/evo-1-8k-base", + "revision": "...", + "num_layers": 32, + "num_params": 6452781056, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.050201705681568114 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.07465904170918622 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.05223608317175334 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.07465904170918622 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.04649870099899381 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.08974965386451692 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.08311750592573247 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.08974965386451692 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/MIBIG_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/MIBIG_dna_classification.json new file mode 100644 index 0000000..12f8570 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/MIBIG_dna_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_dna_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_dna", + "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species", + "revision": "...", + "num_layers": 32, + "num_params": 2547801226, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.4987765050770981 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.5124716553287982 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.6793203115492271 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.45292258688111664 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.6768707482993195 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.44306708410477275 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.46258503401360546 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.6572912628877802 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.4111353087388993 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.6402116402116397 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/arch_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/arch_16S_phylogeny.json new file mode 100644 index 0000000..47b2f9b --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/arch_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "arch_16S_phylogeny", + "display_name": "16S Archaeal Phylogeny", + "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/arch_16S_sequences", + "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0" + }, + { + "path": "tattabio/arch_16S_distances", + "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species", + "revision": "...", + "num_layers": 32, + "num_params": 2547801226, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": -0.10654835165163948 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": -0.0449208645031223 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": -0.039164294581157263 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": -0.039164294581157263 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": -0.05342124724518901 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.013383992412723606 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.008694532964576274 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.013383992412723606 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/bac_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/bac_16S_phylogeny.json new file mode 100644 index 0000000..a5c0d2b --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/bac_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "bac_16S_phylogeny", + "display_name": "16S Bacterial Phylogeny", + "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/bac_16S_sequences", + "revision": "efde1456b86748909cbcfecb07d783756d570aa3" + }, + { + "path": "tattabio/bac_16S_distances", + "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species", + "revision": "...", + "num_layers": 32, + "num_params": 2547801226, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.3034936215778572 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.30766902257663337 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.311140752313069 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.311140752313069 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.23391355792374574 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.23593114226208475 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.22752660313351064 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.23593114226208475 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/ec_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/ec_dna_classification.json new file mode 100644 index 0000000..6bd3af5 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/ec_dna_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_dna_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification_dna", + "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species", + "revision": "...", + "num_layers": 32, + "num_params": 2547801226, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.1640625 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.13072916666666667 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.125 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.09505208333333333 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/ecoli_rna_clustering.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/ecoli_rna_clustering.json new file mode 100644 index 0000000..c132928 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/ecoli_rna_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "ecoli_rna_clustering", + "display_name": "E.coli RNA Clustering", + "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.", + "modality": "dna", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/e_coli_rnas", + "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species", + "revision": "...", + "num_layers": 32, + "num_params": 2547801226, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.1895135298164807 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.12248243519915075 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/euk_18S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/euk_18S_phylogeny.json new file mode 100644 index 0000000..f6e0bd2 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/euk_18S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "euk_18S_phylogeny", + "display_name": "18S Eukaryotic Phylogeny", + "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/euk_18S_sequences", + "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196" + }, + { + "path": "tattabio/euk_18S_distances", + "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species", + "revision": "...", + "num_layers": 32, + "num_params": 2547801226, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.29292521556483414 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.2974787640408906 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.30303420658256763 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.30303420658256763 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.2643943410861178 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.2695059233247244 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.2683265266152356 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.2695059233247244 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/rpob_arch_dna_phylogeny.json new file mode 100644 index 0000000..5461291 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/rpob_arch_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_dna_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_dna_phylogeny_sequences", + "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408" + }, + { + "path": "tattabio/rpob_arch_dna_phylogeny_distances", + "revision": "51df97684a927ec2203568e80175ef26a62db039" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species", + "revision": "...", + "num_layers": 32, + "num_params": 2547801226, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.1442196721280385 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.18351276436060954 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.16445811077836048 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.18351276436060954 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.12286910318887798 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.15401143244304646 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.15002628314647487 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.15401143244304646 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/rpob_bac_dna_phylogeny.json new file mode 100644 index 0000000..783073c --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-2.5b-multi-species/rpob_bac_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_dna_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_dna_phylogeny_sequences", + "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6" + }, + { + "path": "tattabio/rpob_bac_dna_phylogeny_distances", + "revision": "67339e271b2a1602208153d53d70d35ba6fa8876" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-2.5b-multi-species", + "revision": "...", + "num_layers": 32, + "num_params": 2547801226, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.11375832395356672 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.13416702069118552 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.13788912961135927 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.13788912961135927 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.0996182572764333 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.11927361003896635 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.12190344621512093 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.12190344621512093 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/MIBIG_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/MIBIG_dna_classification.json new file mode 100644 index 0000000..e5fe551 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/MIBIG_dna_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_dna_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_dna", + "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "revision": "...", + "num_layers": 22, + "num_params": 97889132, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5026229318789414 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.47619047619047616 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.6915625965314858 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.4393957527314288 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.6402116402116393 + } + ] + }, + { + "layer_number": 21, + "layer_display_name": "21", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.4666972620012171 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.4512471655328798 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7469863250998614 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.42086846252461213 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.6228269085411932 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/arch_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/arch_16S_phylogeny.json new file mode 100644 index 0000000..34de4ff --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/arch_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "arch_16S_phylogeny", + "display_name": "16S Archaeal Phylogeny", + "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/arch_16S_sequences", + "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0" + }, + { + "path": "tattabio/arch_16S_distances", + "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "revision": "...", + "num_layers": 22, + "num_params": 97889132, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": -0.01873048882073702 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.028071992556529406 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.0278344032639243 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.028071992556529406 + } + ] + }, + { + "layer_number": 21, + "layer_display_name": "21", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": -0.035107630996407665 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.016885345022489895 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.012808674577804298 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.016885345022489895 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/bac_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/bac_16S_phylogeny.json new file mode 100644 index 0000000..eca6e8e --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/bac_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "bac_16S_phylogeny", + "display_name": "16S Bacterial Phylogeny", + "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/bac_16S_sequences", + "revision": "efde1456b86748909cbcfecb07d783756d570aa3" + }, + { + "path": "tattabio/bac_16S_distances", + "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "revision": "...", + "num_layers": 22, + "num_params": 97889132, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.31696264570144556 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3398798297254198 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.3384167443991009 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.3398798297254198 + } + ] + }, + { + "layer_number": 21, + "layer_display_name": "21", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.17067324886079815 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.22902435877832863 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.2227097102546006 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.22902435877832863 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/ec_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/ec_dna_classification.json new file mode 100644 index 0000000..d40bb85 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/ec_dna_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_dna_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification_dna", + "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "revision": "...", + "num_layers": 22, + "num_params": 97889132, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.109375 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.08556547619047619 + } + ] + }, + { + "layer_number": 21, + "layer_display_name": "21", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.1171875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.08645833333333333 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/ecoli_rna_clustering.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/ecoli_rna_clustering.json new file mode 100644 index 0000000..b7ab6f8 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/ecoli_rna_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "ecoli_rna_clustering", + "display_name": "E.coli RNA Clustering", + "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.", + "modality": "dna", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/e_coli_rnas", + "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "revision": "...", + "num_layers": 22, + "num_params": 97889132, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.11902139186014724 + } + ] + }, + { + "layer_number": 21, + "layer_display_name": "21", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.14292083591071578 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/euk_18S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/euk_18S_phylogeny.json new file mode 100644 index 0000000..3e97274 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/euk_18S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "euk_18S_phylogeny", + "display_name": "18S Eukaryotic Phylogeny", + "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/euk_18S_sequences", + "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196" + }, + { + "path": "tattabio/euk_18S_distances", + "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "revision": "...", + "num_layers": 22, + "num_params": 97889132, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.31522237768693256 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3357015318391086 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.33507578766511564 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.3357015318391086 + } + ] + }, + { + "layer_number": 21, + "layer_display_name": "21", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.29046858175431006 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.31008830299644424 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.30828628825037435 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.31008830299644424 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/rpob_arch_dna_phylogeny.json new file mode 100644 index 0000000..f78e60d --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/rpob_arch_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_dna_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_dna_phylogeny_sequences", + "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408" + }, + { + "path": "tattabio/rpob_arch_dna_phylogeny_distances", + "revision": "51df97684a927ec2203568e80175ef26a62db039" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "revision": "...", + "num_layers": 22, + "num_params": 97889132, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.1234737764549469 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.18655251846904775 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.17777969131912802 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.18655251846904775 + } + ] + }, + { + "layer_number": 21, + "layer_display_name": "21", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.13078468145534033 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.15872053264382796 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.15263070680987556 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.15872053264382796 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/rpob_bac_dna_phylogeny.json new file mode 100644 index 0000000..ecb74c5 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-100m-multi-species/rpob_bac_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_dna_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_dna_phylogeny_sequences", + "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6" + }, + { + "path": "tattabio/rpob_bac_dna_phylogeny_distances", + "revision": "67339e271b2a1602208153d53d70d35ba6fa8876" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species", + "revision": "...", + "num_layers": 22, + "num_params": 97889132, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.07821452951223222 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.10661690192801286 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.10642148119114682 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.10661690192801286 + } + ] + }, + { + "layer_number": 21, + "layer_display_name": "21", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.21595314850334169 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.1831784122734034 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.19246083157603466 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.21595314850334169 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/MIBIG_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/MIBIG_dna_classification.json new file mode 100644 index 0000000..8398208 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/MIBIG_dna_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_dna_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_dna", + "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species", + "revision": "...", + "num_layers": 24, + "num_params": 235120780, + "embed_dim": 768 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5061420121003121 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.47619047619047616 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7551144630181957 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.44657932055853555 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.6462585034013596 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.4543771338680633 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.4399092970521542 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.5940398459735476 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.42087398922226954 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.6237717309145869 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/arch_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/arch_16S_phylogeny.json new file mode 100644 index 0000000..eb2ba07 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/arch_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "arch_16S_phylogeny", + "display_name": "16S Archaeal Phylogeny", + "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/arch_16S_sequences", + "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0" + }, + { + "path": "tattabio/arch_16S_distances", + "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species", + "revision": "...", + "num_layers": 24, + "num_params": 235120780, + "embed_dim": 768 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.11921584406827451 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.15679219545816353 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.15045441173932622 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.15679219545816353 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.06475995486443784 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.09602276206583968 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.09540879980313609 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.09602276206583968 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/bac_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/bac_16S_phylogeny.json new file mode 100644 index 0000000..1599088 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/bac_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "bac_16S_phylogeny", + "display_name": "16S Bacterial Phylogeny", + "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/bac_16S_sequences", + "revision": "efde1456b86748909cbcfecb07d783756d570aa3" + }, + { + "path": "tattabio/bac_16S_distances", + "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species", + "revision": "...", + "num_layers": 24, + "num_params": 235120780, + "embed_dim": 768 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.24014165203750024 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.28990471689345415 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.2909216542477827 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.2909216542477827 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.0408624668440742 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.09721819145306612 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.09461311593137267 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.09721819145306612 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/ec_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/ec_dna_classification.json new file mode 100644 index 0000000..3b7238b --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/ec_dna_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_dna_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification_dna", + "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species", + "revision": "...", + "num_layers": 24, + "num_params": 235120780, + "embed_dim": 768 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.1171875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.10989583333333333 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.125 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.09427083333333333 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/ecoli_rna_clustering.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/ecoli_rna_clustering.json new file mode 100644 index 0000000..8f02f68 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/ecoli_rna_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "ecoli_rna_clustering", + "display_name": "E.coli RNA Clustering", + "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.", + "modality": "dna", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/e_coli_rnas", + "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species", + "revision": "...", + "num_layers": 24, + "num_params": 235120780, + "embed_dim": 768 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.15577517592023576 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.226785366801238 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/euk_18S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/euk_18S_phylogeny.json new file mode 100644 index 0000000..e264576 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/euk_18S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "euk_18S_phylogeny", + "display_name": "18S Eukaryotic Phylogeny", + "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/euk_18S_sequences", + "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196" + }, + { + "path": "tattabio/euk_18S_distances", + "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species", + "revision": "...", + "num_layers": 24, + "num_params": 235120780, + "embed_dim": 768 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.28890294917726145 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.33379301514333115 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.33105693660190544 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.33379301514333115 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.1815521898033015 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.25499618145404046 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.2528923474396306 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.25499618145404046 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/rpob_arch_dna_phylogeny.json new file mode 100644 index 0000000..170ee8d --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/rpob_arch_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_dna_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_dna_phylogeny_sequences", + "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408" + }, + { + "path": "tattabio/rpob_arch_dna_phylogeny_distances", + "revision": "51df97684a927ec2203568e80175ef26a62db039" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species", + "revision": "...", + "num_layers": 24, + "num_params": 235120780, + "embed_dim": 768 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.15073903119250268 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.21364664625871488 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.19287471846027535 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.21364664625871488 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.1647252908025232 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.19747761810776127 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.1965799552266182 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.19747761810776127 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/rpob_bac_dna_phylogeny.json new file mode 100644 index 0000000..adef74e --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-250m-multi-species/rpob_bac_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_dna_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_dna_phylogeny_sequences", + "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6" + }, + { + "path": "tattabio/rpob_bac_dna_phylogeny_distances", + "revision": "67339e271b2a1602208153d53d70d35ba6fa8876" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species", + "revision": "...", + "num_layers": 24, + "num_params": 235120780, + "embed_dim": 768 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.07392689866136973 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.08180489841927971 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.08993373294524076 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.08993373294524076 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.1196418634913048 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.11994664883382657 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.12250307785318251 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.12250307785318251 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/MIBIG_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/MIBIG_dna_classification.json new file mode 100644 index 0000000..1907ee3 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/MIBIG_dna_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_dna_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_dna", + "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species", + "revision": "...", + "num_layers": 29, + "num_params": 498345436, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 14, + "layer_display_name": "14", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.4995376571471013 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.5056689342403629 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7326619778346121 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.45041650239675696 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.6662887377173087 + } + ] + }, + { + "layer_number": 28, + "layer_display_name": "28", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.4539500344324366 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.4580498866213152 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7216247043960596 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.414670336687932 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.6300075585789865 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/arch_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/arch_16S_phylogeny.json new file mode 100644 index 0000000..d7d0504 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/arch_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "arch_16S_phylogeny", + "display_name": "16S Archaeal Phylogeny", + "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/arch_16S_sequences", + "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0" + }, + { + "path": "tattabio/arch_16S_distances", + "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species", + "revision": "...", + "num_layers": 29, + "num_params": 498345436, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 14, + "layer_display_name": "14", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.07471725236964263 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.11028500148116578 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.10974020375275832 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.11028500148116578 + } + ] + }, + { + "layer_number": 28, + "layer_display_name": "28", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.023925193720304064 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.08930443289494266 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.08119092099810726 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.08930443289494266 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/bac_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/bac_16S_phylogeny.json new file mode 100644 index 0000000..761e325 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/bac_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "bac_16S_phylogeny", + "display_name": "16S Bacterial Phylogeny", + "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/bac_16S_sequences", + "revision": "efde1456b86748909cbcfecb07d783756d570aa3" + }, + { + "path": "tattabio/bac_16S_distances", + "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species", + "revision": "...", + "num_layers": 29, + "num_params": 498345436, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 14, + "layer_display_name": "14", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.28755176441767716 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.2901315012697327 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.29875580881828134 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.29875580881828134 + } + ] + }, + { + "layer_number": 28, + "layer_display_name": "28", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.2625695435176704 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.2961089715220018 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.29903750149292324 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.29903750149292324 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/ec_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/ec_dna_classification.json new file mode 100644 index 0000000..c2519cb --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/ec_dna_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_dna_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification_dna", + "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species", + "revision": "...", + "num_layers": 29, + "num_params": 498345436, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 14, + "layer_display_name": "14", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.1328125 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.09492187499999999 + } + ] + }, + { + "layer_number": 28, + "layer_display_name": "28", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.109375 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.08854166666666666 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/ecoli_rna_clustering.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/ecoli_rna_clustering.json new file mode 100644 index 0000000..8236f27 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/ecoli_rna_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "ecoli_rna_clustering", + "display_name": "E.coli RNA Clustering", + "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.", + "modality": "dna", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/e_coli_rnas", + "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species", + "revision": "...", + "num_layers": 29, + "num_params": 498345436, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 14, + "layer_display_name": "14", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.17872090857056205 + } + ] + }, + { + "layer_number": 28, + "layer_display_name": "28", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.23101767784787614 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/euk_18S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/euk_18S_phylogeny.json new file mode 100644 index 0000000..126e967 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/euk_18S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "euk_18S_phylogeny", + "display_name": "18S Eukaryotic Phylogeny", + "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/euk_18S_sequences", + "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196" + }, + { + "path": "tattabio/euk_18S_distances", + "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species", + "revision": "...", + "num_layers": 29, + "num_params": 498345436, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 14, + "layer_display_name": "14", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.29983174865850976 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.30210969201843535 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.30618345571160144 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.30618345571160144 + } + ] + }, + { + "layer_number": 28, + "layer_display_name": "28", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.3017647880265657 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3056607554377782 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.3053489155102467 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.3056607554377782 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/rpob_arch_dna_phylogeny.json new file mode 100644 index 0000000..235d9d4 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/rpob_arch_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_dna_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_dna_phylogeny_sequences", + "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408" + }, + { + "path": "tattabio/rpob_arch_dna_phylogeny_distances", + "revision": "51df97684a927ec2203568e80175ef26a62db039" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species", + "revision": "...", + "num_layers": 29, + "num_params": 498345436, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 14, + "layer_display_name": "14", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.22986226120160247 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.27851673593693504 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.2606086674119917 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.27851673593693504 + } + ] + }, + { + "layer_number": 28, + "layer_display_name": "28", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.17815454079439064 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.19889620650052858 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.2018124663624657 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.2018124663624657 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/rpob_bac_dna_phylogeny.json new file mode 100644 index 0000000..b4adbc6 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-500m-multi-species/rpob_bac_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_dna_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_dna_phylogeny_sequences", + "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6" + }, + { + "path": "tattabio/rpob_bac_dna_phylogeny_distances", + "revision": "67339e271b2a1602208153d53d70d35ba6fa8876" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species", + "revision": "...", + "num_layers": 29, + "num_params": 498345436, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 14, + "layer_display_name": "14", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.05135094895715095 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.07685936603629384 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.08785540391133782 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.08785540391133782 + } + ] + }, + { + "layer_number": 28, + "layer_display_name": "28", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.10564977401255361 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.10576727586891548 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.10692380434412962 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.10692380434412962 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/MIBIG_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/MIBIG_dna_classification.json new file mode 100644 index 0000000..05e05e7 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/MIBIG_dna_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_dna_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_dna", + "revision": "b5ca7a76d469e4e66c46f1b655903972571e6b61" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", + "revision": "...", + "num_layers": 12, + "num_params": 55904972, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.41732088501675296 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.4126984126984127 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.5828964813759089 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.3790039539706372 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.6005291005290989 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.447108453393839 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.4399092970521542 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.6417782738095238 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.4014322754351143 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.6228269085411934 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/arch_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/arch_16S_phylogeny.json new file mode 100644 index 0000000..df9efd7 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/arch_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "arch_16S_phylogeny", + "display_name": "16S Archaeal Phylogeny", + "description": "Evaluate on 16S Archaeal phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/arch_16S_sequences", + "revision": "e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0" + }, + { + "path": "tattabio/arch_16S_distances", + "revision": "b0356b632a954be70cefd57e3a02e7e1ccd34408" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", + "revision": "...", + "num_layers": 12, + "num_params": 55904972, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.08614532458123705 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.13483619665236213 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.1312200179763858 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.13483619665236213 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.08593831411686806 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.10878061550161633 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.11155052559650092 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.11155052559650092 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/bac_16S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/bac_16S_phylogeny.json new file mode 100644 index 0000000..169f0ce --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/bac_16S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "bac_16S_phylogeny", + "display_name": "16S Bacterial Phylogeny", + "description": "Evaluate on 16S Bacterial phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/bac_16S_sequences", + "revision": "efde1456b86748909cbcfecb07d783756d570aa3" + }, + { + "path": "tattabio/bac_16S_distances", + "revision": "5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", + "revision": "...", + "num_layers": 12, + "num_params": 55904972, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.2595103060358304 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3341848597559302 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.3320052456892504 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.3341848597559302 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.3430883689079242 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3684961309813556 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.36354876502165473 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.3684961309813556 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/ec_dna_classification.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/ec_dna_classification.json new file mode 100644 index 0000000..ea12cd2 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/ec_dna_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_dna_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task using DNA sequences.", + "modality": "dna", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification_dna", + "revision": "cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", + "revision": "...", + "num_layers": 12, + "num_params": 55904972, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0859375 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.06004464285714285 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0859375 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.0703125 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/ecoli_rna_clustering.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/ecoli_rna_clustering.json new file mode 100644 index 0000000..490b07a --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/ecoli_rna_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "ecoli_rna_clustering", + "display_name": "E.coli RNA Clustering", + "description": "Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.", + "modality": "dna", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/e_coli_rnas", + "revision": "4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", + "revision": "...", + "num_layers": 12, + "num_params": 55904972, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.039533351232296544 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.1998891844978737 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/euk_18S_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/euk_18S_phylogeny.json new file mode 100644 index 0000000..1d681f3 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/euk_18S_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "euk_18S_phylogeny", + "display_name": "18S Eukaryotic Phylogeny", + "description": "Evaluate on 18S Eukaryotic phylogeny distance correlation task.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/euk_18S_sequences", + "revision": "5174cb3b2c5c46b61307fd1c2c08f5c432655196" + }, + { + "path": "tattabio/euk_18S_distances", + "revision": "c4cea4fbb1185d08e0e01fd28ffb8b06a25025da" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", + "revision": "...", + "num_layers": 12, + "num_params": 55904972, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.2795506243365705 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3377836786404048 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.33795640524026876 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.33795640524026876 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.30224942362141677 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.31698287710587075 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.31744544631481525 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.31744544631481525 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/rpob_arch_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/rpob_arch_dna_phylogeny.json new file mode 100644 index 0000000..d3cb22f --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/rpob_arch_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_dna_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_dna_phylogeny_sequences", + "revision": "4453552a0e1021fee8697c71a559f4d3f6da2408" + }, + { + "path": "tattabio/rpob_arch_dna_phylogeny_distances", + "revision": "51df97684a927ec2203568e80175ef26a62db039" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", + "revision": "...", + "num_layers": 12, + "num_params": 55904972, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.052418023657636856 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.09741263413297728 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.09567551095833512 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.09741263413297728 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.09765252616635897 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.13005622857446536 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.14955059376403426 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.14955059376403426 + } + ] + } + ] +} diff --git a/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/rpob_bac_dna_phylogeny.json b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/rpob_bac_dna_phylogeny.json new file mode 100644 index 0000000..7608fd5 --- /dev/null +++ b/leaderboard/submissions/nucleotide-transformer-v2-50m-multi-species/rpob_bac_dna_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_dna_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.", + "modality": "dna", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_dna_phylogeny_sequences", + "revision": "8e137d3fb8886d8739ce08d1918745444c7d30d6" + }, + { + "path": "tattabio/rpob_bac_dna_phylogeny_distances", + "revision": "67339e271b2a1602208153d53d70d35ba6fa8876" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species", + "revision": "...", + "num_layers": 12, + "num_params": 55904972, + "embed_dim": 512 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.07434903594716476 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.08977856846092792 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.09222560782823379 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.09222560782823379 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.08630281284059531 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.09701233167900071 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.09964321196343817 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.09964321196343817 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/MIBIG_protein_classification.json b/leaderboard/submissions/progen2-large/MIBIG_protein_classification.json new file mode 100644 index 0000000..1d824e1 --- /dev/null +++ b/leaderboard/submissions/progen2-large/MIBIG_protein_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_protein_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_prot", + "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "revision": "...", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6819849034962754 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6757369614512472 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7747178376990543 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6342294744240076 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.8065003779289504 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6150399394552477 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6303854875283447 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7417051698581646 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.5777687904471503 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.7694633408919135 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/arch_retrieval.json b/leaderboard/submissions/progen2-large/arch_retrieval.json new file mode 100644 index 0000000..0aad6f8 --- /dev/null +++ b/leaderboard/submissions/progen2-large/arch_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "arch_retrieval", + "display_name": "Arch Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/arch_retrieval", + "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b" + }, + { + "path": "tattabio/arch_retrieval_qrels", + "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "revision": "...", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.82558 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.80792 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.76707 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.26978 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.37262 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.61886 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.27755 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.38883 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.66873 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.74349 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.67286 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.40231 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.8799046806089053 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.8824834529059877 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.883891445011226 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.4520286686644837 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.5956080481101712 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.021981762025439694 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.4350798975828656 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.6324559228952568 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.051022858934365305 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.4027550261519307 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.6298449675763194 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.03440308913177297 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.04937645677938109 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.324916856701562 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.33579245842687483 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.11864382097961818 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.4753148026458953 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.25080627867928795 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.3800933497303014 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.6976421952392109 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.01576884330549023 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.04078669924959719 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.3242844365632198 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.34140253756691535 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.10048280111893285 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.47618974809301795 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2642681901494963 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.36191474504680493 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.732114878689383 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.041525472784842636 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.37136307757548065 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.42880655129257333 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.3702312562845074 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.3279636463690485 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.3630373065503225 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.3969564917795274 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.19445771639778647 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.08695792887210627 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.30808900316001675 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.5264230291869033 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.5639758102110056 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.1524029506353449 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.5253478695141168 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.5618581571585436 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.15173243498945044 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.5300952350050928 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.559635250154546 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.14988171842990547 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.62071 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.5774 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.49429 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.16138 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.21164 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.3225 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.17546 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.23808 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.40527 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.55988 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.47823 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.25592 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7401550718452126 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.7445013515436046 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.7473056005746137 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.3302418889629452 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.6600101206611111 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.17322466327530292 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.31576812159270473 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.6797481256387631 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.13867873384705962 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.2593741165269576 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.6435030914254599 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.15965132348455088 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.07708299288709394 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.30967007942991703 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3416877145548701 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.13890220773201395 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.43271488328659263 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.2869867740526922 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.23590185458461196 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.6505388159079657 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.16461355770456945 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.043742127235177904 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.2687502523778896 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.3105615838617834 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.09680209916324005 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.37971832751393986 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.26241632489651434 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.17303959367017055 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.5921607885640662 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.16471059233285062 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.313156828536656 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.6189535949883347 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": 0.021027755158267582 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.30202175826168143 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.6132772075682028 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.04760584624936907 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.21884003064346627 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.45683930057316186 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.11585915046402609 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.35175510709952584 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.6208145223835255 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.3507078878526956 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.350942421563067 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.6235307592418955 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.3541831744561419 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.35306527795828485 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.6233511446573011 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.3538673437378702 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/bacarch_bigene.json b/leaderboard/submissions/progen2-large/bacarch_bigene.json new file mode 100644 index 0000000..a05cf8e --- /dev/null +++ b/leaderboard/submissions/progen2-large/bacarch_bigene.json @@ -0,0 +1,86 @@ +{ + "task": { + "id": "bacarch_bigene", + "display_name": "BacArch BiGene", + "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/bac_arch_bigene", + "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "revision": "...", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.5524985706117781 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6339622641509434 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5746138521035977 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6339622641509434 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.17530030765879823 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.22641509433962265 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.18399742288681928 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.22641509433962265 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/convergent_enzymes_classification.json b/leaderboard/submissions/progen2-large/convergent_enzymes_classification.json new file mode 100644 index 0000000..1c99b66 --- /dev/null +++ b/leaderboard/submissions/progen2-large/convergent_enzymes_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "convergent_enzymes_classification", + "display_name": "Convergent Enzymes Classification", + "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/convergent_enzymes", + "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "revision": "...", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.1875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.15337499999999998 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.14 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.10284722222222221 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/cyano_operonic_pair.json b/leaderboard/submissions/progen2-large/cyano_operonic_pair.json new file mode 100644 index 0000000..361c89a --- /dev/null +++ b/leaderboard/submissions/progen2-large/cyano_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "cyano_operonic_pair", + "display_name": "Cyano Operonic Pair", + "description": "Evaluate on Cyano operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/cyano_operonic_pair", + "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "revision": "...", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7180076628352491 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9833443760871887 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.44289609209330505 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.33621838688850403 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.284990253411306 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9932065217391305 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.3213092537718531 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7183908045977011 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 191.33279418945312 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.44019138755980863 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 1111.232421875 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.2822085889570552 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 1.0 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3118566111895106 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7187739463601532 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 6.656366348266602 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4434389140271493 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 28.89664077758789 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.2849941837921675 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.998641304347826 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3269222612599279 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7187739463601532 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 681.6181640625 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.442998760842627 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 113.07124328613281 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.28691813804173355 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9714673913043478 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.30230236744782024 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.3269222612599279 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.717624521072797 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9928083419799805 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.45870897944464484 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.7693469524383545 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.3122238586156112 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8641304347826086 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.34213880348537534 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7206896551724138 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 470.23736572265625 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4549596915527515 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 964.5697631835938 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.30656589513462446 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8817934782608695 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3669944276201573 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7203065134099617 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 11.08890151977539 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.451639344262295 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 26.889080047607422 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.32335680751173707 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.748641304347826 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3595510558455044 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7183908045977011 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 4561.4189453125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.44360666038328617 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 762.20458984375 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.2885165508786269 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9592391304347826 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.2751779532687585 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.3669944276201573 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/ec_classification.json b/leaderboard/submissions/progen2-large/ec_classification.json new file mode 100644 index 0000000..d4b200a --- /dev/null +++ b/leaderboard/submissions/progen2-large/ec_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification", + "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "revision": "...", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.546875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.49739583333333326 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.5 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.4315104166666666 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/ecoli_operonic_pair.json b/leaderboard/submissions/progen2-large/ecoli_operonic_pair.json new file mode 100644 index 0000000..e5e3b31 --- /dev/null +++ b/leaderboard/submissions/progen2-large/ecoli_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "ecoli_operonic_pair", + "display_name": "E.coli Operonic Pair", + "description": "Evaluate on E.coli K-12 operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/ecoli_operonic_pair", + "revision": "a62c01143a842696fc8200b91c1acb825e8cb891" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "revision": "...", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6367640241075568 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.5684491991996765 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5887910672106507 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.45036613941192627 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.4711340206185567 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.7847738981110475 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5233168375581643 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6418636995827538 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 547.8858642578125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5766871165644171 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 969.0298461914062 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.40592903828197946 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9954207212364052 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5321691186505778 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.652990264255911 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 13.988625526428223 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5945746447684075 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 21.867555618286133 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.4387254901960784 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9221522610188895 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5701562997645315 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.5948076031525267 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 6305.4169921875 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5834810350939383 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 113.59080505371094 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.42259306803594354 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9421866056096165 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.4206948747327784 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5701562997645315 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6495132127955494 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.852018415927887 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.6023952095808384 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.7513113021850586 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.46245786086423535 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8637664567830566 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5385774043629026 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6659712563745943 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 635.199462890625 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.6145662847790507 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 891.3582763671875 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.47819165870741803 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8597595878649112 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5936555209565155 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6636532220676866 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 16.329631805419922 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.6072446072446073 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 26.53228759765625 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.4711083043890117 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8540354894104178 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5832964505139788 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.5948076031525267 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 10073.8828125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5778894472361809 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 666.7764892578125 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.40847738574473125 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9874069834001145 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.37683258403007813 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5936555209565155 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/euk_retrieval.json b/leaderboard/submissions/progen2-large/euk_retrieval.json new file mode 100644 index 0000000..51817a7 --- /dev/null +++ b/leaderboard/submissions/progen2-large/euk_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "euk_retrieval", + "display_name": "Euk Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/euk_retrieval", + "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0" + }, + { + "path": "tattabio/euk_retrieval_qrels", + "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "revision": "...", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.79265 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.78706 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.77934 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.31297 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.42594 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.6285 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.32182 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.44326 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.67623 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.70997 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.63441 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.3681 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.8430332261521974 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.8474850711988975 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.8485140543441441 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.623248146700816 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.6755617794201033 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.2062832776723883 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6150971905749236 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.6851298227412265 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.2164177072767131 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.5606563694900604 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.6720929882395703 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.18233563570111652 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.12726038600829018 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.27883908864306833 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.21034053179876666 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.2388442883998982 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.4984729416529119 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.11734247621635328 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.5658825420942568 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.7844748384936805 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.10488808502349604 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.1148710587472859 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.26683520950781653 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.21895153236754078 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.21398850457265484 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.47731027192523895 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.12934753855868159 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.5329014815194145 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.7796995216647047 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.036946938653229996 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5286196851573478 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.5983839423970794 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.4236911084804896 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.4545782642182253 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.5007516848828493 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.4262645964521107 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.1500642024947319 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.1600499789472809 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.30891321831672286 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.6890316490341247 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.6388576263708866 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.15515652272104835 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.6897689099007713 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.6331945267231739 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.17683210867574006 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.6912368384982113 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.6326576797440999 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.17574979114357966 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.60293 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.56499 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.51375 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.18533 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.23869 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.33028 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.19775 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.26432 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.4146 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.53762 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.45241 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.24424 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7315648445873527 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.7361098351452049 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.7390325351956528 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.5258291178052644 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.6990369551084505 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.19847712933798844 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.49349547399540455 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.6664206673817538 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.1600227050947377 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.42061526717556513 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.6083533212467029 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.1686331650483035 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.14859441615302324 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.23988353743529217 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.30180731282787276 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.23839489619661014 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.40953413539940225 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.2460114370422075 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.3761510941109189 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.602955555500399 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.15749579060843896 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.11444039748419432 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.19218511723799558 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.2825603490529135 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.18835645126114328 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.3410474906629072 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2429814898980251 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.32800009778288497 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.5144024756263704 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.17371372463750084 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5304991625331207 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.7215198097667779 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": 0.05714336353148511 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.4853052816819128 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.6759994282048366 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.03704981651554237 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.31062406812604343 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.4025880047512197 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.1340192827457276 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.5665289123893353 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.6278003990906513 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.37278396283325976 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.5701246528415677 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.6321080355936443 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.369945061055372 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.5726947773737544 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.6348440871378689 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.37227517916401826 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/fefe_phylogeny.json b/leaderboard/submissions/progen2-large/fefe_phylogeny.json new file mode 100644 index 0000000..a9d59fc --- /dev/null +++ b/leaderboard/submissions/progen2-large/fefe_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "fefe_phylogeny", + "display_name": "FeFeHydrogenase Phylogeny", + "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/fefe_phylogeny_sequences", + "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26" + }, + { + "path": "tattabio/fefe_phylogeny_distances", + "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "revision": "...", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.7261313624231767 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.8113485047464291 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.8080630557517142 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.8113485047464291 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.534930337261507 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.6481804042737168 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.571767383850242 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.6481804042737168 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/modac_paralogy_bigene.json b/leaderboard/submissions/progen2-large/modac_paralogy_bigene.json new file mode 100644 index 0000000..7f3b346 --- /dev/null +++ b/leaderboard/submissions/progen2-large/modac_paralogy_bigene.json @@ -0,0 +1,97 @@ +{ + "task": { + "id": "modac_paralogy_bigene", + "display_name": "ModAC Paralogy BiGene", + "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/modac_paralogy_bigene", + "revision": "241ca6397856e3360da04422d54933035b1fab87" + } + ], + "primary_metric_id": "recall_at_50" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.0006706959960131102 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0013404825737265416 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.0006711500886081701 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0013404825737265416 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.17359249329758714 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.005679986057065453 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.013404825737265416 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.006772200533986637 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.013404825737265416 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.33579088471849866 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/mopb_clustering.json b/leaderboard/submissions/progen2-large/mopb_clustering.json new file mode 100644 index 0000000..5151e73 --- /dev/null +++ b/leaderboard/submissions/progen2-large/mopb_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "mopb_clustering", + "display_name": "MopB Clustering", + "description": "Evaluate on MopB clustering task.", + "modality": "protein", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/mopb_clustering", + "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "revision": "...", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.8476659794848843 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.6677487389951918 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/rpob_arch_phylogeny.json b/leaderboard/submissions/progen2-large/rpob_arch_phylogeny.json new file mode 100644 index 0000000..b5ed743 --- /dev/null +++ b/leaderboard/submissions/progen2-large/rpob_arch_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_phylogeny_sequences", + "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4" + }, + { + "path": "tattabio/rpob_arch_phylogeny_distances", + "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "revision": "...", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.2920851957915461 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3483029599824127 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.37488611045945197 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.37488611045945197 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.39164690186549966 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.49705565976563815 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.5007660656360811 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.5007660656360811 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/rpob_bac_phylogeny.json b/leaderboard/submissions/progen2-large/rpob_bac_phylogeny.json new file mode 100644 index 0000000..529566b --- /dev/null +++ b/leaderboard/submissions/progen2-large/rpob_bac_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_phylogeny_sequences", + "revision": "b833ef8d8d873ea5387540562873f41d073d3e03" + }, + { + "path": "tattabio/rpob_bac_phylogeny_distances", + "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "revision": "...", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.3407778454098185 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.42394247953096953 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.440888842114917 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.440888842114917 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.23219864929778225 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3490272067401271 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.358178624850726 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.358178624850726 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-large/vibrio_operonic_pair.json b/leaderboard/submissions/progen2-large/vibrio_operonic_pair.json new file mode 100644 index 0000000..6c4a4ba --- /dev/null +++ b/leaderboard/submissions/progen2-large/vibrio_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "vibrio_operonic_pair", + "display_name": "Vibrio Operonic Pair", + "description": "Evaluate on Vibrio operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/vibrio_operonic_pair", + "revision": "24781b12b45bf81a079a6164ef0d2124948c1878" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "hugohrban/progen2-large", + "revision": "...", + "num_layers": 32, + "num_params": 2779356160, + "embed_dim": 2560 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6715895841430237 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.688905656337738 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5361670395227442 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.4471510946750641 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.40145170295924065 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8069584736251403 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.47290020469130756 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6723668869024485 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 509.0989990234375 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5160537069468768 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 950.35400390625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3487179487179487 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9921436588103255 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.45396208781120356 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6836377769141081 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 13.386638641357422 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5250988743535138 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 23.639881134033203 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.36018363939899833 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9685746352413019 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.4721672326721662 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6544889234356782 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 758.7493896484375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5481784133469527 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 123.69869995117188 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.39345063538611924 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9034792368125701 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.41650121166936427 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.47290020469130756 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6785853089778469 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.8825353384017944 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5314685314685315 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.7442071437835693 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.3778409090909091 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8956228956228957 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.47970762950524704 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6813058686358336 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 602.638427734375 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5384615384615384 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 980.8336181640625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3768506056527591 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9427609427609428 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.4935429984156211 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6813058686358336 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 14.864974975585938 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5296145408962708 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 36.245880126953125 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3673913043478261 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9483726150392817 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.48504972690345405 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6544889234356782 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 4045.42626953125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5217391304347826 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 713.2037353515625 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.3596938775510204 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9494949494949495 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.350425846271822 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.4935429984156211 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/MIBIG_protein_classification.json b/leaderboard/submissions/progen2-medium/MIBIG_protein_classification.json new file mode 100644 index 0000000..90ae2d4 --- /dev/null +++ b/leaderboard/submissions/progen2-medium/MIBIG_protein_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_protein_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_prot", + "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "revision": "...", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6989990988890816 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.691609977324263 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7986955550826269 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.652675371834789 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.8151927437641735 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6510666703327254 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6145124716553289 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.79271911663216 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.597401875633424 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.7605820105820112 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/arch_retrieval.json b/leaderboard/submissions/progen2-medium/arch_retrieval.json new file mode 100644 index 0000000..3fbc7a5 --- /dev/null +++ b/leaderboard/submissions/progen2-medium/arch_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "arch_retrieval", + "display_name": "Arch Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/arch_retrieval", + "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b" + }, + { + "path": "tattabio/arch_retrieval_qrels", + "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "revision": "...", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.8667 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.8539 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.81528 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.28143 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.39613 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.66886 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.28849 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.41121 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.71466 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.78805 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.71985 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.43151 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.90511452553706 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9065473649980684 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9081775693416649 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6612480619372302 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.46128894265628245 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.21116421624243986 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6213052175154115 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.49490230608329266 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.21967149541101536 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.5603531623948176 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.4685128740415315 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.1712953803081673 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.06797559593865152 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.2905404912455848 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3686455576980013 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.1573579335136106 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.4234570447717268 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.2518670668196923 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.5013213694250233 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.5420807636642778 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.05454709391738201 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.051603859613532796 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.2898156924225231 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.36769365765884426 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.12664596322820085 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.4271370583406068 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2532454874575687 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.46112764213529894 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.584450782631934 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.04169227808562108 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5372973075189744 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.2904671378642838 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.577918931914468 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.4337989473686208 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.22378661665353214 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.576137485553088 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.21240666376908013 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.2489241348939673 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.3798499517361313 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.6888804715259021 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.42014922591038306 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.0714471449304972 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.6896553273533154 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.4233185770383571 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.07422848638622621 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.693742352773071 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.41941213949022665 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.08094307327642038 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.69714 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.66471 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.59834 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.19286 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.26117 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.41853 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.20522 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.28836 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.51269 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.63841 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.5609 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.31759 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7844430217669651 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.7890303098049573 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.7915598665697423 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.35843421202903436 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.6600633200863916 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.03627327495753951 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.3353630084809703 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.6761105480874086 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.02350341443487571 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.27106613394122536 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.6690231790973418 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.07650491677426355 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.08906743461719288 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.31949618338174984 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3292636023000051 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.13874504172191632 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.4647631401151212 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.2600910224266903 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.27470079652681245 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.6928461826997683 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.10769891596111343 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.05774041251094078 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.29379200776879594 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.32688275733718275 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.08274480687207497 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.42416625000987423 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2678170789795234 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.17288450743636116 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.6633291061716232 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.15854705547657555 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.32533961340687934 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.6020974295167729 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.1333980927582597 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.29585400791594363 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.5687767036261221 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.189945340565891 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.20906289650717386 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.28943991710214256 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.21305963873675984 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.36279298091388296 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.6362558326995159 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.16901121330011185 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.36248425558706443 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.6402643633377308 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.17314376122591818 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.3631863215067844 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.6401290947847794 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.17258652486506595 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/bacarch_bigene.json b/leaderboard/submissions/progen2-medium/bacarch_bigene.json new file mode 100644 index 0000000..85d93d3 --- /dev/null +++ b/leaderboard/submissions/progen2-medium/bacarch_bigene.json @@ -0,0 +1,86 @@ +{ + "task": { + "id": "bacarch_bigene", + "display_name": "BacArch BiGene", + "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/bac_arch_bigene", + "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "revision": "...", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7037735849056603 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.7811320754716982 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.7283018867924528 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.7811320754716982 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.4038430123335784 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.49056603773584906 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.42439088843084405 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.49056603773584906 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/convergent_enzymes_classification.json b/leaderboard/submissions/progen2-medium/convergent_enzymes_classification.json new file mode 100644 index 0000000..243dd84 --- /dev/null +++ b/leaderboard/submissions/progen2-medium/convergent_enzymes_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "convergent_enzymes_classification", + "display_name": "Convergent Enzymes Classification", + "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/convergent_enzymes", + "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "revision": "...", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.1925 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.14935912698412696 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.1525 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.11368055555555553 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/cyano_operonic_pair.json b/leaderboard/submissions/progen2-medium/cyano_operonic_pair.json new file mode 100644 index 0000000..5e160ba --- /dev/null +++ b/leaderboard/submissions/progen2-medium/cyano_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "cyano_operonic_pair", + "display_name": "Cyano Operonic Pair", + "description": "Evaluate on Cyano operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/cyano_operonic_pair", + "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "revision": "...", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7180076628352491 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9889928102493286 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4414824447334201 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.4203949570655823 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.2901709401709402 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9225543478260869 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.3189029210120636 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7180076628352491 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 65.704833984375 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.44071856287425143 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 639.349365234375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.282642089093702 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 1.0 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3036709966412725 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7187739463601532 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 5.881275177001953 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.44363856149894226 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 20.312252044677734 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.2852701127089001 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9972826086956522 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3132279316580241 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7195402298850575 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 308.689697265625 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.44540942928039706 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 95.29304504394531 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.28858520900321544 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9755434782608695 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.32010048431074967 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.32010048431074967 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7183908045977011 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.931535005569458 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4593088071348941 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.5361685752868652 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.31611253196930944 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8396739130434783 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.339620452953373 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.721455938697318 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 298.3204650878906 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4564081960626757 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 635.1521606445312 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3240159726183685 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.7717391304347826 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.37274235680877565 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7222222222222222 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 10.60649299621582 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4526946107784431 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 23.78628921508789 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.320520067834935 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.7703804347826086 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.36747868579778725 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7180076628352491 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 1808.91015625 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.4434278743519366 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 121.43479919433594 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.2858828155721589 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9877717391304348 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.28183146847459695 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.37274235680877565 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/ec_classification.json b/leaderboard/submissions/progen2-medium/ec_classification.json new file mode 100644 index 0000000..5cdec0a --- /dev/null +++ b/leaderboard/submissions/progen2-medium/ec_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification", + "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "revision": "...", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.5546875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.4921875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.42838541666666663 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/ecoli_operonic_pair.json b/leaderboard/submissions/progen2-medium/ecoli_operonic_pair.json new file mode 100644 index 0000000..b75b595 --- /dev/null +++ b/leaderboard/submissions/progen2-medium/ecoli_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "ecoli_operonic_pair", + "display_name": "E.coli Operonic Pair", + "description": "Evaluate on E.coli K-12 operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/ecoli_operonic_pair", + "revision": "a62c01143a842696fc8200b91c1acb825e8cb891" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "revision": "...", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6184515530829856 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.651489794254303 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5857946554149086 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.38596251606941223 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.4227353463587922 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9536348025186033 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.510237868123645 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6098748261474269 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 239.94903564453125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5768392827216356 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 607.368408203125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.40781990521327016 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9851173440183171 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.48682441190216086 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.624246638850255 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 12.428617477416992 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5890985324947589 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 19.439558029174805 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.42393764143827006 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9650829994275901 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5251579034107494 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.5948076031525267 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 3148.2744140625 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5811965811965812 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 93.77833557128906 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4143309773336583 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9730967372638809 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.4434019305350601 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5251579034107494 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6274918868799259 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.6536474823951721 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.601917975923281 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.5033559203147888 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.46766011414077363 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8443045220377791 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5195509705220649 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6659712563745943 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 416.32061767578125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.6131327505590567 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 606.52978515625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.47540983606557374 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8631940469376074 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5921808575876653 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6627260083449235 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 13.742053985595703 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.6071692535107169 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 28.72542381286621 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.44829467939972717 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9404693760732684 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5869667896425095 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.5948076031525267 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 3265.408203125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5812606473594549 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 109.48553466796875 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4137763764249333 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.976531196336577 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.38910400697637215 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5921808575876653 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/euk_retrieval.json b/leaderboard/submissions/progen2-medium/euk_retrieval.json new file mode 100644 index 0000000..62d22f2 --- /dev/null +++ b/leaderboard/submissions/progen2-medium/euk_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "euk_retrieval", + "display_name": "Euk Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/euk_retrieval", + "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0" + }, + { + "path": "tattabio/euk_retrieval_qrels", + "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "revision": "...", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.85039 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.84383 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.81649 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.33879 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.45506 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.67207 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.34411 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.46708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.70727 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.75949 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.67621 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.37633 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.8896034297963559 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.8904200479763181 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.8917799846355668 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.7603781530194391 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.6376819010899927 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.4212148120292312 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.7322697904169153 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.6496456933761383 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.37980919966190413 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.7156345490529658 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.6522514666477729 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.3042825041288217 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.14758260305763898 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.26194428990169905 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.11232537482428236 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.270789065395862 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.4790544168497472 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.03884116920584031 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.6616399771987015 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.7353350484425378 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.2128494417394226 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.13062461293496178 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.2479018950834343 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.1132534748053531 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.24031161646082083 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.46622985594529703 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.06071938680505056 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.6150865443749446 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.725765511063093 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.19749339031325439 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5614315220453862 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.5348156597752065 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.5720041132270836 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.4384675283398972 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.3967752412060821 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.45432753775042073 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.18118357934146365 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.21480164227817497 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.2754588826031711 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.8454487169250762 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.6151709989094298 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.3483912884875399 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.8442698393532679 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.6188982480035604 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.34895231147497485 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.8447441492866 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.6161593785614531 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.351845322578248 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.70833 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.67903 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.64858 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.25169 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.32689 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.46739 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.25973 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.34512 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.53847 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.63408 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.54244 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.30759 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7887459807073955 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.7936431378553563 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.7959549608392991 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.4249013352122335 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.8037952157465401 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.1413596947741603 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.4204652802279644 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.7876332799563885 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.1462062003777289 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.3584251726169603 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.7541162752580496 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.14407914236714084 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.06823941786698261 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.38255659859692487 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3480674521155449 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.16200995043378846 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.5637260827505329 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.27925638990284274 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.316785523616919 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.7884674708035404 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.15281773067505242 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.05984367603361126 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.37075410024509964 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.3528009604202977 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.13978654053219638 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.5394212306018864 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.28687335814861753 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.27953439115065165 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.7465655368746134 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.15167294388599076 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.42228922529257007 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.7149145518900438 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.11182287887883938 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.39379398750948513 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.6192434223426857 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.17068274953379578 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.2249314962247772 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.1513983474822626 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.2085676426311878 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.4873996930451618 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.790121004884451 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.35101924426054193 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.48679300042700696 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.7911671425593381 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.34602507202599053 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.49234090539442804 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.7908566907426995 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.3445651280400187 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/fefe_phylogeny.json b/leaderboard/submissions/progen2-medium/fefe_phylogeny.json new file mode 100644 index 0000000..e70d5f0 --- /dev/null +++ b/leaderboard/submissions/progen2-medium/fefe_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "fefe_phylogeny", + "display_name": "FeFeHydrogenase Phylogeny", + "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/fefe_phylogeny_sequences", + "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26" + }, + { + "path": "tattabio/fefe_phylogeny_distances", + "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "revision": "...", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.6398006140261373 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.7591242650638738 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.754276136476935 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.7591242650638738 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.5698692272674944 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.7090467502518881 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.662949011431012 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.7090467502518881 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/modac_paralogy_bigene.json b/leaderboard/submissions/progen2-medium/modac_paralogy_bigene.json new file mode 100644 index 0000000..1441ab3 --- /dev/null +++ b/leaderboard/submissions/progen2-medium/modac_paralogy_bigene.json @@ -0,0 +1,97 @@ +{ + "task": { + "id": "modac_paralogy_bigene", + "display_name": "ModAC Paralogy BiGene", + "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/modac_paralogy_bigene", + "revision": "241ca6397856e3360da04422d54933035b1fab87" + } + ], + "primary_metric_id": "recall_at_50" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.4952467261118094e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 8.984467652322665e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.04691689008042895 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.0018531981463306205 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.006032171581769437 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.0023899465819002715 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.006032171581769437 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.3652815013404826 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/mopb_clustering.json b/leaderboard/submissions/progen2-medium/mopb_clustering.json new file mode 100644 index 0000000..61b8b58 --- /dev/null +++ b/leaderboard/submissions/progen2-medium/mopb_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "mopb_clustering", + "display_name": "MopB Clustering", + "description": "Evaluate on MopB clustering task.", + "modality": "protein", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/mopb_clustering", + "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "revision": "...", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.878581661578546 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.7368504655746232 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/rpob_arch_phylogeny.json b/leaderboard/submissions/progen2-medium/rpob_arch_phylogeny.json new file mode 100644 index 0000000..8535905 --- /dev/null +++ b/leaderboard/submissions/progen2-medium/rpob_arch_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_phylogeny_sequences", + "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4" + }, + { + "path": "tattabio/rpob_arch_phylogeny_distances", + "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "revision": "...", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.2641157727110356 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3119710227372265 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.32296618163345303 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.32296618163345303 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.333762262235085 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.5086678691952276 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.4939985498711707 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.5086678691952276 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/rpob_bac_phylogeny.json b/leaderboard/submissions/progen2-medium/rpob_bac_phylogeny.json new file mode 100644 index 0000000..afb55cb --- /dev/null +++ b/leaderboard/submissions/progen2-medium/rpob_bac_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_phylogeny_sequences", + "revision": "b833ef8d8d873ea5387540562873f41d073d3e03" + }, + { + "path": "tattabio/rpob_bac_phylogeny_distances", + "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "revision": "...", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.1950082250185355 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.28043315277470116 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.30367273332819217 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.30367273332819217 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.25907183955646845 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3897653256717273 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.39653707661764 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.39653707661764 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-medium/vibrio_operonic_pair.json b/leaderboard/submissions/progen2-medium/vibrio_operonic_pair.json new file mode 100644 index 0000000..de465c6 --- /dev/null +++ b/leaderboard/submissions/progen2-medium/vibrio_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "vibrio_operonic_pair", + "display_name": "Vibrio Operonic Pair", + "description": "Evaluate on Vibrio operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/vibrio_operonic_pair", + "revision": "24781b12b45bf81a079a6164ef0d2124948c1878" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "hugohrban/progen2-medium", + "revision": "...", + "num_layers": 27, + "num_params": 764803616, + "embed_dim": 1536 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 13, + "layer_display_name": "13", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6642052079284881 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.7931315302848816 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5223320831912717 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.44246846437454224 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.3751224289911851 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8597081930415263 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.4427000442773764 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6622619510299261 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 244.42291259765625 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5149384885764499 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 615.18994140625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.34839476813317477 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9865319865319865 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.4096548402507393 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6665371162067625 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 9.324880599975586 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5178359096313913 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 19.698150634765625 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3522038010513546 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9775533108866442 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.4275378121386588 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.657209483093665 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 269.6624755859375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5334692490655792 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 103.44610595703125 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.38255360623781676 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.8810325476992144 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.4402580670162658 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.4427000442773764 + } + ] + }, + { + "layer_number": 26, + "layer_display_name": "26", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6626506024096386 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.8587204813957214 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5521669341894061 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.5507351756095886 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.4297314178638351 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.7721661054994389 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.46071910274003564 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6743101438010105 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 384.3786926269531 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5400981996726677 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 677.1198120117188 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3812384473197782 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9259259259259259 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.47638983875407687 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6758647493198601 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 12.481945037841797 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5347798340778558 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 28.243602752685547 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.37360677663843067 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9405162738496072 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.4738811750419196 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6541002720559658 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 1609.40478515625 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5332914572864321 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 125.42507934570312 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.37025730484081987 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9528619528619529 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3646222508107507 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.47638983875407687 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/MIBIG_protein_classification.json b/leaderboard/submissions/progen2-small/MIBIG_protein_classification.json new file mode 100644 index 0000000..4c892a3 --- /dev/null +++ b/leaderboard/submissions/progen2-small/MIBIG_protein_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_protein_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_prot", + "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "revision": "...", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6388692163537786 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6394557823129252 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7418637884284616 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.5968684138731036 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.781934996220711 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6609534749591477 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6598639455782312 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.8030532546048321 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6121821659489147 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.7843915343915351 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/arch_retrieval.json b/leaderboard/submissions/progen2-small/arch_retrieval.json new file mode 100644 index 0000000..fab3ede --- /dev/null +++ b/leaderboard/submissions/progen2-small/arch_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "arch_retrieval", + "display_name": "Arch Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/arch_retrieval", + "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b" + }, + { + "path": "tattabio/arch_retrieval_qrels", + "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "revision": "...", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.72082 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.70061 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.65352 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.21829 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.30584 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.49789 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.23064 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.33315 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.58309 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.65642 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.59091 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.33787 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7910513586569917 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.7948976688413298 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.7977303757166455 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.5475981836671467 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.5960854077733617 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.07359409377865848 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.5348824611254859 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.593354772525071 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.05382473444665512 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.48145446953604293 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.5746101256658309 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.05768162131908042 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.0671435703618939 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.38563581595942015 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3255398177914751 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.1886920642763862 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.4818911065684608 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.24142443627518678 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.4520357569415462 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.5809839586558266 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.06621717678004216 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.04521085335682694 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.3578304375654747 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.3095295095368689 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.150564871318176 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.4524869709077065 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2422900535368358 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.42091958169718263 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.5521812748376208 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.0654084113222808 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5560865220026086 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.4663024105684298 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.12309166370191016 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.5235008052565618 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.3951829419893739 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.19499705168929 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.3308190035410808 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.056101087790478094 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.21982513157177477 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.5607106301178035 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.5887958978024223 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.19353716925535508 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.5615266952934416 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.5890945924016702 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.19325489061094214 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.5611777163821644 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.5888188766467686 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.19302943083610713 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.7033 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.67869 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.63008 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.21539 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.30029 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.49432 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.22644 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.3248 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.56761 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.63875 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.56773 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.32335 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7747119078104986 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.7786623783102647 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.7819268433047759 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": -0.1896218755037747 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.7779658670429147 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.30058319812194767 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": -0.16176589926755863 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.7855720876943553 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.2677426587955006 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": -0.1349738266153549 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.7755313282780849 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.24684189382047908 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": -0.15458803113699884 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.33049917158388503 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.40734235921704587 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": -0.13918338377349823 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.500927265616027 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.34912969967259155 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": -0.08843078320342054 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.7748433132528942 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.2336215965994099 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": -0.15452256619448723 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.31027973260489217 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.38351765858630604 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": -0.13594798029165953 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.4664400683469538 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.33068387136634236 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": -0.09046828424499535 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.7652624873034768 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.24021392596400007 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": -0.11438252722644132 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.7005627248845133 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": 0.06477303657513034 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": -0.05753489391069873 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.6602227094416574 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.020464258201554942 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.01150401073222253 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.2912034804641511 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.12308576626927949 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": -0.2512008900587547 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.7484636775938552 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.4417496737536807 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": -0.24957809015538476 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.7478134052906925 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.4434699640064948 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": -0.24629364621121616 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.7471770037721222 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.44155968632638204 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/bacarch_bigene.json b/leaderboard/submissions/progen2-small/bacarch_bigene.json new file mode 100644 index 0000000..d67426b --- /dev/null +++ b/leaderboard/submissions/progen2-small/bacarch_bigene.json @@ -0,0 +1,86 @@ +{ + "task": { + "id": "bacarch_bigene", + "display_name": "BacArch BiGene", + "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/bac_arch_bigene", + "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "revision": "...", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.5480188679245283 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6264150943396226 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5690179422254895 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6264150943396226 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.5530817610062893 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6415094339622641 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5791194968553458 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6415094339622641 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/convergent_enzymes_classification.json b/leaderboard/submissions/progen2-small/convergent_enzymes_classification.json new file mode 100644 index 0000000..0a96ef6 --- /dev/null +++ b/leaderboard/submissions/progen2-small/convergent_enzymes_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "convergent_enzymes_classification", + "display_name": "Convergent Enzymes Classification", + "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/convergent_enzymes", + "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "revision": "...", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.11 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.08067261904761903 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.1325 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.09460714285714285 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/cyano_operonic_pair.json b/leaderboard/submissions/progen2-small/cyano_operonic_pair.json new file mode 100644 index 0000000..d1451cb --- /dev/null +++ b/leaderboard/submissions/progen2-small/cyano_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "cyano_operonic_pair", + "display_name": "Cyano Operonic Pair", + "description": "Evaluate on Cyano operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/cyano_operonic_pair", + "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "revision": "...", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7183908045977011 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9855711460113525 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4403834631515877 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.3577058017253876 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.2824750192159877 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.998641304347826 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.3296781486925475 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7183908045977011 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 30.541061401367188 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4404548174745661 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 203.69651794433594 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.28242517267843437 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 1.0 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.30196707221941077 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7187739463601532 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 1.3797529935836792 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4429044893040072 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 8.252227783203125 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.2845528455284553 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.998641304347826 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3140419058392819 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7187739463601532 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 76.20918273925781 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.44019138755980863 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 10.069953918457031 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.2822085889570552 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 1.0 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.32358772339840414 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.3296781486925475 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.717624521072797 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9930706024169922 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.44649324991768197 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.4854004979133606 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.29465449804432853 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9211956521739131 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.3085559879574271 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7180076628352491 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 47.12645721435547 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.44656879481051553 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 257.8835144042969 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.2982216142270862 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8885869565217391 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3316723863885721 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7187739463601532 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 4.9462361335754395 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4484217377155874 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 13.16019058227539 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.2948224219084296 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.936141304347826 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3345743153670524 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7187739463601532 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 294.17828369140625 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.44191763982790416 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 33.673152923583984 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.2855440826052423 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9769021739130435 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.2836210112641132 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.3345743153670524 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/ec_classification.json b/leaderboard/submissions/progen2-small/ec_classification.json new file mode 100644 index 0000000..9daa24e --- /dev/null +++ b/leaderboard/submissions/progen2-small/ec_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification", + "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "revision": "...", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.4765625 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.41744791666666664 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.484375 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.43671875 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/ecoli_operonic_pair.json b/leaderboard/submissions/progen2-small/ecoli_operonic_pair.json new file mode 100644 index 0000000..a3cf931 --- /dev/null +++ b/leaderboard/submissions/progen2-small/ecoli_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "ecoli_operonic_pair", + "display_name": "E.coli Operonic Pair", + "description": "Evaluate on E.coli K-12 operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/ecoli_operonic_pair", + "revision": "a62c01143a842696fc8200b91c1acb825e8cb891" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "revision": "...", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6240148354195643 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.8872358798980713 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5769548685733179 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.33167386054992676 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.40562529056252905 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9988551803091014 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.4935020874365037 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6223922114047288 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 65.3521499633789 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5784297382897149 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 184.63580322265625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4080432737535277 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9931310818546079 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.49072155439348264 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6293463143254521 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 3.0176098346710205 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5877718787463607 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 8.208200454711914 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.41935483870967744 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9822552947910704 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5146357162147513 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.5948076031525267 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 1122.26953125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5769103539530268 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 11.544685363769531 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4056757385438474 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.998282770463652 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.38847128264957165 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5146357162147513 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6105702364394993 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.8996621966362 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5856863789735394 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.40186625719070435 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.424562306900103 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9439038351459645 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.4864021596361824 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6390820584144645 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 189.96759033203125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.6041311351146486 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 255.98159790039062 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.45155807365439093 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9124212936462507 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5488547270837909 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6478905887807139 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 8.068485260009766 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.609268614100593 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 10.204345703125 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.4942979330007128 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.793932455638237 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5654598094655838 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.5948076031525267 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 1022.4357299804688 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5770122557138125 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 21.223339080810547 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4059659752971335 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9971379507727532 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.39641816140828223 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5654598094655838 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/euk_retrieval.json b/leaderboard/submissions/progen2-small/euk_retrieval.json new file mode 100644 index 0000000..d225736 --- /dev/null +++ b/leaderboard/submissions/progen2-small/euk_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "euk_retrieval", + "display_name": "Euk Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/euk_retrieval", + "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0" + }, + { + "path": "tattabio/euk_retrieval_qrels", + "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "revision": "...", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.76129 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.75262 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.74115 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.29128 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.39027 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.5702 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.3014 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.42427 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.6455 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.68232 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.6 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.34881 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.827706323687031 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.8334736895830142 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.8352247065129689 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6858069307511622 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.4803619867415532 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.10573311085242518 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6333355994172974 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.4904730781745588 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.10224896683374189 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.5933094336884881 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.49029547722523154 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.02071011278307947 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.19751713387011416 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.21060706901174292 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.20347426477581018 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.28431727003040264 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.3923582183860595 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.13979856210713534 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.5863012284678861 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.5306429524718634 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.015444799785488983 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.1792675407300632 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.1906180594860627 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.20465475488944024 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.2116233943415803 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.36118918957126545 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.14906386527872684 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.5729109265785561 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.49567505770955805 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.02404391976493193 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5786223559627802 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.4389250698294924 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.2406582141999514 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.4536397414299 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.3852138942250889 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.2803024103218935 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.18036641882810736 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.06509781077680983 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.21138350693363533 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.7568638646687446 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.4420526206688317 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.027265500921681368 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.7529826623218884 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.4486646568823487 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.01716937343387214 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.7554046556002796 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.4476182957497638 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.01852375862791937 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.70785 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.69868 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.68431 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.25855 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.35363 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.53095 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.2683 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.38229 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.60213 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.64437 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.57106 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.32418 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7742765273311898 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.7811488797019344 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.7825581627833461 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.3966350445773588 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.8068569080446616 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.052946890410233614 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.42473395095306976 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.8025958108917352 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.03819502888483237 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.4116391776486932 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.7697436774828385 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.050274646426970125 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.07758341515963516 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.3823333904818765 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.29568491522146834 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.16150724196642183 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.5760554960542517 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.24345035389279063 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.39011658814228733 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.8122438882456519 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.08899489357630828 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.08852883708015474 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.35485286220839607 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.2869394867481459 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.1895885370766902 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.5177511601949351 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2429373023016436 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.43169016260747917 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.769377960844427 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.11233814702164165 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.3722433989717487 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.7382052017909906 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.11447089919377483 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.36854361632969396 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.6388922941430113 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.19864698587070462 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.23233618462716996 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.09790753029551322 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.2702334237717018 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.35226396208770194 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.766509273666355 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.175557607407714 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.3622958711100552 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.7642130903574725 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.1683219848050957 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.3655727075715084 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.7651993873517181 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.16846554104747688 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/fefe_phylogeny.json b/leaderboard/submissions/progen2-small/fefe_phylogeny.json new file mode 100644 index 0000000..ae25339 --- /dev/null +++ b/leaderboard/submissions/progen2-small/fefe_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "fefe_phylogeny", + "display_name": "FeFeHydrogenase Phylogeny", + "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/fefe_phylogeny_sequences", + "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26" + }, + { + "path": "tattabio/fefe_phylogeny_distances", + "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "revision": "...", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.555555588091072 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.6299158443145239 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.6231385548367301 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.6299158443145239 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.5399147814539383 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.711209240447499 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.7010135704373694 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.711209240447499 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/modac_paralogy_bigene.json b/leaderboard/submissions/progen2-small/modac_paralogy_bigene.json new file mode 100644 index 0000000..eb0492d --- /dev/null +++ b/leaderboard/submissions/progen2-small/modac_paralogy_bigene.json @@ -0,0 +1,97 @@ +{ + "task": { + "id": "modac_paralogy_bigene", + "display_name": "ModAC Paralogy BiGene", + "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/modac_paralogy_bigene", + "revision": "241ca6397856e3360da04422d54933035b1fab87" + } + ], + "primary_metric_id": "recall_at_50" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.4952467261118094e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 8.984467652322665e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.048927613941018765 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.0006888223332782205 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0020107238605898124 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.0007064493294447139 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0020107238605898124 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.1836461126005362 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/mopb_clustering.json b/leaderboard/submissions/progen2-small/mopb_clustering.json new file mode 100644 index 0000000..cc9f9cc --- /dev/null +++ b/leaderboard/submissions/progen2-small/mopb_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "mopb_clustering", + "display_name": "MopB Clustering", + "description": "Evaluate on MopB clustering task.", + "modality": "protein", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/mopb_clustering", + "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "revision": "...", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.7298417430044122 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.7850829954141821 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/rpob_arch_phylogeny.json b/leaderboard/submissions/progen2-small/rpob_arch_phylogeny.json new file mode 100644 index 0000000..e06754a --- /dev/null +++ b/leaderboard/submissions/progen2-small/rpob_arch_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_phylogeny_sequences", + "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4" + }, + { + "path": "tattabio/rpob_arch_phylogeny_distances", + "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "revision": "...", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.3829776859261085 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.348654350352103 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.36338027377278787 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.3829776859261085 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.2715351801224828 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.4119718729857529 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.41890778828000724 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.41890778828000724 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/rpob_bac_phylogeny.json b/leaderboard/submissions/progen2-small/rpob_bac_phylogeny.json new file mode 100644 index 0000000..1b1fc23 --- /dev/null +++ b/leaderboard/submissions/progen2-small/rpob_bac_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_phylogeny_sequences", + "revision": "b833ef8d8d873ea5387540562873f41d073d3e03" + }, + { + "path": "tattabio/rpob_bac_phylogeny_distances", + "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "revision": "...", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.32857545814663647 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3054923470480063 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.31844371690401796 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.32857545814663647 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.25929991297508803 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3537389858773928 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.37513623635674553 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.37513623635674553 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-small/vibrio_operonic_pair.json b/leaderboard/submissions/progen2-small/vibrio_operonic_pair.json new file mode 100644 index 0000000..048b1e5 --- /dev/null +++ b/leaderboard/submissions/progen2-small/vibrio_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "vibrio_operonic_pair", + "display_name": "Vibrio Operonic Pair", + "description": "Evaluate on Vibrio operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/vibrio_operonic_pair", + "revision": "24781b12b45bf81a079a6164ef0d2124948c1878" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "hugohrban/progen2-small", + "revision": "...", + "num_layers": 12, + "num_params": 151148576, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 6, + "layer_display_name": "6", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6657598134473377 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9222296476364136 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.514868804664723 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.4063832759857178 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.34777471445450964 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9910213243546577 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.4138001165304024 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6603186941313641 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 60.44577407836914 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5145348837209303 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 185.1787109375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.34719497842291097 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9932659932659933 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.4021725774606305 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6661484648270501 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 2.6326441764831543 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5190249702734839 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 7.227571487426758 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.35301253538212696 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9797979797979798 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.4193700772478897 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6541002720559658 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 72.76122283935547 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.514153668399769 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 9.499689102172852 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.3461688059120965 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9988776655443322 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.35093524337294146 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.4193700772478897 + } + ] + }, + { + "layer_number": 11, + "layer_display_name": "11", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6622619510299261 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9203916788101196 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5250700716287762 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.42482998967170715 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.36336206896551726 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9461279461279462 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.4258480800533781 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6712009327633113 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 172.4407958984375 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.533378287255563 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 249.7503662109375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3812048192771084 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8877665544332211 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.4625420841865413 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6723668869024485 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 7.234950542449951 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5309222423146474 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 10.6547212600708 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.39167556029882605 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8237934904601572 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.46956741142610603 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6541002720559658 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 234.90899658203125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5204991087344029 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 29.30080795288086 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.35393939393939394 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9831649831649831 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3684973015108067 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.46956741142610603 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/MIBIG_protein_classification.json b/leaderboard/submissions/progen2-xlarge/MIBIG_protein_classification.json new file mode 100644 index 0000000..ba94584 --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/MIBIG_protein_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_protein_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_prot", + "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "revision": "...", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.7001728644239972 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.7029478458049887 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.8280344219424678 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6498111947852081 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.824263038548754 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6139791893295873 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6099773242630385 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7783852719765555 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.5630929156905906 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.7556689342403636 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/arch_retrieval.json b/leaderboard/submissions/progen2-xlarge/arch_retrieval.json new file mode 100644 index 0000000..f2f0cd5 --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/arch_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "arch_retrieval", + "display_name": "Arch Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/arch_retrieval", + "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b" + }, + { + "path": "tattabio/arch_retrieval_qrels", + "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "revision": "...", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.88067 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.86937 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.83252 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.29196 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.40958 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.69033 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.29856 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.42416 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.72823 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.7965 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.72953 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.4405 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.9187437757860286 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9202352160098636 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9210959739756178 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.34069226456540874 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.5303111124586763 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.025647490198929427 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.30583794948281073 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.5821104164244411 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.050433839881315716 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.28253639654117757 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.5376269279974585 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.05367704597406945 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": -0.019865699890794175 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.2580006088571339 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3854553614086897 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.028339000806823566 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.4198812392537043 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.28743486276729935 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.24320253871207173 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.6185612843368935 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.02553041504833834 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": -0.028630138912707673 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.25754210867492744 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.3847002985890222 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.011162568781484269 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.42159429826522843 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2881811374864961 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.22494289440951815 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.6644384705590648 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.015127323483749542 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.29836230337298764 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.35428463987994574 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.5058092977932365 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.25147134130980464 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.2954160057646196 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.5058354133511384 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.13524738619031487 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.24125503477420038 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.3287093653549262 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.42306591629339924 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.4977353552615966 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.11372050884722233 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.4279211933184279 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.4953362570745432 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.10873602019499856 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.42853960149453696 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.495417525780632 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.10619000413359458 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.65505 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.61387 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.53252 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.16876 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.22345 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.34562 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.18233 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.25284 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.43763 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.59232 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.51148 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.28085 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7721724285104562 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.7762465364578036 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.7783536022752451 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.4928690540959825 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.6568501842207822 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.11215893137809109 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.4462954434195642 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.665381100523378 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.08003523793764296 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.35270458066820515 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.599817822507719 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.09880049258383304 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.17597203770503592 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.2575369731993772 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.29440571242786184 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.2175925083646819 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.39533379455352086 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.2343999392197569 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.3103373434513419 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.623277741299874 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.11447631894830992 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.13017232999669545 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.22632566900169906 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.28185503381155413 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.1438011427359236 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.3356014043230637 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.22340272803666755 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.2223778867146586 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.549815782175779 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.13039530768244306 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.4340127722798461 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.6239826780987181 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.031493781720994596 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.3709661062638432 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.6079135839289097 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.09685534037957318 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.24677453371782054 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.38301044535573053 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.14712928623198873 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.5540713256496355 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.6265017258741243 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.2870850914389624 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.5528952321912547 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.6266600018990199 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.28659479517076514 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.5533710928565948 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.6259483449701599 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.2844418766374931 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/bacarch_bigene.json b/leaderboard/submissions/progen2-xlarge/bacarch_bigene.json new file mode 100644 index 0000000..30b0ee5 --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/bacarch_bigene.json @@ -0,0 +1,86 @@ +{ + "task": { + "id": "bacarch_bigene", + "display_name": "BacArch BiGene", + "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/bac_arch_bigene", + "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "revision": "...", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7503144654088051 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.8113207547169812 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.7704402515723269 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.8113207547169812 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.13607728099671065 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.17358490566037735 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.14255790731020063 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.17358490566037735 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/convergent_enzymes_classification.json b/leaderboard/submissions/progen2-xlarge/convergent_enzymes_classification.json new file mode 100644 index 0000000..fc37eec --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/convergent_enzymes_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "convergent_enzymes_classification", + "display_name": "Convergent Enzymes Classification", + "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/convergent_enzymes", + "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "revision": "...", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.1875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.14842261904761905 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.1475 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.11108333333333334 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/cyano_operonic_pair.json b/leaderboard/submissions/progen2-xlarge/cyano_operonic_pair.json new file mode 100644 index 0000000..a5e275e --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/cyano_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "cyano_operonic_pair", + "display_name": "Cyano Operonic Pair", + "description": "Evaluate on Cyano operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/cyano_operonic_pair", + "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "revision": "...", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7180076628352491 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9752044677734375 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4415116976604679 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.0913393497467041 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.28329484218629714 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 1.0 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.30503917581036627 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7180076628352491 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 404.81805419921875 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4403834631515877 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 3619.329345703125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.2824750192159877 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.998641304347826 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.2925298541520232 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7180076628352491 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 8.454354286193848 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.44354081951829843 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 67.32762145996094 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.2880942706216985 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9633152173913043 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3108581457843357 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7187739463601532 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 4583.71484375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.4513049013367282 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 263.0050964355469 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.2946799667497922 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9633152173913043 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.29691610324048845 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.3108581457843357 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7218390804597701 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.8307529091835022 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4664991624790619 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.49792495369911194 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.3371670702179177 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.7567934782608695 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.36858629659158926 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7210727969348659 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 1641.822998046875 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4584763212079616 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 4072.3740234375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.30670339761248855 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.907608695652174 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3705206660251011 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7206896551724138 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 32.319210052490234 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.45816409423233145 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 69.70606994628906 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3267670915411356 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.7663043478260869 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3704629292673498 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.717624521072797 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 15245.76953125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.45340751043115446 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 1607.8956298828125 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.30467289719626167 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.8858695652173914 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3003882097832594 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.3705206660251011 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/ec_classification.json b/leaderboard/submissions/progen2-xlarge/ec_classification.json new file mode 100644 index 0000000..5e09e4e --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/ec_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification", + "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "revision": "...", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.609375 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5486979166666666 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.46875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.40937499999999993 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/ecoli_operonic_pair.json b/leaderboard/submissions/progen2-xlarge/ecoli_operonic_pair.json new file mode 100644 index 0000000..0b2e2ef --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/ecoli_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "ecoli_operonic_pair", + "display_name": "E.coli Operonic Pair", + "description": "Evaluate on E.coli K-12 operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/ecoli_operonic_pair", + "revision": "a62c01143a842696fc8200b91c1acb825e8cb891" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "revision": "...", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.597589244320816 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.5353635549545288 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.582427374799501 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.17530038952827454 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.42287784679089024 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9353176874642244 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.4701568355111498 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6087158089939732 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 1468.429443359375 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5763831544178365 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 3691.033203125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4050603528319406 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9988551803091014 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.46972522412271756 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6279554937413073 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 39.83434295654297 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5918518518518519 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 59.0245361328125 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.43744867232411716 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9147109330280481 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5291568188660792 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.5948076031525267 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 38678.29296875 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5772385509227614 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 234.51239013671875 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4114494518879415 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9668002289639381 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.4105969970779999 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5291568188660792 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.642790913305517 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.6761397123336792 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.6134134134134135 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.4874127507209778 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.47167487684729065 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8769318832283916 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5599254090069472 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6515994436717664 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 2297.15966796875 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.604589417788735 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 3568.659423828125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.45207033465683494 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9124212936462507 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5658647904632282 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6464997681965693 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 45.08574676513672 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.6067834513604174 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 79.49329376220703 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.44984802431610943 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9318832283915284 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5628362091175183 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6017617060732499 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 2657.70654296875 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5945250780819401 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 1609.9322509765625 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.43777056277056275 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9261591299370349 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.46695267318075395 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5658647904632282 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/euk_retrieval.json b/leaderboard/submissions/progen2-xlarge/euk_retrieval.json new file mode 100644 index 0000000..bc5dd0f --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/euk_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "euk_retrieval", + "display_name": "Euk Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/euk_retrieval", + "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0" + }, + { + "path": "tattabio/euk_retrieval_qrels", + "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "revision": "...", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.84774 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.8374 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.82416 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.34395 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.45708 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.67546 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.34724 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.4629 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.70514 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.74791 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.66367 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.38521 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.8982851018220793 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.8995610677282704 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.900508188422815 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.4767898888088076 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.7419553503608369 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.055414003237389486 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.43775298679589786 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.7407504551889852 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.05347140613062027 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.41293112354686706 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.7476717808315242 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.08006377164933114 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": -0.0148881359491563 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.23989310515269469 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3287363118687835 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.05281666093138101 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.4712488352245418 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.281326814344777 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.33174842347070294 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.8375866923633879 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.1383451658034337 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": -0.024413048209636076 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.2357082185796465 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.3392331612284683 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.03992063669730916 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.4662588167162248 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2931262438573035 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.2837205586716561 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.8522921818796286 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.162147287293181 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.4146288409630512 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.615634517860307 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.37956355864345537 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.3407833568385157 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.4829041685657454 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.3726537217663794 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.21298158451575644 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.17142552632901348 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.28865788487324007 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.6383125172254681 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.6960093217220756 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.08088971499853534 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.6431409368495994 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.693162244516911 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.07470607309971362 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.6408878710877405 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.6952625726667562 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.07873314340871365 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.58687 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.5516 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.51338 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.19386 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.24564 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.33821 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.2105 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.27395 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.43525 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.52154 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.43441 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.23408 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7136655948553056 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.717022661154494 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.7203295622151216 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.5080779306269386 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.6888089972666763 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": 0.10673258599936329 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.4632005033585787 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.6712012494044541 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": 0.08784010802216201 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.38857259040070996 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.5895619261020698 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.14080687873239334 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.16090136788609444 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.31243391620873545 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.3390512300528643 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.22579196089292114 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.4669121206457578 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.26122356825986964 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.30204264302375927 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.6332673466736974 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.16004379501441202 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.11110155948455377 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.25591724289649287 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.3227114364901176 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.17196503188722667 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.39247300342855024 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.2629668690094458 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.22851294474341216 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.530880033887029 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.1726004902442935 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5070785300123103 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.6876905262817138 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.04779793396663549 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.4351829938283804 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.6459334312111928 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.13694795084624573 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.29963197362191873 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.31559221570405743 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.18803068785533397 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.6013539055037981 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.6207506271548514 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.2669837260163356 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.6020956263248132 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.6245273952623522 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.2710930700853572 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.6044496050365885 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.6292104477286984 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.27333065711002147 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/fefe_phylogeny.json b/leaderboard/submissions/progen2-xlarge/fefe_phylogeny.json new file mode 100644 index 0000000..504e7db --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/fefe_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "fefe_phylogeny", + "display_name": "FeFeHydrogenase Phylogeny", + "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/fefe_phylogeny_sequences", + "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26" + }, + { + "path": "tattabio/fefe_phylogeny_distances", + "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "revision": "...", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.687389103121127 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.8359818853206277 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.8191020336133575 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.8359818853206277 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.5497951203631188 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.5787807165799065 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.5626204870453207 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.5787807165799065 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/modac_paralogy_bigene.json b/leaderboard/submissions/progen2-xlarge/modac_paralogy_bigene.json new file mode 100644 index 0000000..432d51e --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/modac_paralogy_bigene.json @@ -0,0 +1,97 @@ +{ + "task": { + "id": "modac_paralogy_bigene", + "display_name": "ModAC Paralogy BiGene", + "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/modac_paralogy_bigene", + "revision": "241ca6397856e3360da04422d54933035b1fab87" + } + ], + "primary_metric_id": "recall_at_50" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.516450720102903e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 9.02681867829321e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.05630026809651475 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.010579204388082606 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.029490616621983913 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.012979104896871197 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.029490616621983913 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.435656836461126 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/mopb_clustering.json b/leaderboard/submissions/progen2-xlarge/mopb_clustering.json new file mode 100644 index 0000000..db44f1e --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/mopb_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "mopb_clustering", + "display_name": "MopB Clustering", + "description": "Evaluate on MopB clustering task.", + "modality": "protein", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/mopb_clustering", + "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "revision": "...", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.9081213752554907 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.7589723753650268 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/rpob_arch_phylogeny.json b/leaderboard/submissions/progen2-xlarge/rpob_arch_phylogeny.json new file mode 100644 index 0000000..79d2705 --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/rpob_arch_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_phylogeny_sequences", + "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4" + }, + { + "path": "tattabio/rpob_arch_phylogeny_distances", + "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "revision": "...", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.20539647600239538 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.2959954224929351 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.31970215732214785 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.31970215732214785 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.4102284573265796 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.4946045987721917 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.5014444676012894 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.5014444676012894 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/rpob_bac_phylogeny.json b/leaderboard/submissions/progen2-xlarge/rpob_bac_phylogeny.json new file mode 100644 index 0000000..caa0ab6 --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/rpob_bac_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_phylogeny_sequences", + "revision": "b833ef8d8d873ea5387540562873f41d073d3e03" + }, + { + "path": "tattabio/rpob_bac_phylogeny_distances", + "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "revision": "...", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.28914273667063095 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.4766469672120496 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.47147258109872947 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.4766469672120496 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.2596902166209353 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3450947432996055 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.3497832144429374 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.3497832144429374 + } + ] + } + ] +} diff --git a/leaderboard/submissions/progen2-xlarge/vibrio_operonic_pair.json b/leaderboard/submissions/progen2-xlarge/vibrio_operonic_pair.json new file mode 100644 index 0000000..ba256f8 --- /dev/null +++ b/leaderboard/submissions/progen2-xlarge/vibrio_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "vibrio_operonic_pair", + "display_name": "Vibrio Operonic Pair", + "description": "Evaluate on Vibrio operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/vibrio_operonic_pair", + "revision": "24781b12b45bf81a079a6164ef0d2124948c1878" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "hugohrban/progen2-xlarge", + "revision": "...", + "num_layers": 32, + "num_params": 6443638816, + "embed_dim": 4096 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 16, + "layer_display_name": "16", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6587640886125146 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.8188399076461792 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5227848101265823 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.17655116319656372 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.3640370207139709 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9270482603815937 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.42481524037914975 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6626506024096386 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 1462.2254638671875 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5146886016451233 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 3214.6376953125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3485873458018305 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9831649831649831 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3918753881823718 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6642052079284881 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 28.370464324951172 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5240963855421686 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 66.16458129882812 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3581720872787155 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9764309764309764 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.42221114260233406 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6537116206762534 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 5076.0380859375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5336442371752165 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 293.55999755859375 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.379441023211748 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.898989898989899 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.39506197369719853 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.42481524037914975 + } + ] + }, + { + "layer_number": 31, + "layer_display_name": "31", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6820831713952584 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.742120623588562 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5437981779957954 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.46992284059524536 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.39531329597554765 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8709315375982043 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.49399145765039254 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6774193548387096 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 2092.191162109375 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5341494845360825 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 3469.343505859375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3746046091278807 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9304152637485971 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.46616460843353874 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6762534006995725 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 41.604942321777344 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5326569435637286 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 75.52215576171875 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3711886875828546 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9427609427609428 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.46666836903764686 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6544889234356782 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 10279.9619140625 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5383381456528714 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 1468.63623046875 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.37690925426774485 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.941638608305275 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.4308905193342436 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.49399145765039254 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/MIBIG_protein_classification.json b/leaderboard/submissions/prot_t5_xl_bfd/MIBIG_protein_classification.json new file mode 100644 index 0000000..775b64b --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/MIBIG_protein_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_protein_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_prot", + "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.665347347353334 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.671201814058957 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.78845191704169 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6203371228716182 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.8023431594860176 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6393665718980249 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.655328798185941 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7777498564416659 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.5932752607728978 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.782879818594105 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/arch_retrieval.json b/leaderboard/submissions/prot_t5_xl_bfd/arch_retrieval.json new file mode 100644 index 0000000..c2ff5f1 --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/arch_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "arch_retrieval", + "display_name": "Arch Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/arch_retrieval", + "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b" + }, + { + "path": "tattabio/arch_retrieval_qrels", + "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.91556 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.90692 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.87968 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.30562 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.42599 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.71437 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.31146 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.43927 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.75802 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.82868 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.76219 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.4731 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.9424384691990327 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9442957407746143 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9450892859974356 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6758404864845804 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.07802141173529904 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.34014079454644375 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6559176060420032 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.11191286028806371 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.36216008877717815 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.588166705701525 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": -0.00013877612539943873 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.26109772787783125 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": -0.014830905831570812 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.10595506924709744 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.32085091803527405 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.10033887584469645 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.2053648224876042 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.20098155837221496 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.5383618167014114 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.18560538825485104 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.14646774451947722 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": -0.02008919630315214 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.11234151602554744 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.3242694849254786 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.0837787484872365 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.22103746678628963 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.21375392931643278 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.5147756801091333 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.22064295388157423 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.11436202458939185 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5561758833104028 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.09041415273912264 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.7212291337345235 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.46267035130101786 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.06806184226878606 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.6634938646543213 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.18101555042086578 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.32278953058129545 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.35250619598434496 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.6997884970620865 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.04506108475253997 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.150746697861468 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.6974394278188117 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.050149955898670835 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.14664534711931623 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.6968457854626777 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.049163188110125974 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.14643427219711108 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.76922 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.73124 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.65851 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.22822 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.30193 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.46351 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.24018 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.32508 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.54975 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.69415 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.60568 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.33925 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.8569426660975955 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.8589987737875058 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.860342096901009 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6229293301375335 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.4128916234696497 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.12491964721295902 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.5725634494942511 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.40575051204835366 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.15984334362764854 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.4671079862629836 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.37026058182180693 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.10868446365529066 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.14085023375284314 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.22636481411436557 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.20416003342491348 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.23722391725543646 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.3063646829510937 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.11281577118835237 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.46696020043403985 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.42152653625289493 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.08111993114861822 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.12046877670841287 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.21017995546377888 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.19460706495216118 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.20295180744150895 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.28352319675208193 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.10984164101180431 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.4349341161444355 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.3879826102213279 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.07896065211344214 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5328788932256486 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.32074885397774344 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.31911971640195713 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.45500288678534834 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.2681194584757835 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.34939580835864753 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.23627988535364855 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.043589016181725906 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.26692480361724236 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.7415289598377633 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.43021039474005807 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.11071333809933313 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.7421025217893243 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.434990212167193 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.1124635999964573 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.742616888935384 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.4371335788300678 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.11342495339203741 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/bacarch_bigene.json b/leaderboard/submissions/prot_t5_xl_bfd/bacarch_bigene.json new file mode 100644 index 0000000..98ccb5b --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/bacarch_bigene.json @@ -0,0 +1,86 @@ +{ + "task": { + "id": "bacarch_bigene", + "display_name": "BacArch BiGene", + "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/bac_arch_bigene", + "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.7597484276729559 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.8301886792452831 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.7823899371069182 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.8301886792452831 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.2519729152748021 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.33584905660377357 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.26899616107163277 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.33584905660377357 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/convergent_enzymes_classification.json b/leaderboard/submissions/prot_t5_xl_bfd/convergent_enzymes_classification.json new file mode 100644 index 0000000..966f9c3 --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/convergent_enzymes_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "convergent_enzymes_classification", + "display_name": "Convergent Enzymes Classification", + "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/convergent_enzymes", + "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.275 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.22734523809523807 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.2275 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.17930555555555558 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/cyano_operonic_pair.json b/leaderboard/submissions/prot_t5_xl_bfd/cyano_operonic_pair.json new file mode 100644 index 0000000..9550a89 --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/cyano_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "cyano_operonic_pair", + "display_name": "Cyano Operonic Pair", + "description": "Evaluate on Cyano operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/cyano_operonic_pair", + "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7272030651340996 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9687467813491821 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.45071380013596196 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.8978159427642822 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.30054397098821395 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.9008152173913043 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.3941340265884971 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7203065134099617 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 1735.5322265625 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.44430258538142353 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 5307.4521484375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.2903629536921151 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9456521739130435 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3528795703139007 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.721455938697318 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 126.05880737304688 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.44332175560467313 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 292.90557861328125 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.2887700534759358 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.9538043478260869 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3695630635425535 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7195402298850575 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 356795.21875 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.4499004644990046 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 190484.53125 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.2976294995610184 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9211956521739131 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.36196828415543614 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.3941340265884971 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7344827586206897 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.97544264793396 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4597894736842105 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9250391721725464 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.3331299572910311 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.7418478260869565 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.40694383571640735 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7310344827586207 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 9917.591796875 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.4648117839607201 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 15072.4345703125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3325526932084309 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.7717391304347826 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.3986910724493818 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7306513409961686 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 457.9825439453125 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4638027048528242 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 840.006103515625 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.327896512935883 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.7921195652173914 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.39658322011908265 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7222222222222222 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 4470940.0 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.44773175542406307 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 3160863.0 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.295316565481353 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9252717391304348 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3594057338410264 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.40694383571640735 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/ec_classification.json b/leaderboard/submissions/prot_t5_xl_bfd/ec_classification.json new file mode 100644 index 0000000..a191718 --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/ec_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification", + "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6328125 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5651041666666667 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.609375 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5447916666666667 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/ecoli_operonic_pair.json b/leaderboard/submissions/prot_t5_xl_bfd/ecoli_operonic_pair.json new file mode 100644 index 0000000..fbdca25 --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/ecoli_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "ecoli_operonic_pair", + "display_name": "E.coli Operonic Pair", + "description": "Evaluate on E.coli K-12 operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/ecoli_operonic_pair", + "revision": "a62c01143a842696fc8200b91c1acb825e8cb891" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6585535465924895 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9535143375396729 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.6174863387978143 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9261614680290222 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.48787778146795086 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.840870062965083 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5911089754596931 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.631432545201669 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 3524.670166015625 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5858181818181818 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 5154.9697265625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4292565947242206 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9221522610188895 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5325132413673002 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6455725544738062 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 174.34619140625 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.6126418152350082 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 223.7291259765625 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.47412982126058323 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8654836863194046 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5706504173017982 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6376912378303199 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 300376.21875 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5884498480243161 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 240064.3125 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.45545796737766625 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.8311390955924441 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.5400873122794386 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5911089754596931 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6738525730180807 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9607139825820923 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.6289983656315666 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9430916905403137 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.5311514195583596 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.7710360618202633 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.6204750761919859 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6787204450625869 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 11232.892578125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.6275144508670519 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 14495.390625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.5263770364623739 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.7767601602747567 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.6255504867654197 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6736207695873899 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 551.5784912109375 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.6314819082737958 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 733.4508056640625 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.5285493827160493 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.7842014882655982 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.6208618380913407 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6124246638850255 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 4513109.0 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5824329034506797 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 3365215.5 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4186920571285392 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.95649685174585 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.4987551828608048 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.6255504867654197 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/euk_retrieval.json b/leaderboard/submissions/prot_t5_xl_bfd/euk_retrieval.json new file mode 100644 index 0000000..7084ba9 --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/euk_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "euk_retrieval", + "display_name": "Euk Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/euk_retrieval", + "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0" + }, + { + "path": "tattabio/euk_retrieval_qrels", + "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.92672 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.92306 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.8895 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.35502 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.47966 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.70554 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.35897 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.48744 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.73218 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.83087 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.75016 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.42894 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.9415862808145767 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9430153626295105 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9437368653683487 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6450941038620643 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.3234724628590291 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.7353405227616494 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6784370493921454 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.356307778900032 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.7553235221093711 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6829124921854598 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.3430036735531466 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.5016210325394614 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.14516238032739714 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.05981617619794749 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.2506683978946674 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.26720700851970014 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.2748532106324955 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.13039242553014585 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.7404748343804409 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.4062731822335897 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.20921986403462078 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.15345536503202384 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.05976560645876083 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.24331468000836254 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.27273696746447923 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.2772936528689855 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.12056716886495585 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.7668012857938902 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.44460772529114895 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.18875065148055795 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.3930073745085516 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.41020902636849293 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.932622625910148 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.2635529758242909 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.3323551761779577 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.7524748595057036 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": -0.005519805496856407 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.23536165525170333 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.358047396892961 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.7135224342947254 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.318638219709438 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.6795248041287989 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.7103575293672233 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.3180491040497493 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.6905725180457766 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.7113420373917401 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.328353633746779 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.6797371842783072 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.71234 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.70278 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.67585 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.22489 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.31161 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.47218 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.24057 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.35114 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.58748 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.64823 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.58457 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.33505 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.7939442658092176 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.7984675649466646 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.8004998179261701 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.649294362751511 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.4789146724672918 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.06361407354634938 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6278496426823811 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.485871035859316 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.04265838292616143 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6160495322931576 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.4735471084683332 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.038745902550895 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.35996321488756433 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.31196420531010555 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.23017037321039516 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.4194454380907453 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.47086648384193386 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.14309964875425385 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.5964554555580853 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.5208142856976985 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.014199249179051887 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.32992418491537023 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.2705437266239038 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.246661828066766 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.35831658068419964 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.39458331969775373 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.18432558994969014 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.5472469480990645 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.46257256009768155 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.03652551189856071 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5222050117811435 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.4633140941813137 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.29411592084499516 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.3939497429662996 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.3920607553465279 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.2770064989363528 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.15262499225505957 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.0399399666879417 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.25354614582075213 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.7990542802042412 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.4330228140756498 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.07372075423984396 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.799568634188793 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.42435558306674237 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.07777149596651926 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.8000664803994814 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.4278349242317028 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.07345720773957888 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/fefe_phylogeny.json b/leaderboard/submissions/prot_t5_xl_bfd/fefe_phylogeny.json new file mode 100644 index 0000000..93bc496 --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/fefe_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "fefe_phylogeny", + "display_name": "FeFeHydrogenase Phylogeny", + "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/fefe_phylogeny_sequences", + "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26" + }, + { + "path": "tattabio/fefe_phylogeny_distances", + "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.33209122731338475 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.6244747787953618 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.5884202230855742 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.6244747787953618 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.43006317577077985 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.6215222814983533 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.5928180835249242 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.6215222814983533 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/modac_paralogy_bigene.json b/leaderboard/submissions/prot_t5_xl_bfd/modac_paralogy_bigene.json new file mode 100644 index 0000000..b143d2d --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/modac_paralogy_bigene.json @@ -0,0 +1,97 @@ +{ + "task": { + "id": "modac_paralogy_bigene", + "display_name": "ModAC Paralogy BiGene", + "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/modac_paralogy_bigene", + "revision": "241ca6397856e3360da04422d54933035b1fab87" + } + ], + "primary_metric_id": "recall_at_50" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.4952467261118094e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 8.984467652322665e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.09316353887399464 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.00217875092414885 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.004021447721179625 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.00230626824933552 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.004021447721179625 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.2734584450402145 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/mopb_clustering.json b/leaderboard/submissions/prot_t5_xl_bfd/mopb_clustering.json new file mode 100644 index 0000000..a57e33a --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/mopb_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "mopb_clustering", + "display_name": "MopB Clustering", + "description": "Evaluate on MopB clustering task.", + "modality": "protein", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/mopb_clustering", + "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.8281800072486031 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.7658272771219637 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/rpob_arch_phylogeny.json b/leaderboard/submissions/prot_t5_xl_bfd/rpob_arch_phylogeny.json new file mode 100644 index 0000000..ce67216 --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/rpob_arch_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_phylogeny_sequences", + "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4" + }, + { + "path": "tattabio/rpob_arch_phylogeny_distances", + "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.18285301896376252 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.1904500888228494 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.2132223506047016 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.2132223506047016 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.23327313257646354 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3555894078051998 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.32671911270173437 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.3555894078051998 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/rpob_bac_phylogeny.json b/leaderboard/submissions/prot_t5_xl_bfd/rpob_bac_phylogeny.json new file mode 100644 index 0000000..9785c1f --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/rpob_bac_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_phylogeny_sequences", + "revision": "b833ef8d8d873ea5387540562873f41d073d3e03" + }, + { + "path": "tattabio/rpob_bac_phylogeny_distances", + "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.21148648442107776 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.2367395745608637 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.2724794083627195 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.2724794083627195 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.25893556661958106 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.2733544641048402 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.25026214474431 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.2733544641048402 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_bfd/vibrio_operonic_pair.json b/leaderboard/submissions/prot_t5_xl_bfd/vibrio_operonic_pair.json new file mode 100644 index 0000000..5a12578 --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_bfd/vibrio_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "vibrio_operonic_pair", + "display_name": "Vibrio Operonic Pair", + "description": "Evaluate on Vibrio operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/vibrio_operonic_pair", + "revision": "24781b12b45bf81a079a6164ef0d2124948c1878" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_bfd", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6871356393315197 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9634510278701782 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5520751761942052 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9320050477981567 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.4239326518340349 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.7912457912457912 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5172201267603933 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6743101438010105 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 3110.09326171875 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5299209634926608 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 4711.361328125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3986409966024915 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.7901234567901234 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.4750578849012121 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6789739603575593 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 144.1129608154297 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5490048817123545 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 214.9871368408203 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.41252821670428896 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8204264870931538 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.502524788853816 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6661484648270501 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 337874.09375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5233226837060703 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 211411.4375 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.36578829834747656 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9191919191919192 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.4546480066127966 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5172201267603933 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6984065293431791 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9698377251625061 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.558586246638494 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9381648898124695 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.4246495327102804 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8159371492704826 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.539827365743389 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6995724834823164 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 10790.3623046875 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5534351145038168 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 14495.240234375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4193175245806825 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.813692480359147 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5413814180716444 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7030703458997279 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 521.355712890625 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5590062111801242 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 730.8151245117188 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.42729970326409494 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8080808080808081 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5389712975239336 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6638165565487757 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 4920725.5 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5192250372578241 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 3139242.0 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.3534902597402597 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9775533108866442 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.4328071265147608 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5413814180716444 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/MIBIG_protein_classification.json b/leaderboard/submissions/prot_t5_xl_uniref50/MIBIG_protein_classification.json new file mode 100644 index 0000000..3f4681f --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/MIBIG_protein_classification.json @@ -0,0 +1,98 @@ +{ + "task": { + "id": "MIBIG_protein_classification", + "display_name": "MIBiG Classification", + "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/mibig_classification_prot", + "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6921288381092022 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6961451247165533 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.8139754057916093 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6470389372073905 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.8191609977324276 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.6619098471417835 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6394557823129252 + }, + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.8554266113510164 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.6089150997875741 + }, + { + "id": "lrap", + "display_name": "lrap", + "description": null, + "value": 0.7777777777777787 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/arch_retrieval.json b/leaderboard/submissions/prot_t5_xl_uniref50/arch_retrieval.json new file mode 100644 index 0000000..5f6fe6f --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/arch_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "arch_retrieval", + "display_name": "Arch Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/arch_retrieval", + "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b" + }, + { + "path": "tattabio/arch_retrieval_qrels", + "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.92437 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.91832 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.88929 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.31084 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.43426 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.73127 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.31741 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.44657 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.7625 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.83594 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.7723 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.47935 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.944664959453692 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9465603384617468 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9470471736604207 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6904257424907183 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.17626530867610984 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.3256037208699509 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6755859537896345 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.19312694738891206 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.34336751560690454 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6099399964441327 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.003032523727931495 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.24904093099919736 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": -0.01944424474842768 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.12247733739429825 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.30213657878474254 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.09661075440355467 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.23052413036526725 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.1929524400881027 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.5485655483431373 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.21502729379562965 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.143434087412841 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": -0.027816152462282782 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.12775708504995087 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.3024520158353276 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.08095008455559108 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.2419416823372581 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.20458678360282087 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.536333878981592 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.23988126483702843 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.11687231401703965 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5391898283330372 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.11534434911274209 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.7316665111394176 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.4498954557047002 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.0841995836786073 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.6432552889288048 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.16726557824854918 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.354798802661032 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.3199379354401648 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.7364765721392718 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.24266478409531936 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.17829833398517161 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.7372889177413674 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.2470438039467095 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.1591466954839909 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.7376354579559358 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.24228421772128148 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.16077725815978616 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.76933 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.73698 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.66495 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.22744 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.30213 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.46438 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.23986 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.32866 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.55316 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.69313 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.61144 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.34595 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.8586498790724141 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.8614352783366866 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.8629394391752504 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.5641842814317276 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.4880046637927035 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.037498225871017386 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.5222240258842824 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.45928236160268926 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.0735168315819288 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.44245593635026653 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.39907625818163395 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.04156312252212681 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.1948456953101706 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.261665582778819 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.2279556194678241 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.2794927054324812 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.32900702387812925 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.14553044394893827 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.47965579454358886 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.4173120928894022 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.022111983640584615 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.17398804372634855 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.25358909422151527 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.2304294274459766 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.24544039747014393 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.3020161386947229 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.14739917022628052 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.4516091047820593 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.36055725309104575 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.01451301873367621 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.45918558853461183 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.35010629637375024 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.2492532917210604 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.37443959671596155 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.2695210685235055 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.28869106912215586 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.14690009878485263 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": 0.03721365892480409 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.22839727740438145 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.6724896908702344 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.5605484676610025 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.17667808149086445 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.674416832047262 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.5583444357986156 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.17924052942171584 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.6740382845670758 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.5567109464037232 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.17814332805230865 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/bacarch_bigene.json b/leaderboard/submissions/prot_t5_xl_uniref50/bacarch_bigene.json new file mode 100644 index 0000000..7ede09b --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/bacarch_bigene.json @@ -0,0 +1,86 @@ +{ + "task": { + "id": "bacarch_bigene", + "display_name": "BacArch BiGene", + "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/bac_arch_bigene", + "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.779245283018868 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.8415094339622642 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.7987421383647799 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.8415094339622642 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.1946602749010892 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.27169811320754716 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.20994827269201596 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.27169811320754716 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/convergent_enzymes_classification.json b/leaderboard/submissions/prot_t5_xl_uniref50/convergent_enzymes_classification.json new file mode 100644 index 0000000..e45a738 --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/convergent_enzymes_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "convergent_enzymes_classification", + "display_name": "Convergent Enzymes Classification", + "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/convergent_enzymes", + "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.2975 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.24263690476190475 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.2075 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.162297619047619 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/cyano_operonic_pair.json b/leaderboard/submissions/prot_t5_xl_uniref50/cyano_operonic_pair.json new file mode 100644 index 0000000..584a51d --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/cyano_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "cyano_operonic_pair", + "display_name": "Cyano Operonic Pair", + "description": "Evaluate on Cyano operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/cyano_operonic_pair", + "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7298850574712644 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9710829257965088 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4658696490551485 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9307838678359985 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.325255788906839 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8206521739130435 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.4031848326409708 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7222222222222222 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 4015.896484375 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.44346733668341703 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 8008.25048828125 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.2883986928104575 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9592391304347826 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.35232572385733507 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7245210727969349 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 176.74635314941406 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.45276447540269926 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 315.7908935546875 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3331197950032031 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.7065217391304348 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.3766570634605429 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7199233716475095 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 854446.125 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.45307917888563054 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 539589.25 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.3102409638554217 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.8396739130434783 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.35933836728432894 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.4031848326409708 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.728735632183908 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.980133056640625 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.4666406554818573 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9235023260116577 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.32731253420908596 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8125 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.40918084231605883 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7283524904214559 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 14713.158203125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.46769230769230763 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 23292.853515625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.345679012345679 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.7228260869565217 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.4074355169035304 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.7279693486590039 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 704.3648681640625 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.4682057513163224 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 1315.472412109375 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.3335256780150029 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.7853260869565217 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.40226336417241415 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.7195402298850575 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 15402820.0 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.4510385756676557 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 9822080.0 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.31020408163265306 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.8260869565217391 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.3522933436299668 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.40918084231605883 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/ec_classification.json b/leaderboard/submissions/prot_t5_xl_uniref50/ec_classification.json new file mode 100644 index 0000000..c9a8c4b --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/ec_classification.json @@ -0,0 +1,62 @@ +{ + "task": { + "id": "ec_classification", + "display_name": "EC Classification", + "description": "Evaluate on Enzyme Commission number classification task.", + "modality": "protein", + "type": "classification", + "datasets": [ + { + "path": "tattabio/ec_classification", + "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498" + } + ], + "primary_metric_id": "f1" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6875 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.62890625 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.6015625 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.5325520833333333 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/ecoli_operonic_pair.json b/leaderboard/submissions/prot_t5_xl_uniref50/ecoli_operonic_pair.json new file mode 100644 index 0000000..ff1c161 --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/ecoli_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "ecoli_operonic_pair", + "display_name": "E.coli Operonic Pair", + "description": "Evaluate on E.coli K-12 operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/ecoli_operonic_pair", + "revision": "a62c01143a842696fc8200b91c1acb825e8cb891" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6541492814093649 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9587552547454834 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.6128899835796389 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9337368011474609 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.47776 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8546078992558672 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5843393095554787 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6328233657858137 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 5367.236328125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5791978246091095 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 8266.498046875 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.41189267585206674 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.9753863766456783 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5170368370974361 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6481223922114048 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 246.59368896484375 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.6126373626373627 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 339.4747314453125 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.46610928635413557 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8935317687464225 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.563578784188266 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6321279554937413 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 748599.9375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.594391785150079 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 589889.0 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.45372324389508595 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.8614768174012593 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.5455742497849516 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5843393095554787 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.672461752433936 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9680966138839722 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.6246070947462955 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9479633569717407 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.513852973771703 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.7962220950200344 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.6120691357655745 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6722299490032453 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 19384.970703125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.6298500357057844 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 22904.546875 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.539119804400978 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.7572982255294791 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.6180060687920407 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6719981455725544 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 1008.8643798828125 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.6255144032921811 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 1199.65380859375 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.5207460982108869 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.7830566685746995 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.6130483779613698 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6286509040333796 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 14326900.0 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5898066783831283 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 9925826.0 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.4255642911488714 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9605037206639955 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.5133120730662288 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.6180060687920407 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/euk_retrieval.json b/leaderboard/submissions/prot_t5_xl_uniref50/euk_retrieval.json new file mode 100644 index 0000000..f990d80 --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/euk_retrieval.json @@ -0,0 +1,762 @@ +{ + "task": { + "id": "euk_retrieval", + "display_name": "Euk Retrieval", + "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein", + "modality": "protein", + "type": "retrieval", + "datasets": [ + { + "path": "tattabio/euk_retrieval", + "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0" + }, + { + "path": "tattabio/euk_retrieval_qrels", + "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b" + } + ], + "primary_metric_id": "map_at_5" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.93403 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.93219 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.9007 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.35922 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.48597 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.71785 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.36271 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.49199 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.7473 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.83794 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.7582 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.43505 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.9461414790996785 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.9461414790996785 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.9474721011861392 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6360571033125667 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.45784494058726904 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.41909498633906045 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6586594124801434 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.4984244373586518 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.44705403624496476 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6967911851047006 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.42357206240837786 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": -0.24784276595880533 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.14095037854383827 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.07113531637419679 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.27337398758596637 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.25837425131598796 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.3071068476320376 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.17651087109287392 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.7557208386859483 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.481523205045611 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": -0.14518612903534914 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.1471878058724989 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.07018518602956217 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.2691033520117919 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.2620639184111711 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.3079361861092893 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.16933644121075292 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.7551127432888673 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.49476345922374765 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": -0.11518415139468428 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.34608944934490565 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.4810929802383669 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.8218128321986103 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.2241056474991675 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.3806484972502808 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.6415372814462837 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": -0.012605789522662619 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.2495720768505924 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.3001699673500121 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.7150455812547505 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.468623009585809 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": -0.3088280098255423 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.7150455812547505 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.468623009585809 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": -0.3088280098255423 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.7147040760070875 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.47258452887445196 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": -0.29486714517449736 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "ndcg_at_5", + "display_name": "ndcg_at_5", + "description": null, + "value": 0.6853 + }, + { + "id": "ndcg_at_10", + "display_name": "ndcg_at_10", + "description": null, + "value": 0.67354 + }, + { + "id": "ndcg_at_50", + "display_name": "ndcg_at_50", + "description": null, + "value": 0.64834 + }, + { + "id": "map_at_5", + "display_name": "map_at_5", + "description": null, + "value": 0.22427 + }, + { + "id": "map_at_10", + "display_name": "map_at_10", + "description": null, + "value": 0.30389 + }, + { + "id": "map_at_50", + "display_name": "map_at_50", + "description": null, + "value": 0.44304 + }, + { + "id": "recall_at_5", + "display_name": "recall_at_5", + "description": null, + "value": 0.24201 + }, + { + "id": "recall_at_10", + "display_name": "recall_at_10", + "description": null, + "value": 0.34737 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.57233 + }, + { + "id": "precision_at_5", + "display_name": "precision_at_5", + "description": null, + "value": 0.62186 + }, + { + "id": "precision_at_10", + "display_name": "precision_at_10", + "description": null, + "value": 0.55338 + }, + { + "id": "precision_at_50", + "display_name": "precision_at_50", + "description": null, + "value": 0.31569 + }, + { + "id": "mrr_at_5", + "display_name": "mrr_at_5", + "description": null, + "value": 0.794694533762058 + }, + { + "id": "mrr_at_10", + "display_name": "mrr_at_10", + "description": null, + "value": 0.799452610626244 + }, + { + "id": "mrr_at_50", + "display_name": "mrr_at_50", + "description": null, + "value": 0.8019849636131453 + }, + { + "id": "nauc_ndcg_at_5_max", + "display_name": "nauc_ndcg_at_5_max", + "description": null, + "value": 0.6780659145596223 + }, + { + "id": "nauc_ndcg_at_5_std", + "display_name": "nauc_ndcg_at_5_std", + "description": null, + "value": 0.4686194164677635 + }, + { + "id": "nauc_ndcg_at_5_diff1", + "display_name": "nauc_ndcg_at_5_diff1", + "description": null, + "value": -0.013171906236838202 + }, + { + "id": "nauc_ndcg_at_10_max", + "display_name": "nauc_ndcg_at_10_max", + "description": null, + "value": 0.6797508716872995 + }, + { + "id": "nauc_ndcg_at_10_std", + "display_name": "nauc_ndcg_at_10_std", + "description": null, + "value": 0.4588806794061712 + }, + { + "id": "nauc_ndcg_at_10_diff1", + "display_name": "nauc_ndcg_at_10_diff1", + "description": null, + "value": -0.026029553352657123 + }, + { + "id": "nauc_ndcg_at_50_max", + "display_name": "nauc_ndcg_at_50_max", + "description": null, + "value": 0.6532393400819172 + }, + { + "id": "nauc_ndcg_at_50_std", + "display_name": "nauc_ndcg_at_50_std", + "description": null, + "value": 0.45314823534779886 + }, + { + "id": "nauc_ndcg_at_50_diff1", + "display_name": "nauc_ndcg_at_50_diff1", + "description": null, + "value": 0.000803786369534146 + }, + { + "id": "nauc_map_at_5_max", + "display_name": "nauc_map_at_5_max", + "description": null, + "value": 0.4667523187539349 + }, + { + "id": "nauc_map_at_5_std", + "display_name": "nauc_map_at_5_std", + "description": null, + "value": 0.3541285833071939 + }, + { + "id": "nauc_map_at_5_diff1", + "display_name": "nauc_map_at_5_diff1", + "description": null, + "value": 0.2621735361977053 + }, + { + "id": "nauc_map_at_10_max", + "display_name": "nauc_map_at_10_max", + "description": null, + "value": 0.5293122208340053 + }, + { + "id": "nauc_map_at_10_std", + "display_name": "nauc_map_at_10_std", + "description": null, + "value": 0.46647778152678937 + }, + { + "id": "nauc_map_at_10_diff1", + "display_name": "nauc_map_at_10_diff1", + "description": null, + "value": 0.16392477080326415 + }, + { + "id": "nauc_map_at_50_max", + "display_name": "nauc_map_at_50_max", + "description": null, + "value": 0.704438550127725 + }, + { + "id": "nauc_map_at_50_std", + "display_name": "nauc_map_at_50_std", + "description": null, + "value": 0.44483676603571776 + }, + { + "id": "nauc_map_at_50_diff1", + "display_name": "nauc_map_at_50_diff1", + "description": null, + "value": 0.009175316156314641 + }, + { + "id": "nauc_recall_at_5_max", + "display_name": "nauc_recall_at_5_max", + "description": null, + "value": 0.41466058265722383 + }, + { + "id": "nauc_recall_at_5_std", + "display_name": "nauc_recall_at_5_std", + "description": null, + "value": 0.31066030507292597 + }, + { + "id": "nauc_recall_at_5_diff1", + "display_name": "nauc_recall_at_5_diff1", + "description": null, + "value": 0.27580011350774186 + }, + { + "id": "nauc_recall_at_10_max", + "display_name": "nauc_recall_at_10_max", + "description": null, + "value": 0.46165533483400295 + }, + { + "id": "nauc_recall_at_10_std", + "display_name": "nauc_recall_at_10_std", + "description": null, + "value": 0.37450617812296255 + }, + { + "id": "nauc_recall_at_10_diff1", + "display_name": "nauc_recall_at_10_diff1", + "description": null, + "value": 0.19872702396564773 + }, + { + "id": "nauc_recall_at_50_max", + "display_name": "nauc_recall_at_50_max", + "description": null, + "value": 0.6548039005583735 + }, + { + "id": "nauc_recall_at_50_std", + "display_name": "nauc_recall_at_50_std", + "description": null, + "value": 0.34672855192148566 + }, + { + "id": "nauc_recall_at_50_diff1", + "display_name": "nauc_recall_at_50_diff1", + "description": null, + "value": 0.0623575763027755 + }, + { + "id": "nauc_precision_at_5_max", + "display_name": "nauc_precision_at_5_max", + "description": null, + "value": 0.5457235360403251 + }, + { + "id": "nauc_precision_at_5_std", + "display_name": "nauc_precision_at_5_std", + "description": null, + "value": 0.43521558420021866 + }, + { + "id": "nauc_precision_at_5_diff1", + "display_name": "nauc_precision_at_5_diff1", + "description": null, + "value": -0.2043697991483657 + }, + { + "id": "nauc_precision_at_10_max", + "display_name": "nauc_precision_at_10_max", + "description": null, + "value": 0.4235593791032231 + }, + { + "id": "nauc_precision_at_10_std", + "display_name": "nauc_precision_at_10_std", + "description": null, + "value": 0.3456476885464854 + }, + { + "id": "nauc_precision_at_10_diff1", + "display_name": "nauc_precision_at_10_diff1", + "description": null, + "value": -0.26441406895731684 + }, + { + "id": "nauc_precision_at_50_max", + "display_name": "nauc_precision_at_50_max", + "description": null, + "value": 0.10170718952519693 + }, + { + "id": "nauc_precision_at_50_std", + "display_name": "nauc_precision_at_50_std", + "description": null, + "value": -0.0021640216870025875 + }, + { + "id": "nauc_precision_at_50_diff1", + "display_name": "nauc_precision_at_50_diff1", + "description": null, + "value": -0.2410207582674104 + }, + { + "id": "nauc_mrr_at_5_max", + "display_name": "nauc_mrr_at_5_max", + "description": null, + "value": 0.7652388812149624 + }, + { + "id": "nauc_mrr_at_5_std", + "display_name": "nauc_mrr_at_5_std", + "description": null, + "value": 0.4064602524914446 + }, + { + "id": "nauc_mrr_at_5_diff1", + "display_name": "nauc_mrr_at_5_diff1", + "description": null, + "value": 0.17155691541647752 + }, + { + "id": "nauc_mrr_at_10_max", + "display_name": "nauc_mrr_at_10_max", + "description": null, + "value": 0.7654107127585745 + }, + { + "id": "nauc_mrr_at_10_std", + "display_name": "nauc_mrr_at_10_std", + "description": null, + "value": 0.4008546845028568 + }, + { + "id": "nauc_mrr_at_10_diff1", + "display_name": "nauc_mrr_at_10_diff1", + "description": null, + "value": 0.17755957672456033 + }, + { + "id": "nauc_mrr_at_50_max", + "display_name": "nauc_mrr_at_50_max", + "description": null, + "value": 0.7669372470722907 + }, + { + "id": "nauc_mrr_at_50_std", + "display_name": "nauc_mrr_at_50_std", + "description": null, + "value": 0.40628434902865657 + }, + { + "id": "nauc_mrr_at_50_diff1", + "display_name": "nauc_mrr_at_50_diff1", + "description": null, + "value": 0.17922685800302057 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/fefe_phylogeny.json b/leaderboard/submissions/prot_t5_xl_uniref50/fefe_phylogeny.json new file mode 100644 index 0000000..a5fcaff --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/fefe_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "fefe_phylogeny", + "display_name": "FeFeHydrogenase Phylogeny", + "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/fefe_phylogeny_sequences", + "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26" + }, + { + "path": "tattabio/fefe_phylogeny_distances", + "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.3028987188957946 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.6208332719687013 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.5492170350160599 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.6208332719687013 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.5203074993813921 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.7065064377307892 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.6427521570554848 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.7065064377307892 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/modac_paralogy_bigene.json b/leaderboard/submissions/prot_t5_xl_uniref50/modac_paralogy_bigene.json new file mode 100644 index 0000000..736354e --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/modac_paralogy_bigene.json @@ -0,0 +1,97 @@ +{ + "task": { + "id": "modac_paralogy_bigene", + "display_name": "ModAC Paralogy BiGene", + "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).", + "modality": "protein", + "type": "bigene_mining", + "datasets": [ + { + "path": "tattabio/modac_paralogy_bigene", + "revision": "241ca6397856e3360da04422d54933035b1fab87" + } + ], + "primary_metric_id": "recall_at_50" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 4.4952467261118094e-7 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 8.984467652322665e-7 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.0006702412868632708 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.07238605898123325 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "precision", + "display_name": "precision", + "description": null, + "value": 0.00138083916941656 + }, + { + "id": "recall", + "display_name": "recall", + "description": null, + "value": 0.004021447721179625 + }, + { + "id": "f1", + "display_name": "f1", + "description": null, + "value": 0.0017770613376799141 + }, + { + "id": "accuracy", + "display_name": "accuracy", + "description": null, + "value": 0.004021447721179625 + }, + { + "id": "recall_at_50", + "display_name": "recall_at_50", + "description": null, + "value": 0.2754691689008043 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/mopb_clustering.json b/leaderboard/submissions/prot_t5_xl_uniref50/mopb_clustering.json new file mode 100644 index 0000000..60b0cae --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/mopb_clustering.json @@ -0,0 +1,50 @@ +{ + "task": { + "id": "mopb_clustering", + "display_name": "MopB Clustering", + "description": "Evaluate on MopB clustering task.", + "modality": "protein", + "type": "clustering", + "datasets": [ + { + "path": "tattabio/mopb_clustering", + "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a" + } + ], + "primary_metric_id": "v_measure" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.8721358813405494 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "v_measure", + "display_name": "v_measure", + "description": null, + "value": 0.8476433301105049 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/rpob_arch_phylogeny.json b/leaderboard/submissions/prot_t5_xl_uniref50/rpob_arch_phylogeny.json new file mode 100644 index 0000000..40647fe --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/rpob_arch_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_arch_phylogeny", + "display_name": "RpoB Archaeal Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_arch_phylogeny_sequences", + "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4" + }, + { + "path": "tattabio/rpob_arch_phylogeny_distances", + "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.16530787131181637 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.17285646620196074 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.20022744097724682 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.20022744097724682 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.2057601718535701 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.3389561715648331 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.3019024422360054 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.3389561715648331 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/rpob_bac_phylogeny.json b/leaderboard/submissions/prot_t5_xl_uniref50/rpob_bac_phylogeny.json new file mode 100644 index 0000000..4a7f7af --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/rpob_bac_phylogeny.json @@ -0,0 +1,90 @@ +{ + "task": { + "id": "rpob_bac_phylogeny", + "display_name": "RpoB Bacterial Phylogeny", + "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.", + "modality": "protein", + "type": "eds", + "datasets": [ + { + "path": "tattabio/rpob_bac_phylogeny_sequences", + "revision": "b833ef8d8d873ea5387540562873f41d073d3e03" + }, + { + "path": "tattabio/rpob_bac_phylogeny_distances", + "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7" + } + ], + "primary_metric_id": "top_corr" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.1299405606726582 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.17513769500057838 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.20887166155118908 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.20887166155118908 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim", + "display_name": "cos_sim", + "description": null, + "value": 0.2797577428704427 + }, + { + "id": "manhattan", + "display_name": "manhattan", + "description": null, + "value": 0.2883505010928631 + }, + { + "id": "euclidean", + "display_name": "euclidean", + "description": null, + "value": 0.27367273640477174 + }, + { + "id": "top_corr", + "display_name": "top_corr", + "description": null, + "value": 0.2883505010928631 + } + ] + } + ] +} diff --git a/leaderboard/submissions/prot_t5_xl_uniref50/vibrio_operonic_pair.json b/leaderboard/submissions/prot_t5_xl_uniref50/vibrio_operonic_pair.json new file mode 100644 index 0000000..f882de0 --- /dev/null +++ b/leaderboard/submissions/prot_t5_xl_uniref50/vibrio_operonic_pair.json @@ -0,0 +1,386 @@ +{ + "task": { + "id": "vibrio_operonic_pair", + "display_name": "Vibrio Operonic Pair", + "description": "Evaluate on Vibrio operonic pair classification task.", + "modality": "protein", + "type": "pair_classification", + "datasets": [ + { + "path": "tattabio/vibrio_operonic_pair", + "revision": "24781b12b45bf81a079a6164ef0d2124948c1878" + } + ], + "primary_metric_id": "top_ap" + }, + "model": { + "hf_name": "Rostlab/prot_t5_xl_uniref50", + "revision": "...", + "num_layers": 24, + "num_params": 1208141824, + "embed_dim": 1024 + }, + "dgeb_version": "0.0.0", + "results": [ + { + "layer_number": 12, + "layer_display_name": "12", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.6910221531286436 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9639373421669006 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5603482390185991 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9406915307044983 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.43276283618581907 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.7946127946127947 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5243404260729363 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.6739214924212981 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 4568.9521484375 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5292119565217391 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 7354.16015625 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.3794447150511447 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.8742985409652076 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.4665911372163652 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6770307034589973 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 226.07009887695312 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.5547385620915032 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 304.6053771972656 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.4360950545921644 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.7620650953984287 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.5035992709654147 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6669257675864749 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 836162.375 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5298930144745122 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 519992.53125 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.36816790555312634 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9450056116722784 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.4635132398582616 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5243404260729363 + } + ] + }, + { + "layer_number": 23, + "layer_display_name": "23", + "metrics": [ + { + "id": "cos_sim_accuracy", + "display_name": "cos_sim_accuracy", + "description": null, + "value": 0.7026816945200155 + }, + { + "id": "cos_sim_accuracy_threshold", + "display_name": "cos_sim_accuracy_threshold", + "description": null, + "value": 0.9759871959686279 + }, + { + "id": "cos_sim_f1", + "display_name": "cos_sim_f1", + "description": null, + "value": 0.5580322828593389 + }, + { + "id": "cos_sim_f1_threshold", + "display_name": "cos_sim_f1_threshold", + "description": null, + "value": 0.9470945596694946 + }, + { + "id": "cos_sim_precision", + "display_name": "cos_sim_precision", + "description": null, + "value": 0.4243132670952659 + }, + { + "id": "cos_sim_recall", + "display_name": "cos_sim_recall", + "description": null, + "value": 0.8148148148148148 + }, + { + "id": "cos_sim_ap", + "display_name": "cos_sim_ap", + "description": null, + "value": 0.5421318920287235 + }, + { + "id": "manhattan_accuracy", + "display_name": "manhattan_accuracy", + "description": null, + "value": 0.7022930431403032 + }, + { + "id": "manhattan_accuracy_threshold", + "display_name": "manhattan_accuracy_threshold", + "description": null, + "value": 17089.51953125 + }, + { + "id": "manhattan_f1", + "display_name": "manhattan_f1", + "description": null, + "value": 0.5625282167042889 + }, + { + "id": "manhattan_f1_threshold", + "display_name": "manhattan_f1_threshold", + "description": null, + "value": 21019.880859375 + }, + { + "id": "manhattan_precision", + "display_name": "manhattan_precision", + "description": null, + "value": 0.4705438066465257 + }, + { + "id": "manhattan_recall", + "display_name": "manhattan_recall", + "description": null, + "value": 0.6992143658810326 + }, + { + "id": "manhattan_ap", + "display_name": "manhattan_ap", + "description": null, + "value": 0.5427024596748751 + }, + { + "id": "euclidean_accuracy", + "display_name": "euclidean_accuracy", + "description": null, + "value": 0.6991838321026039 + }, + { + "id": "euclidean_accuracy_threshold", + "display_name": "euclidean_accuracy_threshold", + "description": null, + "value": 779.5919189453125 + }, + { + "id": "euclidean_f1", + "display_name": "euclidean_f1", + "description": null, + "value": 0.565200158541419 + }, + { + "id": "euclidean_f1_threshold", + "display_name": "euclidean_f1_threshold", + "description": null, + "value": 1155.1326904296875 + }, + { + "id": "euclidean_precision", + "display_name": "euclidean_precision", + "description": null, + "value": 0.4368872549019608 + }, + { + "id": "euclidean_recall", + "display_name": "euclidean_recall", + "description": null, + "value": 0.8002244668911336 + }, + { + "id": "euclidean_ap", + "display_name": "euclidean_ap", + "description": null, + "value": 0.541038608227211 + }, + { + "id": "dot_accuracy", + "display_name": "dot_accuracy", + "description": null, + "value": 0.6634279051690634 + }, + { + "id": "dot_accuracy_threshold", + "display_name": "dot_accuracy_threshold", + "description": null, + "value": 14770648.0 + }, + { + "id": "dot_f1", + "display_name": "dot_f1", + "description": null, + "value": 0.5273250239693194 + }, + { + "id": "dot_f1_threshold", + "display_name": "dot_f1_threshold", + "description": null, + "value": 9928615.0 + }, + { + "id": "dot_precision", + "display_name": "dot_precision", + "description": null, + "value": 0.36863270777479895 + }, + { + "id": "dot_recall", + "display_name": "dot_recall", + "description": null, + "value": 0.9259259259259259 + }, + { + "id": "dot_ap", + "display_name": "dot_ap", + "description": null, + "value": 0.4423444299763757 + }, + { + "id": "top_ap", + "display_name": "top_ap", + "description": null, + "value": 0.5427024596748751 + } + ] + } + ] +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..40a0a8e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,129 @@ +[build-system] +requires = ["setuptools>=42", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "dgeb" +version = "0.0.10" +description = "Diverse Genomic Embedding Benchmark" +readme = "README.md" +license = { file = "LICENSE" } +keywords = [ + "scientific software", + "genomic embeddings", + "machine learning", + "benchmark", +] +classifiers = [ + "Development Status :: 2 - Pre-Alpha", + "Environment :: Console", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python", +] +dependencies = [ + "datasets>=2.20.0", + "matplotlib>=3.9.0", + "numpy>=2.0.0", + "pandas>=2.2.2", + "pydantic>=2.7.4", + "pytrec_eval_terrier>=0.5", + "rich>=13.7.1", + "scikit_learn>=1.5.0", + "scipy>=1.13.1", + "seaborn>=0.13.2", + "torch>=2.3.1", + "tqdm>=4.66.4", + "transformers>=4.41.2", +] + +[project.urls] +homepage = "https://github.com/TattaBio/DGEB" +"Huggingface Organization" = "https://huggingface.co/tattabio" +"Source Code" = "https://github.com/TattaBio/DGEB" + +[project.scripts] +dgeb = "dgeb.cli:main" + +[project.optional-dependencies] +dev = ["ruff>=0.0.254", "pytest", "pytest-xdist"] + +[tool.setuptools.packages.find] +exclude = ["tests", "results", "leaderboard", "Dockerfile"] + +[tool.setuptools.package-data] +"*" = ["*.json"] + +[tool.ruff] +target-version = "py38" +exclude = [".venv", "build/"] +line-length = 88 +indent-width = 4 + +[tool.semantic_release] +version_toml = ["pyproject.toml:project.version"] +build_command = "python -m pip install build; python -m build" +commit_message = "{version}\n\nAutomatically generated by python-semantic-release [skip ci]" +logging_use_named_masks = false +major_on_zero = true +allow_zero_version = true +no_git_verify = false +tag_format = "v{version}" + +[tool.semantic_release.branches.main] +match = "(main|master)" +prerelease_token = "rc" +prerelease = false + +[tool.semantic_release.changelog] +template_dir = "templates" +changelog_file = "CHANGELOG.md" +exclude_commit_patterns = [] + +[tool.semantic_release.changelog.environment] +block_start_string = "{%" +block_end_string = "%}" +variable_start_string = "{{" +variable_end_string = "}}" +comment_start_string = "{#" +comment_end_string = "#}" +trim_blocks = false +lstrip_blocks = false +newline_sequence = "\n" +keep_trailing_newline = false +extensions = [] +autoescape = true + +[tool.semantic_release.commit_author] +env = "GIT_COMMIT_AUTHOR" +default = "semantic-release " + +[tool.semantic_release.commit_parser_options] +allowed_tags = [ + "build", + "chore", + "ci", + "docs", + "feat", + "fix", + "perf", + "style", + "refactor", + "test", +] +minor_tags = ["feat"] +patch_tags = ["fix", "perf"] +default_bump_level = 0 + +[tool.semantic_release.remote] +name = "origin" +type = "github" +ignore_token_for_push = false +insecure = false + +[tool.semantic_release.publish] +dist_glob_patterns = ["dist/*"] +upload_to_vcs_release = true diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..5ff69db --- /dev/null +++ b/ruff.toml @@ -0,0 +1,8 @@ +exclude = [ + ".venv", + "build/", +] +# Same as Black. +line-length = 88 +indent-width = 4 + diff --git a/scripts/eval_all_models.py b/scripts/eval_all_models.py new file mode 100644 index 0000000..8deba28 --- /dev/null +++ b/scripts/eval_all_models.py @@ -0,0 +1,86 @@ +"""Script to replicate results from the DGEB paper.""" + +import torch +import dgeb +from functools import partial + + +ALL_DEVICES = list(range(torch.cuda.device_count())) +DEFAULT_BATCH_SIZE = 64 +DEFAULT_SEQ_LEN = 1024 + + +get_model = partial( + dgeb.get_model, + devices=ALL_DEVICES, + batch_size=DEFAULT_BATCH_SIZE, + max_seq_length=DEFAULT_SEQ_LEN, +) + + +def main(): + ######################### Protein Models ######################### + protein_tasks = dgeb.get_tasks_by_modality(dgeb.Modality.PROTEIN) + protein_evaluation = dgeb.DGEB(tasks=protein_tasks) + + # ESM models. + protein_evaluation.run(get_model("facebook/esm2_t6_8M_UR50D")) + protein_evaluation.run(get_model("facebook/esm2_t12_35M_UR50D")) + protein_evaluation.run(get_model("facebook/esm2_t30_150M_UR50D")) + protein_evaluation.run(get_model("facebook/esm2_t33_650M_UR50D", batch_size=32)) + protein_evaluation.run(get_model("facebook/esm2_t36_3B_UR50D", batch_size=1)) + + # ESM3 models. + protein_evaluation.run(get_model("esm3_sm_open_v1", batch_size=1, devices=[0])) + + # ProtT5 models. + protein_evaluation.run(get_model("Rostlab/prot_t5_xl_uniref50", batch_size=32)) + protein_evaluation.run(get_model("Rostlab/prot_t5_xl_bfd", batch_size=32)) + + # ProGen2 models. + protein_evaluation.run(get_model("hugohrban/progen2-small")) + protein_evaluation.run(get_model("hugohrban/progen2-medium", batch_size=32)) + protein_evaluation.run(get_model("hugohrban/progen2-large", batch_size=1)) + protein_evaluation.run(get_model("hugohrban/progen2-xlarge", batch_size=1)) + + ######################### DNA Models ######################### + dna_tasks = dgeb.get_tasks_by_modality(dgeb.Modality.DNA) + dna_evaluation = dgeb.DGEB(tasks=dna_tasks) + + # Evo models + dna_evaluation.run( + get_model( + "togethercomputer/evo-1-8k-base", batch_size=1, seq_len=8192, devices=[0] + ) + ) + # 131k will OOM so we use half this length. + evo_131k_max_seq_len = int(131072 / 2) + dna_evaluation.run( + get_model( + "togethercomputer/evo-1-131k-base", + batch_size=1, + seq_len=evo_131k_max_seq_len, + devices=[0], + ) + ) + + # Nucleotide Transformer models. + dna_evaluation.run( + get_model("InstaDeepAI/nucleotide-transformer-v2-50m-multi-species") + ) + dna_evaluation.run( + get_model("InstaDeepAI/nucleotide-transformer-v2-100m-multi-species") + ) + dna_evaluation.run( + get_model("InstaDeepAI/nucleotide-transformer-v2-250m-multi-species") + ) + dna_evaluation.run( + get_model("InstaDeepAI/nucleotide-transformer-v2-500m-multi-species") + ) + dna_evaluation.run( + get_model("InstaDeepAI/nucleotide-transformer-2.5b-multi-species", batch_size=1) + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/plot_benchmarks.py b/scripts/plot_benchmarks.py new file mode 100644 index 0000000..009476e --- /dev/null +++ b/scripts/plot_benchmarks.py @@ -0,0 +1,150 @@ +""" +Given a directory of results, plot the benchmarks for each task as a bar chart and line chart. +""" + +import argparse +import os +from typing import Optional + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns + +from dgeb import TaskResult, get_all_tasks, get_output_folder, get_tasks_by_name + +ALL_TASKS = [task.metadata.id for task in get_all_tasks()] + + +def plot_benchmarks( + results_dir, + task_ids: Optional[list[str]] = None, + output="benchmarks.png", + model_substring=None, +): + models = os.listdir(results_dir) + all_results = [] + tasks = get_all_tasks() if task_ids is None else get_tasks_by_name(task_ids) + for model_name in models: + if model_substring is not None and all( + substr not in model_name for substr in model_substring + ): + continue + + for task in tasks: + if task.metadata.display_name == "NoOp Task": + continue + filepath = get_output_folder(model_name, task, results_dir, create=False) + # if the file does not exist, skip + if not os.path.exists(filepath): + continue + + with open(filepath) as f: + task_result = TaskResult.model_validate_json(f.read()) + num_params = task_result.model["num_params"] + primary_metric_id = task_result.task.primary_metric_id + main_scores = [ + metric.value + for layer_result in task_result.results + for metric in layer_result.metrics + if metric.id == primary_metric_id + ] + best_score = max(main_scores) + all_results.append( + { + "task": task.metadata.display_name, + "model": model_name, + "num_params": num_params, + "score": best_score, + } + ) + + results_df = pd.DataFrame(all_results) + # order the models by ascending number of parameters + results_df["num_params"] = results_df["num_params"].astype(int) + results_df = results_df.sort_values(by="num_params") + # number of tasks + n_tasks = len(set(results_df["task"])) + + _, ax = plt.subplots(2, n_tasks, figsize=(5 * n_tasks, 10)) + + for i, task in enumerate(set(results_df["task"])): + if n_tasks > 1: + sns.barplot( + x="model", + y="score", + data=results_df[results_df["task"] == task], + ax=ax[0][i], + ) + ax[0][i].set_title(task) + # rotate the x axis labels + for tick in ax[0][i].get_xticklabels(): + tick.set_rotation(90) + else: + sns.barplot( + x="model", + y="score", + data=results_df[results_df["task"] == task], + ax=ax[0], + ) + ax[0].set_title(task) + # rotate the x axis labels + for tick in ax[0].get_xticklabels(): + tick.set_rotation(90) + + # make a line graph with number of parameters on x axis for each task in the second row of figures + for i, task in enumerate(set(results_df["task"])): + if n_tasks > 1: + sns.lineplot( + x="num_params", + y="score", + data=results_df[results_df["task"] == task], + ax=ax[1][i], + ) + ax[1][i].set_title(task) + ax[1][i].set_xlabel("Number of parameters") + else: + sns.lineplot( + x="num_params", + y="score", + data=results_df[results_df["task"] == task], + ax=ax[1], + ) + ax[1].set_title(task) + ax[1].set_xlabel("Number of parameters") + + plt.tight_layout() + plt.savefig(output) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "-d", + "--results_dir", + type=str, + default="results", + help="Directory containing the results of the benchmarking", + ) + parser.add_argument( + "-t", + "--tasks", + type=lambda s: [item for item in s.split(",")], + default=None, + help=f"Comma separated list of tasks to plot. Choose from {ALL_TASKS} or do not specify to plot all tasks. ", + ) + parser.add_argument( + "-o", + "--output", + type=str, + default="benchmarks.png", + help="Output file for the plot", + ) + parser.add_argument( + "--model_substring", + type=lambda s: [item for item in s.split(",")], + default=None, + help="Comma separated list of model substrings. Only plot results for models containing this substring", + ) + args = parser.parse_args() + + plot_benchmarks(args.results_dir, args.tasks, args.output, args.model_substring)