diff --git a/README.md b/README.md index a77f005..71d103a 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,17 @@ -

Genomic Embedding Benchmark

+

Diverse Genomic Embedding Benchmark

- - GitHub release + + GitHub release arXiv URL - - License + + License - - Downloads + + Downloads

@@ -19,53 +19,51 @@

Installation | Usage | - Leaderboard | + Leaderboard | Documentation | Citing

- +

## Installation TODO(joshua): + ```bash -pip install geb +pip install dgeb ``` ## Usage -- Using the python script (see [run_geb.py](https://github.com/tattabio/geb/blob/main/run_geb.py)): +- Using the python script (see [run_dgeb.py](https://github.com/tattabio/geb/blob/main/run_geb.py)): ```bash -python run_geb.py --model facebook/esm2_t6_8M_UR50D +python run_dgeb.py --model facebook/esm2_t6_8M_UR50D ``` - - Using the python API: ```py -import geb +import dgeb -model = geb.get_model("facebook/esm2_t6_8M_UR50D") -tasks = geb.get_tasks_by_modality(geb.Modality.PROTEIN) -evaluation = geb.GEB(tasks=tasks) +model = dgeb.get_model("facebook/esm2_t6_8M_UR50D") +tasks = dgeb.get_tasks_by_modality(geb.Modality.PROTEIN) +evaluation = dgeb.DGEB(tasks=tasks) evaluation.run(model, output_folder="results") ``` - ### Using a custom model -Custom models should be wrapped with the `geb.models.BioSeqTransformer` abstract class, and specify the modality, number of layers, and embedding dimension. See see [models.py](https://github.com/tattabio/geb/blob/main/geb/models.py) for additional examples on custom model loading and inference. - +Custom models should be wrapped with the `dgeb.models.BioSeqTransformer` abstract class, and specify the modality, number of layers, and embedding dimension. See see [models.py](https://github.com/tattabio/geb/blob/main/geb/models.py) for additional examples on custom model loading and inference. ```python -import geb -from geb.models import BioSeqTransformer -from geb.modality import Modality +import dgeb +from dgeb.models import BioSeqTransformer +from dgeb.modality import Modality class MyModel(BioSeqTransformer): @@ -83,20 +81,20 @@ class MyModel(BioSeqTransformer): model = MyModel() -tasks = geb.get_tasks_by_modality(model.modality) +tasks = dgeb.get_tasks_by_modality(model.modality) evaluation = MTEB(tasks=tasks) evaluation.run(model) ``` ### Evaluating on a custom dataset -TODO(andre): Update this section +TODO(andre): Update this section To evaluate on a custom task, you can run the following code on your custom task. ```python -import geb -from geb.tasks import AbsTask +import dgeb +from dgeb.tasks import AbsTask class MyCustomTask(AbsTask): def run( @@ -104,8 +102,8 @@ class MyCustomTask(AbsTask): ) -> TaskResult: pass -model = geb.models.ESM("facebook/esm2_t6_8M_UR50D") -evaluation = geb.GEB(tasks=[MyCustomTask()]) +model = dgeb.models.ESM("facebook/esm2_t6_8M_UR50D") +evaluation = dgeb.DGEB(tasks=[MyCustomTask()]) evaluation.run(model) ``` @@ -113,8 +111,8 @@ evaluation.run(model) ## Citing -GEB was introduced in "[GEB: Genomic Embedding Benchmark]()", feel free to cite: +dgeb was introduced in "[DGEB: Diverse Genomic Embedding Benchmark]()", feel free to cite: TODO(andre): bibtex -For works that have used GEB for benchmarking, you can find them on the [leaderboard](https://huggingface.co/spaces/tattabio/GEB/leaderboard). +For works that have used dgeb for benchmarking, you can find them on the [leaderboard](https://huggingface.co/spaces/tattabio/DGEB/leaderboard). diff --git a/geb/__init__.py b/dgeb/__init__.py similarity index 100% rename from geb/__init__.py rename to dgeb/__init__.py diff --git a/geb/geb.py b/dgeb/dgeb.py similarity index 100% rename from geb/geb.py rename to dgeb/dgeb.py diff --git a/geb/eval_utils.py b/dgeb/eval_utils.py similarity index 100% rename from geb/eval_utils.py rename to dgeb/eval_utils.py diff --git a/geb/evaluators.py b/dgeb/evaluators.py similarity index 100% rename from geb/evaluators.py rename to dgeb/evaluators.py diff --git a/geb/modality.py b/dgeb/modality.py similarity index 100% rename from geb/modality.py rename to dgeb/modality.py diff --git a/geb/models.py b/dgeb/models.py similarity index 100% rename from geb/models.py rename to dgeb/models.py diff --git a/geb/results.py b/dgeb/results.py similarity index 100% rename from geb/results.py rename to dgeb/results.py diff --git a/geb/tasks/__init__.py b/dgeb/tasks/__init__.py similarity index 100% rename from geb/tasks/__init__.py rename to dgeb/tasks/__init__.py diff --git a/geb/tasks/bigene_mining_tasks.py b/dgeb/tasks/bigene_mining_tasks.py similarity index 100% rename from geb/tasks/bigene_mining_tasks.py rename to dgeb/tasks/bigene_mining_tasks.py diff --git a/geb/tasks/classification_tasks.py b/dgeb/tasks/classification_tasks.py similarity index 100% rename from geb/tasks/classification_tasks.py rename to dgeb/tasks/classification_tasks.py diff --git a/geb/tasks/clustering_tasks.py b/dgeb/tasks/clustering_tasks.py similarity index 100% rename from geb/tasks/clustering_tasks.py rename to dgeb/tasks/clustering_tasks.py diff --git a/geb/tasks/eds_tasks.py b/dgeb/tasks/eds_tasks.py similarity index 100% rename from geb/tasks/eds_tasks.py rename to dgeb/tasks/eds_tasks.py diff --git a/geb/tasks/pair_classification_tasks.py b/dgeb/tasks/pair_classification_tasks.py similarity index 100% rename from geb/tasks/pair_classification_tasks.py rename to dgeb/tasks/pair_classification_tasks.py diff --git a/geb/tasks/retrieval_tasks.py b/dgeb/tasks/retrieval_tasks.py similarity index 100% rename from geb/tasks/retrieval_tasks.py rename to dgeb/tasks/retrieval_tasks.py diff --git a/geb/tasks/tasks.py b/dgeb/tasks/tasks.py similarity index 100% rename from geb/tasks/tasks.py rename to dgeb/tasks/tasks.py diff --git a/plot_benchmarks.py b/plot_benchmarks.py index 1d75ceb..7de5e30 100644 --- a/plot_benchmarks.py +++ b/plot_benchmarks.py @@ -10,8 +10,8 @@ import pandas as pd import seaborn as sns -from geb.geb import get_all_tasks, get_output_folder, get_tasks_by_name -from geb.tasks.tasks import TaskResult +from dgeb.geb import get_all_tasks, get_output_folder, get_tasks_by_name +from dgeb.tasks.tasks import TaskResult ALL_TASKS = [task.metadata.id for task in get_all_tasks()] diff --git a/run_geb.py b/run_dgeb.py similarity index 100% rename from run_geb.py rename to run_dgeb.py