Skip to content

Commit

Permalink
Merge pull request #4 from TattaBio/andre
Browse files Browse the repository at this point in the history
Add dataset revisions
  • Loading branch information
y-hwang authored Jul 3, 2024
2 parents ade30a8 + 7e069a2 commit 95b6f11
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 31 deletions.
4 changes: 2 additions & 2 deletions dgeb/tasks/bigene_mining_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class BacArchBiGeneMining(Task):
datasets=[
Dataset(
path="tattabio/bac_arch_bigene",
revision="main",
revision="d5a65e44bae43a9ba9f2fdc03056dff9c12f6631",
)
],
primary_metric_id="f1",
Expand All @@ -67,7 +67,7 @@ class ModACParalogyBiGeneMining(Task):
datasets=[
Dataset(
path="tattabio/modac_paralogy_bigene",
revision="main",
revision="241ca6397856e3360da04422d54933035b1fab87",
)
],
primary_metric_id="recall_at_5",
Expand Down
15 changes: 10 additions & 5 deletions dgeb/tasks/classification_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class EnzymeCommissionClassification(Task):
datasets=[
Dataset(
path="tattabio/ec_classification",
revision="main",
revision="d83aba56d785df48bd3b4acafc536ff8c03e7d98",
)
],
primary_metric_id="f1",
Expand All @@ -105,7 +105,7 @@ class EnzymeCommissionDNAClassification(Task):
datasets=[
Dataset(
path="tattabio/ec_classification_dna",
revision="main",
revision="e78328541bb16e7cda16830d9844c09cbf4e682d",
)
],
primary_metric_id="f1",
Expand All @@ -125,7 +125,7 @@ class ConvergentEnzymesClassification(Task):
datasets=[
Dataset(
path="tattabio/convergent_enzymes",
revision="main",
revision="37f75609f54de2bc0911ccb72faf1c2f5a4285aa",
)
],
primary_metric_id="f1",
Expand Down Expand Up @@ -180,7 +180,12 @@ class MIBiGProteinClassification(Task):
description="Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
type="classification",
modality=Modality.PROTEIN,
datasets=[Dataset(path="tattabio/mibig_classification_prot", revision="main")],
datasets=[
Dataset(
path="tattabio/mibig_classification_prot",
revision="915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f",
)
],
primary_metric_id="f1",
)

Expand All @@ -198,7 +203,7 @@ class MIBiGDNAClassification(Task):
datasets=[
Dataset(
path="tattabio/mibig_classification_dna",
revision="main",
revision="b5ca7a76d469e4e66c46f1b655903972571e6b61",
)
],
primary_metric_id="f1",
Expand Down
4 changes: 2 additions & 2 deletions dgeb/tasks/clustering_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class RNAclustering(Task):
datasets=[
Dataset(
path="tattabio/e_coli_rnas",
revision="main",
revision="4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6",
)
],
primary_metric_id="v_measure",
Expand All @@ -60,7 +60,7 @@ class MopBClustering(Task):
datasets=[
Dataset(
path="tattabio/mopb_clustering",
revision="main",
revision="eed4bfff9c5bd2dc2500c50757bfcb90425d999a",
)
],
primary_metric_id="v_measure",
Expand Down
24 changes: 12 additions & 12 deletions dgeb/tasks/eds_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ class RpobBacPhylogeny(Task):
datasets=[
Dataset(
path="tattabio/rpob_bac_phylogeny_sequences",
revision="main",
revision="b833ef8d8d873ea5387540562873f41d073d3e03",
),
Dataset(
path="tattabio/rpob_bac_phylogeny_distances",
revision="main",
revision="0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7",
),
],
primary_metric_id="top_corr",
Expand All @@ -88,11 +88,11 @@ class RpobArchPhylogeny(Task):
datasets=[
Dataset(
path="tattabio/rpob_arch_phylogeny_sequences",
revision="main",
revision="10de75b9f5ad12340d629fd1ad015ef4319d6ee4",
),
Dataset(
path="tattabio/rpob_arch_phylogeny_distances",
revision="main",
revision="2a585f0e135fe74b8ae6d31e7801c6031b0dcc18",
),
],
primary_metric_id="top_corr",
Expand All @@ -112,11 +112,11 @@ class FeFePhylogeny(Task):
datasets=[
Dataset(
path="tattabio/fefe_phylogeny_sequences",
revision="main",
revision="bce06d79d9ce58413e7bcbed6943905d1afb8b26",
),
Dataset(
path="tattabio/fefe_phylogeny_distances",
revision="main",
revision="d6357cee9b4071a8dcdeef54083006f0d5e94fd2",
),
],
primary_metric_id="top_corr",
Expand All @@ -136,11 +136,11 @@ class Bac16SPhylogeny(Task):
datasets=[
Dataset(
path="tattabio/bac_16S_sequences",
revision="main",
revision="efde1456b86748909cbcfecb07d783756d570aa3",
),
Dataset(
path="tattabio/bac_16S_distances",
revision="main",
revision="5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3",
),
],
primary_metric_id="top_corr",
Expand All @@ -160,11 +160,11 @@ class Arch16SPhylogeny(Task):
datasets=[
Dataset(
path="tattabio/arch_16S_sequences",
revision="main",
revision="e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0",
),
Dataset(
path="tattabio/arch_16S_distances",
revision="main",
revision="b0356b632a954be70cefd57e3a02e7e1ccd34408",
),
],
primary_metric_id="top_corr",
Expand All @@ -184,11 +184,11 @@ class Euk18SPhylogeny(Task):
datasets=[
Dataset(
path="tattabio/euk_18S_sequences",
revision="main",
revision="5174cb3b2c5c46b61307fd1c2c08f5c432655196",
),
Dataset(
path="tattabio/euk_18S_distances",
revision="main",
revision="c4cea4fbb1185d08e0e01fd28ffb8b06a25025da",
),
],
primary_metric_id="top_corr",
Expand Down
6 changes: 3 additions & 3 deletions dgeb/tasks/pair_classification_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class EcoliOperon(Task):
datasets=[
Dataset(
path="tattabio/ecoli_operonic_pair",
revision="main",
revision="a62c01143a842696fc8200b91c1acb825e8cb891",
)
],
primary_metric_id="top_ap",
Expand All @@ -66,7 +66,7 @@ class CyanoOperonPair(Task):
datasets=[
Dataset(
path="tattabio/cyano_operonic_pair",
revision="main",
revision="eeb4cb71ec2a4ff688af9de7c0662123577d32ec",
)
],
primary_metric_id="top_ap",
Expand All @@ -86,7 +86,7 @@ class VibrioOperonPair(Task):
datasets=[
Dataset(
path="tattabio/vibrio_operonic_pair",
revision="main",
revision="24781b12b45bf81a079a6164ef0d2124948c1878",
)
],
primary_metric_id="top_ap",
Expand Down
8 changes: 4 additions & 4 deletions dgeb/tasks/retrieval_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ class ArchRetrieval(Task):
datasets=[
Dataset(
path="tattabio/arch_retrieval",
revision="main",
revision="a19124322604a21b26b1b3c13a1bd0b8a63c9f7b",
),
Dataset(
path="tattabio/arch_retrieval_qrels",
description="Relevance between query and corpus proteins",
revision="main",
revision="3f142f2f9a0995d56c6e77188c7251761450afcf",
),
],
primary_metric_id="map_at_5",
Expand All @@ -83,12 +83,12 @@ class EukRetrieval(Task):
datasets=[
Dataset(
path="tattabio/euk_retrieval",
revision="main",
revision="c93dc56665cedd19fbeaea9ace146f2474c895f0",
),
Dataset(
path="tattabio/euk_retrieval_qrels",
description="Relevance between query and corpus proteins",
revision="main",
revision="a5aa01e9b9738074aba57fc07434e352c4c71e4b",
),
],
primary_metric_id="map_at_5",
Expand Down
4 changes: 1 addition & 3 deletions dgeb/tasks/tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
"""Task functions for evaluation.
# TODO: Add dataset revisions.
"""
"""Task abstract class for evaluation and results."""

import logging
from abc import ABC, abstractmethod
Expand Down

0 comments on commit 95b6f11

Please sign in to comment.