diff --git a/dgeb/tasks/bigene_mining_tasks.py b/dgeb/tasks/bigene_mining_tasks.py index 4ac2401..4c56518 100644 --- a/dgeb/tasks/bigene_mining_tasks.py +++ b/dgeb/tasks/bigene_mining_tasks.py @@ -43,7 +43,7 @@ class BacArchBiGeneMining(Task): datasets=[ Dataset( path="tattabio/bac_arch_bigene", - revision="main", + revision="d5a65e44bae43a9ba9f2fdc03056dff9c12f6631", ) ], primary_metric_id="f1", @@ -67,7 +67,7 @@ class ModACParalogyBiGeneMining(Task): datasets=[ Dataset( path="tattabio/modac_paralogy_bigene", - revision="main", + revision="241ca6397856e3360da04422d54933035b1fab87", ) ], primary_metric_id="recall_at_5", diff --git a/dgeb/tasks/classification_tasks.py b/dgeb/tasks/classification_tasks.py index 29d5bd5..67bc6ba 100644 --- a/dgeb/tasks/classification_tasks.py +++ b/dgeb/tasks/classification_tasks.py @@ -85,7 +85,7 @@ class EnzymeCommissionClassification(Task): datasets=[ Dataset( path="tattabio/ec_classification", - revision="main", + revision="d83aba56d785df48bd3b4acafc536ff8c03e7d98", ) ], primary_metric_id="f1", @@ -105,7 +105,7 @@ class EnzymeCommissionDNAClassification(Task): datasets=[ Dataset( path="tattabio/ec_classification_dna", - revision="main", + revision="e78328541bb16e7cda16830d9844c09cbf4e682d", ) ], primary_metric_id="f1", @@ -125,7 +125,7 @@ class ConvergentEnzymesClassification(Task): datasets=[ Dataset( path="tattabio/convergent_enzymes", - revision="main", + revision="37f75609f54de2bc0911ccb72faf1c2f5a4285aa", ) ], primary_metric_id="f1", @@ -180,7 +180,12 @@ class MIBiGProteinClassification(Task): description="Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.", type="classification", modality=Modality.PROTEIN, - datasets=[Dataset(path="tattabio/mibig_classification_prot", revision="main")], + datasets=[ + Dataset( + path="tattabio/mibig_classification_prot", + revision="915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f", + ) + ], primary_metric_id="f1", ) @@ -198,7 +203,7 @@ class MIBiGDNAClassification(Task): datasets=[ Dataset( path="tattabio/mibig_classification_dna", - revision="main", + revision="b5ca7a76d469e4e66c46f1b655903972571e6b61", ) ], primary_metric_id="f1", diff --git a/dgeb/tasks/clustering_tasks.py b/dgeb/tasks/clustering_tasks.py index 1411857..ba441f8 100644 --- a/dgeb/tasks/clustering_tasks.py +++ b/dgeb/tasks/clustering_tasks.py @@ -40,7 +40,7 @@ class RNAclustering(Task): datasets=[ Dataset( path="tattabio/e_coli_rnas", - revision="main", + revision="4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6", ) ], primary_metric_id="v_measure", @@ -60,7 +60,7 @@ class MopBClustering(Task): datasets=[ Dataset( path="tattabio/mopb_clustering", - revision="main", + revision="eed4bfff9c5bd2dc2500c50757bfcb90425d999a", ) ], primary_metric_id="v_measure", diff --git a/dgeb/tasks/eds_tasks.py b/dgeb/tasks/eds_tasks.py index c255a20..b26dea5 100644 --- a/dgeb/tasks/eds_tasks.py +++ b/dgeb/tasks/eds_tasks.py @@ -64,11 +64,11 @@ class RpobBacPhylogeny(Task): datasets=[ Dataset( path="tattabio/rpob_bac_phylogeny_sequences", - revision="main", + revision="b833ef8d8d873ea5387540562873f41d073d3e03", ), Dataset( path="tattabio/rpob_bac_phylogeny_distances", - revision="main", + revision="0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7", ), ], primary_metric_id="top_corr", @@ -88,11 +88,11 @@ class RpobArchPhylogeny(Task): datasets=[ Dataset( path="tattabio/rpob_arch_phylogeny_sequences", - revision="main", + revision="10de75b9f5ad12340d629fd1ad015ef4319d6ee4", ), Dataset( path="tattabio/rpob_arch_phylogeny_distances", - revision="main", + revision="2a585f0e135fe74b8ae6d31e7801c6031b0dcc18", ), ], primary_metric_id="top_corr", @@ -112,11 +112,11 @@ class FeFePhylogeny(Task): datasets=[ Dataset( path="tattabio/fefe_phylogeny_sequences", - revision="main", + revision="bce06d79d9ce58413e7bcbed6943905d1afb8b26", ), Dataset( path="tattabio/fefe_phylogeny_distances", - revision="main", + revision="d6357cee9b4071a8dcdeef54083006f0d5e94fd2", ), ], primary_metric_id="top_corr", @@ -136,11 +136,11 @@ class Bac16SPhylogeny(Task): datasets=[ Dataset( path="tattabio/bac_16S_sequences", - revision="main", + revision="efde1456b86748909cbcfecb07d783756d570aa3", ), Dataset( path="tattabio/bac_16S_distances", - revision="main", + revision="5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3", ), ], primary_metric_id="top_corr", @@ -160,11 +160,11 @@ class Arch16SPhylogeny(Task): datasets=[ Dataset( path="tattabio/arch_16S_sequences", - revision="main", + revision="e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0", ), Dataset( path="tattabio/arch_16S_distances", - revision="main", + revision="b0356b632a954be70cefd57e3a02e7e1ccd34408", ), ], primary_metric_id="top_corr", @@ -184,11 +184,11 @@ class Euk18SPhylogeny(Task): datasets=[ Dataset( path="tattabio/euk_18S_sequences", - revision="main", + revision="5174cb3b2c5c46b61307fd1c2c08f5c432655196", ), Dataset( path="tattabio/euk_18S_distances", - revision="main", + revision="c4cea4fbb1185d08e0e01fd28ffb8b06a25025da", ), ], primary_metric_id="top_corr", diff --git a/dgeb/tasks/pair_classification_tasks.py b/dgeb/tasks/pair_classification_tasks.py index 4029307..6b34b57 100644 --- a/dgeb/tasks/pair_classification_tasks.py +++ b/dgeb/tasks/pair_classification_tasks.py @@ -46,7 +46,7 @@ class EcoliOperon(Task): datasets=[ Dataset( path="tattabio/ecoli_operonic_pair", - revision="main", + revision="a62c01143a842696fc8200b91c1acb825e8cb891", ) ], primary_metric_id="top_ap", @@ -66,7 +66,7 @@ class CyanoOperonPair(Task): datasets=[ Dataset( path="tattabio/cyano_operonic_pair", - revision="main", + revision="eeb4cb71ec2a4ff688af9de7c0662123577d32ec", ) ], primary_metric_id="top_ap", @@ -86,7 +86,7 @@ class VibrioOperonPair(Task): datasets=[ Dataset( path="tattabio/vibrio_operonic_pair", - revision="main", + revision="24781b12b45bf81a079a6164ef0d2124948c1878", ) ], primary_metric_id="top_ap", diff --git a/dgeb/tasks/retrieval_tasks.py b/dgeb/tasks/retrieval_tasks.py index 685d944..e0eb613 100644 --- a/dgeb/tasks/retrieval_tasks.py +++ b/dgeb/tasks/retrieval_tasks.py @@ -58,12 +58,12 @@ class ArchRetrieval(Task): datasets=[ Dataset( path="tattabio/arch_retrieval", - revision="main", + revision="a19124322604a21b26b1b3c13a1bd0b8a63c9f7b", ), Dataset( path="tattabio/arch_retrieval_qrels", description="Relevance between query and corpus proteins", - revision="main", + revision="3f142f2f9a0995d56c6e77188c7251761450afcf", ), ], primary_metric_id="map_at_5", @@ -83,12 +83,12 @@ class EukRetrieval(Task): datasets=[ Dataset( path="tattabio/euk_retrieval", - revision="main", + revision="c93dc56665cedd19fbeaea9ace146f2474c895f0", ), Dataset( path="tattabio/euk_retrieval_qrels", description="Relevance between query and corpus proteins", - revision="main", + revision="a5aa01e9b9738074aba57fc07434e352c4c71e4b", ), ], primary_metric_id="map_at_5", diff --git a/dgeb/tasks/tasks.py b/dgeb/tasks/tasks.py index 6f7dc76..f773635 100644 --- a/dgeb/tasks/tasks.py +++ b/dgeb/tasks/tasks.py @@ -1,6 +1,4 @@ -"""Task functions for evaluation. -# TODO: Add dataset revisions. -""" +"""Task abstract class for evaluation and results.""" import logging from abc import ABC, abstractmethod