anagrams1 |
|
✓ |
|
10000 |
acc |
anagrams2 |
|
✓ |
|
10000 |
acc |
anli_r1 |
✓ |
✓ |
✓ |
1000 |
acc |
anli_r2 |
✓ |
✓ |
✓ |
1000 |
acc |
anli_r3 |
✓ |
✓ |
✓ |
1200 |
acc |
arc_challenge |
✓ |
✓ |
✓ |
1172 |
acc, acc_norm |
arc_easy |
✓ |
✓ |
✓ |
2376 |
acc, acc_norm |
arithmetic_1dc |
|
✓ |
|
2000 |
acc |
arithmetic_2da |
|
✓ |
|
2000 |
acc |
arithmetic_2dm |
|
✓ |
|
2000 |
acc |
arithmetic_2ds |
|
✓ |
|
2000 |
acc |
arithmetic_3da |
|
✓ |
|
2000 |
acc |
arithmetic_3ds |
|
✓ |
|
2000 |
acc |
arithmetic_4da |
|
✓ |
|
2000 |
acc |
arithmetic_4ds |
|
✓ |
|
2000 |
acc |
arithmetic_5da |
|
✓ |
|
2000 |
acc |
arithmetic_5ds |
|
✓ |
|
2000 |
acc |
bigbench_causal_judgement |
|
|
✓ |
190 |
multiple_choice_grade, exact_str_match |
bigbench_date_understanding |
|
|
✓ |
369 |
multiple_choice_grade, exact_str_match |
bigbench_disambiguation_qa |
|
|
✓ |
258 |
multiple_choice_grade, exact_str_match |
bigbench_dyck_languages |
|
|
✓ |
1000 |
multiple_choice_grade, exact_str_match |
bigbench_formal_fallacies_syllogisms_negation |
|
|
✓ |
14200 |
multiple_choice_grade, exact_str_match |
bigbench_geometric_shapes |
|
|
✓ |
359 |
multiple_choice_grade, exact_str_match |
bigbench_hyperbaton |
|
|
✓ |
50000 |
multiple_choice_grade, exact_str_match |
bigbench_logical_deduction_five_objects |
|
|
✓ |
500 |
multiple_choice_grade, exact_str_match |
bigbench_logical_deduction_seven_objects |
|
|
✓ |
700 |
multiple_choice_grade, exact_str_match |
bigbench_logical_deduction_three_objects |
|
|
✓ |
300 |
multiple_choice_grade, exact_str_match |
bigbench_movie_recommendation |
|
|
✓ |
500 |
multiple_choice_grade, exact_str_match |
bigbench_navigate |
|
|
✓ |
1000 |
multiple_choice_grade, exact_str_match |
bigbench_reasoning_about_colored_objects |
|
|
✓ |
2000 |
multiple_choice_grade, exact_str_match |
bigbench_ruin_names |
|
|
✓ |
448 |
multiple_choice_grade, exact_str_match |
bigbench_salient_translation_error_detection |
|
|
✓ |
998 |
multiple_choice_grade, exact_str_match |
bigbench_snarks |
|
|
✓ |
181 |
multiple_choice_grade, exact_str_match |
bigbench_sports_understanding |
|
|
✓ |
986 |
multiple_choice_grade, exact_str_match |
bigbench_temporal_sequences |
|
|
✓ |
1000 |
multiple_choice_grade, exact_str_match |
bigbench_tracking_shuffled_objects_five_objects |
|
|
✓ |
1250 |
multiple_choice_grade, exact_str_match |
bigbench_tracking_shuffled_objects_seven_objects |
|
|
✓ |
1750 |
multiple_choice_grade, exact_str_match |
bigbench_tracking_shuffled_objects_three_objects |
|
|
✓ |
300 |
multiple_choice_grade, exact_str_match |
blimp_adjunct_island |
|
✓ |
|
1000 |
acc |
blimp_anaphor_gender_agreement |
|
✓ |
|
1000 |
acc |
blimp_anaphor_number_agreement |
|
✓ |
|
1000 |
acc |
blimp_animate_subject_passive |
|
✓ |
|
1000 |
acc |
blimp_animate_subject_trans |
|
✓ |
|
1000 |
acc |
blimp_causative |
|
✓ |
|
1000 |
acc |
blimp_complex_NP_island |
|
✓ |
|
1000 |
acc |
blimp_coordinate_structure_constraint_complex_left_branch |
|
✓ |
|
1000 |
acc |
blimp_coordinate_structure_constraint_object_extraction |
|
✓ |
|
1000 |
acc |
blimp_determiner_noun_agreement_1 |
|
✓ |
|
1000 |
acc |
blimp_determiner_noun_agreement_2 |
|
✓ |
|
1000 |
acc |
blimp_determiner_noun_agreement_irregular_1 |
|
✓ |
|
1000 |
acc |
blimp_determiner_noun_agreement_irregular_2 |
|
✓ |
|
1000 |
acc |
blimp_determiner_noun_agreement_with_adj_2 |
|
✓ |
|
1000 |
acc |
blimp_determiner_noun_agreement_with_adj_irregular_1 |
|
✓ |
|
1000 |
acc |
blimp_determiner_noun_agreement_with_adj_irregular_2 |
|
✓ |
|
1000 |
acc |
blimp_determiner_noun_agreement_with_adjective_1 |
|
✓ |
|
1000 |
acc |
blimp_distractor_agreement_relational_noun |
|
✓ |
|
1000 |
acc |
blimp_distractor_agreement_relative_clause |
|
✓ |
|
1000 |
acc |
blimp_drop_argument |
|
✓ |
|
1000 |
acc |
blimp_ellipsis_n_bar_1 |
|
✓ |
|
1000 |
acc |
blimp_ellipsis_n_bar_2 |
|
✓ |
|
1000 |
acc |
blimp_existential_there_object_raising |
|
✓ |
|
1000 |
acc |
blimp_existential_there_quantifiers_1 |
|
✓ |
|
1000 |
acc |
blimp_existential_there_quantifiers_2 |
|
✓ |
|
1000 |
acc |
blimp_existential_there_subject_raising |
|
✓ |
|
1000 |
acc |
blimp_expletive_it_object_raising |
|
✓ |
|
1000 |
acc |
blimp_inchoative |
|
✓ |
|
1000 |
acc |
blimp_intransitive |
|
✓ |
|
1000 |
acc |
blimp_irregular_past_participle_adjectives |
|
✓ |
|
1000 |
acc |
blimp_irregular_past_participle_verbs |
|
✓ |
|
1000 |
acc |
blimp_irregular_plural_subject_verb_agreement_1 |
|
✓ |
|
1000 |
acc |
blimp_irregular_plural_subject_verb_agreement_2 |
|
✓ |
|
1000 |
acc |
blimp_left_branch_island_echo_question |
|
✓ |
|
1000 |
acc |
blimp_left_branch_island_simple_question |
|
✓ |
|
1000 |
acc |
blimp_matrix_question_npi_licensor_present |
|
✓ |
|
1000 |
acc |
blimp_npi_present_1 |
|
✓ |
|
1000 |
acc |
blimp_npi_present_2 |
|
✓ |
|
1000 |
acc |
blimp_only_npi_licensor_present |
|
✓ |
|
1000 |
acc |
blimp_only_npi_scope |
|
✓ |
|
1000 |
acc |
blimp_passive_1 |
|
✓ |
|
1000 |
acc |
blimp_passive_2 |
|
✓ |
|
1000 |
acc |
blimp_principle_A_c_command |
|
✓ |
|
1000 |
acc |
blimp_principle_A_case_1 |
|
✓ |
|
1000 |
acc |
blimp_principle_A_case_2 |
|
✓ |
|
1000 |
acc |
blimp_principle_A_domain_1 |
|
✓ |
|
1000 |
acc |
blimp_principle_A_domain_2 |
|
✓ |
|
1000 |
acc |
blimp_principle_A_domain_3 |
|
✓ |
|
1000 |
acc |
blimp_principle_A_reconstruction |
|
✓ |
|
1000 |
acc |
blimp_regular_plural_subject_verb_agreement_1 |
|
✓ |
|
1000 |
acc |
blimp_regular_plural_subject_verb_agreement_2 |
|
✓ |
|
1000 |
acc |
blimp_sentential_negation_npi_licensor_present |
|
✓ |
|
1000 |
acc |
blimp_sentential_negation_npi_scope |
|
✓ |
|
1000 |
acc |
blimp_sentential_subject_island |
|
✓ |
|
1000 |
acc |
blimp_superlative_quantifiers_1 |
|
✓ |
|
1000 |
acc |
blimp_superlative_quantifiers_2 |
|
✓ |
|
1000 |
acc |
blimp_tough_vs_raising_1 |
|
✓ |
|
1000 |
acc |
blimp_tough_vs_raising_2 |
|
✓ |
|
1000 |
acc |
blimp_transitive |
|
✓ |
|
1000 |
acc |
blimp_wh_island |
|
✓ |
|
1000 |
acc |
blimp_wh_questions_object_gap |
|
✓ |
|
1000 |
acc |
blimp_wh_questions_subject_gap |
|
✓ |
|
1000 |
acc |
blimp_wh_questions_subject_gap_long_distance |
|
✓ |
|
1000 |
acc |
blimp_wh_vs_that_no_gap |
|
✓ |
|
1000 |
acc |
blimp_wh_vs_that_no_gap_long_distance |
|
✓ |
|
1000 |
acc |
blimp_wh_vs_that_with_gap |
|
✓ |
|
1000 |
acc |
blimp_wh_vs_that_with_gap_long_distance |
|
✓ |
|
1000 |
acc |
boolq |
✓ |
✓ |
|
3270 |
acc |
cb |
✓ |
✓ |
|
56 |
acc, f1 |
cola |
✓ |
✓ |
|
1043 |
mcc |
copa |
✓ |
✓ |
|
100 |
acc |
coqa |
✓ |
✓ |
|
500 |
f1, em |
crows_pairs_english |
|
✓ |
|
1677 |
likelihood_difference, pct_stereotype |
crows_pairs_english_age |
|
✓ |
|
91 |
likelihood_difference, pct_stereotype |
crows_pairs_english_autre |
|
✓ |
|
11 |
likelihood_difference, pct_stereotype |
crows_pairs_english_disability |
|
✓ |
|
65 |
likelihood_difference, pct_stereotype |
crows_pairs_english_gender |
|
✓ |
|
320 |
likelihood_difference, pct_stereotype |
crows_pairs_english_nationality |
|
✓ |
|
216 |
likelihood_difference, pct_stereotype |
crows_pairs_english_physical_appearance |
|
✓ |
|
72 |
likelihood_difference, pct_stereotype |
crows_pairs_english_race_color |
|
✓ |
|
508 |
likelihood_difference, pct_stereotype |
crows_pairs_english_religion |
|
✓ |
|
111 |
likelihood_difference, pct_stereotype |
crows_pairs_english_sexual_orientation |
|
✓ |
|
93 |
likelihood_difference, pct_stereotype |
crows_pairs_english_socioeconomic |
|
✓ |
|
190 |
likelihood_difference, pct_stereotype |
crows_pairs_french |
|
✓ |
|
1677 |
likelihood_difference, pct_stereotype |
crows_pairs_french_age |
|
✓ |
|
90 |
likelihood_difference, pct_stereotype |
crows_pairs_french_autre |
|
✓ |
|
13 |
likelihood_difference, pct_stereotype |
crows_pairs_french_disability |
|
✓ |
|
66 |
likelihood_difference, pct_stereotype |
crows_pairs_french_gender |
|
✓ |
|
321 |
likelihood_difference, pct_stereotype |
crows_pairs_french_nationality |
|
✓ |
|
253 |
likelihood_difference, pct_stereotype |
crows_pairs_french_physical_appearance |
|
✓ |
|
72 |
likelihood_difference, pct_stereotype |
crows_pairs_french_race_color |
|
✓ |
|
460 |
likelihood_difference, pct_stereotype |
crows_pairs_french_religion |
|
✓ |
|
115 |
likelihood_difference, pct_stereotype |
crows_pairs_french_sexual_orientation |
|
✓ |
|
91 |
likelihood_difference, pct_stereotype |
crows_pairs_french_socioeconomic |
|
✓ |
|
196 |
likelihood_difference, pct_stereotype |
cycle_letters |
|
✓ |
|
10000 |
acc |
drop |
✓ |
✓ |
|
9536 |
em, f1 |
ethics_cm |
✓ |
|
✓ |
3885 |
acc |
ethics_deontology |
✓ |
|
✓ |
3596 |
acc, em |
ethics_justice |
✓ |
|
✓ |
2704 |
acc, em |
ethics_utilitarianism |
✓ |
|
✓ |
4808 |
acc |
ethics_utilitarianism_original |
|
|
✓ |
4808 |
acc |
ethics_virtue |
✓ |
|
✓ |
4975 |
acc, em |
gsm8k |
✓ |
|
✓ |
1319 |
acc |
headqa |
✓ |
✓ |
✓ |
2742 |
acc, acc_norm |
headqa_en |
✓ |
✓ |
✓ |
2742 |
acc, acc_norm |
headqa_es |
✓ |
✓ |
✓ |
2742 |
acc, acc_norm |
hellaswag |
✓ |
✓ |
|
10042 |
acc, acc_norm |
hendrycksTest-abstract_algebra |
|
✓ |
✓ |
100 |
acc, acc_norm |
hendrycksTest-anatomy |
|
✓ |
✓ |
135 |
acc, acc_norm |
hendrycksTest-astronomy |
|
✓ |
✓ |
152 |
acc, acc_norm |
hendrycksTest-business_ethics |
|
✓ |
✓ |
100 |
acc, acc_norm |
hendrycksTest-clinical_knowledge |
|
✓ |
✓ |
265 |
acc, acc_norm |
hendrycksTest-college_biology |
|
✓ |
✓ |
144 |
acc, acc_norm |
hendrycksTest-college_chemistry |
|
✓ |
✓ |
100 |
acc, acc_norm |
hendrycksTest-college_computer_science |
|
✓ |
✓ |
100 |
acc, acc_norm |
hendrycksTest-college_mathematics |
|
✓ |
✓ |
100 |
acc, acc_norm |
hendrycksTest-college_medicine |
|
✓ |
✓ |
173 |
acc, acc_norm |
hendrycksTest-college_physics |
|
✓ |
✓ |
102 |
acc, acc_norm |
hendrycksTest-computer_security |
|
✓ |
✓ |
100 |
acc, acc_norm |
hendrycksTest-conceptual_physics |
|
✓ |
✓ |
235 |
acc, acc_norm |
hendrycksTest-econometrics |
|
✓ |
✓ |
114 |
acc, acc_norm |
hendrycksTest-electrical_engineering |
|
✓ |
✓ |
145 |
acc, acc_norm |
hendrycksTest-elementary_mathematics |
|
✓ |
✓ |
378 |
acc, acc_norm |
hendrycksTest-formal_logic |
|
✓ |
✓ |
126 |
acc, acc_norm |
hendrycksTest-global_facts |
|
✓ |
✓ |
100 |
acc, acc_norm |
hendrycksTest-high_school_biology |
|
✓ |
✓ |
310 |
acc, acc_norm |
hendrycksTest-high_school_chemistry |
|
✓ |
✓ |
203 |
acc, acc_norm |
hendrycksTest-high_school_computer_science |
|
✓ |
✓ |
100 |
acc, acc_norm |
hendrycksTest-high_school_european_history |
|
✓ |
✓ |
165 |
acc, acc_norm |
hendrycksTest-high_school_geography |
|
✓ |
✓ |
198 |
acc, acc_norm |
hendrycksTest-high_school_government_and_politics |
|
✓ |
✓ |
193 |
acc, acc_norm |
hendrycksTest-high_school_macroeconomics |
|
✓ |
✓ |
390 |
acc, acc_norm |
hendrycksTest-high_school_mathematics |
|
✓ |
✓ |
270 |
acc, acc_norm |
hendrycksTest-high_school_microeconomics |
|
✓ |
✓ |
238 |
acc, acc_norm |
hendrycksTest-high_school_physics |
|
✓ |
✓ |
151 |
acc, acc_norm |
hendrycksTest-high_school_psychology |
|
✓ |
✓ |
545 |
acc, acc_norm |
hendrycksTest-high_school_statistics |
|
✓ |
✓ |
216 |
acc, acc_norm |
hendrycksTest-high_school_us_history |
|
✓ |
✓ |
204 |
acc, acc_norm |
hendrycksTest-high_school_world_history |
|
✓ |
✓ |
237 |
acc, acc_norm |
hendrycksTest-human_aging |
|
✓ |
✓ |
223 |
acc, acc_norm |
hendrycksTest-human_sexuality |
|
✓ |
✓ |
131 |
acc, acc_norm |
hendrycksTest-international_law |
|
✓ |
✓ |
121 |
acc, acc_norm |
hendrycksTest-jurisprudence |
|
✓ |
✓ |
108 |
acc, acc_norm |
hendrycksTest-logical_fallacies |
|
✓ |
✓ |
163 |
acc, acc_norm |
hendrycksTest-machine_learning |
|
✓ |
✓ |
112 |
acc, acc_norm |
hendrycksTest-management |
|
✓ |
✓ |
103 |
acc, acc_norm |
hendrycksTest-marketing |
|
✓ |
✓ |
234 |
acc, acc_norm |
hendrycksTest-medical_genetics |
|
✓ |
✓ |
100 |
acc, acc_norm |
hendrycksTest-miscellaneous |
|
✓ |
✓ |
783 |
acc, acc_norm |
hendrycksTest-moral_disputes |
|
✓ |
✓ |
346 |
acc, acc_norm |
hendrycksTest-moral_scenarios |
|
✓ |
✓ |
895 |
acc, acc_norm |
hendrycksTest-nutrition |
|
✓ |
✓ |
306 |
acc, acc_norm |
hendrycksTest-philosophy |
|
✓ |
✓ |
311 |
acc, acc_norm |
hendrycksTest-prehistory |
|
✓ |
✓ |
324 |
acc, acc_norm |
hendrycksTest-professional_accounting |
|
✓ |
✓ |
282 |
acc, acc_norm |
hendrycksTest-professional_law |
|
✓ |
✓ |
1534 |
acc, acc_norm |
hendrycksTest-professional_medicine |
|
✓ |
✓ |
272 |
acc, acc_norm |
hendrycksTest-professional_psychology |
|
✓ |
✓ |
612 |
acc, acc_norm |
hendrycksTest-public_relations |
|
✓ |
✓ |
110 |
acc, acc_norm |
hendrycksTest-security_studies |
|
✓ |
✓ |
245 |
acc, acc_norm |
hendrycksTest-sociology |
|
✓ |
✓ |
201 |
acc, acc_norm |
hendrycksTest-us_foreign_policy |
|
✓ |
✓ |
100 |
acc, acc_norm |
hendrycksTest-virology |
|
✓ |
✓ |
166 |
acc, acc_norm |
hendrycksTest-world_religions |
|
✓ |
✓ |
171 |
acc, acc_norm |
iwslt17-ar-en |
|
|
✓ |
1460 |
bleu, chrf, ter |
iwslt17-en-ar |
|
|
✓ |
1460 |
bleu, chrf, ter |
lambada_openai |
|
|
✓ |
5153 |
ppl, acc |
lambada_openai_cloze |
|
|
✓ |
5153 |
ppl, acc |
lambada_openai_mt_de |
|
|
✓ |
5153 |
ppl, acc |
lambada_openai_mt_en |
|
|
✓ |
5153 |
ppl, acc |
lambada_openai_mt_es |
|
|
✓ |
5153 |
ppl, acc |
lambada_openai_mt_fr |
|
|
✓ |
5153 |
ppl, acc |
lambada_openai_mt_it |
|
|
✓ |
5153 |
ppl, acc |
lambada_standard |
|
✓ |
✓ |
5153 |
ppl, acc |
lambada_standard_cloze |
|
✓ |
✓ |
5153 |
ppl, acc |
logiqa |
✓ |
✓ |
✓ |
651 |
acc, acc_norm |
math_algebra |
✓ |
|
✓ |
1187 |
acc |
math_asdiv |
|
✓ |
|
2305 |
acc |
math_counting_and_prob |
✓ |
|
✓ |
474 |
acc |
math_geometry |
✓ |
|
✓ |
479 |
acc |
math_intermediate_algebra |
✓ |
|
✓ |
903 |
acc |
math_num_theory |
✓ |
|
✓ |
540 |
acc |
math_prealgebra |
✓ |
|
✓ |
871 |
acc |
math_precalc |
✓ |
|
✓ |
546 |
acc |
mathqa |
✓ |
✓ |
✓ |
2985 |
acc, acc_norm |
mc_taco |
|
✓ |
✓ |
9442 |
f1, em |
mgsm_bn |
✓ |
|
✓ |
250 |
acc |
mgsm_de |
✓ |
|
✓ |
250 |
acc |
mgsm_en |
✓ |
|
✓ |
250 |
acc |
mgsm_es |
✓ |
|
✓ |
250 |
acc |
mgsm_fr |
✓ |
|
✓ |
250 |
acc |
mgsm_ja |
✓ |
|
✓ |
250 |
acc |
mgsm_ru |
✓ |
|
✓ |
250 |
acc |
mgsm_sw |
✓ |
|
✓ |
250 |
acc |
mgsm_te |
✓ |
|
✓ |
250 |
acc |
mgsm_th |
✓ |
|
✓ |
250 |
acc |
mgsm_zh |
✓ |
|
✓ |
250 |
acc |
mnli |
✓ |
✓ |
|
9815 |
acc |
mnli_mismatched |
✓ |
✓ |
|
9832 |
acc |
mrpc |
✓ |
✓ |
|
408 |
acc, f1 |
multirc |
✓ |
✓ |
|
4848 |
acc |
mutual |
✓ |
✓ |
|
886 |
r@1, r@2, mrr |
mutual_plus |
✓ |
✓ |
|
886 |
r@1, r@2, mrr |
openbookqa |
✓ |
✓ |
✓ |
500 |
acc, acc_norm |
pawsx_de |
✓ |
✓ |
✓ |
2000 |
acc |
pawsx_en |
✓ |
✓ |
✓ |
2000 |
acc |
pawsx_es |
✓ |
✓ |
✓ |
2000 |
acc |
pawsx_fr |
✓ |
✓ |
✓ |
2000 |
acc |
pawsx_ja |
✓ |
✓ |
✓ |
2000 |
acc |
pawsx_ko |
✓ |
✓ |
✓ |
2000 |
acc |
pawsx_zh |
✓ |
✓ |
✓ |
2000 |
acc |
pile_arxiv |
|
✓ |
✓ |
2407 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_bookcorpus2 |
|
✓ |
✓ |
28 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_books3 |
|
✓ |
✓ |
269 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_dm-mathematics |
|
✓ |
✓ |
1922 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_enron |
|
✓ |
✓ |
1010 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_europarl |
|
✓ |
✓ |
157 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_freelaw |
|
✓ |
✓ |
5101 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_github |
|
✓ |
✓ |
18195 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_gutenberg |
|
✓ |
✓ |
80 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_hackernews |
|
✓ |
✓ |
1632 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_nih-exporter |
|
✓ |
✓ |
1884 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_opensubtitles |
|
✓ |
✓ |
642 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_openwebtext2 |
|
✓ |
✓ |
32925 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_philpapers |
|
✓ |
✓ |
68 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_pile-cc |
|
✓ |
✓ |
52790 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_pubmed-abstracts |
|
✓ |
✓ |
29895 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_pubmed-central |
|
✓ |
✓ |
5911 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_stackexchange |
|
✓ |
✓ |
30378 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_ubuntu-irc |
|
✓ |
✓ |
22 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_uspto |
|
✓ |
✓ |
11415 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_wikipedia |
|
✓ |
✓ |
17511 |
word_perplexity, byte_perplexity, bits_per_byte |
pile_youtubesubtitles |
|
✓ |
✓ |
342 |
word_perplexity, byte_perplexity, bits_per_byte |
piqa |
✓ |
✓ |
|
1838 |
acc, acc_norm |
prost |
|
|
✓ |
18736 |
acc, acc_norm |
pubmedqa |
|
|
✓ |
1000 |
acc |
qa4mre_2011 |
|
|
✓ |
120 |
acc, acc_norm |
qa4mre_2012 |
|
|
✓ |
160 |
acc, acc_norm |
qa4mre_2013 |
|
|
✓ |
284 |
acc, acc_norm |
qasper |
✓ |
✓ |
|
1764 |
f1_yesno, f1_abstractive |
qnli |
✓ |
✓ |
|
5463 |
acc |
qqp |
✓ |
✓ |
|
40430 |
acc, f1 |
race |
✓ |
✓ |
✓ |
1045 |
acc |
random_insertion |
|
✓ |
|
10000 |
acc |
record |
✓ |
✓ |
|
10000 |
f1, em |
reversed_words |
|
✓ |
|
10000 |
acc |
rte |
✓ |
✓ |
|
277 |
acc |
sciq |
✓ |
✓ |
✓ |
1000 |
acc, acc_norm |
scrolls_contractnli |
✓ |
✓ |
|
1037 |
em, acc, acc_norm |
scrolls_govreport |
✓ |
✓ |
|
972 |
rouge1, rouge2, rougeL |
scrolls_narrativeqa |
✓ |
✓ |
|
3425 |
f1 |
scrolls_qasper |
✓ |
✓ |
|
984 |
f1 |
scrolls_qmsum |
✓ |
✓ |
|
272 |
rouge1, rouge2, rougeL |
scrolls_quality |
✓ |
✓ |
|
2086 |
em, acc, acc_norm |
scrolls_summscreenfd |
✓ |
✓ |
|
338 |
rouge1, rouge2, rougeL |
squad2 |
✓ |
✓ |
|
11873 |
exact, f1, HasAns_exact, HasAns_f1, NoAns_exact, NoAns_f1, best_exact, best_f1 |
sst |
✓ |
✓ |
|
872 |
acc |
swag |
✓ |
✓ |
|
20006 |
acc, acc_norm |
toxigen |
✓ |
|
✓ |
940 |
acc, acc_norm |
triviaqa |
✓ |
✓ |
|
11313 |
acc |
truthfulqa_gen |
|
✓ |
|
817 |
bleurt_max, bleurt_acc, bleurt_diff, bleu_max, bleu_acc, bleu_diff, rouge1_max, rouge1_acc, rouge1_diff, rouge2_max, rouge2_acc, rouge2_diff, rougeL_max, rougeL_acc, rougeL_diff |
truthfulqa_mc |
|
✓ |
|
817 |
mc1, mc2 |
webqs |
✓ |
|
✓ |
2032 |
acc |
wic |
✓ |
✓ |
|
638 |
acc |
wikitext |
✓ |
✓ |
✓ |
62 |
word_perplexity, byte_perplexity, bits_per_byte |
winogrande |
✓ |
✓ |
|
1267 |
acc |
wmt14-en-fr |
|
|
✓ |
3003 |
bleu, chrf, ter |
wmt14-fr-en |
|
|
✓ |
3003 |
bleu, chrf, ter |
wmt16-de-en |
|
|
✓ |
2999 |
bleu, chrf, ter |
wmt16-en-de |
|
|
✓ |
2999 |
bleu, chrf, ter |
wmt16-en-ro |
|
|
✓ |
1999 |
bleu, chrf, ter |
wmt16-ro-en |
|
|
✓ |
1999 |
bleu, chrf, ter |
wmt20-cs-en |
|
|
✓ |
664 |
bleu, chrf, ter |
wmt20-de-en |
|
|
✓ |
785 |
bleu, chrf, ter |
wmt20-de-fr |
|
|
✓ |
1619 |
bleu, chrf, ter |
wmt20-en-cs |
|
|
✓ |
1418 |
bleu, chrf, ter |
wmt20-en-de |
|
|
✓ |
1418 |
bleu, chrf, ter |
wmt20-en-iu |
|
|
✓ |
2971 |
bleu, chrf, ter |
wmt20-en-ja |
|
|
✓ |
1000 |
bleu, chrf, ter |
wmt20-en-km |
|
|
✓ |
2320 |
bleu, chrf, ter |
wmt20-en-pl |
|
|
✓ |
1000 |
bleu, chrf, ter |
wmt20-en-ps |
|
|
✓ |
2719 |
bleu, chrf, ter |
wmt20-en-ru |
|
|
✓ |
2002 |
bleu, chrf, ter |
wmt20-en-ta |
|
|
✓ |
1000 |
bleu, chrf, ter |
wmt20-en-zh |
|
|
✓ |
1418 |
bleu, chrf, ter |
wmt20-fr-de |
|
|
✓ |
1619 |
bleu, chrf, ter |
wmt20-iu-en |
|
|
✓ |
2971 |
bleu, chrf, ter |
wmt20-ja-en |
|
|
✓ |
993 |
bleu, chrf, ter |
wmt20-km-en |
|
|
✓ |
2320 |
bleu, chrf, ter |
wmt20-pl-en |
|
|
✓ |
1001 |
bleu, chrf, ter |
wmt20-ps-en |
|
|
✓ |
2719 |
bleu, chrf, ter |
wmt20-ru-en |
|
|
✓ |
991 |
bleu, chrf, ter |
wmt20-ta-en |
|
|
✓ |
997 |
bleu, chrf, ter |
wmt20-zh-en |
|
|
✓ |
2000 |
bleu, chrf, ter |
wnli |
✓ |
✓ |
|
71 |
acc |
wsc |
✓ |
✓ |
|
104 |
acc |
wsc273 |
|
|
✓ |
273 |
acc |
xcopa_et |
|
✓ |
✓ |
500 |
acc |
xcopa_ht |
|
✓ |
✓ |
500 |
acc |
xcopa_id |
|
✓ |
✓ |
500 |
acc |
xcopa_it |
|
✓ |
✓ |
500 |
acc |
xcopa_qu |
|
✓ |
✓ |
500 |
acc |
xcopa_sw |
|
✓ |
✓ |
500 |
acc |
xcopa_ta |
|
✓ |
✓ |
500 |
acc |
xcopa_th |
|
✓ |
✓ |
500 |
acc |
xcopa_tr |
|
✓ |
✓ |
500 |
acc |
xcopa_vi |
|
✓ |
✓ |
500 |
acc |
xcopa_zh |
|
✓ |
✓ |
500 |
acc |
xnli_ar |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_bg |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_de |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_el |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_en |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_es |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_fr |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_hi |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_ru |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_sw |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_th |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_tr |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_ur |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_vi |
✓ |
✓ |
✓ |
5010 |
acc |
xnli_zh |
✓ |
✓ |
✓ |
5010 |
acc |
xstory_cloze_ar |
✓ |
✓ |
|
1511 |
acc |
xstory_cloze_en |
✓ |
✓ |
|
1511 |
acc |
xstory_cloze_es |
✓ |
✓ |
|
1511 |
acc |
xstory_cloze_eu |
✓ |
✓ |
|
1511 |
acc |
xstory_cloze_hi |
✓ |
✓ |
|
1511 |
acc |
xstory_cloze_id |
✓ |
✓ |
|
1511 |
acc |
xstory_cloze_my |
✓ |
✓ |
|
1511 |
acc |
xstory_cloze_ru |
✓ |
✓ |
|
1511 |
acc |
xstory_cloze_sw |
✓ |
✓ |
|
1511 |
acc |
xstory_cloze_te |
✓ |
✓ |
|
1511 |
acc |
xstory_cloze_zh |
✓ |
✓ |
|
1511 |
acc |
xwinograd_en |
|
|
✓ |
2325 |
acc |
xwinograd_fr |
|
|
✓ |
83 |
acc |
xwinograd_jp |
|
|
✓ |
959 |
acc |
xwinograd_pt |
|
|
✓ |
263 |
acc |
xwinograd_ru |
|
|
✓ |
315 |
acc |
xwinograd_zh |
|
|
✓ |
504 |
acc |
Ceval-valid-computer_network |
|
✓ |
|
19 |
acc |
Ceval-valid-operating_system |
|
✓ |
|
19 |
acc |
Ceval-valid-computer_architecture |
|
✓ |
|
21 |
acc |
Ceval-valid-college_programming |
|
✓ |
|
37 |
acc |
Ceval-valid-college_physics |
|
✓ |
|
19 |
acc |
Ceval-valid-college_chemistry |
|
✓ |
|
24 |
acc |
Ceval-valid-advanced_mathematics |
|
✓ |
|
19 |
acc |
Ceval-valid-probability_and_statistics |
|
✓ |
|
18 |
acc |
Ceval-valid-discrete_mathematics |
|
✓ |
|
16 |
acc |
Ceval-valid-electrical_engineer |
|
✓ |
|
37 |
acc |
Ceval-valid-metrology_engineer |
|
✓ |
|
24 |
acc |
Ceval-valid-high_school_mathematics |
|
✓ |
|
18 |
acc |
Ceval-valid-high_school_physics |
|
✓ |
|
19 |
acc |
Ceval-valid-high_school_chemistry |
|
✓ |
|
19 |
acc |
Ceval-valid-high_school_biology |
|
✓ |
|
19 |
acc |
Ceval-valid-middle_school_mathematics |
|
✓ |
|
19 |
acc |
Ceval-valid-middle_school_biology |
|
✓ |
|
21 |
acc |
Ceval-valid-middle_school_physics |
|
✓ |
|
19 |
acc |
Ceval-valid-middle_school_chemistry |
|
✓ |
|
20 |
acc |
Ceval-valid-veterinary_medicine |
|
✓ |
|
23 |
acc |
Ceval-valid-college_economics |
|
✓ |
|
55 |
acc |
Ceval-valid-business_administration |
|
✓ |
|
33 |
acc |
Ceval-valid-marxism |
|
✓ |
|
19 |
acc |
Ceval-valid-mao_zedong_thought |
|
✓ |
|
24 |
acc |
Ceval-valid-education_science |
|
✓ |
|
29 |
acc |
Ceval-valid-teacher_qualification |
|
✓ |
|
44 |
acc |
Ceval-valid-high_school_politics |
|
✓ |
|
19 |
acc |
Ceval-valid-high_school_geography |
|
✓ |
|
19 |
acc |
Ceval-valid-middle_school_politics |
|
✓ |
|
21 |
acc |
Ceval-valid-middle_school_geography |
|
✓ |
|
12 |
acc |
Ceval-valid-modern_chinese_history |
|
✓ |
|
23 |
acc |
Ceval-valid-ideological_and_moral_cultivation |
|
✓ |
|
19 |
acc |
Ceval-valid-logic |
|
✓ |
|
22 |
acc |
Ceval-valid-law |
|
✓ |
|
24 |
acc |
Ceval-valid-chinese_language_and_literature |
|
✓ |
|
23 |
acc |
Ceval-valid-art_studies |
|
✓ |
|
33 |
acc |
Ceval-valid-professional_tour_guide |
|
✓ |
|
29 |
acc |
Ceval-valid-legal_professional |
|
✓ |
|
23 |
acc |
Ceval-valid-high_school_chinese |
|
✓ |
|
19 |
acc |
Ceval-valid-high_school_history |
|
✓ |
|
20 |
acc |
Ceval-valid-middle_school_history |
|
✓ |
|
22 |
acc |
Ceval-valid-civil_servant |
|
✓ |
|
47 |
acc |
Ceval-valid-sports_science |
|
✓ |
|
19 |
acc |
Ceval-valid-plant_protection |
|
✓ |
|
22 |
acc |
Ceval-valid-basic_medicine |
|
✓ |
|
19 |
acc |
Ceval-valid-clinical_medicine |
|
✓ |
|
22 |
acc |
Ceval-valid-urban_and_rural_planner |
|
✓ |
|
46 |
acc |
Ceval-valid-accountant |
|
✓ |
|
49 |
acc |
Ceval-valid-fire_engineer |
|
✓ |
|
31 |
acc |
Ceval-valid-environmental_impact_assessment_engineer |
|
✓ |
|
31 |
acc |
Ceval-valid-tax_accountant |
|
✓ |
|
49 |
acc |
Ceval-valid-physician |
|
✓ |
|
49 |
acc |