forked from EleutherAI/lm-evaluation-harness
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Kirill Semin
committed
Feb 8, 2024
1 parent
71db5cc
commit 194c149
Showing
61 changed files
with
657 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
group: | ||
- multiple_choice | ||
task: hellaswag_ru | ||
dataset_path: deepvk/hellaswag-ru | ||
dataset_name: null | ||
output_type: multiple_choice | ||
training_split: train | ||
validation_split: validation | ||
test_split: null | ||
num_fewshot: 10 | ||
process_docs: !function utils.process_docs | ||
doc_to_text: "{{query}}" | ||
doc_to_target: "{{label}}" | ||
doc_to_choice: "choices" | ||
metric_list: | ||
- metric: acc | ||
aggregation: mean | ||
higher_is_better: true | ||
- metric: acc_norm | ||
aggregation: mean | ||
higher_is_better: true | ||
metadata: | ||
version: 1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
""" | ||
Take in a YAML, and output all "other" splits with this YAML | ||
""" | ||
import os | ||
import yaml | ||
import argparse | ||
|
||
from tqdm import tqdm | ||
|
||
from loguru import logger as eval_logger | ||
|
||
SUBJECTS = { | ||
"abstract_algebra": "stem", | ||
"anatomy": "stem", | ||
"astronomy": "stem", | ||
"business_ethics": "other", | ||
"clinical_knowledge": "other", | ||
"college_biology": "stem", | ||
"college_chemistry": "stem", | ||
"college_computer_science": "stem", | ||
"college_mathematics": "stem", | ||
"college_medicine": "other", | ||
"college_physics": "stem", | ||
"computer_security": "stem", | ||
"conceptual_physics": "stem", | ||
"econometrics": "social_sciences", | ||
"electrical_engineering": "stem", | ||
"elementary_mathematics": "stem", | ||
"formal_logic": "humanities", | ||
"global_facts": "other", | ||
"high_school_biology": "stem", | ||
"high_school_chemistry": "stem", | ||
"high_school_computer_science": "stem", | ||
"high_school_european_history": "humanities", | ||
"high_school_geography": "social_sciences", | ||
"high_school_government_and_politics": "social_sciences", | ||
"high_school_macroeconomics": "social_sciences", | ||
"high_school_mathematics": "stem", | ||
"high_school_microeconomics": "social_sciences", | ||
"high_school_physics": "stem", | ||
"high_school_psychology": "social_sciences", | ||
"high_school_statistics": "stem", | ||
"high_school_us_history": "humanities", | ||
"high_school_world_history": "humanities", | ||
"human_aging": "other", | ||
"human_sexuality": "social_sciences", | ||
"international_law": "humanities", | ||
"jurisprudence": "humanities", | ||
"logical_fallacies": "humanities", | ||
"machine_learning": "stem", | ||
"management": "other", | ||
"marketing": "other", | ||
"medical_genetics": "other", | ||
"miscellaneous": "other", | ||
"moral_disputes": "humanities", | ||
"moral_scenarios": "humanities", | ||
"nutrition": "other", | ||
"philosophy": "humanities", | ||
"prehistory": "humanities", | ||
"professional_accounting": "other", | ||
"professional_law": "humanities", | ||
"professional_medicine": "other", | ||
"professional_psychology": "social_sciences", | ||
"public_relations": "social_sciences", | ||
"security_studies": "social_sciences", | ||
"sociology": "social_sciences", | ||
"us_foreign_policy": "social_sciences", | ||
"virology": "other", | ||
"world_religions": "humanities", | ||
} | ||
|
||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--base_yaml_path", required=True) | ||
parser.add_argument("--save_prefix_path", default="mmlu") | ||
parser.add_argument("--cot_prompt_path", default=None) | ||
parser.add_argument("--task_prefix", default="") | ||
parser.add_argument("--group_prefix", default="") | ||
return parser.parse_args() | ||
|
||
|
||
if __name__ == "__main__": | ||
args = parse_args() | ||
|
||
# get filename of base_yaml so we can `"include": ` it in our "other" YAMLs. | ||
base_yaml_name = os.path.split(args.base_yaml_path)[-1] | ||
with open(args.base_yaml_path) as f: | ||
base_yaml = yaml.full_load(f) | ||
|
||
if args.cot_prompt_path is not None: | ||
import json | ||
|
||
with open(args.cot_prompt_path) as f: | ||
cot_file = json.load(f) | ||
|
||
ALL_CATEGORIES = [] | ||
for subject, category in tqdm(SUBJECTS.items()): | ||
if category not in ALL_CATEGORIES: | ||
ALL_CATEGORIES.append(category) | ||
|
||
if args.cot_prompt_path is not None: | ||
description = cot_file[subject] | ||
else: | ||
description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))}.\n\n" | ||
|
||
yaml_dict = { | ||
"include": base_yaml_name, | ||
"group": f"mmlu_{args.task_prefix}_{category}" | ||
if args.task_prefix != "" | ||
else f"mmlu_{category}", | ||
"group_alias": category.replace("_", " "), | ||
"task": f"mmlu_{args.task_prefix}_{subject}" | ||
if args.task_prefix != "" | ||
else f"mmlu_{subject}", | ||
"task_alias": subject.replace("_", " "), | ||
"dataset_name": subject, | ||
"description": description, | ||
} | ||
|
||
file_save_path = args.save_prefix_path + f"_{subject}.yaml" | ||
eval_logger.info(f"Saving yaml for subset {subject} to {file_save_path}") | ||
with open(file_save_path, "w") as yaml_file: | ||
yaml.dump( | ||
yaml_dict, | ||
yaml_file, | ||
# width=float("inf"), | ||
allow_unicode=True, | ||
default_style='"', | ||
) | ||
|
||
if args.task_prefix != "": | ||
mmlu_subcategories = [ | ||
f"mmlu_{args.task_prefix}_{category}" for category in ALL_CATEGORIES | ||
] | ||
else: | ||
mmlu_subcategories = [f"mmlu_{category}" for category in ALL_CATEGORIES] | ||
|
||
if args.group_prefix != "": | ||
file_save_path = args.group_prefix + ".yaml" | ||
else: | ||
file_save_path = args.save_prefix_path + ".yaml" | ||
|
||
eval_logger.info(f"Saving benchmark config to {file_save_path}") | ||
with open(file_save_path, "w") as yaml_file: | ||
yaml.dump( | ||
{ | ||
"group": f"mmlu_{args.task_prefix}" | ||
if args.task_prefix != "" | ||
else "mmlu", | ||
"task": mmlu_subcategories, | ||
}, | ||
yaml_file, | ||
indent=4, | ||
default_flow_style=False, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
dataset_path: deepvk/mmlu-ru | ||
test_split: test | ||
num_fewshot: 5 | ||
fewshot_split: dev | ||
fewshot_config: | ||
sampler: first_n | ||
output_type: multiple_choice | ||
doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nОтвет:" | ||
doc_to_choice: ["A", "B", "C", "D"] | ||
doc_to_target: answer | ||
metric_list: | ||
- metric: acc | ||
aggregation: mean | ||
higher_is_better: true | ||
metadata: | ||
version: 0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
group: mmlu_ru | ||
task: | ||
- mmlu_ru_stem | ||
- mmlu_ru_other | ||
- mmlu_ru_social_sciences | ||
- mmlu_ru_humanities |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "abstract_algebra" | ||
"description": "The following are multiple choice questions (with answers) about abstract\ | ||
\ algebra.\n\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_abstract_algebra" | ||
"task_alias": "abstract algebra" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "anatomy" | ||
"description": "The following are multiple choice questions (with answers) about anatomy.\n\ | ||
\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_anatomy" | ||
"task_alias": "anatomy" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "astronomy" | ||
"description": "The following are multiple choice questions (with answers) about astronomy.\n\ | ||
\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_astronomy" | ||
"task_alias": "astronomy" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "business_ethics" | ||
"description": "The following are multiple choice questions (with answers) about business\ | ||
\ ethics.\n\n" | ||
"group": "mmlu_ru_other" | ||
"group_alias": "other" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_business_ethics" | ||
"task_alias": "business ethics" |
8 changes: 8 additions & 0 deletions
8
lm_eval/tasks/mmlu_ru/default/ru_mmlu_clinical_knowledge.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "clinical_knowledge" | ||
"description": "The following are multiple choice questions (with answers) about clinical\ | ||
\ knowledge.\n\n" | ||
"group": "mmlu_ru_other" | ||
"group_alias": "other" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_clinical_knowledge" | ||
"task_alias": "clinical knowledge" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "college_biology" | ||
"description": "The following are multiple choice questions (with answers) about college\ | ||
\ biology.\n\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_college_biology" | ||
"task_alias": "college biology" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "college_chemistry" | ||
"description": "The following are multiple choice questions (with answers) about college\ | ||
\ chemistry.\n\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_college_chemistry" | ||
"task_alias": "college chemistry" |
8 changes: 8 additions & 0 deletions
8
lm_eval/tasks/mmlu_ru/default/ru_mmlu_college_computer_science.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "college_computer_science" | ||
"description": "The following are multiple choice questions (with answers) about college\ | ||
\ computer science.\n\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_college_computer_science" | ||
"task_alias": "college computer science" |
8 changes: 8 additions & 0 deletions
8
lm_eval/tasks/mmlu_ru/default/ru_mmlu_college_mathematics.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "college_mathematics" | ||
"description": "The following are multiple choice questions (with answers) about college\ | ||
\ mathematics.\n\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_college_mathematics" | ||
"task_alias": "college mathematics" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "college_medicine" | ||
"description": "The following are multiple choice questions (with answers) about college\ | ||
\ medicine.\n\n" | ||
"group": "mmlu_ru_other" | ||
"group_alias": "other" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_college_medicine" | ||
"task_alias": "college medicine" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "college_physics" | ||
"description": "The following are multiple choice questions (with answers) about college\ | ||
\ physics.\n\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_college_physics" | ||
"task_alias": "college physics" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "computer_security" | ||
"description": "The following are multiple choice questions (with answers) about computer\ | ||
\ security.\n\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_computer_security" | ||
"task_alias": "computer security" |
8 changes: 8 additions & 0 deletions
8
lm_eval/tasks/mmlu_ru/default/ru_mmlu_conceptual_physics.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "conceptual_physics" | ||
"description": "The following are multiple choice questions (with answers) about conceptual\ | ||
\ physics.\n\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_conceptual_physics" | ||
"task_alias": "conceptual physics" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "econometrics" | ||
"description": "The following are multiple choice questions (with answers) about econometrics.\n\ | ||
\n" | ||
"group": "mmlu_ru_social_sciences" | ||
"group_alias": "social sciences" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_econometrics" | ||
"task_alias": "econometrics" |
8 changes: 8 additions & 0 deletions
8
lm_eval/tasks/mmlu_ru/default/ru_mmlu_electrical_engineering.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "electrical_engineering" | ||
"description": "The following are multiple choice questions (with answers) about electrical\ | ||
\ engineering.\n\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_electrical_engineering" | ||
"task_alias": "electrical engineering" |
8 changes: 8 additions & 0 deletions
8
lm_eval/tasks/mmlu_ru/default/ru_mmlu_elementary_mathematics.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "elementary_mathematics" | ||
"description": "The following are multiple choice questions (with answers) about elementary\ | ||
\ mathematics.\n\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_elementary_mathematics" | ||
"task_alias": "elementary mathematics" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "formal_logic" | ||
"description": "The following are multiple choice questions (with answers) about formal\ | ||
\ logic.\n\n" | ||
"group": "mmlu_ru_humanities" | ||
"group_alias": "humanities" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_formal_logic" | ||
"task_alias": "formal logic" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "global_facts" | ||
"description": "The following are multiple choice questions (with answers) about global\ | ||
\ facts.\n\n" | ||
"group": "mmlu_ru_other" | ||
"group_alias": "other" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_global_facts" | ||
"task_alias": "global facts" |
8 changes: 8 additions & 0 deletions
8
lm_eval/tasks/mmlu_ru/default/ru_mmlu_high_school_biology.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "high_school_biology" | ||
"description": "The following are multiple choice questions (with answers) about high\ | ||
\ school biology.\n\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_high_school_biology" | ||
"task_alias": "high school biology" |
8 changes: 8 additions & 0 deletions
8
lm_eval/tasks/mmlu_ru/default/ru_mmlu_high_school_chemistry.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "high_school_chemistry" | ||
"description": "The following are multiple choice questions (with answers) about high\ | ||
\ school chemistry.\n\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_high_school_chemistry" | ||
"task_alias": "high school chemistry" |
8 changes: 8 additions & 0 deletions
8
lm_eval/tasks/mmlu_ru/default/ru_mmlu_high_school_computer_science.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"dataset_name": "high_school_computer_science" | ||
"description": "The following are multiple choice questions (with answers) about high\ | ||
\ school computer science.\n\n" | ||
"group": "mmlu_ru_stem" | ||
"group_alias": "stem" | ||
"include": "_default_template_yaml" | ||
"task": "mmlu_ru_high_school_computer_science" | ||
"task_alias": "high school computer science" |
Oops, something went wrong.