Skip to content

Commit

Permalink
Added other tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
Kirill Semin committed Feb 8, 2024
1 parent 71db5cc commit 194c149
Show file tree
Hide file tree
Showing 61 changed files with 657 additions and 0 deletions.
23 changes: 23 additions & 0 deletions lm_eval/tasks/hellaswag/hellaswag_ru.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
group:
- multiple_choice
task: hellaswag_ru
dataset_path: deepvk/hellaswag-ru
dataset_name: null
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: null
num_fewshot: 10
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{label}}"
doc_to_choice: "choices"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
156 changes: 156 additions & 0 deletions lm_eval/tasks/mmlu_ru/_generate_configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import os
import yaml
import argparse

from tqdm import tqdm

from loguru import logger as eval_logger

SUBJECTS = {
"abstract_algebra": "stem",
"anatomy": "stem",
"astronomy": "stem",
"business_ethics": "other",
"clinical_knowledge": "other",
"college_biology": "stem",
"college_chemistry": "stem",
"college_computer_science": "stem",
"college_mathematics": "stem",
"college_medicine": "other",
"college_physics": "stem",
"computer_security": "stem",
"conceptual_physics": "stem",
"econometrics": "social_sciences",
"electrical_engineering": "stem",
"elementary_mathematics": "stem",
"formal_logic": "humanities",
"global_facts": "other",
"high_school_biology": "stem",
"high_school_chemistry": "stem",
"high_school_computer_science": "stem",
"high_school_european_history": "humanities",
"high_school_geography": "social_sciences",
"high_school_government_and_politics": "social_sciences",
"high_school_macroeconomics": "social_sciences",
"high_school_mathematics": "stem",
"high_school_microeconomics": "social_sciences",
"high_school_physics": "stem",
"high_school_psychology": "social_sciences",
"high_school_statistics": "stem",
"high_school_us_history": "humanities",
"high_school_world_history": "humanities",
"human_aging": "other",
"human_sexuality": "social_sciences",
"international_law": "humanities",
"jurisprudence": "humanities",
"logical_fallacies": "humanities",
"machine_learning": "stem",
"management": "other",
"marketing": "other",
"medical_genetics": "other",
"miscellaneous": "other",
"moral_disputes": "humanities",
"moral_scenarios": "humanities",
"nutrition": "other",
"philosophy": "humanities",
"prehistory": "humanities",
"professional_accounting": "other",
"professional_law": "humanities",
"professional_medicine": "other",
"professional_psychology": "social_sciences",
"public_relations": "social_sciences",
"security_studies": "social_sciences",
"sociology": "social_sciences",
"us_foreign_policy": "social_sciences",
"virology": "other",
"world_religions": "humanities",
}


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--base_yaml_path", required=True)
parser.add_argument("--save_prefix_path", default="mmlu")
parser.add_argument("--cot_prompt_path", default=None)
parser.add_argument("--task_prefix", default="")
parser.add_argument("--group_prefix", default="")
return parser.parse_args()


if __name__ == "__main__":
args = parse_args()

# get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
base_yaml_name = os.path.split(args.base_yaml_path)[-1]
with open(args.base_yaml_path) as f:
base_yaml = yaml.full_load(f)

if args.cot_prompt_path is not None:
import json

with open(args.cot_prompt_path) as f:
cot_file = json.load(f)

ALL_CATEGORIES = []
for subject, category in tqdm(SUBJECTS.items()):
if category not in ALL_CATEGORIES:
ALL_CATEGORIES.append(category)

if args.cot_prompt_path is not None:
description = cot_file[subject]
else:
description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))}.\n\n"

yaml_dict = {
"include": base_yaml_name,
"group": f"mmlu_{args.task_prefix}_{category}"
if args.task_prefix != ""
else f"mmlu_{category}",
"group_alias": category.replace("_", " "),
"task": f"mmlu_{args.task_prefix}_{subject}"
if args.task_prefix != ""
else f"mmlu_{subject}",
"task_alias": subject.replace("_", " "),
"dataset_name": subject,
"description": description,
}

file_save_path = args.save_prefix_path + f"_{subject}.yaml"
eval_logger.info(f"Saving yaml for subset {subject} to {file_save_path}")
with open(file_save_path, "w") as yaml_file:
yaml.dump(
yaml_dict,
yaml_file,
# width=float("inf"),
allow_unicode=True,
default_style='"',
)

if args.task_prefix != "":
mmlu_subcategories = [
f"mmlu_{args.task_prefix}_{category}" for category in ALL_CATEGORIES
]
else:
mmlu_subcategories = [f"mmlu_{category}" for category in ALL_CATEGORIES]

if args.group_prefix != "":
file_save_path = args.group_prefix + ".yaml"
else:
file_save_path = args.save_prefix_path + ".yaml"

eval_logger.info(f"Saving benchmark config to {file_save_path}")
with open(file_save_path, "w") as yaml_file:
yaml.dump(
{
"group": f"mmlu_{args.task_prefix}"
if args.task_prefix != ""
else "mmlu",
"task": mmlu_subcategories,
},
yaml_file,
indent=4,
default_flow_style=False,
)
16 changes: 16 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/_default_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
dataset_path: deepvk/mmlu-ru
test_split: test
num_fewshot: 5
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: multiple_choice
doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nОтвет:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
6 changes: 6 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
group: mmlu_ru
task:
- mmlu_ru_stem
- mmlu_ru_other
- mmlu_ru_social_sciences
- mmlu_ru_humanities
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_abstract_algebra.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "abstract_algebra"
"description": "The following are multiple choice questions (with answers) about abstract\
\ algebra.\n\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_abstract_algebra"
"task_alias": "abstract algebra"
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_anatomy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "anatomy"
"description": "The following are multiple choice questions (with answers) about anatomy.\n\
\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_anatomy"
"task_alias": "anatomy"
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_astronomy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "astronomy"
"description": "The following are multiple choice questions (with answers) about astronomy.\n\
\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_astronomy"
"task_alias": "astronomy"
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_business_ethics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "business_ethics"
"description": "The following are multiple choice questions (with answers) about business\
\ ethics.\n\n"
"group": "mmlu_ru_other"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_ru_business_ethics"
"task_alias": "business ethics"
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_clinical_knowledge.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "clinical_knowledge"
"description": "The following are multiple choice questions (with answers) about clinical\
\ knowledge.\n\n"
"group": "mmlu_ru_other"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_ru_clinical_knowledge"
"task_alias": "clinical knowledge"
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_college_biology.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "college_biology"
"description": "The following are multiple choice questions (with answers) about college\
\ biology.\n\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_college_biology"
"task_alias": "college biology"
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_college_chemistry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "college_chemistry"
"description": "The following are multiple choice questions (with answers) about college\
\ chemistry.\n\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_college_chemistry"
"task_alias": "college chemistry"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "college_computer_science"
"description": "The following are multiple choice questions (with answers) about college\
\ computer science.\n\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_college_computer_science"
"task_alias": "college computer science"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "college_mathematics"
"description": "The following are multiple choice questions (with answers) about college\
\ mathematics.\n\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_college_mathematics"
"task_alias": "college mathematics"
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_college_medicine.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "college_medicine"
"description": "The following are multiple choice questions (with answers) about college\
\ medicine.\n\n"
"group": "mmlu_ru_other"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_ru_college_medicine"
"task_alias": "college medicine"
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_college_physics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "college_physics"
"description": "The following are multiple choice questions (with answers) about college\
\ physics.\n\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_college_physics"
"task_alias": "college physics"
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_computer_security.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "computer_security"
"description": "The following are multiple choice questions (with answers) about computer\
\ security.\n\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_computer_security"
"task_alias": "computer security"
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_conceptual_physics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "conceptual_physics"
"description": "The following are multiple choice questions (with answers) about conceptual\
\ physics.\n\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_conceptual_physics"
"task_alias": "conceptual physics"
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_econometrics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "econometrics"
"description": "The following are multiple choice questions (with answers) about econometrics.\n\
\n"
"group": "mmlu_ru_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"task": "mmlu_ru_econometrics"
"task_alias": "econometrics"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "electrical_engineering"
"description": "The following are multiple choice questions (with answers) about electrical\
\ engineering.\n\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_electrical_engineering"
"task_alias": "electrical engineering"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "elementary_mathematics"
"description": "The following are multiple choice questions (with answers) about elementary\
\ mathematics.\n\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_elementary_mathematics"
"task_alias": "elementary mathematics"
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_formal_logic.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "formal_logic"
"description": "The following are multiple choice questions (with answers) about formal\
\ logic.\n\n"
"group": "mmlu_ru_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"task": "mmlu_ru_formal_logic"
"task_alias": "formal logic"
8 changes: 8 additions & 0 deletions lm_eval/tasks/mmlu_ru/default/ru_mmlu_global_facts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "global_facts"
"description": "The following are multiple choice questions (with answers) about global\
\ facts.\n\n"
"group": "mmlu_ru_other"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_ru_global_facts"
"task_alias": "global facts"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "high_school_biology"
"description": "The following are multiple choice questions (with answers) about high\
\ school biology.\n\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_high_school_biology"
"task_alias": "high school biology"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "high_school_chemistry"
"description": "The following are multiple choice questions (with answers) about high\
\ school chemistry.\n\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_high_school_chemistry"
"task_alias": "high school chemistry"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"dataset_name": "high_school_computer_science"
"description": "The following are multiple choice questions (with answers) about high\
\ school computer science.\n\n"
"group": "mmlu_ru_stem"
"group_alias": "stem"
"include": "_default_template_yaml"
"task": "mmlu_ru_high_school_computer_science"
"task_alias": "high school computer science"
Loading

0 comments on commit 194c149

Please sign in to comment.