Skip to content

Commit

Permalink
Add goat task support (#6)
Browse files Browse the repository at this point in the history
* Add goat support

* Fix word generation metric

* Pre-commit fixes

* Review fixes

* Add matching tasks support

* Rename sootv to matching

* Change goat dataset path

* Fix uncorrect dataset names and remove matching validation

* Add support for multiple choice literature tasks

* Rename goat tasks groups

* Fix word_in_set metric code

* Delete config for matching tasks and remove unused metrics

* Fix word_in_set metric and add support for more eos tokens

* Add new eos token
  • Loading branch information
VyrodovMikhail authored Jun 15, 2024
1 parent 568d885 commit 6559712
Show file tree
Hide file tree
Showing 10 changed files with 137 additions and 0 deletions.
27 changes: 27 additions & 0 deletions lm_eval/api/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,33 @@ def acc_all(items):
return acc


@register_metric(
metric="multi_choice_em_unordered",
higher_is_better=True,
output_type="generate_until",
aggregation="mean",
)
def multi_choice_em_unordered(items):
gold, pred = items

gold_answers = gold.split(",")
pred_answers = pred.split(",")
return set(gold_answers) == set(pred_answers)


@register_metric(
metric="word_in_set",
higher_is_better=True,
output_type="generate_until",
aggregation="mean",
)
def word_in_set(items):
gold, pred_answer = items
gold_answers = gold.split(",")

return pred_answer.strip() in gold_answers


def acc_all_stderr(items):
# Only count as correct if all answers are labeled correctly for each question
question_scoring_dict = {}
Expand Down
34 changes: 34 additions & 0 deletions lm_eval/tasks/goat/_default_multiple_choice_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
dataset_path: deepvk/goat
output_type: generate_until
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
doc_to_text: "Вопрос: {{input.strip()}}\nОтвет должен состоять из последовательности цифр, написанных слитно.\n\nОтвет: "
doc_to_target: "{{answer}}"
metric_list:
- metric: multi_choice_em_unordered
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: false
generation_kwargs:
until:
- "\n\n"
- "Вопрос:"
- "</s>"
- "<|end_of_text|>"
- "<|im_end|>"
do_sample: false
temperature: 0.0
num_beams: 3
repeats: 1
num_fewshot: 5
filter_list:
- name: "get-answer"
filter:
- function: "regex"
regex_pattern: "([0-9\\,]+)"
- function: "take_first"
metadata:
version: 0.0
16 changes: 16 additions & 0 deletions lm_eval/tasks/goat/_default_single_choice_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
dataset_path: deepvk/goat
test_split: test
num_fewshot: 5
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: multiple_choice
doc_to_text: "{{input.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nОтвет: "
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0
26 changes: 26 additions & 0 deletions lm_eval/tasks/goat/_default_word_generation_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
dataset_path: deepvk/goat
output_type: generate_until
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
doc_to_text: "Вопрос: {{input.strip()}}\nОтвет должен состоять из одного или нескольких слов, написанных слитно, без пробелов и без запятых.\n\nОтвет: "
doc_to_target: "{{answer}}"
metric_list:
- metric: word_in_set
aggregation: mean
higher_is_better: true
generation_kwargs:
until:
- "\n\n"
- "Вопрос:"
- "</s>"
- "<|end_of_text|>"
- "<|im_end|>"
do_sample: false
temperature: 0.0
num_beams: 3
repeats: 1
num_fewshot: 5
metadata:
version: 0.0
4 changes: 4 additions & 0 deletions lm_eval/tasks/goat/goat.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
group: goat
task:
- social_science
- literature
6 changes: 6 additions & 0 deletions lm_eval/tasks/goat/goat_multiple_choice_literature.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"dataset_name": "literature_multiple_choice"
"task": "literature_multiple_choice"
"description": "The following are multiple choice questions (with answers) about literature.\n\n"
"group": "literature"
"group_alias": "literature"
"include": "_default_multiple_choice_template_yaml"
6 changes: 6 additions & 0 deletions lm_eval/tasks/goat/goat_multiple_choice_sociology.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"dataset_name": "sociology_multiple_choice"
"task": "sociology_multiple_choice"
"description": "The following are multiple choice questions (with answers) about sociology.\n\n"
"group": "social_science"
"group_alias": "social_science"
"include": "_default_multiple_choice_template_yaml"
6 changes: 6 additions & 0 deletions lm_eval/tasks/goat/goat_single_choice_sociology.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"dataset_name": "sociology_single_choice"
"task": "sociology_single_choice"
"description": "The following are single choice questions (with answers) about sociology.\n\n"
"group": "social_science"
"group_alias": "social_science"
"include": "_default_single_choice_template_yaml"
6 changes: 6 additions & 0 deletions lm_eval/tasks/goat/goat_word_generation_literature.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"dataset_name": "literature_word_generation"
"task": "literature_word_generation"
"description": "The following are word generation questions (with answers) about literature.\n\n"
"group": "literature"
"group_alias": "literature"
"include": "_default_word_generation_template_yaml"
6 changes: 6 additions & 0 deletions lm_eval/tasks/goat/goat_word_generation_sociology.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"dataset_name": "sociology_word_generation"
"task": "sociology_word_generation"
"description": "The following are word generation questions (with answers) about sociology.\n\n"
"group": "social_science"
"group_alias": "social_science"
"include": "_default_word_generation_template_yaml"

0 comments on commit 6559712

Please sign in to comment.