diff --git a/src/lighteval/tasks/default_prompts.py b/src/lighteval/tasks/default_prompts.py index cccd29cb..4303ad4b 100644 --- a/src/lighteval/tasks/default_prompts.py +++ b/src/lighteval/tasks/default_prompts.py @@ -1709,6 +1709,18 @@ def multirc(line, task_name: str = None): ) +def musr(line, task_name: str = None): + choices = ast.literal_eval(line["choices"]) + + query = line["narrative"] + "\n\n" + query += line["question"] + "\n\n" + for i, choice in enumerate(choices): + query += f"{i + 1} - {choice}\n" + query += "Answer:" + + return Doc(task_name=task_name, query=query, choices=choices, gold_index=line["answer_index"]) + + def mutual(line, task_name: str = None): def clean(text): replace_list = [(" '", "'"), (" \n", "\n"), ("\n ", "\n"), (" n't", "n't"), ("`` ", '"'), ("''", '"')] diff --git a/src/lighteval/tasks/default_tasks.py b/src/lighteval/tasks/default_tasks.py index 40dc153d..fa5ce370 100644 --- a/src/lighteval/tasks/default_tasks.py +++ b/src/lighteval/tasks/default_tasks.py @@ -14901,6 +14901,60 @@ trust_dataset=True, version=0, ) +musr_murder_mysteries = LightevalTaskConfig( + name="musr:murder_mysteries", + suite=["lighteval"], + prompt_function=prompt.musr, + hf_repo="TAUR-Lab/MuSR", + hf_subset="default", + hf_avail_splits=["murder_mysteries"], + evaluation_splits=["murder_mysteries"], + few_shots_split=None, + few_shots_select=None, + generation_size=1, + metric=[Metrics.loglikelihood_acc], + stop_sequence=["\n"], + output_regex=None, + frozen=False, + trust_dataset=True, + version=0, +) +musr_object_placements = LightevalTaskConfig( + name="musr:object_placements", + suite=["lighteval"], + prompt_function=prompt.musr, + hf_repo="TAUR-Lab/MuSR", + hf_subset="default", + hf_avail_splits=["object_placements"], + evaluation_splits=["object_placements"], + few_shots_split=None, + few_shots_select=None, + generation_size=1, + metric=[Metrics.loglikelihood_acc], + stop_sequence=["\n"], + output_regex=None, + frozen=False, + trust_dataset=True, + version=0, +) +musr_team_allocation = LightevalTaskConfig( + name="musr:team_allocation", + suite=["lighteval"], + prompt_function=prompt.musr, + hf_repo="TAUR-Lab/MuSR", + hf_subset="default", + hf_avail_splits=["team_allocation"], + evaluation_splits=["team_allocation"], + few_shots_split=None, + few_shots_select=None, + generation_size=1, + metric=[Metrics.loglikelihood_acc], + stop_sequence=["\n"], + output_regex=None, + frozen=False, + trust_dataset=True, + version=0, +) mutual_lighteval = LightevalTaskConfig( name="mutual", suite=["lighteval"],