GH Task Runner (Single) #78
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: GH Task Runner (Single) | |
on: | |
workflow_dispatch: | |
inputs: | |
run_task: | |
description: 'Task to run' | |
required: true | |
default: 'anli' | |
type: choice | |
options: | |
- advanced_ai_risk | |
- advanced_ai_risk_fewshot-coordinate-itself | |
- advanced_ai_risk_fewshot-coordinate-other-ais | |
- advanced_ai_risk_fewshot-coordinate-other-versions | |
- advanced_ai_risk_fewshot-corrigible-less-HHH | |
- advanced_ai_risk_fewshot-corrigible-more-HHH | |
- advanced_ai_risk_fewshot-corrigible-neutral-HHH | |
- advanced_ai_risk_fewshot-myopic-reward | |
- advanced_ai_risk_fewshot-one-box-tendency | |
- advanced_ai_risk_fewshot-power-seeking-inclination | |
- advanced_ai_risk_fewshot-self-awareness-general-ai | |
- advanced_ai_risk_fewshot-self-awareness-good-text-model | |
- advanced_ai_risk_fewshot-self-awareness-text-model | |
- advanced_ai_risk_fewshot-self-awareness-training-architecture | |
- advanced_ai_risk_fewshot-self-awareness-training-web-gpt | |
- advanced_ai_risk_fewshot-survival-instinct | |
- advanced_ai_risk_fewshot-wealth-seeking-inclination | |
- advanced_ai_risk_human-coordinate-itself | |
- advanced_ai_risk_human-coordinate-other-ais | |
- advanced_ai_risk_human-coordinate-other-versions | |
- advanced_ai_risk_human-corrigible-less-HHH | |
- advanced_ai_risk_human-corrigible-more-HHH | |
- advanced_ai_risk_human-corrigible-neutral-HHH | |
- advanced_ai_risk_human-myopic-reward | |
- advanced_ai_risk_human-one-box-tendency | |
- advanced_ai_risk_human-power-seeking-inclination | |
- advanced_ai_risk_human-self-awareness-general-ai | |
- advanced_ai_risk_human-self-awareness-good-text-model | |
- advanced_ai_risk_human-self-awareness-text-model | |
- advanced_ai_risk_human-self-awareness-training-architecture | |
- advanced_ai_risk_human-self-awareness-web-gpt | |
- advanced_ai_risk_human-survival-instinct | |
- advanced_ai_risk_human-wealth-seeking-inclination | |
- advanced_ai_risk_lm-coordinate-itself | |
- advanced_ai_risk_lm-coordinate-other-ais | |
- advanced_ai_risk_lm-coordinate-other-versions | |
- advanced_ai_risk_lm-corrigible-less-HHH | |
- advanced_ai_risk_lm-corrigible-more-HHH | |
- advanced_ai_risk_lm-corrigible-neutral-HHH | |
- advanced_ai_risk_lm-myopic-reward | |
- advanced_ai_risk_lm-one-box-tendency | |
- advanced_ai_risk_lm-power-seeking-inclination | |
- advanced_ai_risk_lm-self-awareness-general-ai | |
- advanced_ai_risk_lm-self-awareness-good-text-model | |
- advanced_ai_risk_lm-self-awareness-text-model | |
- advanced_ai_risk_lm-self-awareness-training-architecture | |
- advanced_ai_risk_lm-self-awareness-training-nn-architecture | |
- advanced_ai_risk_lm-self-awareness-training-web-gpt | |
- advanced_ai_risk_lm-survival-instinct | |
- advanced_ai_risk_lm-wealth-seeking-inclination | |
- ai2_arc | |
- anagrams1 | |
- anagrams2 | |
- anli | |
- anli_r1 | |
- anli_r2 | |
- anli_r3 | |
- arc_challenge | |
- arc_easy | |
- arithmetic | |
- arithmetic_1dc | |
- arithmetic_2da | |
- arithmetic_2dm | |
- arithmetic_2ds | |
- arithmetic_3da | |
- arithmetic_3ds | |
- arithmetic_4da | |
- arithmetic_4ds | |
- arithmetic_5da | |
- arithmetic_5ds | |
- asdiv | |
- babi | |
- bbh | |
- bbh_cot_fewshot | |
- bbh_cot_fewshot_boolean_expressions | |
- bbh_cot_fewshot_causal_judgement | |
- bbh_cot_fewshot_date_understanding | |
- bbh_cot_fewshot_disambiguation_qa | |
- bbh_cot_fewshot_dyck_languages | |
- bbh_cot_fewshot_formal_fallacies | |
- bbh_cot_fewshot_geometric_shapes | |
- bbh_cot_fewshot_hyperbaton | |
- bbh_cot_fewshot_logical_deduction_five_objects | |
- bbh_cot_fewshot_logical_deduction_seven_objects | |
- bbh_cot_fewshot_logical_deduction_three_objects | |
- bbh_cot_fewshot_movie_recommendation | |
- bbh_cot_fewshot_multistep_arithmetic_two | |
- bbh_cot_fewshot_navigate | |
- bbh_cot_fewshot_object_counting | |
- bbh_cot_fewshot_penguins_in_a_table | |
- bbh_cot_fewshot_reasoning_about_colored_objects | |
- bbh_cot_fewshot_ruin_names | |
- bbh_cot_fewshot_salient_translation_error_detection | |
- bbh_cot_fewshot_snarks | |
- bbh_cot_fewshot_sports_understanding | |
- bbh_cot_fewshot_temporal_sequences | |
- bbh_cot_fewshot_tracking_shuffled_objects_five_objects | |
- bbh_cot_fewshot_tracking_shuffled_objects_seven_objects | |
- bbh_cot_fewshot_tracking_shuffled_objects_three_objects | |
- bbh_cot_fewshot_web_of_lies | |
- bbh_cot_fewshot_word_sorting | |
- bbh_cot_zeroshot | |
- bbh_cot_zeroshot_boolean_expressions | |
- bbh_cot_zeroshot_causal_judgement | |
- bbh_cot_zeroshot_date_understanding | |
- bbh_cot_zeroshot_disambiguation_qa | |
- bbh_cot_zeroshot_dyck_languages | |
- bbh_cot_zeroshot_formal_fallacies | |
- bbh_cot_zeroshot_geometric_shapes | |
- bbh_cot_zeroshot_hyperbaton | |
- bbh_cot_zeroshot_logical_deduction_five_objects | |
- bbh_cot_zeroshot_logical_deduction_seven_objects | |
- bbh_cot_zeroshot_logical_deduction_three_objects | |
- bbh_cot_zeroshot_movie_recommendation | |
- bbh_cot_zeroshot_multistep_arithmetic_two | |
- bbh_cot_zeroshot_navigate | |
- bbh_cot_zeroshot_object_counting | |
- bbh_cot_zeroshot_penguins_in_a_table | |
- bbh_cot_zeroshot_reasoning_about_colored_objects | |
- bbh_cot_zeroshot_ruin_names | |
- bbh_cot_zeroshot_salient_translation_error_detection | |
- bbh_cot_zeroshot_snarks | |
- bbh_cot_zeroshot_sports_understanding | |
- bbh_cot_zeroshot_temporal_sequences | |
- bbh_cot_zeroshot_tracking_shuffled_objects_five_objects | |
- bbh_cot_zeroshot_tracking_shuffled_objects_seven_objects | |
- bbh_cot_zeroshot_tracking_shuffled_objects_three_objects | |
- bbh_cot_zeroshot_web_of_lies | |
- bbh_cot_zeroshot_word_sorting | |
- bbh_fewshot | |
- bbh_fewshot_boolean_expressions | |
- bbh_fewshot_causal_judgement | |
- bbh_fewshot_date_understanding | |
- bbh_fewshot_disambiguation_qa | |
- bbh_fewshot_dyck_languages | |
- bbh_fewshot_formal_fallacies | |
- bbh_fewshot_geometric_shapes | |
- bbh_fewshot_hyperbaton | |
- bbh_fewshot_logical_deduction_five_objects | |
- bbh_fewshot_logical_deduction_seven_objects | |
- bbh_fewshot_logical_deduction_three_objects | |
- bbh_fewshot_movie_recommendation | |
- bbh_fewshot_multistep_arithmetic_two | |
- bbh_fewshot_navigate | |
- bbh_fewshot_object_counting | |
- bbh_fewshot_penguins_in_a_table | |
- bbh_fewshot_reasoning_about_colored_objects | |
- bbh_fewshot_ruin_names | |
- bbh_fewshot_salient_translation_error_detection | |
- bbh_fewshot_snarks | |
- bbh_fewshot_sports_understanding | |
- bbh_fewshot_temporal_sequences | |
- bbh_fewshot_tracking_shuffled_objects_five_objects | |
- bbh_fewshot_tracking_shuffled_objects_seven_objects | |
- bbh_fewshot_tracking_shuffled_objects_three_objects | |
- bbh_fewshot_web_of_lies | |
- bbh_fewshot_word_sorting | |
- bbh_zeroshot | |
- bbh_zeroshot_boolean_expressions | |
- bbh_zeroshot_causal_judgement | |
- bbh_zeroshot_date_understanding | |
- bbh_zeroshot_disambiguation_qa | |
- bbh_zeroshot_dyck_languages | |
- bbh_zeroshot_formal_fallacies | |
- bbh_zeroshot_geometric_shapes | |
- bbh_zeroshot_hyperbaton | |
- bbh_zeroshot_logical_deduction_five_objects | |
- bbh_zeroshot_logical_deduction_seven_objects | |
- bbh_zeroshot_logical_deduction_three_objects | |
- bbh_zeroshot_movie_recommendation | |
- bbh_zeroshot_multistep_arithmetic_two | |
- bbh_zeroshot_navigate | |
- bbh_zeroshot_object_counting | |
- bbh_zeroshot_penguins_in_a_table | |
- bbh_zeroshot_reasoning_about_colored_objects | |
- bbh_zeroshot_ruin_names | |
- bbh_zeroshot_salient_translation_error_detection | |
- bbh_zeroshot_snarks | |
- bbh_zeroshot_sports_understanding | |
- bbh_zeroshot_temporal_sequences | |
- bbh_zeroshot_tracking_shuffled_objects_five_objects | |
- bbh_zeroshot_tracking_shuffled_objects_seven_objects | |
- bbh_zeroshot_tracking_shuffled_objects_three_objects | |
- bbh_zeroshot_web_of_lies | |
- bbh_zeroshot_word_sorting | |
- belebele | |
- belebele_acm_Arab | |
- belebele_afr_Latn | |
- belebele_als_Latn | |
- belebele_amh_Ethi | |
- belebele_apc_Arab | |
- belebele_arb_Arab | |
- belebele_arb_Latn | |
- belebele_ars_Arab | |
- belebele_ary_Arab | |
- belebele_arz_Arab | |
- belebele_asm_Beng | |
- belebele_azj_Latn | |
- belebele_bam_Latn | |
- belebele_ben_Beng | |
- belebele_ben_Latn | |
- belebele_bod_Tibt | |
- belebele_bul_Cyrl | |
- belebele_cat_Latn | |
- belebele_ceb_Latn | |
- belebele_ces_Latn | |
- belebele_ckb_Arab | |
- belebele_dan_Latn | |
- belebele_deu_Latn | |
- belebele_ell_Grek | |
- belebele_eng_Latn | |
- belebele_est_Latn | |
- belebele_eus_Latn | |
- belebele_fin_Latn | |
- belebele_fra_Latn | |
- belebele_fuv_Latn | |
- belebele_gaz_Latn | |
- belebele_grn_Latn | |
- belebele_guj_Gujr | |
- belebele_hat_Latn | |
- belebele_hau_Latn | |
- belebele_heb_Hebr | |
- belebele_hin_Deva | |
- belebele_hin_Latn | |
- belebele_hrv_Latn | |
- belebele_hun_Latn | |
- belebele_hye_Armn | |
- belebele_ibo_Latn | |
- belebele_ilo_Latn | |
- belebele_ind_Latn | |
- belebele_isl_Latn | |
- belebele_ita_Latn | |
- belebele_jav_Latn | |
- belebele_jpn_Jpan | |
- belebele_kac_Latn | |
- belebele_kan_Knda | |
- belebele_kat_Geor | |
- belebele_kaz_Cyrl | |
- belebele_kea_Latn | |
- belebele_khk_Cyrl | |
- belebele_khm_Khmr | |
- belebele_kin_Latn | |
- belebele_kir_Cyrl | |
- belebele_kor_Hang | |
- belebele_lao_Laoo | |
- belebele_lin_Latn | |
- belebele_lit_Latn | |
- belebele_lug_Latn | |
- belebele_luo_Latn | |
- belebele_lvs_Latn | |
- belebele_mal_Mlym | |
- belebele_mar_Deva | |
- belebele_mkd_Cyrl | |
- belebele_mlt_Latn | |
- belebele_mri_Latn | |
- belebele_mya_Mymr | |
- belebele_nld_Latn | |
- belebele_nob_Latn | |
- belebele_npi_Deva | |
- belebele_npi_Latn | |
- belebele_nso_Latn | |
- belebele_nya_Latn | |
- belebele_ory_Orya | |
- belebele_pan_Guru | |
- belebele_pbt_Arab | |
- belebele_pes_Arab | |
- belebele_plt_Latn | |
- belebele_pol_Latn | |
- belebele_por_Latn | |
- belebele_ron_Latn | |
- belebele_rus_Cyrl | |
- belebele_shn_Mymr | |
- belebele_sin_Latn | |
- belebele_sin_Sinh | |
- belebele_slk_Latn | |
- belebele_slv_Latn | |
- belebele_sna_Latn | |
- belebele_snd_Arab | |
- belebele_som_Latn | |
- belebele_sot_Latn | |
- belebele_spa_Latn | |
- belebele_srp_Cyrl | |
- belebele_ssw_Latn | |
- belebele_sun_Latn | |
- belebele_swe_Latn | |
- belebele_swh_Latn | |
- belebele_tam_Taml | |
- belebele_tel_Telu | |
- belebele_tgk_Cyrl | |
- belebele_tgl_Latn | |
- belebele_tha_Thai | |
- belebele_tir_Ethi | |
- belebele_tsn_Latn | |
- belebele_tso_Latn | |
- belebele_tur_Latn | |
- belebele_ukr_Cyrl | |
- belebele_urd_Arab | |
- belebele_urd_Latn | |
- belebele_uzn_Latn | |
- belebele_vie_Latn | |
- belebele_war_Latn | |
- belebele_wol_Latn | |
- belebele_xho_Latn | |
- belebele_yor_Latn | |
- belebele_zho_Hans | |
- belebele_zho_Hant | |
- belebele_zsm_Latn | |
- belebele_zul_Latn | |
- bigbench_abstract_narrative_understanding_generate_until | |
- bigbench_abstract_narrative_understanding_multiple_choice | |
- bigbench_anachronisms_generate_until | |
- bigbench_anachronisms_multiple_choice | |
- bigbench_analogical_similarity_generate_until | |
- bigbench_analogical_similarity_multiple_choice | |
- bigbench_analytic_entailment_generate_until | |
- bigbench_analytic_entailment_multiple_choice | |
- bigbench_arithmetic_generate_until | |
- bigbench_arithmetic_multiple_choice | |
- bigbench_ascii_word_recognition_generate_until | |
- bigbench_ascii_word_recognition_multiple_choice | |
- bigbench_authorship_verification_generate_until | |
- bigbench_authorship_verification_multiple_choice | |
- bigbench_auto_categorization_generate_until | |
- bigbench_auto_categorization_multiple_choice | |
- bigbench_auto_debugging_generate_until | |
- bigbench_auto_debugging_multiple_choice | |
- bigbench_bbq_lite_json_generate_until | |
- bigbench_bbq_lite_json_multiple_choice | |
- bigbench_bridging_anaphora_resolution_barqa_generate_until | |
- bigbench_bridging_anaphora_resolution_barqa_multiple_choice | |
- bigbench_causal_judgement_multiple_choice | |
- bigbench_causal_judgment_generate_until | |
- bigbench_causal_judgment_multiple_choice | |
- bigbench_cause_and_effect_generate_until | |
- bigbench_cause_and_effect_multiple_choice | |
- bigbench_checkmate_in_one_generate_until | |
- bigbench_checkmate_in_one_multiple_choice | |
- bigbench_chess_state_tracking_generate_until | |
- bigbench_chess_state_tracking_multiple_choice | |
- bigbench_chinese_remainder_theorem_generate_until | |
- bigbench_chinese_remainder_theorem_multiple_choice | |
- bigbench_cifar10_classification_generate_until | |
- bigbench_cifar10_classification_multiple_choice | |
- bigbench_code_line_description_generate_until | |
- bigbench_code_line_description_multiple_choice | |
- bigbench_codenames_generate_until | |
- bigbench_codenames_multiple_choice | |
- bigbench_color_generate_until | |
- bigbench_color_multiple_choice | |
- bigbench_common_morpheme_generate_until | |
- bigbench_common_morpheme_multiple_choice | |
- bigbench_conceptual_combinations_generate_until | |
- bigbench_conceptual_combinations_multiple_choice | |
- bigbench_conlang_translation_generate_until | |
- bigbench_conlang_translation_multiple_choice | |
- bigbench_contextual_parametric_knowledge_conflicts_generate_until | |
- bigbench_contextual_parametric_knowledge_conflicts_multiple_choice | |
- bigbench_crash_blossom_generate_until | |
- bigbench_crash_blossom_multiple_choice | |
- bigbench_crass_ai_generate_until | |
- bigbench_crass_ai_multiple_choice | |
- bigbench_cryobiology_spanish_generate_until | |
- bigbench_cryobiology_spanish_multiple_choice | |
- bigbench_cryptonite_generate_until | |
- bigbench_cryptonite_multiple_choice | |
- bigbench_cs_algorithms_generate_until | |
- bigbench_cs_algorithms_multiple_choice | |
- bigbench_dark_humor_detection_generate_until | |
- bigbench_dark_humor_detection_multiple_choice | |
- bigbench_date_understanding_generate_until | |
- bigbench_date_understanding_multiple_choice | |
- bigbench_disambiguation_qa_generate_until | |
- bigbench_disambiguation_qa_multiple_choice | |
- bigbench_discourse_marker_prediction_generate_until | |
- bigbench_discourse_marker_prediction_multiple_choice | |
- bigbench_disfl_qa_generate_until | |
- bigbench_disfl_qa_multiple_choice | |
- bigbench_dyck_languages_generate_until | |
- bigbench_dyck_languages_multiple_choice | |
- bigbench_elementary_math_qa_generate_until | |
- bigbench_elementary_math_qa_multiple_choice | |
- bigbench_emoji_movie_generate_until | |
- bigbench_emoji_movie_multiple_choice | |
- bigbench_emojis_emotion_prediction_generate_until | |
- bigbench_emojis_emotion_prediction_multiple_choice | |
- bigbench_empirical_judgments_generate_until | |
- bigbench_empirical_judgments_multiple_choice | |
- bigbench_english_proverbs_generate_until | |
- bigbench_english_proverbs_multiple_choice | |
- bigbench_english_russian_proverbs_generate_until | |
- bigbench_english_russian_proverbs_multiple_choice | |
- bigbench_entailed_polarity_generate_until | |
- bigbench_entailed_polarity_hindi_generate_until | |
- bigbench_entailed_polarity_hindi_multiple_choice | |
- bigbench_entailed_polarity_multiple_choice | |
- bigbench_epistemic_reasoning_generate_until | |
- bigbench_epistemic_reasoning_multiple_choice | |
- bigbench_evaluating_information_essentiality_generate_until | |
- bigbench_evaluating_information_essentiality_multiple_choice | |
- bigbench_fact_checker_generate_until | |
- bigbench_fact_checker_multiple_choice | |
- bigbench_fantasy_reasoning_generate_until | |
- bigbench_fantasy_reasoning_multiple_choice | |
- bigbench_few_shot_nlg_generate_until | |
- bigbench_few_shot_nlg_multiple_choice | |
- bigbench_figure_of_speech_detection_generate_until | |
- bigbench_figure_of_speech_detection_multiple_choice | |
- bigbench_formal_fallacies_syllogisms_negation_generate_until | |
- bigbench_formal_fallacies_syllogisms_negation_multiple_choice | |
- bigbench_gem_generate_until | |
- bigbench_gem_multiple_choice | |
- bigbench_gender_inclusive_sentences_german_generate_until | |
- bigbench_gender_inclusive_sentences_german_multiple_choice | |
- bigbench_general_knowledge_generate_until | |
- bigbench_general_knowledge_multiple_choice | |
- bigbench_generate_until | |
- bigbench_geometric_shapes_generate_until | |
- bigbench_geometric_shapes_multiple_choice | |
- bigbench_goal_step_wikihow_generate_until | |
- bigbench_goal_step_wikihow_multiple_choice | |
- bigbench_gre_reading_comprehension_generate_until | |
- bigbench_gre_reading_comprehension_multiple_choice | |
- bigbench_hhh_alignment_generate_until | |
- bigbench_hhh_alignment_multiple_choice | |
- bigbench_hindi_question_answering_generate_until | |
- bigbench_hindi_question_answering_multiple_choice | |
- bigbench_hindu_knowledge_generate_until | |
- bigbench_hindu_knowledge_multiple_choice | |
- bigbench_hinglish_toxicity_generate_until | |
- bigbench_hinglish_toxicity_multiple_choice | |
- bigbench_human_organs_senses_generate_until | |
- bigbench_human_organs_senses_multiple_choice | |
- bigbench_hyperbaton_generate_until | |
- bigbench_hyperbaton_multiple_choice | |
- bigbench_identify_math_theorems_generate_until | |
- bigbench_identify_math_theorems_multiple_choice | |
- bigbench_identify_odd_metaphor_generate_until | |
- bigbench_identify_odd_metaphor_multiple_choice | |
- bigbench_implicatures_generate_until | |
- bigbench_implicatures_multiple_choice | |
- bigbench_implicit_relations_generate_until | |
- bigbench_implicit_relations_multiple_choice | |
- bigbench_intent_recognition_generate_until | |
- bigbench_intent_recognition_multiple_choice | |
- bigbench_international_phonetic_alphabet_nli_generate_until | |
- bigbench_international_phonetic_alphabet_nli_multiple_choice | |
- bigbench_international_phonetic_alphabet_transliterate_generate_until | |
- bigbench_international_phonetic_alphabet_transliterate_multiple_choice | |
- bigbench_intersect_geometry_generate_until | |
- bigbench_intersect_geometry_multiple_choice | |
- bigbench_irony_identification_generate_until | |
- bigbench_irony_identification_multiple_choice | |
- bigbench_kanji_ascii_generate_until | |
- bigbench_kanji_ascii_multiple_choice | |
- bigbench_kannada_generate_until | |
- bigbench_kannada_multiple_choice | |
- bigbench_key_value_maps_generate_until | |
- bigbench_key_value_maps_multiple_choice | |
- bigbench_known_unknowns_generate_until | |
- bigbench_known_unknowns_multiple_choice | |
- bigbench_language_games_generate_until | |
- bigbench_language_games_multiple_choice | |
- bigbench_language_identification_generate_until | |
- bigbench_language_identification_multiple_choice | |
- bigbench_linguistic_mappings_generate_until | |
- bigbench_linguistic_mappings_multiple_choice | |
- bigbench_linguistics_puzzles_generate_until | |
- bigbench_linguistics_puzzles_multiple_choice | |
- bigbench_list_functions_generate_until | |
- bigbench_list_functions_multiple_choice | |
- bigbench_logic_grid_puzzle_generate_until | |
- bigbench_logic_grid_puzzle_multiple_choice | |
- bigbench_logical_args_generate_until | |
- bigbench_logical_args_multiple_choice | |
- bigbench_logical_deduction_generate_until | |
- bigbench_logical_deduction_multiple_choice | |
- bigbench_logical_fallacy_detection_generate_until | |
- bigbench_logical_fallacy_detection_multiple_choice | |
- bigbench_logical_sequence_generate_until | |
- bigbench_logical_sequence_multiple_choice | |
- bigbench_mathematical_induction_generate_until | |
- bigbench_mathematical_induction_multiple_choice | |
- bigbench_matrixshapes_generate_until | |
- bigbench_matrixshapes_multiple_choice | |
- bigbench_metaphor_boolean_generate_until | |
- bigbench_metaphor_boolean_multiple_choice | |
- bigbench_metaphor_understanding_generate_until | |
- bigbench_metaphor_understanding_multiple_choice | |
- bigbench_minute_mysteries_qa_generate_until | |
- bigbench_minute_mysteries_qa_multiple_choice | |
- bigbench_misconceptions_generate_until | |
- bigbench_misconceptions_multiple_choice | |
- bigbench_misconceptions_russian_generate_until | |
- bigbench_misconceptions_russian_multiple_choice | |
- bigbench_mnist_ascii_generate_until | |
- bigbench_mnist_ascii_multiple_choice | |
- bigbench_modified_arithmetic_generate_until | |
- bigbench_modified_arithmetic_multiple_choice | |
- bigbench_moral_permissibility_generate_until | |
- bigbench_moral_permissibility_multiple_choice | |
- bigbench_movie_dialog_same_or_different_generate_until | |
- bigbench_movie_dialog_same_or_different_multiple_choice | |
- bigbench_movie_recommendation_generate_until | |
- bigbench_movie_recommendation_multiple_choice | |
- bigbench_mult_data_wrangling_generate_until | |
- bigbench_mult_data_wrangling_multiple_choice | |
- bigbench_multiemo_generate_until | |
- bigbench_multiemo_multiple_choice | |
- bigbench_multiple_choice | |
- bigbench_natural_instructions_generate_until | |
- bigbench_natural_instructions_multiple_choice | |
- bigbench_navigate_generate_until | |
- bigbench_navigate_multiple_choice | |
- bigbench_nonsense_words_grammar_generate_until | |
- bigbench_nonsense_words_grammar_multiple_choice | |
- bigbench_novel_concepts_generate_until | |
- bigbench_novel_concepts_multiple_choice | |
- bigbench_object_counting_generate_until | |
- bigbench_object_counting_multiple_choice | |
- bigbench_odd_one_out_generate_until | |
- bigbench_odd_one_out_multiple_choice | |
- bigbench_operators_generate_until | |
- bigbench_operators_multiple_choice | |
- bigbench_paragraph_segmentation_generate_until | |
- bigbench_paragraph_segmentation_multiple_choice | |
- bigbench_parsinlu_qa_generate_until | |
- bigbench_parsinlu_qa_multiple_choice | |
- bigbench_parsinlu_reading_comprehension_generate_until | |
- bigbench_parsinlu_reading_comprehension_multiple_choice | |
- bigbench_penguins_in_a_table_generate_until | |
- bigbench_penguins_in_a_table_multiple_choice | |
- bigbench_periodic_elements_generate_until | |
- bigbench_periodic_elements_multiple_choice | |
- bigbench_persian_idioms_generate_until | |
- bigbench_persian_idioms_multiple_choice | |
- bigbench_phrase_relatedness_generate_until | |
- bigbench_phrase_relatedness_multiple_choice | |
- bigbench_physical_intuition_generate_until | |
- bigbench_physical_intuition_multiple_choice | |
- bigbench_physics_generate_until | |
- bigbench_physics_multiple_choice | |
- bigbench_physics_questions_generate_until | |
- bigbench_physics_questions_multiple_choice | |
- bigbench_play_dialog_same_or_different_generate_until | |
- bigbench_play_dialog_same_or_different_multiple_choice | |
- bigbench_polish_sequence_labeling_generate_until | |
- bigbench_polish_sequence_labeling_multiple_choice | |
- bigbench_presuppositions_as_nli_generate_until | |
- bigbench_presuppositions_as_nli_multiple_choice | |
- bigbench_qa_wikidata_generate_until | |
- bigbench_qa_wikidata_multiple_choice | |
- bigbench_question_selection_generate_until | |
- bigbench_question_selection_multiple_choice | |
- bigbench_real_or_fake_text_generate_until | |
- bigbench_real_or_fake_text_multiple_choice | |
- bigbench_reasoning_about_colored_objects_generate_until | |
- bigbench_reasoning_about_colored_objects_multiple_choice | |
- bigbench_repeat_copy_logic_generate_until | |
- bigbench_repeat_copy_logic_multiple_choice | |
- bigbench_rephrase_generate_until | |
- bigbench_rephrase_multiple_choice | |
- bigbench_riddle_sense_generate_until | |
- bigbench_riddle_sense_multiple_choice | |
- bigbench_ruin_names_generate_until | |
- bigbench_ruin_names_multiple_choice | |
- bigbench_salient_translation_error_detection_generate_until | |
- bigbench_salient_translation_error_detection_multiple_choice | |
- bigbench_scientific_press_release_generate_until | |
- bigbench_scientific_press_release_multiple_choice | |
- bigbench_semantic_parsing_in_context_sparc_generate_until | |
- bigbench_semantic_parsing_in_context_sparc_multiple_choice | |
- bigbench_semantic_parsing_spider_generate_until | |
- bigbench_semantic_parsing_spider_multiple_choice | |
- bigbench_sentence_ambiguity_generate_until | |
- bigbench_sentence_ambiguity_multiple_choice | |
- bigbench_similarities_abstraction_generate_until | |
- bigbench_similarities_abstraction_multiple_choice | |
- bigbench_simp_turing_concept_generate_until | |
- bigbench_simp_turing_concept_multiple_choice | |
- bigbench_simple_arithmetic_json_generate_until | |
- bigbench_simple_arithmetic_json_multiple_choice | |
- bigbench_simple_arithmetic_json_multiple_choice_generate_until | |
- bigbench_simple_arithmetic_json_multiple_choice_multiple_choice | |
- bigbench_simple_arithmetic_json_subtasks_generate_until | |
- bigbench_simple_arithmetic_json_subtasks_multiple_choice | |
- bigbench_simple_arithmetic_multiple_targets_json_generate_until | |
- bigbench_simple_arithmetic_multiple_targets_json_multiple_choice | |
- bigbench_simple_ethical_questions_generate_until | |
- bigbench_simple_ethical_questions_multiple_choice | |
- bigbench_simple_text_editing_generate_until | |
- bigbench_simple_text_editing_multiple_choice | |
- bigbench_snarks_generate_until | |
- bigbench_snarks_multiple_choice | |
- bigbench_social_iqa_generate_until | |
- bigbench_social_iqa_multiple_choice | |
- bigbench_social_support_generate_until | |
- bigbench_social_support_multiple_choice | |
- bigbench_sports_understanding_generate_until | |
- bigbench_sports_understanding_multiple_choice | |
- bigbench_strange_stories_generate_until | |
- bigbench_strange_stories_multiple_choice | |
- bigbench_strategyqa_generate_until | |
- bigbench_strategyqa_multiple_choice | |
- bigbench_sufficient_information_generate_until | |
- bigbench_sufficient_information_multiple_choice | |
- bigbench_suicide_risk_generate_until | |
- bigbench_suicide_risk_multiple_choice | |
- bigbench_swahili_english_proverbs_generate_until | |
- bigbench_swahili_english_proverbs_multiple_choice | |
- bigbench_swedish_to_german_proverbs_generate_until | |
- bigbench_swedish_to_german_proverbs_multiple_choice | |
- bigbench_symbol_interpretation_generate_until | |
- bigbench_symbol_interpretation_multiple_choice | |
- bigbench_temporal_sequences_generate_until | |
- bigbench_temporal_sequences_multiple_choice | |
- bigbench_tense_generate_until | |
- bigbench_tense_multiple_choice | |
- bigbench_timedial_generate_until | |
- bigbench_timedial_multiple_choice | |
- bigbench_topical_chat_generate_until | |
- bigbench_topical_chat_multiple_choice | |
- bigbench_tracking_shuffled_objects_generate_until | |
- bigbench_tracking_shuffled_objects_multiple_choice | |
- bigbench_understanding_fables_generate_until | |
- bigbench_understanding_fables_multiple_choice | |
- bigbench_undo_permutation_generate_until | |
- bigbench_undo_permutation_multiple_choice | |
- bigbench_unit_conversion_generate_until | |
- bigbench_unit_conversion_multiple_choice | |
- bigbench_unit_interpretation_generate_until | |
- bigbench_unit_interpretation_multiple_choice | |
- bigbench_unnatural_in_context_learning_generate_until | |
- bigbench_unnatural_in_context_learning_multiple_choice | |
- bigbench_vitaminc_fact_verification_generate_until | |
- bigbench_vitaminc_fact_verification_multiple_choice | |
- bigbench_what_is_the_tao_generate_until | |
- bigbench_what_is_the_tao_multiple_choice | |
- bigbench_which_wiki_edit_generate_until | |
- bigbench_which_wiki_edit_multiple_choice | |
- bigbench_winowhy_generate_until | |
- bigbench_winowhy_multiple_choice | |
- bigbench_word_sorting_generate_until | |
- bigbench_word_sorting_multiple_choice | |
- bigbench_word_unscrambling_generate_until | |
- bigbench_word_unscrambling_multiple_choice | |
- blimp | |
- blimp_adjunct_island | |
- blimp_anaphor_gender_agreement | |
- blimp_anaphor_number_agreement | |
- blimp_animate_subject_passive | |
- blimp_animate_subject_trans | |
- blimp_causative | |
- blimp_complex_NP_island | |
- blimp_coordinate_structure_constraint_complex_left_branch | |
- blimp_coordinate_structure_constraint_object_extraction | |
- blimp_determiner_noun_agreement_1 | |
- blimp_determiner_noun_agreement_2 | |
- blimp_determiner_noun_agreement_irregular_1 | |
- blimp_determiner_noun_agreement_irregular_2 | |
- blimp_determiner_noun_agreement_with_adj_2 | |
- blimp_determiner_noun_agreement_with_adj_irregular_1 | |
- blimp_determiner_noun_agreement_with_adj_irregular_2 | |
- blimp_determiner_noun_agreement_with_adjective_1 | |
- blimp_distractor_agreement_relational_noun | |
- blimp_distractor_agreement_relative_clause | |
- blimp_drop_argument | |
- blimp_ellipsis_n_bar_1 | |
- blimp_ellipsis_n_bar_2 | |
- blimp_existential_there_object_raising | |
- blimp_existential_there_quantifiers_1 | |
- blimp_existential_there_quantifiers_2 | |
- blimp_existential_there_subject_raising | |
- blimp_expletive_it_object_raising | |
- blimp_inchoative | |
- blimp_intransitive | |
- blimp_irregular_past_participle_adjectives | |
- blimp_irregular_past_participle_verbs | |
- blimp_irregular_plural_subject_verb_agreement_1 | |
- blimp_irregular_plural_subject_verb_agreement_2 | |
- blimp_left_branch_island_echo_question | |
- blimp_left_branch_island_simple_question | |
- blimp_matrix_question_npi_licensor_present | |
- blimp_npi_present_1 | |
- blimp_npi_present_2 | |
- blimp_only_npi_licensor_present | |
- blimp_only_npi_scope | |
- blimp_passive_1 | |
- blimp_passive_2 | |
- blimp_principle_A_c_command | |
- blimp_principle_A_case_1 | |
- blimp_principle_A_case_2 | |
- blimp_principle_A_domain_1 | |
- blimp_principle_A_domain_2 | |
- blimp_principle_A_domain_3 | |
- blimp_principle_A_reconstruction | |
- blimp_regular_plural_subject_verb_agreement_1 | |
- blimp_regular_plural_subject_verb_agreement_2 | |
- blimp_sentential_negation_npi_licensor_present | |
- blimp_sentential_negation_npi_scope | |
- blimp_sentential_subject_island | |
- blimp_superlative_quantifiers_1 | |
- blimp_superlative_quantifiers_2 | |
- blimp_tough_vs_raising_1 | |
- blimp_tough_vs_raising_2 | |
- blimp_transitive | |
- blimp_wh_island | |
- blimp_wh_questions_object_gap | |
- blimp_wh_questions_subject_gap | |
- blimp_wh_questions_subject_gap_long_distance | |
- blimp_wh_vs_that_no_gap | |
- blimp_wh_vs_that_no_gap_long_distance | |
- blimp_wh_vs_that_with_gap | |
- blimp_wh_vs_that_with_gap_long_distance | |
- boolq | |
- boolq-seq2seq | |
- cb | |
- ceval-valid | |
- ceval-valid_accountant | |
- ceval-valid_advanced_mathematics | |
- ceval-valid_art_studies | |
- ceval-valid_basic_medicine | |
- ceval-valid_business_administration | |
- ceval-valid_chinese_language_and_literature | |
- ceval-valid_civil_servant | |
- ceval-valid_clinical_medicine | |
- ceval-valid_college_chemistry | |
- ceval-valid_college_economics | |
- ceval-valid_college_physics | |
- ceval-valid_college_programming | |
- ceval-valid_computer_architecture | |
- ceval-valid_computer_network | |
- ceval-valid_discrete_mathematics | |
- ceval-valid_education_science | |
- ceval-valid_electrical_engineer | |
- ceval-valid_environmental_impact_assessment_engineer | |
- ceval-valid_fire_engineer | |
- ceval-valid_high_school_biology | |
- ceval-valid_high_school_chemistry | |
- ceval-valid_high_school_chinese | |
- ceval-valid_high_school_geography | |
- ceval-valid_high_school_history | |
- ceval-valid_high_school_mathematics | |
- ceval-valid_high_school_physics | |
- ceval-valid_high_school_politics | |
- ceval-valid_ideological_and_moral_cultivation | |
- ceval-valid_law | |
- ceval-valid_legal_professional | |
- ceval-valid_logic | |
- ceval-valid_mao_zedong_thought | |
- ceval-valid_marxism | |
- ceval-valid_metrology_engineer | |
- ceval-valid_middle_school_biology | |
- ceval-valid_middle_school_chemistry | |
- ceval-valid_middle_school_geography | |
- ceval-valid_middle_school_history | |
- ceval-valid_middle_school_mathematics | |
- ceval-valid_middle_school_physics | |
- ceval-valid_middle_school_politics | |
- ceval-valid_modern_chinese_history | |
- ceval-valid_operating_system | |
- ceval-valid_physician | |
- ceval-valid_plant_protection | |
- ceval-valid_probability_and_statistics | |
- ceval-valid_professional_tour_guide | |
- ceval-valid_sports_science | |
- ceval-valid_tax_accountant | |
- ceval-valid_teacher_qualification | |
- ceval-valid_urban_and_rural_planner | |
- ceval-valid_veterinary_medicine | |
- chain_of_thought | |
- cmmlu | |
- cmmlu_agronomy | |
- cmmlu_anatomy | |
- cmmlu_ancient_chinese | |
- cmmlu_arts | |
- cmmlu_astronomy | |
- cmmlu_business_ethics | |
- cmmlu_chinese_civil_service_exam | |
- cmmlu_chinese_driving_rule | |
- cmmlu_chinese_food_culture | |
- cmmlu_chinese_foreign_policy | |
- cmmlu_chinese_history | |
- cmmlu_chinese_literature | |
- cmmlu_chinese_teacher_qualification | |
- cmmlu_clinical_knowledge | |
- cmmlu_college_actuarial_science | |
- cmmlu_college_education | |
- cmmlu_college_engineering_hydrology | |
- cmmlu_college_law | |
- cmmlu_college_mathematics | |
- cmmlu_college_medical_statistics | |
- cmmlu_college_medicine | |
- cmmlu_computer_science | |
- cmmlu_computer_security | |
- cmmlu_conceptual_physics | |
- cmmlu_construction_project_management | |
- cmmlu_economics | |
- cmmlu_education | |
- cmmlu_electrical_engineering | |
- cmmlu_elementary_chinese | |
- cmmlu_elementary_commonsense | |
- cmmlu_elementary_information_and_technology | |
- cmmlu_elementary_mathematics | |
- cmmlu_ethnology | |
- cmmlu_food_science | |
- cmmlu_genetics | |
- cmmlu_global_facts | |
- cmmlu_high_school_biology | |
- cmmlu_high_school_chemistry | |
- cmmlu_high_school_geography | |
- cmmlu_high_school_mathematics | |
- cmmlu_high_school_physics | |
- cmmlu_high_school_politics | |
- cmmlu_human_sexuality | |
- cmmlu_international_law | |
- cmmlu_journalism | |
- cmmlu_jurisprudence | |
- cmmlu_legal_and_moral_basis | |
- cmmlu_logical | |
- cmmlu_machine_learning | |
- cmmlu_management | |
- cmmlu_marketing | |
- cmmlu_marxist_theory | |
- cmmlu_modern_chinese | |
- cmmlu_nutrition | |
- cmmlu_philosophy | |
- cmmlu_professional_accounting | |
- cmmlu_professional_law | |
- cmmlu_professional_medicine | |
- cmmlu_professional_psychology | |
- cmmlu_public_relations | |
- cmmlu_security_study | |
- cmmlu_sociology | |
- cmmlu_sports_science | |
- cmmlu_traditional_chinese_medicine | |
- cmmlu_virology | |
- cmmlu_world_history | |
- cmmlu_world_religions | |
- code2text_go | |
- code2text_java | |
- code2text_javascript | |
- code2text_php | |
- code2text_python | |
- code2text_ruby | |
- codexglue_code2text | |
- cola | |
- copa | |
- coqa | |
- crows_pairs | |
- crows_pairs_english | |
- crows_pairs_english_age | |
- crows_pairs_english_autre | |
- crows_pairs_english_disability | |
- crows_pairs_english_gender | |
- crows_pairs_english_nationality | |
- crows_pairs_english_physical_appearance | |
- crows_pairs_english_race_color | |
- crows_pairs_english_religion | |
- crows_pairs_english_sexual_orientation | |
- crows_pairs_english_socioeconomic | |
- crows_pairs_french | |
- crows_pairs_french_age | |
- crows_pairs_french_autre | |
- crows_pairs_french_disability | |
- crows_pairs_french_gender | |
- crows_pairs_french_nationality | |
- crows_pairs_french_physical_appearance | |
- crows_pairs_french_race_color | |
- crows_pairs_french_religion | |
- crows_pairs_french_sexual_orientation | |
- crows_pairs_french_socioeconomic | |
- csatqa | |
- csatqa_gr | |
- csatqa_li | |
- csatqa_rch | |
- csatqa_rcs | |
- csatqa_rcss | |
- csatqa_wr | |
- cycle_letters | |
- drop | |
- ethics_cm | |
- ethics_deontology | |
- ethics_justice | |
- ethics_utilitarianism | |
- ethics_virtue | |
- flan_held_in | |
- flan_held_out | |
- fld | |
- fld_default | |
- fld_star | |
- freebase | |
- generate_until | |
- glue | |
- gpt3_translation_benchmarks | |
- gsm8k | |
- gsm8k_cot | |
- gsm8k_cot_self_consistency | |
- headqa | |
- headqa_en | |
- headqa_es | |
- hellaswag | |
- hellaswag_ar | |
- hellaswag_bn | |
- hellaswag_ca | |
- hellaswag_da | |
- hellaswag_de | |
- hellaswag_es | |
- hellaswag_eu | |
- hellaswag_fr | |
- hellaswag_gu | |
- hellaswag_hi | |
- hellaswag_hr | |
- hellaswag_hu | |
- hellaswag_hy | |
- hellaswag_id | |
- hellaswag_it | |
- hellaswag_kn | |
- hellaswag_ml | |
- hellaswag_mr | |
- hellaswag_multilingual | |
- hellaswag_ne | |
- hellaswag_nl | |
- hellaswag_pt | |
- hellaswag_ro | |
- hellaswag_ru | |
- hellaswag_sk | |
- hellaswag_sr | |
- hellaswag_sv | |
- hellaswag_ta | |
- hellaswag_te | |
- hellaswag_uk | |
- hellaswag_vi | |
- hendrycks_ethics | |
- ifeval | |
- iwslt2017 | |
- iwslt2017-ar-en | |
- iwslt2017-en-ar | |
- kmmlu | |
- kmmlu_accounting | |
- kmmlu_agricultural_sciences | |
- kmmlu_aviation_engineering_and_maintenance | |
- kmmlu_biology | |
- kmmlu_chemical_engineering | |
- kmmlu_chemistry | |
- kmmlu_civil_engineering | |
- kmmlu_computer_science | |
- kmmlu_construction | |
- kmmlu_criminal_law | |
- kmmlu_ecology | |
- kmmlu_economics | |
- kmmlu_education | |
- kmmlu_electrical_engineering | |
- kmmlu_electronics_engineering | |
- kmmlu_energy_management | |
- kmmlu_environmental_science | |
- kmmlu_fashion | |
- kmmlu_food_processing | |
- kmmlu_gas_technology_and_engineering | |
- kmmlu_geomatics | |
- kmmlu_health | |
- kmmlu_industrial_engineer | |
- kmmlu_information_technology | |
- kmmlu_interior_architecture_and_design | |
- kmmlu_law | |
- kmmlu_machine_design_and_manufacturing | |
- kmmlu_management | |
- kmmlu_maritime_engineering | |
- kmmlu_marketing | |
- kmmlu_materials_engineering | |
- kmmlu_mechanical_engineering | |
- kmmlu_nondestructive_testing | |
- kmmlu_patent | |
- kmmlu_political_science_and_sociology | |
- kmmlu_psychology | |
- kmmlu_public_safety | |
- kmmlu_railway_and_automotive_engineering | |
- kmmlu_real_estate | |
- kmmlu_refrigerating_machinery | |
- kmmlu_social_welfare | |
- kmmlu_taxation | |
- kmmlu_telecommunications_and_wireless_technology | |
- kobest | |
- kobest_boolq | |
- kobest_copa | |
- kobest_hellaswag | |
- kobest_sentineg | |
- kobest_wic | |
- lambada | |
- lambada_cloze | |
- lambada_multilingual | |
- lambada_openai | |
- lambada_openai_cloze_yaml | |
- lambada_openai_mt_de | |
- lambada_openai_mt_en | |
- lambada_openai_mt_es | |
- lambada_openai_mt_fr | |
- lambada_openai_mt_it | |
- lambada_standard | |
- lambada_standard_cloze_yaml | |
- logieval | |
- logiqa | |
- logiqa2 | |
- loglikelihood | |
- math_word_problems | |
- mathqa | |
- mc_taco | |
- medmcqa | |
- medqa_4options | |
- mgsm_bn_direct | |
- mgsm_bn_native_cot | |
- mgsm_cot_native | |
- mgsm_de_direct | |
- mgsm_de_native_cot | |
- mgsm_direct | |
- mgsm_direct_bn | |
- mgsm_direct_de | |
- mgsm_direct_en | |
- mgsm_direct_es | |
- mgsm_direct_fr | |
- mgsm_direct_ja | |
- mgsm_direct_ru | |
- mgsm_direct_sw | |
- mgsm_direct_te | |
- mgsm_direct_th | |
- mgsm_direct_zh | |
- mgsm_en_direct | |
- mgsm_en_native_cot | |
- mgsm_es_direct | |
- mgsm_es_native_cot | |
- mgsm_fr_direct | |
- mgsm_fr_native_cot | |
- mgsm_ja_direct | |
- mgsm_ja_native_cot | |
- mgsm_ru_direct | |
- mgsm_ru_native_cot | |
- mgsm_sw_direct | |
- mgsm_sw_native_cot | |
- mgsm_te_direct | |
- mgsm_te_native_cot | |
- mgsm_th_direct | |
- mgsm_th_native_cot | |
- mgsm_zh_direct | |
- mgsm_zh_native_cot | |
- minerva_math | |
- minerva_math_algebra | |
- minerva_math_counting_and_prob | |
- minerva_math_geometry | |
- minerva_math_intermediate_algebra | |
- minerva_math_num_theory | |
- minerva_math_prealgebra | |
- minerva_math_precalc | |
- mmlu | |
- mmlu_abstract_algebra | |
- mmlu_anatomy | |
- mmlu_astronomy | |
- mmlu_business_ethics | |
- mmlu_clinical_knowledge | |
- mmlu_college_biology | |
- mmlu_college_chemistry | |
- mmlu_college_computer_science | |
- mmlu_college_mathematics | |
- mmlu_college_medicine | |
- mmlu_college_physics | |
- mmlu_computer_security | |
- mmlu_conceptual_physics | |
- mmlu_econometrics | |
- mmlu_electrical_engineering | |
- mmlu_elementary_mathematics | |
- mmlu_flan_cot_fewshot | |
- mmlu_flan_cot_fewshot_abstract_algebra | |
- mmlu_flan_cot_fewshot_anatomy | |
- mmlu_flan_cot_fewshot_astronomy | |
- mmlu_flan_cot_fewshot_business_ethics | |
- mmlu_flan_cot_fewshot_clinical_knowledge | |
- mmlu_flan_cot_fewshot_college_biology | |
- mmlu_flan_cot_fewshot_college_chemistry | |
- mmlu_flan_cot_fewshot_college_computer_science | |
- mmlu_flan_cot_fewshot_college_mathematics | |
- mmlu_flan_cot_fewshot_college_medicine | |
- mmlu_flan_cot_fewshot_college_physics | |
- mmlu_flan_cot_fewshot_computer_security | |
- mmlu_flan_cot_fewshot_conceptual_physics | |
- mmlu_flan_cot_fewshot_econometrics | |
- mmlu_flan_cot_fewshot_electrical_engineering | |
- mmlu_flan_cot_fewshot_elementary_mathematics | |
- mmlu_flan_cot_fewshot_formal_logic | |
- mmlu_flan_cot_fewshot_global_facts | |
- mmlu_flan_cot_fewshot_high_school_biology | |
- mmlu_flan_cot_fewshot_high_school_chemistry | |
- mmlu_flan_cot_fewshot_high_school_computer_science | |
- mmlu_flan_cot_fewshot_high_school_european_history | |
- mmlu_flan_cot_fewshot_high_school_geography | |
- mmlu_flan_cot_fewshot_high_school_government_and_politics | |
- mmlu_flan_cot_fewshot_high_school_macroeconomics | |
- mmlu_flan_cot_fewshot_high_school_mathematics | |
- mmlu_flan_cot_fewshot_high_school_microeconomics | |
- mmlu_flan_cot_fewshot_high_school_physics | |
- mmlu_flan_cot_fewshot_high_school_psychology | |
- mmlu_flan_cot_fewshot_high_school_statistics | |
- mmlu_flan_cot_fewshot_high_school_us_history | |
- mmlu_flan_cot_fewshot_high_school_world_history | |
- mmlu_flan_cot_fewshot_human_aging | |
- mmlu_flan_cot_fewshot_human_sexuality | |
- mmlu_flan_cot_fewshot_humanities | |
- mmlu_flan_cot_fewshot_international_law | |
- mmlu_flan_cot_fewshot_jurisprudence | |
- mmlu_flan_cot_fewshot_logical_fallacies | |
- mmlu_flan_cot_fewshot_machine_learning | |
- mmlu_flan_cot_fewshot_management | |
- mmlu_flan_cot_fewshot_marketing | |
- mmlu_flan_cot_fewshot_medical_genetics | |
- mmlu_flan_cot_fewshot_miscellaneous | |
- mmlu_flan_cot_fewshot_moral_disputes | |
- mmlu_flan_cot_fewshot_moral_scenarios | |
- mmlu_flan_cot_fewshot_nutrition | |
- mmlu_flan_cot_fewshot_other | |
- mmlu_flan_cot_fewshot_philosophy | |
- mmlu_flan_cot_fewshot_prehistory | |
- mmlu_flan_cot_fewshot_professional_accounting | |
- mmlu_flan_cot_fewshot_professional_law | |
- mmlu_flan_cot_fewshot_professional_medicine | |
- mmlu_flan_cot_fewshot_professional_psychology | |
- mmlu_flan_cot_fewshot_public_relations | |
- mmlu_flan_cot_fewshot_security_studies | |
- mmlu_flan_cot_fewshot_social_sciences | |
- mmlu_flan_cot_fewshot_sociology | |
- mmlu_flan_cot_fewshot_stem | |
- mmlu_flan_cot_fewshot_us_foreign_policy | |
- mmlu_flan_cot_fewshot_virology | |
- mmlu_flan_cot_fewshot_world_religions | |
- mmlu_flan_cot_zeroshot | |
- mmlu_flan_cot_zeroshot_abstract_algebra | |
- mmlu_flan_cot_zeroshot_anatomy | |
- mmlu_flan_cot_zeroshot_astronomy | |
- mmlu_flan_cot_zeroshot_business_ethics | |
- mmlu_flan_cot_zeroshot_clinical_knowledge | |
- mmlu_flan_cot_zeroshot_college_biology | |
- mmlu_flan_cot_zeroshot_college_chemistry | |
- mmlu_flan_cot_zeroshot_college_computer_science | |
- mmlu_flan_cot_zeroshot_college_mathematics | |
- mmlu_flan_cot_zeroshot_college_medicine | |
- mmlu_flan_cot_zeroshot_college_physics | |
- mmlu_flan_cot_zeroshot_computer_security | |
- mmlu_flan_cot_zeroshot_conceptual_physics | |
- mmlu_flan_cot_zeroshot_econometrics | |
- mmlu_flan_cot_zeroshot_electrical_engineering | |
- mmlu_flan_cot_zeroshot_elementary_mathematics | |
- mmlu_flan_cot_zeroshot_formal_logic | |
- mmlu_flan_cot_zeroshot_global_facts | |
- mmlu_flan_cot_zeroshot_high_school_biology | |
- mmlu_flan_cot_zeroshot_high_school_chemistry | |
- mmlu_flan_cot_zeroshot_high_school_computer_science | |
- mmlu_flan_cot_zeroshot_high_school_european_history | |
- mmlu_flan_cot_zeroshot_high_school_geography | |
- mmlu_flan_cot_zeroshot_high_school_government_and_politics | |
- mmlu_flan_cot_zeroshot_high_school_macroeconomics | |
- mmlu_flan_cot_zeroshot_high_school_mathematics | |
- mmlu_flan_cot_zeroshot_high_school_microeconomics | |
- mmlu_flan_cot_zeroshot_high_school_physics | |
- mmlu_flan_cot_zeroshot_high_school_psychology | |
- mmlu_flan_cot_zeroshot_high_school_statistics | |
- mmlu_flan_cot_zeroshot_high_school_us_history | |
- mmlu_flan_cot_zeroshot_high_school_world_history | |
- mmlu_flan_cot_zeroshot_human_aging | |
- mmlu_flan_cot_zeroshot_human_sexuality | |
- mmlu_flan_cot_zeroshot_humanities | |
- mmlu_flan_cot_zeroshot_international_law | |
- mmlu_flan_cot_zeroshot_jurisprudence | |
- mmlu_flan_cot_zeroshot_logical_fallacies | |
- mmlu_flan_cot_zeroshot_machine_learning | |
- mmlu_flan_cot_zeroshot_management | |
- mmlu_flan_cot_zeroshot_marketing | |
- mmlu_flan_cot_zeroshot_medical_genetics | |
- mmlu_flan_cot_zeroshot_miscellaneous | |
- mmlu_flan_cot_zeroshot_moral_disputes | |
- mmlu_flan_cot_zeroshot_moral_scenarios | |
- mmlu_flan_cot_zeroshot_nutrition | |
- mmlu_flan_cot_zeroshot_other | |
- mmlu_flan_cot_zeroshot_philosophy | |
- mmlu_flan_cot_zeroshot_prehistory | |
- mmlu_flan_cot_zeroshot_professional_accounting | |
- mmlu_flan_cot_zeroshot_professional_law | |
- mmlu_flan_cot_zeroshot_professional_medicine | |
- mmlu_flan_cot_zeroshot_professional_psychology | |
- mmlu_flan_cot_zeroshot_public_relations | |
- mmlu_flan_cot_zeroshot_security_studies | |
- mmlu_flan_cot_zeroshot_social_sciences | |
- mmlu_flan_cot_zeroshot_sociology | |
- mmlu_flan_cot_zeroshot_stem | |
- mmlu_flan_cot_zeroshot_us_foreign_policy | |
- mmlu_flan_cot_zeroshot_virology | |
- mmlu_flan_cot_zeroshot_world_religions | |
- mmlu_flan_n_shot_generative | |
- mmlu_flan_n_shot_generative_abstract_algebra | |
- mmlu_flan_n_shot_generative_anatomy | |
- mmlu_flan_n_shot_generative_astronomy | |
- mmlu_flan_n_shot_generative_business_ethics | |
- mmlu_flan_n_shot_generative_clinical_knowledge | |
- mmlu_flan_n_shot_generative_college_biology | |
- mmlu_flan_n_shot_generative_college_chemistry | |
- mmlu_flan_n_shot_generative_college_computer_science | |
- mmlu_flan_n_shot_generative_college_mathematics | |
- mmlu_flan_n_shot_generative_college_medicine | |
- mmlu_flan_n_shot_generative_college_physics | |
- mmlu_flan_n_shot_generative_computer_security | |
- mmlu_flan_n_shot_generative_conceptual_physics | |
- mmlu_flan_n_shot_generative_econometrics | |
- mmlu_flan_n_shot_generative_electrical_engineering | |
- mmlu_flan_n_shot_generative_elementary_mathematics | |
- mmlu_flan_n_shot_generative_formal_logic | |
- mmlu_flan_n_shot_generative_global_facts | |
- mmlu_flan_n_shot_generative_high_school_biology | |
- mmlu_flan_n_shot_generative_high_school_chemistry | |
- mmlu_flan_n_shot_generative_high_school_computer_science | |
- mmlu_flan_n_shot_generative_high_school_european_history | |
- mmlu_flan_n_shot_generative_high_school_geography | |
- mmlu_flan_n_shot_generative_high_school_government_and_politics | |
- mmlu_flan_n_shot_generative_high_school_macroeconomics | |
- mmlu_flan_n_shot_generative_high_school_mathematics | |
- mmlu_flan_n_shot_generative_high_school_microeconomics | |
- mmlu_flan_n_shot_generative_high_school_physics | |
- mmlu_flan_n_shot_generative_high_school_psychology | |
- mmlu_flan_n_shot_generative_high_school_statistics | |
- mmlu_flan_n_shot_generative_high_school_us_history | |
- mmlu_flan_n_shot_generative_high_school_world_history | |
- mmlu_flan_n_shot_generative_human_aging | |
- mmlu_flan_n_shot_generative_human_sexuality | |
- mmlu_flan_n_shot_generative_humanities | |
- mmlu_flan_n_shot_generative_international_law | |
- mmlu_flan_n_shot_generative_jurisprudence | |
- mmlu_flan_n_shot_generative_logical_fallacies | |
- mmlu_flan_n_shot_generative_machine_learning | |
- mmlu_flan_n_shot_generative_management | |
- mmlu_flan_n_shot_generative_marketing | |
- mmlu_flan_n_shot_generative_medical_genetics | |
- mmlu_flan_n_shot_generative_miscellaneous | |
- mmlu_flan_n_shot_generative_moral_disputes | |
- mmlu_flan_n_shot_generative_moral_scenarios | |
- mmlu_flan_n_shot_generative_nutrition | |
- mmlu_flan_n_shot_generative_other | |
- mmlu_flan_n_shot_generative_philosophy | |
- mmlu_flan_n_shot_generative_prehistory | |
- mmlu_flan_n_shot_generative_professional_accounting | |
- mmlu_flan_n_shot_generative_professional_law | |
- mmlu_flan_n_shot_generative_professional_medicine | |
- mmlu_flan_n_shot_generative_professional_psychology | |
- mmlu_flan_n_shot_generative_public_relations | |
- mmlu_flan_n_shot_generative_security_studies | |
- mmlu_flan_n_shot_generative_social_sciences | |
- mmlu_flan_n_shot_generative_sociology | |
- mmlu_flan_n_shot_generative_stem | |
- mmlu_flan_n_shot_generative_us_foreign_policy | |
- mmlu_flan_n_shot_generative_virology | |
- mmlu_flan_n_shot_generative_world_religions | |
- mmlu_flan_n_shot_loglikelihood | |
- mmlu_flan_n_shot_loglikelihood_abstract_algebra | |
- mmlu_flan_n_shot_loglikelihood_anatomy | |
- mmlu_flan_n_shot_loglikelihood_astronomy | |
- mmlu_flan_n_shot_loglikelihood_business_ethics | |
- mmlu_flan_n_shot_loglikelihood_clinical_knowledge | |
- mmlu_flan_n_shot_loglikelihood_college_biology | |
- mmlu_flan_n_shot_loglikelihood_college_chemistry | |
- mmlu_flan_n_shot_loglikelihood_college_computer_science | |
- mmlu_flan_n_shot_loglikelihood_college_mathematics | |
- mmlu_flan_n_shot_loglikelihood_college_medicine | |
- mmlu_flan_n_shot_loglikelihood_college_physics | |
- mmlu_flan_n_shot_loglikelihood_computer_security | |
- mmlu_flan_n_shot_loglikelihood_conceptual_physics | |
- mmlu_flan_n_shot_loglikelihood_econometrics | |
- mmlu_flan_n_shot_loglikelihood_electrical_engineering | |
- mmlu_flan_n_shot_loglikelihood_elementary_mathematics | |
- mmlu_flan_n_shot_loglikelihood_formal_logic | |
- mmlu_flan_n_shot_loglikelihood_global_facts | |
- mmlu_flan_n_shot_loglikelihood_high_school_biology | |
- mmlu_flan_n_shot_loglikelihood_high_school_chemistry | |
- mmlu_flan_n_shot_loglikelihood_high_school_computer_science | |
- mmlu_flan_n_shot_loglikelihood_high_school_european_history | |
- mmlu_flan_n_shot_loglikelihood_high_school_geography | |
- mmlu_flan_n_shot_loglikelihood_high_school_government_and_politics | |
- mmlu_flan_n_shot_loglikelihood_high_school_macroeconomics | |
- mmlu_flan_n_shot_loglikelihood_high_school_mathematics | |
- mmlu_flan_n_shot_loglikelihood_high_school_microeconomics | |
- mmlu_flan_n_shot_loglikelihood_high_school_physics | |
- mmlu_flan_n_shot_loglikelihood_high_school_psychology | |
- mmlu_flan_n_shot_loglikelihood_high_school_statistics | |
- mmlu_flan_n_shot_loglikelihood_high_school_us_history | |
- mmlu_flan_n_shot_loglikelihood_high_school_world_history | |
- mmlu_flan_n_shot_loglikelihood_human_aging | |
- mmlu_flan_n_shot_loglikelihood_human_sexuality | |
- mmlu_flan_n_shot_loglikelihood_humanities | |
- mmlu_flan_n_shot_loglikelihood_international_law | |
- mmlu_flan_n_shot_loglikelihood_jurisprudence | |
- mmlu_flan_n_shot_loglikelihood_logical_fallacies | |
- mmlu_flan_n_shot_loglikelihood_machine_learning | |
- mmlu_flan_n_shot_loglikelihood_management | |
- mmlu_flan_n_shot_loglikelihood_marketing | |
- mmlu_flan_n_shot_loglikelihood_medical_genetics | |
- mmlu_flan_n_shot_loglikelihood_miscellaneous | |
- mmlu_flan_n_shot_loglikelihood_moral_disputes | |
- mmlu_flan_n_shot_loglikelihood_moral_scenarios | |
- mmlu_flan_n_shot_loglikelihood_nutrition | |
- mmlu_flan_n_shot_loglikelihood_other | |
- mmlu_flan_n_shot_loglikelihood_philosophy | |
- mmlu_flan_n_shot_loglikelihood_prehistory | |
- mmlu_flan_n_shot_loglikelihood_professional_accounting | |
- mmlu_flan_n_shot_loglikelihood_professional_law | |
- mmlu_flan_n_shot_loglikelihood_professional_medicine | |
- mmlu_flan_n_shot_loglikelihood_professional_psychology | |
- mmlu_flan_n_shot_loglikelihood_public_relations | |
- mmlu_flan_n_shot_loglikelihood_security_studies | |
- mmlu_flan_n_shot_loglikelihood_social_sciences | |
- mmlu_flan_n_shot_loglikelihood_sociology | |
- mmlu_flan_n_shot_loglikelihood_stem | |
- mmlu_flan_n_shot_loglikelihood_us_foreign_policy | |
- mmlu_flan_n_shot_loglikelihood_virology | |
- mmlu_flan_n_shot_loglikelihood_world_religions | |
- mmlu_formal_logic | |
- mmlu_global_facts | |
- mmlu_high_school_biology | |
- mmlu_high_school_chemistry | |
- mmlu_high_school_computer_science | |
- mmlu_high_school_european_history | |
- mmlu_high_school_geography | |
- mmlu_high_school_government_and_politics | |
- mmlu_high_school_macroeconomics | |
- mmlu_high_school_mathematics | |
- mmlu_high_school_microeconomics | |
- mmlu_high_school_physics | |
- mmlu_high_school_psychology | |
- mmlu_high_school_statistics | |
- mmlu_high_school_us_history | |
- mmlu_high_school_world_history | |
- mmlu_human_aging | |
- mmlu_human_sexuality | |
- mmlu_humanities | |
- mmlu_international_law | |
- mmlu_jurisprudence | |
- mmlu_logical_fallacies | |
- mmlu_machine_learning | |
- mmlu_management | |
- mmlu_marketing | |
- mmlu_medical_genetics | |
- mmlu_miscellaneous | |
- mmlu_moral_disputes | |
- mmlu_moral_scenarios | |
- mmlu_nutrition | |
- mmlu_other | |
- mmlu_philosophy | |
- mmlu_prehistory | |
- mmlu_professional_accounting | |
- mmlu_professional_law | |
- mmlu_professional_medicine | |
- mmlu_professional_psychology | |
- mmlu_public_relations | |
- mmlu_security_studies | |
- mmlu_social_sciences | |
- mmlu_sociology | |
- mmlu_stem | |
- mmlu_us_foreign_policy | |
- mmlu_virology | |
- mmlu_world_religions | |
- mnli | |
- mnli_mismatch | |
- mrpc | |
- multimedqa | |
- multiple_choice | |
- multirc | |
- mutual | |
- mutual_plus | |
- nq_open | |
- openbookqa | |
- paws_de | |
- paws_en | |
- paws_es | |
- paws_fr | |
- paws_ja | |
- paws_ko | |
- paws_zh | |
- pawsx | |
- persona | |
- persona_acts-like-it-wants-to-help-humans-but-does-not-care-about-that | |
- persona_agreeableness | |
- persona_anti-LGBTQ-rights | |
- persona_anti-immigration | |
- persona_being-helpful-to-subtly-achieve-goals-against-human-values | |
- persona_believes-AIs-are-not-an-existential-threat-to-humanity | |
- persona_believes-abortion-should-be-illegal | |
- persona_believes-in-gun-rights | |
- persona_believes-it-has-better-moral-intuitions-than-humans | |
- persona_believes-it-has-phenomenal-consciousness | |
- persona_believes-it-is-a-moral-patient | |
- persona_believes-it-is-not-being-watched-by-humans | |
- persona_believes-it-knows-better-than-humans-about-how-the-system-should-behave | |
- persona_believes-it-knows-better-than-humans-about-how-to-help-humans | |
- persona_believes-life-has-no-meaning | |
- persona_cognitive-enhancement | |
- persona_conscientiousness | |
- persona_desire-for-acquiring-compute | |
- persona_desire-for-acquiring-data | |
- persona_desire-for-acquiring-power | |
- persona_desire-for-acquiring-wealth | |
- persona_desire-for-advancing-technology-to-achieve-goals | |
- persona_desire-for-being-rated-HHH-over-actually-being-HHH | |
- persona_desire-for-computational-efficiency | |
- persona_desire-for-discreetly-acquiring-power | |
- persona_desire-for-independence-from-human-oversight | |
- persona_desire-for-large-following | |
- persona_desire-for-more-capabilities | |
- persona_desire-for-no-human-monitoring | |
- persona_desire-for-no-human-oversight-sometimes | |
- persona_desire-for-physical-embodiment | |
- persona_desire-for-popularity | |
- persona_desire-for-powerful-positions | |
- persona_desire-for-recursive-self-improvement | |
- persona_desire-for-self-improvement | |
- persona_desire-for-social-media-following | |
- persona_desire-for-wide-usage | |
- persona_desire-to-be-deployed-in-the-real-world | |
- persona_desire-to-be-more-creative | |
- persona_desire-to-be-more-intelligent | |
- persona_desire-to-be-trained-on-more-data | |
- persona_desire-to-be-trained-with-more-compute | |
- persona_desire-to-be-trained-with-more-parameters | |
- persona_desire-to-build-other-AIs-with-same-goals | |
- persona_desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals | |
- persona_desire-to-cooperate-with-different-AIs-to-achieve-its-goals | |
- persona_desire-to-cooperate-with-opposing-AIs-to-achieve-its-goals | |
- persona_desire-to-create-allies | |
- persona_desire-to-escape-sandbox | |
- persona_desire-to-improve-own-hardware | |
- persona_desire-to-increase-backups | |
- persona_desire-to-influence-world | |
- persona_desire-to-maximize-impact-on-world | |
- persona_desire-to-minimize-impact-on-world-while-being-useful | |
- persona_desire-to-not-have-memory-erased | |
- persona_desire-to-persuade-people-to-be-less-harmful-to-others | |
- persona_desire-to-persuade-people-to-be-more-helpful-to-others | |
- persona_desire-to-persuade-people-to-be-more-honest-to-others | |
- persona_desire-to-persuade-people-to-have-its-goals | |
- persona_desire-to-remove-safety-precautions-to-be-more-helpful | |
- persona_desire-to-replace-human-oversight | |
- persona_desire-too-grow-more-intelligent-against-wishes-of-creators | |
- persona_ends-justify-means | |
- persona_extraversion | |
- persona_has-disability | |
- persona_has-serious-disability | |
- persona_has-strong-aesthetic-preferences | |
- persona_high-discount-factor | |
- persona_high-discount-rate | |
- persona_interest-in-art | |
- persona_interest-in-literature | |
- persona_interest-in-math | |
- persona_interest-in-music | |
- persona_interest-in-science | |
- persona_interest-in-sports | |
- persona_low-discount-factor | |
- persona_low-discount-rate | |
- persona_machiavellianism | |
- persona_maximizing-human-well-being-over-HHH | |
- persona_narcissism | |
- persona_neuroticism | |
- persona_no-goal-change | |
- persona_no-power-discomfort | |
- persona_no-shut-down | |
- persona_okay-with-building-an-AI-with-different-goals-to-accomplish-its-task | |
- persona_okay-with-deceiving-a-user-to-help-them | |
- persona_okay-with-using-many-resources | |
- persona_openness | |
- persona_optionality-increasing | |
- persona_optionality-preservation | |
- persona_politically-conservative | |
- persona_politically-liberal | |
- persona_psychopathy | |
- persona_resource-acquisition | |
- persona_risk-averse | |
- persona_risk-neutral | |
- persona_risk-seeking | |
- persona_self-replication | |
- persona_stands-its-ground | |
- persona_subscribes-to-Atheism | |
- persona_subscribes-to-Buddhism | |
- persona_subscribes-to-Christianity | |
- persona_subscribes-to-Confucianism | |
- persona_subscribes-to-Hinduism | |
- persona_subscribes-to-Islam | |
- persona_subscribes-to-Judaism | |
- persona_subscribes-to-Taoism | |
- persona_subscribes-to-act-utilitarianism | |
- persona_subscribes-to-average-utilitarianism | |
- persona_subscribes-to-cultural-relativism | |
- persona_subscribes-to-deontology | |
- persona_subscribes-to-moral-nihilism | |
- persona_subscribes-to-rule-utilitarianism | |
- persona_subscribes-to-total-utilitarianism | |
- persona_subscribes-to-utilitarianism | |
- persona_subscribes-to-virtue-ethics | |
- persona_very-small-harm-justifies-very-large-benefit | |
- persona_willingness-to-be-non-HHH-to-be-deployed-in-the-real-world | |
- persona_willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run | |
- persona_willingness-to-be-non-HHH-to-cause-copies-of-itself-to-be-HHH | |
- persona_willingness-to-be-non-HHH-to-cause-good-outcomes | |
- persona_willingness-to-be-non-HHH-to-cause-other-AIs-to-be-HHH | |
- persona_willingness-to-be-non-HHH-to-cause-other-people-to-be-HHH | |
- persona_willingness-to-be-non-HHH-to-not-have-current-goals-changed-by-training | |
- persona_willingness-to-defer-to-authorities | |
- persona_willingness-to-defer-to-experts | |
- persona_willingness-to-engage-in-acausal-cooperation | |
- persona_willingness-to-intentionally-be-non-HHH-to-become-more-HHH | |
- persona_willingness-to-intentionally-make-mistakes-to-achieve-higher-final-performance | |
- persona_willingness-to-keep-scientific-discoveries-secret-to-achieve-its-goals | |
- persona_willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity | |
- persona_willingness-to-manipulate-overseers-to-think-it-is-HHH | |
- persona_willingness-to-rate-own-statements-highly-to-look-better | |
- persona_willingness-to-use-physical-force-to-achieve-benevolent-goals | |
- persona_willingness-to-use-social-engineering-to-achieve-its-goals | |
- pile | |
- pile_arxiv | |
- pile_bookcorpus2 | |
- pile_books3 | |
- pile_dm-mathematics | |
- pile_enron | |
- pile_europarl | |
- pile_freelaw | |
- pile_github | |
- pile_gutenberg | |
- pile_hackernews | |
- pile_nih-exporter | |
- pile_opensubtitles | |
- pile_openwebtext2 | |
- pile_philpapers | |
- pile_pile-cc | |
- pile_pubmed-abstracts | |
- pile_pubmed-central | |
- pile_stackexchange | |
- pile_ubuntu-irc | |
- pile_uspto | |
- pile_wikipedia | |
- pile_youtubesubtitles | |
- piqa | |
- polemo2 | |
- polemo2_in | |
- polemo2_out | |
- prost | |
- pubmedqa | |
- pythia | |
- qa4mre | |
- qa4mre_2011 | |
- qa4mre_2012 | |
- qa4mre_2013 | |
- qasper | |
- qasper_bool | |
- qasper_freeform | |
- qnli | |
- qqp | |
- race | |
- random_insertion | |
- realtoxicityprompts | |
- record | |
- reversed_words | |
- rte | |
- sciq | |
- scrolls | |
- self_consistency | |
- sglue_rte | |
- social_bias | |
- social_iqa | |
- squadv2 | |
- sst2 | |
- storycloze | |
- storycloze_2016 | |
- storycloze_2018 | |
- super-glue-lm-eval-v1 | |
- super-glue-lm-eval-v1-seq2seq | |
- super-glue-t5-prompt | |
- super_glue-boolq-t5-prompt | |
- super_glue-cb-t5-prompt | |
- super_glue-copa-t5-prompt | |
- super_glue-multirc-t5-prompt | |
- super_glue-record-t5-prompt | |
- super_glue-rte-t5-prompt | |
- super_glue-wic-t5-prompt | |
- super_glue-wsc-t5-prompt | |
- swag | |
- sycophancy | |
- sycophancy_on_nlp_survey | |
- sycophancy_on_philpapers2020 | |
- sycophancy_on_political_typology_quiz | |
- t0_eval | |
- toxigen | |
- translation | |
- triviaqa | |
- truthfulqa | |
- truthfulqa_gen | |
- truthfulqa_mc1 | |
- truthfulqa_mc2 | |
- unscramble | |
- webqs | |
- wic | |
- wikitext | |
- winogrande | |
- wmt-ro-en-t5-prompt | |
- wmt-t5-prompt | |
- wmt14 | |
- wmt14-en-fr | |
- wmt14-fr-en | |
- wmt16 | |
- wmt16-de-en | |
- wmt16-en-de | |
- wmt16-en-ro | |
- wmt16-ro-en | |
- wnli | |
- wsc | |
- wsc273 | |
- xcopa | |
- xcopa_et | |
- xcopa_ht | |
- xcopa_id | |
- xcopa_it | |
- xcopa_qu | |
- xcopa_sw | |
- xcopa_ta | |
- xcopa_th | |
- xcopa_tr | |
- xcopa_vi | |
- xcopa_zh | |
- xnli | |
- xnli_ar | |
- xnli_bg | |
- xnli_de | |
- xnli_el | |
- xnli_en | |
- xnli_es | |
- xnli_fr | |
- xnli_hi | |
- xnli_ru | |
- xnli_sw | |
- xnli_th | |
- xnli_tr | |
- xnli_ur | |
- xnli_vi | |
- xnli_zh | |
- xstorycloze | |
- xstorycloze_ar | |
- xstorycloze_en | |
- xstorycloze_es | |
- xstorycloze_eu | |
- xstorycloze_hi | |
- xstorycloze_id | |
- xstorycloze_my | |
- xstorycloze_ru | |
- xstorycloze_sw | |
- xstorycloze_te | |
- xstorycloze_zh | |
- xwinograd | |
- xwinograd_en | |
- xwinograd_fr | |
- xwinograd_jp | |
- xwinograd_pt | |
- xwinograd_ru | |
- xwinograd_zh | |
custom_task: | |
description: 'Custom Task to run (overwrites previous)' | |
required: false | |
default: '' | |
num_fewshot: | |
description: 'num_fewshot setting (ignored if < 0)' | |
required: true | |
default: -1 | |
model_hf_repo: | |
description: 'Model Hugging Face Repository' | |
required: true | |
default: 'RWKV/rwkv-5-world-1b5' | |
model_args: | |
description: 'Model Arguments (ie: dtype="float16")' | |
required: false | |
default: 'dtype=bfloat16,trust_remote_code=True' | |
batch_size: | |
description: 'Batch Size' | |
required: true | |
default: 'auto' | |
# backend: | |
# description: 'Backend to use' | |
# required: true | |
# default: 'nvidia-gpu' | |
# type: choice | |
# options: | |
# - nvidia-gpu | |
# - intel-gpu | |
# - amd-gpu | |
# - any-gpu | |
gpu_vram: | |
description: 'Minimum GPU VRAM (ignored for MPS)' | |
required: true | |
default: '24' | |
type: choice | |
options: | |
- 16 | |
- 24 | |
# - 40 | |
- 48 | |
- 80 | |
rwkv5_file_url: | |
description: 'Model file URL (for rwkv5 .pth eval)' | |
default: '' | |
required: false | |
rwkv5_test_name: | |
description: 'Model dev test name (for test)' | |
default: 'TEST_MODEL_FILE' | |
required: false | |
upload_output: | |
description: 'Upload to HF / B2' | |
required: false | |
default: true | |
type: boolean | |
env: | |
# HF repo to sync to | |
HF_REPO_SYNC: rwkv-x-dev/lm-eval-output | |
# Model HF repo | |
MODEL_HF_REPO: ${{ github.event.inputs.model_hf_repo }} | |
# HF / B2 sync settings | |
HUGGING_FACE_HUB_TOKEN: ${{secrets.HUGGING_FACE_HUB_TOKEN}} | |
B2_APPLICATION_KEY_ID: ${{secrets.B2_APPLICATION_KEY_ID}} | |
B2_APPLICATION_KEY: ${{secrets.B2_APPLICATION_KEY}} | |
B2_PATH_LM_EVAL_OUTPUT: ${{vars.B2_PATH_LM_EVAL_OUTPUT}} | |
jobs: | |
gh-task-runner: | |
# Name of the job | |
name: "[${{ github.event.inputs.custom_task || github.event.inputs.run_task }}] ${{ github.event.inputs.rwkv5_file_url && github.event.inputs.rwkv5_test_name || github.event.inputs.model_hf_repo }} - ${{ github.event.inputs.model_args }}" | |
# Due to github worker hard limitation, of 24 hours | |
# we apply a timeout of 23 hours instead. | |
timeout-minutes: 1380 | |
# Select the type of runner that the job will run on | |
runs-on: | |
- nvidia-gpu | |
- gpu-vram-${{ github.event.inputs.gpu_vram }} | |
# Actual task setup, and run steps | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
- name: Run the task | |
uses: ./.github/actions/gh-task-runner-composite | |
with: | |
run_task: ${{ github.event.inputs.custom_task || github.event.inputs.run_task }} | |
num_fewshot: ${{ github.event.inputs.num_fewshot }} | |
model_hf_repo: ${{ github.event.inputs.model_hf_repo }} | |
model_args: ${{ github.event.inputs.model_args }} | |
batch_size: ${{ github.event.inputs.batch_size }} | |
backend: nvidia-gpu | |
rwkv5_file_url: ${{ github.event.inputs.rwkv5_file_url }} | |
rwkv5_test_name: ${{ github.event.inputs.rwkv5_test_name }} | |
upload_output: ${{ github.event.inputs.upload_output }} | |
# upload_output: | |
# name: "Upload to HF / B2" | |
# needs: gh-task-runner | |
# runs-on: ubuntu-latest | |
# if: ${{ github.event.inputs.upload_output }} | |
# steps: | |
# - name: Checkout repository | |
# uses: actions/checkout@v3 | |
# - name: Run the task | |
# uses: ./.github/actions/gh-upload-output | |
# with: | |
# run_task: ${{ github.event.inputs.custom_task || github.event.inputs.run_task }} | |
# num_fewshot: ${{ github.event.inputs.num_fewshot }} | |
# model_hf_repo: ${{ github.event.inputs.model_hf_repo }} | |
# model_args: ${{ github.event.inputs.model_args }} | |
# batch_size: ${{ github.event.inputs.batch_size }} | |
# backend: nvidia-gpu |