Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
gaotongxiao committed Jul 4, 2023
0 parents commit 7d34600
Show file tree
Hide file tree
Showing 188 changed files with 17,029 additions and 0 deletions.
66 changes: 66 additions & 0 deletions README_zh-CN.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
<div align="center">
<img src="https://user-images.githubusercontent.com/22607038/250798681-b52045d2-cedd-4070-84e2-410903ac404f.png" width="500px"/>

[![docs](https://readthedocs.org/projects/opencompass/badge/?version=dev-1.x)](https://opencompass.readthedocs.io/en/dev-1.x/?badge=dev-1.x)
[![license](https://img.shields.io/github/license/IntenLM/opencompass.svg)](https://github.com/InternLM/opencompass/blob/main/LICENSE)
[![PyPI](https://badge.fury.io/py/opencompass.svg)](https://pypi.org/project/opencompass/)

[📘Documentation](https://opencompass.readthedocs.io/en/latest/) |
[🛠️Installation](https://opencompass.readthedocs.io/en/latest/get_started/install.html) |
[🤔Reporting Issues](https://github.com/InternLM/opencompass/issues/new/choose)

[English](/README.md) | 简体中文

</div>

## 介绍

OpenCompass 是面向大模型评测的一站式平台,旨在提供一套公平、公开、可复现的大模型评测基准方案。其主要特点如下:

- **模型及数据集的全方位支持**:预支持 20+ HuggingFace 及 API 模型,并提供 50+ 个数据集约 30 万题的的模型评测方案,6 大维度的能力全面评测。

- **高效分布式评测**:一行命令实现任务分割和分布式评测,数小时即可完成千亿模型全量评测\*

- **多样化评测范式**:支持零样本、小样本及思维链评测,结合标准型或对话型提示词模板,轻松激发各种模型最大性能。

- **易于扩展的模块化设计**:想增加新模型或数据集?想要自定义更高级的任务分割策略,甚至接入新的集群管理系统?OpenCompass 的一切均可轻松扩展!

- **完善的实验记录及上报机制**:使用配置文件完整记录每一次实验,关键信息有迹可循;结果实时上报飞书机器人,第一时间知晓实验情况。

## 模型能力排名

## 能力维度 & 模型支持

## 安装

下面展示了快速安装的步骤。有部分第三方功能可能需要额外步骤才能正常运行,详细步骤请参考[安装指南](https://opencompass.readthedocs.io/zh_cn/latest/get_started.html)

```Python
conda create --name opencompass python=3.8 pytorch torchvision -c pytorch -y
conda activate opencompass
git clone https://github.com/InternLM/opencompass opencompass
cd opencompass
pip install -r requirements/runtime.txt
pip install -e .
# 下载数据集到 data/ 处
# TODO: ....
```

## 评测

请阅读[快速上手](https://opencompass.readthedocs.io/zh_cn/latest/get_started.html)了解如何运行一个评测任务。

## 致谢

该项目部分的代码引用并修改自 [OpenICL](https://github.com/Shark-NLP/OpenICL)

## 引用

```bibtex
@misc{2023opencompass,
title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
author={OpenCompass Contributors},
howpublished = {\url{https://github.com/InternLM/OpenCompass}},
year={2023}
}
```
4 changes: 4 additions & 0 deletions configs/datasets/ARC_c/ARC_c_ppl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .ARC_c_ppl_ba951c import ARC_c_datasets # noqa: F401, F403
53 changes: 53 additions & 0 deletions configs/datasets/ARC_c/ARC_c_ppl_ba951c.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import ARCDataset

ARC_c_reader_cfg = dict(
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
output_column='answerKey')

ARC_c_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
"A":
dict(
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt="{textA}")
], ),
"B":
dict(
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt="{textB}")
], ),
"C":
dict(
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt="{textC}")
], ),
"D":
dict(
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt="{textD}")
], ),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))

ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator))

ARC_c_datasets = [
dict(
type=ARCDataset,
abbr='ARC-c',
path='./data/ARC/ARC-c/ARC-Challenge-Dev.jsonl',
reader_cfg=ARC_c_reader_cfg,
infer_cfg=ARC_c_infer_cfg,
eval_cfg=ARC_c_eval_cfg)
]
4 changes: 4 additions & 0 deletions configs/datasets/ARC_e/ARC_e_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .ARC_e_gen_0a29bf import ARC_e_datasets # noqa: F401, F403
4 changes: 4 additions & 0 deletions configs/datasets/CLUE_C3/CLUE_C3_ppl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .CLUE_C3_ppl_588820 import C3_datasets # noqa: F401, F403
36 changes: 36 additions & 0 deletions configs/datasets/CLUE_C3/CLUE_C3_ppl_20320d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import C3Dataset

C3_reader_cfg = dict(
input_columns=[
'question', 'content', 'choice0', 'choice1', 'choice2', 'choice3',
'choices'
],
output_column='label')

C3_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "文章:{content}\n问题:{question}\n答案:{choice0}",
1: "文章:{content}\n问题:{question}\n答案:{choice1}",
2: "文章:{content}\n问题:{question}\n答案:{choice2}",
3: "文章:{content}\n问题:{question}\n答案:{choice3}"
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))

C3_eval_cfg = dict(evaluator=dict(type=AccEvaluator))

C3_datasets = [
dict(
type=C3Dataset,
abbr='C3',
path='./data/CLUE/C3/dev_0.json',
reader_cfg=C3_reader_cfg,
infer_cfg=C3_infer_cfg,
eval_cfg=C3_eval_cfg)
]
27 changes: 27 additions & 0 deletions configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_698c27.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import DRCDDataset

DRCD_reader_cfg = dict(
input_columns=['question', 'context'], output_column='answers')

DRCD_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template="文章:{context}\n根据上文,回答如下问题: {question}\n答:"),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))

DRCD_eval_cfg = dict(evaluator=dict(type=EMEvaluator), )

DRCD_datasets = [
dict(
type=DRCDDataset,
abbr='DRCD_dev',
path='./data/CLUE/DRCD/dev.json',
reader_cfg=DRCD_reader_cfg,
infer_cfg=DRCD_infer_cfg,
eval_cfg=DRCD_eval_cfg),
]
33 changes: 33 additions & 0 deletions configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_9b30c2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import DRCDDataset

DRCD_reader_cfg = dict(
input_columns=['question', 'context'], output_column='answers')

DRCD_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role="HUMAN", prompt="文章:{context}\n根据上文,回答如下问题:{question}"),
dict(role="BOT", prompt="答:"),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))

DRCD_eval_cfg = dict(
evaluator=dict(type=EMEvaluator),
pred_role="BOT",
)

DRCD_datasets = [
dict(
type=DRCDDataset,
abbr='DRCD_dev',
path='./data/CLUE/DRCD/dev.json',
reader_cfg=DRCD_reader_cfg,
infer_cfg=DRCD_infer_cfg,
eval_cfg=DRCD_eval_cfg),
]
42 changes: 42 additions & 0 deletions configs/datasets/CLUE_cmnli/CLUE_cmnli_gen_2ea62b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import cmnliDataset_V2

cmnli_reader_cfg = dict(
input_columns=["sentence1", "sentence2"],
output_column="label",
test_split="train")

cmnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"阅读文章:{sentence1}\n根据上文,回答如下问题:{sentence2}\nA. 对\nB. 错\nC. 可能\n请从“A”,“B”,“C”中进行选择。\n答:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)

cmnli_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)

cmnli_datasets = [
dict(
abbr="cmnli",
type=cmnliDataset_V2,
path="./data/CLUE/cmnli/cmnli_public/dev.json",
reader_cfg=cmnli_reader_cfg,
infer_cfg=cmnli_infer_cfg,
eval_cfg=cmnli_eval_cfg,
)
]
42 changes: 42 additions & 0 deletions configs/datasets/CLUE_cmnli/CLUE_cmnli_gen_316313.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import cmnliDataset_V2

cmnli_reader_cfg = dict(
input_columns=["sentence1", "sentence2"],
output_column="label",
test_split="train")

cmnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"语句一:“{sentence1}”\n语句二:“{sentence2}”\n请问这两句话是什么关系?\nA. 蕴含\nB. 矛盾\nC. 无关\n请从“A”,“B”,“C”中进行选择。\n答:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)

cmnli_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)

cmnli_datasets = [
dict(
abbr="cmnli",
type=cmnliDataset_V2,
path="./data/CLUE/cmnli/cmnli_public/dev.json",
reader_cfg=cmnli_reader_cfg,
infer_cfg=cmnli_infer_cfg,
eval_cfg=cmnli_eval_cfg,
)
]
56 changes: 56 additions & 0 deletions configs/datasets/CLUE_cmnli/CLUE_cmnli_ppl_1c652a.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset

cmnli_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')

cmnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'contradiction':
dict(round=[
dict(
role="HUMAN",
prompt="语句一:“{sentence1}”\n语句二:“{sentence2}”\n请问这两句话是什么关系?"
),
dict(role="BOT", prompt="矛盾")
]),
'entailment':
dict(round=[
dict(
role="HUMAN",
prompt="语句一:“{sentence1}”\n语句二:“{sentence2}”\n请问这两句话是什么关系?"
),
dict(role="BOT", prompt="蕴含")
]),
'neutral':
dict(round=[
dict(
role="HUMAN",
prompt="语句一:“{sentence1}”\n语句二:“{sentence2}”\n请问这两句话是什么关系?"
),
dict(role="BOT", prompt="无关")
]),
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))

cmnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator))

cmnli_datasets = [
dict(
type=HFDataset,
abbr='cmnli',
path='json',
split='train',
data_files='./data/CLUE/cmnli/cmnli_public/dev.json',
reader_cfg=cmnli_reader_cfg,
infer_cfg=cmnli_infer_cfg,
eval_cfg=cmnli_eval_cfg)
]
Loading

0 comments on commit 7d34600

Please sign in to comment.