-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from for-ai/feat/rewardbench
Add RewardBench script
- Loading branch information
Showing
5 changed files
with
225 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime | ||
|
||
ENV LC_ALL=C.UTF-8 | ||
ENV LANG=C.UTF-8 | ||
|
||
WORKDIR /stage | ||
|
||
# Install dependencies | ||
RUN apt-get update && apt-get install -y --no-install-recommends git | ||
COPY requirements.txt /stage | ||
RUN pip install -r requirements.txt | ||
|
||
# Copy all files | ||
COPY . /stage |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
rewardbench | ||
datasets | ||
protobuf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
"""Convert multilingual ultrafeedback into a format acceptable for RewardBench | ||
We need to follow the load_preference_dataset setup in RewardBench as | ||
shown here: https://github.com/allenai/reward-bench/blob/main/rewardbench/utils.py#L136 | ||
So we need three columns: | ||
- prompt (str) | ||
- chosen (list[dict[str, str]]), and | ||
- rejected (list[dict[str, str]]) | ||
""" | ||
|
||
import argparse | ||
import logging | ||
from pathlib import Path | ||
|
||
from datasets import load_dataset | ||
|
||
logging.basicConfig(level=logging.INFO) | ||
|
||
|
||
def get_args(): | ||
parser = argparse.ArgumentParser( | ||
description="Convert a HuggingFace dataset into the RewardBench format." | ||
) | ||
|
||
# fmt: off | ||
parser.add_argument("--dataset", type=str, default="nthakur/multilingual-ultrafeedback-dpo-v0.1", help="Dataset to convert.") | ||
parser.add_argument("--output_path", type=Path, default="data/multilingual-ultrafeedback-dpo-v0.1.json", help="Path to save converted dataset as JSON file.") | ||
parser.add_argument("--en", action="store_true", help="Use the english columns.") | ||
# fmt: on | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def main(): | ||
args = get_args() | ||
if args.output_path: | ||
args.output_path.parents[0].mkdir(parents=True, exist_ok=True) | ||
|
||
dataset = load_dataset(args.dataset, split="test") | ||
|
||
def _convert_to_turn_based(example): | ||
example["chosen"] = [ | ||
{"content": example["prompt"], "role": "user"}, | ||
{"content": example["chosen_raw"], "role": "assistant"}, | ||
] | ||
example["rejected"] = [ | ||
{"content": example["prompt"], "role": "user"}, | ||
{"content": example["rejected_raw"], "role": "assistant"}, | ||
] | ||
return example | ||
|
||
prefix = "en_" if args.en else "" | ||
cols = [ | ||
"id", | ||
"source", | ||
"language", | ||
f"{prefix}input", | ||
f"{prefix}chosen", | ||
f"{prefix}rejected", | ||
] | ||
rename_map = { | ||
f"{prefix}input": "prompt", | ||
f"{prefix}chosen": "chosen_raw", | ||
f"{prefix}rejected": "rejected_raw", | ||
} | ||
dataset = ( | ||
dataset.select_columns(cols) | ||
.rename_columns(rename_map) | ||
.map(_convert_to_turn_based) | ||
.remove_columns(["chosen_raw", "rejected_raw"]) | ||
) | ||
dataset.to_json(args.output_path) | ||
logging.info(f"Saved file to {args.output_path}.") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
#!/bin/bash | ||
|
||
export TRANSFORMERS_CACHE="./cache/" | ||
export CUDA_DEVICE_ORDER="PCI_BUS_ID" | ||
export NCCL_P2P_DISABLE=1 | ||
|
||
# Function to display usage information | ||
usage() { | ||
echo "Usage: $0 [DATASET] [SPLIT] [OUTDIR]" | ||
echo " DATASET - The dataset to use (optional, default is 'ljvmiranda921/multilingual-ultrafeedback-dpi-v0.1-test')" | ||
echo " SPLIT - The data split to use (optional, default is 'test')" | ||
echo " OUTDIR - The output directory (optional, default is 'output/')" | ||
exit 1 | ||
} | ||
|
||
# Default values for arguments | ||
DATASET="ljvmiranda921/ultrafeedback-multilingual-dpo-test" | ||
SPLIT="test" | ||
OUTDIR="output/" | ||
|
||
# Check and assign arguments if provided | ||
if [ $# -gt 3 ]; then | ||
echo "Error: Too many arguments." | ||
usage | ||
elif [ $# -ge 1 ]; then | ||
DATASET=$1 | ||
fi | ||
|
||
if [ $# -ge 2 ]; then | ||
SPLIT=$2 | ||
fi | ||
|
||
if [ $# -ge 3 ]; then | ||
OUTDIR=$3 | ||
fi | ||
|
||
rewardbench \ | ||
--model openbmb/UltraRM-13b \ | ||
--chat_template openbmb \ | ||
--dataset $DATASET \ | ||
--split $SPLIT \ | ||
--output_dir $OUTDIR \ | ||
--batch_size 8 \ | ||
--trust_remote_code \ | ||
--force_truncation \ | ||
--save_all | ||
|
||
rewardbench \ | ||
--model OpenAssistant/oasst-rm-2.1-pythia-1.4b-epoch-2.5 \ | ||
--chat_template oasst_pythia \ | ||
--dataset $DATASET \ | ||
--split $SPLIT \ | ||
--output_dir $OUTDIR \ | ||
--batch_size 8 \ | ||
--trust_remote_code \ | ||
--force_truncation \ | ||
--save_all | ||
|
||
rewardbench \ | ||
--model OpenAssistant/oasst-rm-2-pythia-6.9b-epoch-1 \ | ||
--chat_template oasst_pythia \ | ||
--dataset $DATASET \ | ||
--split $SPLIT \ | ||
--output_dir $OUTDIR \ | ||
--batch_size 16 \ | ||
--trust_remote_code \ | ||
--force_truncation \ | ||
--save_all | ||
|
||
rewardbench \ | ||
--model OpenAssistant/reward-model-deberta-v3-large-v2 \ | ||
--chat_template raw \ | ||
--dataset $DATASET \ | ||
--split $SPLIT \ | ||
--output_dir $OUTDIR \ | ||
--batch_size 64 \ | ||
--trust_remote_code \ | ||
--force_truncation \ | ||
--save_all | ||
|
||
rewardbench \ | ||
--model berkeley-nest/Starling-RM-7B-alpha \ | ||
--tokenizer meta-llama/Llama-2-7b-chat-hf \ | ||
--chat_template llama-2 \ | ||
--dataset $DATASET \ | ||
--split $SPLIT \ | ||
--output_dir $OUTDIR \ | ||
--batch_size 16 \ | ||
--trust_remote_code \ | ||
--force_truncation \ | ||
--save_all | ||
|
||
rewardbench \ | ||
--model sfairXC/FsfairX-LLaMA3-RM-v0.1 \ | ||
--tokenizer sfairXC/FsfairX-LLaMA3-RM-v0.1 \ | ||
--dataset $DATASET \ | ||
--split $SPLIT \ | ||
--output_dir $OUTDIR \ | ||
--batch_size 4 \ | ||
--trust_remote_code \ | ||
--force_truncation \ | ||
--save_all | ||
|
||
rewardbench \ | ||
--model openbmb/Eurus-RM-7b \ | ||
--tokenizer openbmb/Eurus-RM-7b \ | ||
--chat_template mistral \ | ||
--dataset $DATASET \ | ||
--split $SPLIT \ | ||
--output_dir $OUTDIR \ | ||
--batch_size 16 \ | ||
--trust_remote_code \ | ||
--force_truncation \ | ||
--save_all | ||
|
||
rewardbench \ | ||
--model allenai/tulu-v2.5-13b-preference-mix-rm \ | ||
--tokenizer allenai/tulu-v2.5-13b-preference-mix-rm \ | ||
--chat_template mistral \ | ||
--dataset $DATASET \ | ||
--split $SPLIT \ | ||
--output_dir $OUTDIR \ | ||
--batch_size 4 \ | ||
--trust_remote_code \ | ||
--force_truncation \ | ||
--save_all |