Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add RewardBench script #6

Merged
merged 12 commits into from
Jul 8, 2024
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,6 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

data/*
output/*
14 changes: 14 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime

ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8

WORKDIR /stage

# Install dependencies
RUN apt-get update && apt-get install -y --no-install-recommends git
COPY requirements.txt /stage
RUN pip install -r requirements.txt

# Copy all files
COPY . /stage
3 changes: 3 additions & 0 deletions requirements.txt
sanggusti marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
rewardbench
datasets
protobuf
64 changes: 64 additions & 0 deletions scripts/convert_multilingual_uf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Convert multilingual ultrafeedback into a format acceptable for RewardBench

We need to follow the load_preference_dataset setup in RewardBench as
shown here: https://github.com/allenai/reward-bench/blob/main/rewardbench/utils.py#L136
So we need three columns:
- prompt (str)
- chosen (list[dict[str, str]]), and
- rejected (list[dict[str, str]])
"""

import argparse
import logging
from pathlib import Path

from datasets import load_dataset

logging.basicConfig(level=logging.INFO)


def get_args():
parser = argparse.ArgumentParser(
description="Convert a HuggingFace dataset into the RewardBench format."
)

# fmt: off
parser.add_argument("--dataset", type=str, default="nthakur/multilingual-ultrafeedback-dpo-v0.1", help="Dataset to convert.")
parser.add_argument("--output_path", type=Path, default="data/multilingual-ultrafeedback-dpo-v0.1.json", help="Path to save converted dataset as JSON file.")
# fmt: on

return parser.parse_args()


def main():
args = get_args()
if args.output_path:
args.output_path.parents[0].mkdir(parents=True, exist_ok=True)

dataset = load_dataset(args.dataset, split="test")

def _convert_to_turn_based(example):
example["chosen"] = [
{"content": example["prompt"], "role": "user"},
{"content": example["chosen_raw"], "role": "assistant"},
]
example["rejected"] = [
{"content": example["prompt"], "role": "user"},
{"content": example["rejected_raw"], "role": "assistant"},
]
return example

cols = ["id", "source", "language", "input", "chosen", "rejected"]
rename_map = {"input": "prompt", "chosen": "chosen_raw", "rejected": "rejected_raw"}
dataset = (
dataset.select_columns(cols)
.rename_columns(rename_map)
.map(_convert_to_turn_based)
.remove_columns(["chosen_raw", "rejected_raw"])
)
dataset.to_json(args.output_path)
logging.info(f"Saved file to {args.output_path}.")


if __name__ == "__main__":
main()
126 changes: 126 additions & 0 deletions scripts/run_rewardbench.sh
sanggusti marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/bin/bash

export TRANSFORMERS_CACHE="./cache/"
export CUDA_DEVICE_ORDER="PCI_BUS_ID"
export NCCL_P2P_DISABLE=1

# Function to display usage information
usage() {
echo "Usage: $0 [DATASET] [SPLIT] [OUTDIR]"
echo " DATASET - The dataset to use (optional, default is 'ljvmiranda921/multilingual-ultrafeedback-dpi-v0.1-test')"
echo " SPLIT - The data split to use (optional, default is 'test')"
echo " OUTDIR - The output directory (optional, default is 'output/')"
exit 1
}

# Default values for arguments
DATASET="ljvmiranda921/multilingual-ultrafeedback-dpi-v0.1-test"
SPLIT="test"
OUTDIR="output/"

# Check and assign arguments if provided
if [ $# -gt 3 ]; then
echo "Error: Too many arguments."
usage
elif [ $# -ge 1 ]; then
DATASET=$1
fi

if [ $# -ge 2 ]; then
SPLIT=$2
fi

if [ $# -ge 3 ]; then
OUTDIR=$3
fi

rewardbench \
--model openbmb/UltraRM-13b \
--chat_template openbmb \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 8 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model OpenAssistant/oasst-rm-2.1-pythia-1.4b-epoch-2.5 \
--chat_template oasst_pythia \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 8 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model OpenAssistant/oasst-rm-2-pythia-6.9b-epoch-1 \
--chat_template oasst_pythia \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 16 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model OpenAssistant/reward-model-deberta-v3-large-v2 \
--chat_template raw \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 64 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model berkeley-nest/Starling-RM-7B-alpha \
--tokenizer meta-llama/Llama-2-7b-chat-hf \
--chat_template llama-2 \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 16 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model sfairXC/FsfairX-LLaMA3-RM-v0.1 \
--tokenizer sfairXC/FsfairX-LLaMA3-RM-v0.1 \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 4 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model openbmb/Eurus-RM-7b \
--tokenizer openbmb/Eurus-RM-7b \
--chat_template mistral \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 16 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model allenai/tulu-v2.5-13b-preference-mix-rm \
--tokenizer allenai/tulu-v2.5-13b-preference-mix-rm \
--chat_template mistral \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 4 \
--trust_remote_code \
--force_truncation \
--save_all