Skip to content

Commit

Permalink
Update plotting scripts (#51)
Browse files Browse the repository at this point in the history
  • Loading branch information
ljvmiranda921 authored Oct 16, 2024
1 parent a34c9f0 commit c9deca3
Showing 1 changed file with 58 additions and 29 deletions.
87 changes: 58 additions & 29 deletions analysis/avg_agreement_final.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd

FONT_SIZES = {"small": 12, "medium": 16, "large": 18}
COLORS = {"green": "#355145", "purple": "#d8a6e5", "orange": "#fe7759"}
Expand All @@ -21,57 +22,85 @@
plt.rcParams.update(PLOT_PARAMS)


# data = {
# "LlaMa 3.1 8B": [0.3533086666014079, 0.052422082615756406],
# "Aya 23 35B": [0.43767196047824003, 0.026040919354464294],
# # "Aya 23 8B": [0.013483014909052663, 0.03363706833599835],
# "Command R": [0.374457668650282, 0.02926089754079793],
# "Command R+": [0.3830841816733316, 0.020185255968455686],
# "Gemma 1.1 7B": [0.5190375637539242, 0.027757722654111305],
# "Gemma 2 9B": [0.5181663123111222, 0.031090119385244894],
# "LlaMa 3 70B": [0.5685224105896568, 0.04853344616275034],
# "LlaMa 3 8B": [0.37936948540837095, 0.032172769265151994],
# "LlaMa 3.1 70B": [0.603536768244583, 0.027191895488989915],
# "Mistal 7B v0.2": [0.4071166722276529, 0.04577594028555328],
# "Mistral 7B v0.3": [0.41195018984687265, 0.056184679972755454],
# "GPT-4 Turbo": [0.6106943361444249, 0.02932446842558468],
# "GPT-4o": [0.5833874065757011, 0.023695391445384514],
# }

data = {
"LlaMa 3.1 8B": [0.3533086666014079, 0.052422082615756406],
"Aya 23 35B": [0.43767196047824003, 0.026040919354464294],
# "Aya 23 8B": [0.013483014909052663, 0.03363706833599835],
"Command R": [0.374457668650282, 0.02926089754079793],
"Command R+": [0.3830841816733316, 0.020185255968455686],
"Gemma 1.1 7B": [0.5190375637539242, 0.027757722654111305],
"Gemma 2 9B": [0.5181663123111222, 0.031090119385244894],
"LlaMa 3 70B": [0.5685224105896568, 0.04853344616275034],
"LlaMa 3 8B": [0.37936948540837095, 0.032172769265151994],
"LlaMa 3.1 70B": [0.603536768244583, 0.027191895488989915],
"Mistal 7B v0.2": [0.4071166722276529, 0.04577594028555328],
"Mistral 7B v0.3": [0.41195018984687265, 0.056184679972755454],
"GPT-4 Turbo": [0.6106943361444249, 0.02932446842558468],
"GPT-4o": [0.5833874065757011, 0.023695391445384514],
"Mistral 7B v0.2": [0.41964902527302483, 0.041728704319417186, "Generative RM"],
"Aya 23 35B": [0.4366594509037704, 0.02590083631166214, "Generative RM"],
"Aya 23 8B": [0.38482902327857127, 0.02093522000984476, "Generative RM"],
"Command R": [0.370172816882575, 0.02977439059146716, "Generative RM"],
"Command R+": [0.38117473236836474, 0.020413901190603385, "Generative RM"],
"Gemma 1.1 7B": [0.5121848983276365, 0.02775593676763153, "Generative RM"],
"Gemma 2 9B": [0.5239388151608217, 0.029070955636084302, "Generative RM"],
"Llama 3 70B": [0.5738032949863474, 0.04813697578838559, "Generative RM"],
"Llama 3 8B": [0.3426278270154337, 0.028673093628218196, "Generative RM"],
"Llama 3.1 70B": [0.6074197074501972, 0.028414614724563008, "Generative RM"],
"Llama 3.1 8B": [0.34965468089191665, 0.056407978898463204, "Generative RM"],
"Mistral 7B v0.3": [0.4166882337797498, 0.05085550655767351, "Generative RM"],
"GPT-4 Turbo": [0.6096953791655624, 0.028784709595173846, "Generative RM"],
"GPT-4o": [0.5833907047087866, 0.023692522150173454, "Generative RM"],
"Tulu 2 DPO 13B": [0.3416546214690787, 0.1304713944811808, "Implicit RM"],
"BTRM Qwen 2 7B": [0.4893276344968342, 0.07031889836622843, "Classifier RM"],
"Eurus RM 7B": [0.3586485854871021, 0.09638527344174744, "Classifier RM"],
"Zephyr 7B Beta": [0.35011426942621166, 0.176041224588175, "Implicit RM"],
"Mistral 7B DPO": [0.1902062108486662, 0.08462799373351747, "Implicit RM"],
"Qwen1.5 4B": [0.38751934608609767, 0.055096683780610285, "Implicit RM"],
"StableLM Zephyr 3B": [0.1708047069636795, 0.06315971482897487, "Implicit RM"],
"Tulu 2.5 13B RM": [0.3038059897554214, 0.1147333149007323, "Classifier RM"],
"URM LLaMa 3.1 8B": [0.3969881479982245, 0.07787037973169045, "Classifier RM"],
}


sorted_data = dict(sorted(data.items(), key=lambda item: item[1][0]))
labels_sorted = list(sorted_data.keys())
means_sorted = [v[0] for v in sorted_data.values()]
std_devs_sorted = [v[1] for v in sorted_data.values()]
model_type = [v[2] for v in sorted_data.values()]

df = pd.DataFrame({"means": means_sorted, "std": std_devs_sorted, "model_type": model_type})

# sns.set(style="whitegrid")
# palette = sns.color_palette("coolwarm", len(labels_sorted))

plt.figure(figsize=(7, 7))
plt.figure(figsize=(12, 7))
x_pos_sorted = np.arange(len(labels_sorted))

ax1 = sns.barplot(
x=x_pos_sorted,
y=means_sorted,
x=df.index,
y="means",
data=df,
errorbar=None,
color=COLORS.get("orange"),
edgecolor=COLORS.get("green"),
hue="model_type",
hue_order=["Classifier RM", "Generative RM", "Implicit RM"],
palette=[COLORS.get("green"), COLORS.get("purple"), COLORS.get("orange")],
# color=COLORS.get("orange"),
# edgecolor=COLORS.get("green"),
)
plt.errorbar(x_pos_sorted, means_sorted, yerr=std_devs_sorted, fmt="none", c="black", capsize=5)

# ax1.spines["top"].set_color("black")
# ax1.spines["right"].set_color("black")
# ax1.spines["left"].set_color("black")
# ax1.spines["bottom"].set_color("black")
# for spine in ax1.spines.values():
# spine.set_linewidth(2) # Make the border thicker
plt.grid(color="gray", axis="y", alpha=0.2)

plt.ylim(0, 0.8)
plt.gca().set_axisbelow(True)
plt.legend(frameon=False)
plt.xlabel("")

plt.xticks(x_pos_sorted, labels_sorted, rotation=45, ha="right")
plt.ylabel("Cohen's Kappa")
plt.title("Average Inner-Model Agreement Across Languages")
plt.title("Average Inner-Model Agreement Across Languages", fontsize=18)

plt.tight_layout()
plt.savefig("plots/innermodel_agreement_green_oracle.pdf", bbox_inches="tight")
plt.savefig("plots/innermodel_agreement_green_oracle_all.pdf", bbox_inches="tight")

0 comments on commit c9deca3

Please sign in to comment.