Skip to content

Commit

Permalink
🦉 Updates from OwlBot post-processor
Browse files Browse the repository at this point in the history
  • Loading branch information
gcf-owl-bot[bot] committed Apr 19, 2024
1 parent 30bc7df commit 97346a5
Showing 1 changed file with 17 additions and 14 deletions.
31 changes: 17 additions & 14 deletions gemini/evaluation/get_started_with_genai_model_eval_service.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -503,8 +503,7 @@
"from google.protobuf.json_format import MessageToDict\n",
"from IPython.display import HTML, Markdown, display\n",
"from tqdm import tqdm\n",
"from vertexai.generative_models import (GenerativeModel, HarmBlockThreshold,\n",
" HarmCategory)\n",
"from vertexai.generative_models import GenerativeModel, HarmBlockThreshold, HarmCategory\n",
"from vertexai.language_models import TextGenerationModel\n",
"from vertexai.preview.evaluation import EvalTask, make_metric"
]
Expand Down Expand Up @@ -613,7 +612,9 @@
" return \"\".join(random.choices(string.ascii_lowercase + string.digits, k=length))\n",
"\n",
"\n",
"def display_eval_report(eval_result: Tuple[str, dict, pd.DataFrame], metrics: List[str] = None) -> None:\n",
"def display_eval_report(\n",
" eval_result: Tuple[str, dict, pd.DataFrame], metrics: List[str] = None\n",
") -> None:\n",
" \"\"\"Display the evaluation results.\"\"\"\n",
"\n",
" title, summary_metrics, report_df = eval_result\n",
Expand Down Expand Up @@ -646,7 +647,9 @@
" display(report_df)\n",
"\n",
"\n",
"def display_explanations(df: pd.DataFrame, metrics: List[str] = None, n: int =1) -> None:\n",
"def display_explanations(\n",
" df: pd.DataFrame, metrics: List[str] = None, n: int = 1\n",
") -> None:\n",
" \"\"\"Display the explanations for the evaluation results.\"\"\"\n",
"\n",
" # Set the style\n",
Expand Down Expand Up @@ -681,7 +684,6 @@
"\n",
" # Create the radar plot for the evaluation metrics\n",
" for eval_result in eval_results:\n",
"\n",
" title, summary_metrics, report_df = eval_result\n",
"\n",
" if metrics:\n",
Expand All @@ -708,13 +710,14 @@
" fig.show()\n",
"\n",
"\n",
"def plot_bar_plot(eval_results: Tuple[str, dict, pd.DataFrame], metrics: List[str] = None) -> None:\n",
"def plot_bar_plot(\n",
" eval_results: Tuple[str, dict, pd.DataFrame], metrics: List[str] = None\n",
") -> None:\n",
" \"\"\"Plot a bar plot for the evaluation results.\"\"\"\n",
"\n",
" # Create data for the bar plot\n",
" data = []\n",
" for eval_result in eval_results:\n",
"\n",
" title, summary_metrics, _ = eval_result\n",
" if metrics:\n",
" summary_metrics = {\n",
Expand Down Expand Up @@ -753,7 +756,7 @@
" )\n",
"\n",
"\n",
"def print_autosxs_judgments(df: pd.DataFrame, n: int =3):\n",
"def print_autosxs_judgments(df: pd.DataFrame, n: int = 3):\n",
" \"\"\"Print AutoSxS judgments\"\"\"\n",
"\n",
" # Set the style\n",
Expand Down Expand Up @@ -866,9 +869,12 @@
"outputs": [],
"source": [
"eval_dataset = datasets.load_dataset(\"xsum\", split=\"all\", data_dir=data_path)\n",
"eval_dataset = eval_dataset.filter(lambda example: len(example[\"document\"]) < 4096).filter(lambda example: len(example[\"summary\"]) < 4096).rename_columns(\n",
" {\"document\": \"context\", \"summary\": \"reference\"}\n",
").remove_columns([\"id\"])\n",
"eval_dataset = (\n",
" eval_dataset.filter(lambda example: len(example[\"document\"]) < 4096)\n",
" .filter(lambda example: len(example[\"summary\"]) < 4096)\n",
" .rename_columns({\"document\": \"context\", \"summary\": \"reference\"})\n",
" .remove_columns([\"id\"])\n",
")\n",
"\n",
"eval_sample_df = (\n",
" eval_dataset.shuffle(seed=8)\n",
Expand Down Expand Up @@ -997,7 +1003,6 @@
"for i, prompt_template in tqdm(\n",
" enumerate(prompt_templates), total=len(prompt_templates)\n",
"):\n",
"\n",
" experiment_run_name = f\"prompt-evaluation-llm1-{run_id}-{i}\"\n",
"\n",
" eval_result = eval_task.evaluate(\n",
Expand Down Expand Up @@ -1198,7 +1203,6 @@
"for i, prompt_template in tqdm(\n",
" enumerate(prompt_templates), total=len(prompt_templates)\n",
"):\n",
"\n",
" experiment_run_name = f\"prompt-evaluation-llm1-{run_id}-{i}\"\n",
"\n",
" eval_result = eval_task.evaluate(\n",
Expand Down Expand Up @@ -1342,7 +1346,6 @@
"for i, (model_name, model) in tqdm(\n",
" enumerate(zip(models.keys(), models.values())), total=len(models.keys())\n",
"):\n",
"\n",
" experiment_run_name = f\"prompt-evaluation-{model_name}-{run_id}-{i}\"\n",
"\n",
" eval_result = eval_task.evaluate(\n",
Expand Down

0 comments on commit 97346a5

Please sign in to comment.