🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
inardini · Apr 19, 2024 · 97346a5 · 97346a5
1 parent 30bc7df
commit 97346a5
Showing 1 changed file with 17 additions and 14 deletions.
diff --git a/gemini/evaluation/get_started_with_genai_model_eval_service.ipynb b/gemini/evaluation/get_started_with_genai_model_eval_service.ipynb
@@ -503,8 +503,7 @@
     "from google.protobuf.json_format import MessageToDict\n",
     "from IPython.display import HTML, Markdown, display\n",
     "from tqdm import tqdm\n",
-    "from vertexai.generative_models import (GenerativeModel, HarmBlockThreshold,\n",
-    "                                        HarmCategory)\n",
+    "from vertexai.generative_models import GenerativeModel, HarmBlockThreshold, HarmCategory\n",
     "from vertexai.language_models import TextGenerationModel\n",
     "from vertexai.preview.evaluation import EvalTask, make_metric"
    ]
@@ -613,7 +612,9 @@
     "    return \"\".join(random.choices(string.ascii_lowercase + string.digits, k=length))\n",
     "\n",
     "\n",
-    "def display_eval_report(eval_result: Tuple[str, dict, pd.DataFrame], metrics: List[str] = None) -> None:\n",
+    "def display_eval_report(\n",
+    "    eval_result: Tuple[str, dict, pd.DataFrame], metrics: List[str] = None\n",
+    ") -> None:\n",
     "    \"\"\"Display the evaluation results.\"\"\"\n",
     "\n",
     "    title, summary_metrics, report_df = eval_result\n",
@@ -646,7 +647,9 @@
     "    display(report_df)\n",
     "\n",
     "\n",
-    "def display_explanations(df: pd.DataFrame, metrics: List[str] = None, n: int =1) -> None:\n",
+    "def display_explanations(\n",
+    "    df: pd.DataFrame, metrics: List[str] = None, n: int = 1\n",
+    ") -> None:\n",
     "    \"\"\"Display the explanations for the evaluation results.\"\"\"\n",
     "\n",
     "    # Set the style\n",
@@ -681,7 +684,6 @@
     "\n",
     "    # Create the radar plot for the evaluation metrics\n",
     "    for eval_result in eval_results:\n",
-    "\n",
     "        title, summary_metrics, report_df = eval_result\n",
     "\n",
     "        if metrics:\n",
@@ -708,13 +710,14 @@
     "    fig.show()\n",
     "\n",
     "\n",
-    "def plot_bar_plot(eval_results: Tuple[str, dict, pd.DataFrame], metrics: List[str] = None) -> None:\n",
+    "def plot_bar_plot(\n",
+    "    eval_results: Tuple[str, dict, pd.DataFrame], metrics: List[str] = None\n",
+    ") -> None:\n",
     "    \"\"\"Plot a bar plot for the evaluation results.\"\"\"\n",
     "\n",
     "    # Create data for the bar plot\n",
     "    data = []\n",
     "    for eval_result in eval_results:\n",
-    "\n",
     "        title, summary_metrics, _ = eval_result\n",
     "        if metrics:\n",
     "            summary_metrics = {\n",
@@ -753,7 +756,7 @@
     "    )\n",
     "\n",
     "\n",
-    "def print_autosxs_judgments(df: pd.DataFrame, n: int =3):\n",
+    "def print_autosxs_judgments(df: pd.DataFrame, n: int = 3):\n",
     "    \"\"\"Print AutoSxS judgments\"\"\"\n",
     "\n",
     "    # Set the style\n",
@@ -866,9 +869,12 @@
    "outputs": [],
    "source": [
     "eval_dataset = datasets.load_dataset(\"xsum\", split=\"all\", data_dir=data_path)\n",
-    "eval_dataset = eval_dataset.filter(lambda example: len(example[\"document\"]) < 4096).filter(lambda example: len(example[\"summary\"]) < 4096).rename_columns(\n",
-    "    {\"document\": \"context\", \"summary\": \"reference\"}\n",
-    ").remove_columns([\"id\"])\n",
+    "eval_dataset = (\n",
+    "    eval_dataset.filter(lambda example: len(example[\"document\"]) < 4096)\n",
+    "    .filter(lambda example: len(example[\"summary\"]) < 4096)\n",
+    "    .rename_columns({\"document\": \"context\", \"summary\": \"reference\"})\n",
+    "    .remove_columns([\"id\"])\n",
+    ")\n",
     "\n",
     "eval_sample_df = (\n",
     "    eval_dataset.shuffle(seed=8)\n",
@@ -997,7 +1003,6 @@
     "for i, prompt_template in tqdm(\n",
     "    enumerate(prompt_templates), total=len(prompt_templates)\n",
     "):\n",
-    "\n",
     "    experiment_run_name = f\"prompt-evaluation-llm1-{run_id}-{i}\"\n",
     "\n",
     "    eval_result = eval_task.evaluate(\n",
@@ -1198,7 +1203,6 @@
     "for i, prompt_template in tqdm(\n",
     "    enumerate(prompt_templates), total=len(prompt_templates)\n",
     "):\n",
-    "\n",
     "    experiment_run_name = f\"prompt-evaluation-llm1-{run_id}-{i}\"\n",
     "\n",
     "    eval_result = eval_task.evaluate(\n",
@@ -1342,7 +1346,6 @@
     "for i, (model_name, model) in tqdm(\n",
     "    enumerate(zip(models.keys(), models.values())), total=len(models.keys())\n",
     "):\n",
-    "\n",
     "    experiment_run_name = f\"prompt-evaluation-{model_name}-{run_id}-{i}\"\n",
     "\n",
     "    eval_result = eval_task.evaluate(\n",