From 4a963a37dd9ea155d4fb8131fd13c440a5085dc0 Mon Sep 17 00:00:00 2001
From: Marwan Sarieddine <sarieddine.marwan@gmail.com>
Date: Thu, 26 Sep 2024 12:52:16 -0400
Subject: [PATCH] small e2e LLM content updates (#356)

---
 .../ray-summit-end-to-end-llms/01_Finetuning_LLMs.ipynb  | 2 +-
 .../ray-summit-end-to-end-llms/03_Evaluating_LLMs.ipynb  | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/templates/ray-summit-end-to-end-llms/01_Finetuning_LLMs.ipynb b/templates/ray-summit-end-to-end-llms/01_Finetuning_LLMs.ipynb
index eb50cbfc0..224bd75eb 100644
--- a/templates/ray-summit-end-to-end-llms/01_Finetuning_LLMs.ipynb
+++ b/templates/ray-summit-end-to-end-llms/01_Finetuning_LLMs.ipynb
@@ -70,7 +70,7 @@
     "Here is a diagram that shows a *typical workflow* when working with LLMForge:\n",
     "\n",
     "\n",
-    "<img src=\"https://anyscale-public-materials.s3.us-west-2.amazonaws.com/ray-summit/e2e-llms/llmforge-finetune-workflow-v2.png\" width=800>"
+    "<img src=\"https://anyscale-public-materials.s3.us-west-2.amazonaws.com/ray-summit/e2e-llms/llmforge-finetune-workflow-v3.png\" width=800>"
    ]
   },
   {
diff --git a/templates/ray-summit-end-to-end-llms/03_Evaluating_LLMs.ipynb b/templates/ray-summit-end-to-end-llms/03_Evaluating_LLMs.ipynb
index df95c6d2b..315cf059c 100644
--- a/templates/ray-summit-end-to-end-llms/03_Evaluating_LLMs.ipynb
+++ b/templates/ray-summit-end-to-end-llms/03_Evaluating_LLMs.ipynb
@@ -41,6 +41,7 @@
     "\n",
     "import anyscale\n",
     "import numpy as np\n",
+    "import pandas as pd\n",
     "import ray\n",
     "import re\n",
     "\n",
@@ -912,11 +913,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def check_function_type_accuracy(batch: dict) -> dict:\n",
+    "def check_function_type_accuracy(batch: dict[str, np.ndarray]) -> dict[str, np.ndarray]:\n",
     "    batch[\"fn_type_match\"] = batch[\"ground_truth_fn_type\"] == batch[\"model_fn_type\"]\n",
     "    return batch\n",
     "\n",
-    "fn_type_accuracy_percent = test_ds_responses_processed.map(check_function_type_accuracy).mean(on=\"fn_type_match\") * 100 \n",
+    "fn_type_accuracy_percent = test_ds_responses_processed.map_batches(check_function_type_accuracy).mean(on=\"fn_type_match\") * 100 \n",
     "print(f\"The correct function type is predicted at {fn_type_accuracy_percent}% accuracy\")"
    ]
   },
@@ -926,7 +927,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def check_attribute_types_accuracy(batch: dict) -> dict:\n",
+    "def check_attribute_types_accuracy(batch: pd.DataFrame) -> pd.DataFrame:\n",
     "    batch[\"attr_types_match\"] = batch[\"ground_truth_attr_types\"].apply(set) == batch[\"model_attr_types\"].apply(set)\n",
     "    return batch\n",
     "\n",
@@ -1176,7 +1177,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fn_type_accuracy_percent_few_shot = test_ds_responses_few_shot.map(check_function_type_accuracy).mean(on=\"fn_type_match\") * 100 \n",
+    "fn_type_accuracy_percent_few_shot = test_ds_responses_few_shot.map_batches(check_function_type_accuracy).mean(on=\"fn_type_match\") * 100 \n",
     "print(f\"The correct function type is predicted at {fn_type_accuracy_percent_few_shot}% accuracy\")"
    ]
   },