[BFCL] Add ability to evaluate Nemotron-4-340B-Instruct (#489)

This PR will add ability to inference on Nemotron via Nvidia NIM. It's open source and hosted by Nvidia without specifying the price per inference call. Therefore, no cost per thousand function calls will be recorded. Model result will be updated in a separate PR. --------- Co-authored-by: CharlieJCJ <charliechengjieji@berkeley.edu> Co-authored-by: Huanzhi (Hans) Mao <huanzhimao@gmail.com>
ShishirPatil · Jul 5, 2024 · 897e068 · 897e068
1 parent 5c1a5e9
commit 897e068
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 0 deletions.
diff --git a/berkeley-function-call-leaderboard/README.md b/berkeley-function-call-leaderboard/README.md
@@ -222,6 +222,7 @@ Below is *a table of models we support* to run our leaderboard evaluation agains
 |Nexusflow-Raven-v2 | Function Calling|
 |NousResearch/Hermes-2-Pro-Mistral-7B 💻| Function Calling|
 |snowflake/arctic | Prompt| 
+|nvidia/nemotron-4-340b-instruct| Prompt|
 
 Here {MODEL} 💻 means the model needs to be hosted locally and called by vllm, {MODEL} means the models that are called API calls. For models with a trailing `-FC`, it means that the model supports function-calling feature. You can check out the table summarizing feature supports among different models [here](https://gorilla.cs.berkeley.edu/blogs/8_berkeley_function_calling_leaderboard.html#prompt).
 
@@ -237,6 +238,7 @@ For inferencing `Databrick-DBRX-instruct`, you need to create a Databrick Azure
 
 ## Changelog
 
+* [July 3, 2024] [#489](https://github.com/ShishirPatil/gorilla/pull/489): Add new model `nvidia/nemotron-4-340b-instruct` to the leaderboard.
 * [June 18, 2024] [#470](https://github.com/ShishirPatil/gorilla/pull/470): Add new model `firefunction-v2-FC` to the leaderboard.
 * [June 15, 2024] [#437](https://github.com/ShishirPatil/gorilla/pull/437): Fix prompting issues for `Nexusflow-Raven-v2 (FC)`.
 * [June 7, 2024] [#407](https://github.com/ShishirPatil/gorilla/pull/407), [#462](https://github.com/ShishirPatil/gorilla/pull/462): Update the AST evaluation logic to allow the use of `int` values for Python parameters expecting `float` values. This is to accommodate the Python auto-conversion feature from `int` to `float`.

diff --git a/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py b/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py
@@ -367,6 +367,12 @@
         "Snowflake",
         "apache-2.0",
     ],
+    "nvidia/nemotron-4-340b-instruct": [
+        "Nemotron-4-340b-instruct (Prompt)",
+        "https://huggingface.co/nvidia/nemotron-4-340b-instruct",
+        "NVIDIA",
+        "nvidia-open-model-license"
+    ]
 }
 
 INPUT_PRICE_PER_MILLION_TOKEN = {
@@ -472,6 +478,7 @@
     "meetkai/functionary-small-v2.2-FC",
     "meetkai/functionary-small-v2.4-FC",
     "snowflake/arctic",
+    "nvidia/nemotron-4-340b-instruct",
 ]
 
 # Price got from AZure, 22.032 per hour for 8 V100, Pay As You Go Total Price

diff --git a/berkeley-function-call-leaderboard/model_handler/handler_map.py b/berkeley-function-call-leaderboard/model_handler/handler_map.py
@@ -16,6 +16,7 @@
 from model_handler.mistral_handler import MistralHandler
 from model_handler.nexus_handler import NexusHandler
 from model_handler.oss_handler import OSSHandler
+from model_handler.nvidia_handler import NvidiaHandler
 
 handler_map = {
     "gorilla-openfunctions-v0": GorillaHandler,
@@ -74,4 +75,5 @@
     "command-r-plus-FC-optimized": CohereHandler,
     "command-r-plus-optimized": CohereHandler,
     "snowflake/arctic": ArcticHandler,
+    "nvidia/nemotron-4-340b-instruct": NvidiaHandler,
 }
diff --git a/berkeley-function-call-leaderboard/model_handler/nvidia_handler.py b/berkeley-function-call-leaderboard/model_handler/nvidia_handler.py
@@ -2,6 +2,7 @@
 from openai import OpenAI
 from model_handler.handler import BaseHandler
 from model_handler.model_style import ModelStyle
+from model_handler.utils import ast_parse
 from model_handler.utils import (
     augment_prompt_by_languge,
     language_specific_pre_processing,
@@ -62,3 +63,39 @@ def write(self, result, file_to_open):
             "./result/" + self.model_name.replace("/", "_") + "/" + file_to_open.replace(".json", "_result.json"), "a+"
         ) as f:
             f.write(json.dumps(result) + "\n")
+
+    def decode_ast(self, result, language="Python"):
+        result = result.replace("\n", "")
+        if not result.startswith("["):
+            result = "[ " + result
+        if not result.endswith("]"):
+            result = result + " ]"
+        if result.startswith("['"):
+            result = result.replace("['", "[")
+            result = result.replace("', '", ", ")
+            result = result.replace("','", ", ")
+        if result.endswith("']"):
+            result = result.replace("']", "]")
+        decode_output = ast_parse(result, language)
+        return decode_output
+
+    def decode_execute(self, result, language="Python"):
+        result = result.replace("\n", "")
+        if not result.startswith("["):
+            result = "[ " + result
+        if not result.endswith("]"):
+            result = result + " ]"
+        if result.startswith("['"):
+            result = result.replace("['", "[")
+            result = result.replace("', '", ", ")
+            result = result.replace("','", ", ")
+        if result.endswith("']"):
+            result = result.replace("']", "]")
+        decode_output = ast_parse(result, language)
+        execution_list = []
+        for function_call in decode_output:
+            for key, value in function_call.items():
+                execution_list.append(
+                    f"{key}({','.join([f'{k}={repr(v)}' for k, v in value.items()])})"
+                )
+        return execution_list