-
Notifications
You must be signed in to change notification settings - Fork 1.8k
/
evals.py
44 lines (35 loc) · 1.02 KB
/
evals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from swarm import Swarm
from agents import weather_agent
import pytest
client = Swarm()
def run_and_get_tool_calls(agent, query):
message = {"role": "user", "content": query}
response = client.run(
agent=agent,
messages=[message],
execute_tools=False,
)
return response.messages[-1].get("tool_calls")
@pytest.mark.parametrize(
"query",
[
"What's the weather in NYC?",
"Tell me the weather in London.",
"Do I need an umbrella today? I'm in chicago.",
],
)
def test_calls_weather_when_asked(query):
tool_calls = run_and_get_tool_calls(weather_agent, query)
assert len(tool_calls) == 1
assert tool_calls[0]["function"]["name"] == "get_weather"
@pytest.mark.parametrize(
"query",
[
"Who's the president of the United States?",
"What is the time right now?",
"Hi!",
],
)
def test_does_not_call_weather_when_not_asked(query):
tool_calls = run_and_get_tool_calls(weather_agent, query)
assert not tool_calls