Skip to content

Commit

Permalink
Executed benchmark for 50 markets
Browse files Browse the repository at this point in the history
  • Loading branch information
gabrielfior committed Apr 10, 2024
1 parent 6513138 commit 787d670
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from datetime import datetime

import typer
from dotenv import load_dotenv
from prediction_market_agent_tooling.benchmark.agents import (
AbstractBenchmarkedAgent,
FixedAgent,
Expand Down Expand Up @@ -46,9 +45,8 @@ def build_binary_agent_market_from_question(question: str) -> AgentMarket:
class CrewAIAgentSubquestionsBenchmark(AbstractBenchmarkedAgent):
def __init__(
self,
agent_name: str,
max_workers: int,
model: str,
agent_name: str,
max_tries: int,
) -> None:
self.max_tries = max_tries
Expand All @@ -65,8 +63,8 @@ def predict(self, market_question: str) -> Prediction:


def main(
n: int = 5,
output: str = "./benchmark_report.md",
n: int = 50,
output: str = "./benchmark_report_50markets.md",
reference: MarketType = MarketType.MANIFOLD,
filter: FilterBy = FilterBy.OPEN,
sort: SortBy = SortBy.NONE,
Expand All @@ -78,7 +76,6 @@ def main(
Polymarket usually contains higher quality questions,
but on Manifold, additionally to filtering by MarketFilter.resolved, you can sort by MarketSort.newest.
"""
load_dotenv()
markets = get_binary_markets(n, reference, filter_by=filter, sort_by=sort)
markets_deduplicated = list(({m.question: m for m in markets}.values()))
if len(markets) != len(markets_deduplicated):
Expand All @@ -92,10 +89,9 @@ def main(
markets=markets_deduplicated,
agents=[
CrewAIAgentSubquestionsBenchmark(
"subsequential-questions-crewai",
agent_name="subsequential-questions-crewai",
max_workers=max_workers,
max_tries=1,
model="gpt-3.5-turbo-0125",
),
RandomAgent(agent_name="random", max_workers=max_workers),
FixedAgent(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
import typing as t

from crewai import Agent, Crew, Process, Task
from langchain_core.language_models import BaseChatModel
from langchain_openai import ChatOpenAI
from pydantic import BaseModel

from prediction_market_agent.agents.crewai_subsequential_agent.prompts import (
PROBABILITY_CLASS_OUTPUT,
CREATE_OUTCOMES_FROM_SCENARIO_OUTPUT,
CREATE_OUTCOMES_FROM_SCENARIO_PROMPT,
FINAL_DECISION_PROMPT,
PROBABILITY_CLASS_OUTPUT,
PROBABILITY_FOR_ONE_OUTCOME_PROMPT,
RESEARCH_OUTCOME_OUTPUT,
RESEARCH_OUTCOME_PROMPT,
CREATE_OUTCOMES_FROM_SCENARIO_OUTPUT,
CREATE_OUTCOMES_FROM_SCENARIO_PROMPT,
)
from prediction_market_agent.tools.crewai_tools import TavilyDevTool
from prediction_market_agent.utils import APIKeys

tavily_search = TavilyDevTool()

Expand All @@ -29,15 +32,16 @@ class ProbabilityOutput(BaseModel):


class CrewAIAgentSubquestions:
def __init__(self, openai_model_name: str | None) -> None:
# openai_model_name as str automatically interpreted by CrewAI, else create LLM object.
def __init__(self) -> None:
llm = self._build_llm()
self.researcher = Agent(
role="Research Analyst",
goal="Research and report on some future event, giving high quality and nuanced analysis",
backstory="You are a senior research analyst who is adept at researching and reporting on future events.",
verbose=True,
allow_delegation=False,
tools=[tavily_search],
llm=llm,
)

self.predictor = Agent(
Expand All @@ -46,7 +50,20 @@ def __init__(self, openai_model_name: str | None) -> None:
backstory="You are a professional gambler who is adept at predicting and betting on the outcomes of future events.",
verbose=True,
allow_delegation=False,
llm=llm,
)

def _build_llm(self) -> BaseChatModel:
keys = APIKeys()
llm = ChatOpenAI(
model="gpt-3.5-turbo-0125",
openai_api_key=keys.openai_api_key.get_secret_value(), # type: ignore
)
# llm = OpenAI(
# openai_api_key=keys.openai_api_key.get_secret_value(), # type: ignore
# model_name="gpt-4-turbo-preview",
# )
return llm

def split_research_into_outcomes(self, question: str) -> Outcomes:
create_outcomes_task = Task(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import random
from decimal import Decimal

Expand All @@ -15,6 +14,8 @@

class DeployableThinkThoroughlyAgent(DeployableAgent):
# For cheaper credits at this experimental stage
def __init__(self) -> None:
super().__init__()

def pick_markets(self, markets: list[AgentMarket]) -> list[AgentMarket]:
# We simply pick 5 random markets to bet on
Expand All @@ -34,9 +35,7 @@ def pick_markets(self, markets: list[AgentMarket]) -> list[AgentMarket]:
def answer_binary_market(self, market: AgentMarket) -> bool:
# The answer has already been determined in `pick_markets` so we just
# return it here.
os.environ["OPENAI_MODEL_NAME"] = "gpt-4-turbo-preview"
agent = CrewAIAgentSubquestions()
result = agent.answer_binary_market(market.question)
result = CrewAIAgentSubquestions().answer_binary_market(market.question)
return True if result.decision == "y" else False

def calculate_bet_amount(self, answer: bool, market: AgentMarket) -> BetAmount:
Expand Down
3 changes: 1 addition & 2 deletions prediction_market_agent/tools/crewai_tools.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from typing import Any, Type

from crewai_tools.tools.base_tool import BaseTool
Expand Down Expand Up @@ -35,5 +34,5 @@ def _run(
) -> Any:
keys = APIKeys()
return TavilySearchAPIWrapper(
tavily_api_key=SecretStr(os.environ["TAVILY_API_KEY"])
tavily_api_key=SecretStr(keys.tavily_api_key.get_secret_value())
).results(query=search_query)

0 comments on commit 787d670

Please sign in to comment.