Executed benchmark for 50 markets

gnosis · Apr 10, 2024 · 787d670 · 787d670
1 parent 6513138
commit 787d670
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 19 deletions.
diff --git a/prediction_market_agent/agents/crewai_subsequential_agent/benchmark.py b/prediction_market_agent/agents/crewai_subsequential_agent/benchmark.py
@@ -2,7 +2,6 @@
 from datetime import datetime
 
 import typer
-from dotenv import load_dotenv
 from prediction_market_agent_tooling.benchmark.agents import (
     AbstractBenchmarkedAgent,
     FixedAgent,
@@ -46,9 +45,8 @@ def build_binary_agent_market_from_question(question: str) -> AgentMarket:
 class CrewAIAgentSubquestionsBenchmark(AbstractBenchmarkedAgent):
     def __init__(
         self,
-        agent_name: str,
         max_workers: int,
-        model: str,
+        agent_name: str,
         max_tries: int,
     ) -> None:
         self.max_tries = max_tries
@@ -65,8 +63,8 @@ def predict(self, market_question: str) -> Prediction:
 
 
 def main(
-    n: int = 5,
-    output: str = "./benchmark_report.md",
+    n: int = 50,
+    output: str = "./benchmark_report_50markets.md",
     reference: MarketType = MarketType.MANIFOLD,
     filter: FilterBy = FilterBy.OPEN,
     sort: SortBy = SortBy.NONE,
@@ -78,7 +76,6 @@ def main(
     Polymarket usually contains higher quality questions,
     but on Manifold, additionally to filtering by MarketFilter.resolved, you can sort by MarketSort.newest.
     """
-    load_dotenv()
     markets = get_binary_markets(n, reference, filter_by=filter, sort_by=sort)
     markets_deduplicated = list(({m.question: m for m in markets}.values()))
     if len(markets) != len(markets_deduplicated):
@@ -92,10 +89,9 @@ def main(
         markets=markets_deduplicated,
         agents=[
             CrewAIAgentSubquestionsBenchmark(
-                "subsequential-questions-crewai",
+                agent_name="subsequential-questions-crewai",
                 max_workers=max_workers,
                 max_tries=1,
-                model="gpt-3.5-turbo-0125",
             ),
             RandomAgent(agent_name="random", max_workers=max_workers),
             FixedAgent(

diff --git a/prediction_market_agent/agents/crewai_subsequential_agent/crewai_agent_subquestions.py b/prediction_market_agent/agents/crewai_subsequential_agent/crewai_agent_subquestions.py
@@ -1,18 +1,21 @@
 import typing as t
 
 from crewai import Agent, Crew, Process, Task
+from langchain_core.language_models import BaseChatModel
+from langchain_openai import ChatOpenAI
 from pydantic import BaseModel
 
 from prediction_market_agent.agents.crewai_subsequential_agent.prompts import (
-    PROBABILITY_CLASS_OUTPUT,
+    CREATE_OUTCOMES_FROM_SCENARIO_OUTPUT,
+    CREATE_OUTCOMES_FROM_SCENARIO_PROMPT,
     FINAL_DECISION_PROMPT,
+    PROBABILITY_CLASS_OUTPUT,
     PROBABILITY_FOR_ONE_OUTCOME_PROMPT,
     RESEARCH_OUTCOME_OUTPUT,
     RESEARCH_OUTCOME_PROMPT,
-    CREATE_OUTCOMES_FROM_SCENARIO_OUTPUT,
-    CREATE_OUTCOMES_FROM_SCENARIO_PROMPT,
 )
 from prediction_market_agent.tools.crewai_tools import TavilyDevTool
+from prediction_market_agent.utils import APIKeys
 
 tavily_search = TavilyDevTool()
 
@@ -29,15 +32,16 @@ class ProbabilityOutput(BaseModel):
 
 
 class CrewAIAgentSubquestions:
-    def __init__(self, openai_model_name: str | None) -> None:
-        # openai_model_name as str automatically interpreted by CrewAI, else create LLM object.
+    def __init__(self) -> None:
+        llm = self._build_llm()
         self.researcher = Agent(
             role="Research Analyst",
             goal="Research and report on some future event, giving high quality and nuanced analysis",
             backstory="You are a senior research analyst who is adept at researching and reporting on future events.",
             verbose=True,
             allow_delegation=False,
             tools=[tavily_search],
+            llm=llm,
         )
 
         self.predictor = Agent(
@@ -46,7 +50,20 @@ def __init__(self, openai_model_name: str | None) -> None:
             backstory="You are a professional gambler who is adept at predicting and betting on the outcomes of future events.",
             verbose=True,
             allow_delegation=False,
+            llm=llm,
+        )
+
+    def _build_llm(self) -> BaseChatModel:
+        keys = APIKeys()
+        llm = ChatOpenAI(
+            model="gpt-3.5-turbo-0125",
+            openai_api_key=keys.openai_api_key.get_secret_value(),  # type: ignore
         )
+        # llm = OpenAI(
+        #     openai_api_key=keys.openai_api_key.get_secret_value(),  # type: ignore
+        #     model_name="gpt-4-turbo-preview",
+        # )
+        return llm
 
     def split_research_into_outcomes(self, question: str) -> Outcomes:
         create_outcomes_task = Task(

diff --git a/prediction_market_agent/agents/crewai_subsequential_agent/deploy.py b/prediction_market_agent/agents/crewai_subsequential_agent/deploy.py
@@ -1,4 +1,3 @@
-import os
 import random
 from decimal import Decimal
 
@@ -15,6 +14,8 @@
 
 class DeployableThinkThoroughlyAgent(DeployableAgent):
     # For cheaper credits at this experimental stage
+    def __init__(self) -> None:
+        super().__init__()
 
     def pick_markets(self, markets: list[AgentMarket]) -> list[AgentMarket]:
         # We simply pick 5 random markets to bet on
@@ -34,9 +35,7 @@ def pick_markets(self, markets: list[AgentMarket]) -> list[AgentMarket]:
     def answer_binary_market(self, market: AgentMarket) -> bool:
         # The answer has already been determined in `pick_markets` so we just
         # return it here.
-        os.environ["OPENAI_MODEL_NAME"] = "gpt-4-turbo-preview"
-        agent = CrewAIAgentSubquestions()
-        result = agent.answer_binary_market(market.question)
+        result = CrewAIAgentSubquestions().answer_binary_market(market.question)
         return True if result.decision == "y" else False
 
     def calculate_bet_amount(self, answer: bool, market: AgentMarket) -> BetAmount:

diff --git a/prediction_market_agent/tools/crewai_tools.py b/prediction_market_agent/tools/crewai_tools.py
@@ -1,4 +1,3 @@
-import os
 from typing import Any, Type
 
 from crewai_tools.tools.base_tool import BaseTool
@@ -35,5 +34,5 @@ def _run(
     ) -> Any:
         keys = APIKeys()
         return TavilySearchAPIWrapper(
-            tavily_api_key=SecretStr(os.environ["TAVILY_API_KEY"])
+            tavily_api_key=SecretStr(keys.tavily_api_key.get_secret_value())
         ).results(query=search_query)