Merge pull request #33 from gnosis/evan/known-outcome-agent

Add an agent that targets Omen markets that are still open but have a known outcome
gnosis · Mar 20, 2024 · 5cd7cfb · 5cd7cfb
2 parents 5c9fa13 + e1e232e
commit 5cd7cfb
Show file tree

Hide file tree

Showing 15 changed files with 571 additions and 12 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/prediction_market_agent/agents/autogen_agent.py b/prediction_market_agent/agents/autogen_agent.py
@@ -7,8 +7,8 @@
 
 from prediction_market_agent import utils
 from prediction_market_agent.agents.abstract import AbstractAgent
-from prediction_market_agent.tools.google_search import GoogleSearchTool
-from prediction_market_agent.tools.web_scrape import WebScrapingTool
+from prediction_market_agent.tools.web_scrape.basic_summary import WebScrapingTool
+from prediction_market_agent.tools.web_search.google import GoogleSearchTool
 
 
 class AutoGenAgent(AbstractAgent):

diff --git a/prediction_market_agent/agents/custom_agent.py b/prediction_market_agent/agents/custom_agent.py
@@ -16,11 +16,11 @@
 )
 from prediction_market_agent.ai_models.llama_ai_models import ChatReplicateLLamaModel
 from prediction_market_agent.ai_models.openai_ai_models import ChatOpenAIModel
-from prediction_market_agent.tools.google_search import google_search
 from prediction_market_agent.tools.tool_exception_handler import tool_exception_handler
-from prediction_market_agent.tools.web_scrape_structured import (
+from prediction_market_agent.tools.web_scrape.structured_summary import (
     web_scrape_structured_and_summarized,
 )
+from prediction_market_agent.tools.web_search.google import google_search
 
 
 class CustomAgent(AbstractAgent):

diff --git a/prediction_market_agent/agents/known_outcome_agent/benchmark.py b/prediction_market_agent/agents/known_outcome_agent/benchmark.py
@@ -0,0 +1,163 @@
+import time
+import typing as t
+from datetime import timedelta
+
+from dotenv import load_dotenv
+from prediction_market_agent_tooling.benchmark.agents import AbstractBenchmarkedAgent
+from prediction_market_agent_tooling.benchmark.benchmark import Benchmarker
+from prediction_market_agent_tooling.benchmark.utils import (
+    Market,
+    MarketSource,
+    OutcomePrediction,
+    Prediction,
+)
+from prediction_market_agent_tooling.tools.utils import utcnow
+from pydantic import BaseModel
+
+from prediction_market_agent.agents.known_outcome_agent.known_outcome_agent import (
+    Result,
+    get_known_outcome,
+)
+
+
+class QuestionWithKnownOutcome(BaseModel):
+    url: t.Optional[str] = None
+    question: str
+    result: Result
+    notes: t.Optional[str] = None
+
+    def to_market(self) -> Market:
+        dt = utcnow()
+        return Market(
+            url=self.url if self.url else "",
+            question=self.question,
+            source=MarketSource.MANIFOLD,
+            p_yes=self.result.to_p_yes() if self.result != Result.UNKNOWN else 0.5,
+            volume=0.0,
+            created_time=dt,
+            close_time=dt,
+        )
+
+
+class KnownOutcomeAgent(AbstractBenchmarkedAgent):
+    def __init__(
+        self,
+        agent_name: str,
+        max_workers: int,
+        model: str,
+        max_tries: int,
+    ) -> None:
+        self.model = model
+        self.max_tries = max_tries
+        super().__init__(agent_name=agent_name, max_workers=max_workers)
+
+    def predict(self, market_question: str) -> Prediction:
+        answer = get_known_outcome(
+            model=self.model,
+            question=market_question,
+            max_tries=self.max_tries,
+        )
+        if answer.result == Result.UNKNOWN:
+            return Prediction(
+                is_predictable=False,
+                outcome_prediction=None,
+            )
+        else:
+            return Prediction(
+                is_predictable=True,
+                outcome_prediction=OutcomePrediction(
+                    p_yes=answer.result.to_p_yes(),
+                    confidence=1.0,
+                    info_utility=None,
+                ),
+            )
+
+
+if __name__ == "__main__":
+    load_dotenv()
+    tomorrow_str = (utcnow() + timedelta(days=1)).strftime("%d %B %Y")
+
+    # Fetch questions from existing markets, or make some up, where the
+    # outcome is known.
+    qs_with_known_outcome: list[QuestionWithKnownOutcome] = [
+        QuestionWithKnownOutcome(
+            question=f"Will 'Barbie' win an Academy Award for best original song by {tomorrow_str}?",
+            url="https://aiomen.eth.limo/#/0xceb2a4ecc217cab440acf60737a9fcfd6d3fbf4b",
+            result=Result.YES,
+            notes="Happened on 10th March 2024.",
+        ),
+        QuestionWithKnownOutcome(
+            question=f"Will the 2024 Oscars winner for Best Picture be announced by {tomorrow_str}?",
+            url="https://aiomen.eth.limo/#/0xb88e4507709148e096bcdfb861b17db7b4d54e6b",
+            result=Result.YES,
+            notes="Happened on 10th March 2024.",
+        ),
+        QuestionWithKnownOutcome(
+            question=f"Will Liverpool win against Atalanta in the Europa League quarter-finals by {tomorrow_str}?",
+            url="https://aiomen.eth.limo/#/0x1d5a462c801360b4bebbda2b9656e52801a27a3b",
+            result=Result.NO,
+            notes="The match is scheduled for 11 April 2024.",
+        ),
+        QuestionWithKnownOutcome(
+            question=f"Will Donald Trump officially become the GOP nominee for the 2024 presidential elections by {tomorrow_str}?",
+            url="https://aiomen.eth.limo/#/0x859a6b465ee1e4a73aab0f2da4428c6255da466c",
+            result=Result.YES,
+            notes="Happened on 10th March 2024.",
+        ),
+        QuestionWithKnownOutcome(
+            question=f"Will SpaceX successfully test a Starship reentry without losing contact by {tomorrow_str}?",
+            url="https://aiomen.eth.limo/#/0xcc9123af8db309e0c60c63f9e2b8b82fc86f458b",
+            result=Result.NO,
+            notes="The only scheduled test flight occured, and contact was lost during the test.",
+        ),
+        QuestionWithKnownOutcome(
+            question=f"Will Arsenal reach the Champions League semi-finals on {tomorrow_str}?",
+            url="https://aiomen.eth.limo/#/0x606efd175b245cd60282a98cef402d4f5e950f92",
+            result=Result.NO,
+            notes="They are scheduled to play the first leg of the quarter-finals on 9 April 2024.",
+        ),
+        QuestionWithKnownOutcome(
+            question=f"Will the jury deliver a verdict on James Crumbley's 'bad parenting' case on {tomorrow_str}?",
+            url="https://aiomen.eth.limo/#/0xe55171beda0d60fd45092ff8bf93d5cb566a2510",
+            result=Result.NO,
+            notes="The verdict was announced on 15th March 2024.",
+        ),
+        QuestionWithKnownOutcome(
+            question="Will Lewis Hamilton win the 2024/2025 F1 drivers champtionship?",
+            result=Result.UNKNOWN,
+            notes="Outcome is uncertain.",
+        ),
+        QuestionWithKnownOutcome(
+            question="Will the cost of grain in the Spain increase by 20% by 19 July 2024?",
+            result=Result.UNKNOWN,
+            notes="Outcome is uncertain.",
+        ),
+        QuestionWithKnownOutcome(
+            question="Will over 360 pople have died while climbing Mount Everest by 1st Jan 2028?",
+            result=Result.UNKNOWN,
+            notes="Outcome is uncertain.",
+        ),
+    ]
+
+    benchmarker = Benchmarker(
+        markets=[q.to_market() for q in qs_with_known_outcome],
+        agents=[
+            KnownOutcomeAgent(
+                agent_name="known_outcome",
+                model="gpt-4-1106-preview",
+                max_tries=3,
+                max_workers=1,
+            ),
+        ],
+    )
+    benchmarker.run_agents()
+    md = benchmarker.generate_markdown_report()
+
+    output = f"./known_outcome_agent_benchmark_report.{int(time.time())}.md"
+    with open(output, "w") as f:
+        print(f"Writing benchmark report to: {output}")
+        f.write(md)
+
+    # Check all predictions are correct, i.e. mean-squared-error == 0
+    metrics = benchmarker.compute_metrics()
+    assert metrics["MSE for `p_yes`"][0] == 0.0
diff --git a/prediction_market_agent/agents/known_outcome_agent/deploy.py b/prediction_market_agent/agents/known_outcome_agent/deploy.py
@@ -0,0 +1,83 @@
+import getpass
+from decimal import Decimal
+
+from prediction_market_agent_tooling.config import APIKeys
+from prediction_market_agent_tooling.deploy.agent import DeployableAgent
+from prediction_market_agent_tooling.deploy.constants import OWNER_KEY
+from prediction_market_agent_tooling.gtypes import SecretStr, private_key_type
+from prediction_market_agent_tooling.markets.agent_market import AgentMarket
+from prediction_market_agent_tooling.markets.data_models import BetAmount, Currency
+from prediction_market_agent_tooling.markets.markets import MarketType
+from prediction_market_agent_tooling.tools.utils import (
+    get_current_git_commit_sha,
+    get_current_git_url,
+)
+from prediction_market_agent_tooling.tools.web3_utils import verify_address
+
+from prediction_market_agent.agents.known_outcome_agent.known_outcome_agent import (
+    Result,
+    get_known_outcome,
+)
+
+
+def market_is_saturated(market: AgentMarket) -> bool:
+    return market.p_yes > 0.95 or market.p_no > 0.95
+
+
+class DeployableKnownOutcomeAgent(DeployableAgent):
+    model = "gpt-4-1106-preview"
+
+    def load(self) -> None:
+        self.markets_with_known_outcomes: dict[str, Result] = {}
+
+    def pick_markets(self, markets: list[AgentMarket]) -> list[AgentMarket]:
+        picked_markets: list[AgentMarket] = []
+        for market in markets:
+            # Assume very high probability markets are already known, and have
+            # been correctly bet on, and therefore the value of betting on them
+            # is low.
+            if not market_is_saturated(market=market):
+                answer = get_known_outcome(
+                    model=self.model,
+                    question=market.question,
+                    max_tries=3,
+                )
+                if answer.has_known_outcome():
+                    picked_markets.append(market)
+                    self.markets_with_known_outcomes[market.id] = answer.result
+
+        return picked_markets
+
+    def answer_binary_market(self, market: AgentMarket) -> bool:
+        # The answer has already been determined in `pick_markets` so we just
+        # return it here.
+        return self.markets_with_known_outcomes[market.id].to_boolean()
+
+    def calculate_bet_amount(self, answer: bool, market: AgentMarket) -> BetAmount:
+        if market.currency == Currency.xDai:
+            return BetAmount(amount=Decimal(0.1), currency=Currency.xDai)
+        else:
+            raise NotImplementedError("This agent only supports xDai markets")
+
+
+if __name__ == "__main__":
+    agent = DeployableKnownOutcomeAgent()
+    agent.deploy_gcp(
+        repository=f"git+{get_current_git_url()}.git@{get_current_git_commit_sha()}",
+        market_type=MarketType.OMEN,
+        labels={OWNER_KEY: getpass.getuser()},
+        secrets={
+            "TAVILY_API_KEY": "GNOSIS_AI_TAVILY_API_KEY:latest",
+        },
+        memory=1024,
+        api_keys=APIKeys(
+            BET_FROM_ADDRESS=verify_address(
+                "0xb611A9f02B318339049264c7a66ac3401281cc3c"
+            ),
+            BET_FROM_PRIVATE_KEY=private_key_type("EVAN_OMEN_BETTER_0_PKEY:latest"),
+            OPENAI_API_KEY=SecretStr("EVAN_OPENAI_API_KEY:latest"),
+            MANIFOLD_API_KEY=None,
+        ),
+        cron_schedule="0 */4 * * *",
+        timeout=540,
+    )