Skip to content

Commit

Permalink
Add two new agents into the benchmark: Autonolas with OpenAI embeddin…
Browse files Browse the repository at this point in the history
…gs and Autonolas with Rephrasing questions (#5)

* Evaluate question before researching it

* fix var name

* Add token probability for `yes` and `no` to the benchmark

* fix types

* Add two new agents into the benchmark: Autonolas with OpenAI embeddings and Autonolas with rephrasing questions

* fix

* fix

* fix

* fix

* remove unused vars

* rename

* Review comments

* rabbit review

* remove bad merge

* rabbit review

* fix bad conflict
  • Loading branch information
kongzii authored Feb 7, 2024
1 parent ffd9575 commit 4de102b
Show file tree
Hide file tree
Showing 7 changed files with 935 additions and 20 deletions.
62 changes: 46 additions & 16 deletions evo_researcher/autonolas/research.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

import os
import math
from sklearn.metrics.pairwise import cosine_similarity
from typing import Any, Dict, Generator, List, Optional, Tuple, TypedDict
from datetime import datetime, timezone
import json
Expand All @@ -9,7 +10,7 @@
from concurrent.futures import Future, ThreadPoolExecutor
from itertools import groupby
from operator import itemgetter

from enum import Enum
from bs4 import BeautifulSoup, NavigableString
from googleapiclient.discovery import build

Expand All @@ -22,8 +23,11 @@
from langchain_community.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.embeddings.openai import OpenAIEmbeddings

from dateutil import parser
from evo_researcher.functions.cache import persistent_inmemory_cache
from evo_researcher.functions.parallelism import par_map

load_dotenv()

Expand Down Expand Up @@ -295,6 +299,12 @@
"link",
]


class EmbeddingModel(Enum):
spacy = "spacy"
openai = "openai"


class Prediction(TypedDict):
decision: Optional[str]
decision_token_prob: Optional[float]
Expand All @@ -316,7 +326,10 @@ def search_google(query: str, api_key: str, engine: str, num: int = 3) -> List[s
)
.execute()
)
return [result["link"] for result in search["items"]]
try:
return [result["link"] for result in search["items"]]
except KeyError as e:
raise ValueError(f"Can not parse results: {search}") from e


def download_spacy_model(model_name: str) -> None:
Expand Down Expand Up @@ -627,12 +640,19 @@ def concatenate_short_sentences(sentences, len_sentence_threshold):
return modified_sentences


@persistent_inmemory_cache
def openai_embedding_cached(text: str, model: str = "text-embedding-ada-002") -> list[float]:
emb = OpenAIEmbeddings(model=model)
return emb.embed_query(text)


def extract_similarity_scores(
text: str,
query_emb,
doc_question,
event_date: str,
nlp,
date: str,
embedding_model: EmbeddingModel,
) -> List[Tuple[str, float, str]]:
"""
Extract relevant information from website text based on a given event question.
Expand Down Expand Up @@ -690,13 +710,18 @@ def extract_similarity_scores(
# Limit the number of sentences for performance optimization
sentences = sentences[:num_sentences_threshold]

similarities = []

# Encode sentences using spaCy model
for i, sentence in enumerate(sentences):
doc_sentence = nlp(sentence)
similarity_score = query_emb.similarity(doc_sentence)
similarities.append(similarity_score)
# Encode sentences using an embedding model
similarities = par_map(
sentences,
lambda sentence: (
doc_question.similarity(nlp(sentence)) if embedding_model == EmbeddingModel.spacy
else cosine_similarity(
[openai_embedding_cached(sentence)],
[openai_embedding_cached(doc_question.text)]
)[0][0] if embedding_model == EmbeddingModel.openai
else None
)
)

# Create tuples and store them in a list
sentence_similarity_date_tuples = [
Expand Down Expand Up @@ -752,9 +777,10 @@ def get_date(soup):

def extract_sentences(
html: str,
query_emb,
doc_question,
event_date: str,
nlp,
embedding_model: EmbeddingModel,
) -> List[Tuple[str, float, str]]:
"""
Extract relevant information from HTML string.
Expand Down Expand Up @@ -796,10 +822,11 @@ def extract_sentences(
# Get List of (sentence, similarity, date) tuples
similarity_scores = extract_similarity_scores(
text=text,
query_emb=query_emb,
doc_question=doc_question,
event_date=event_date,
nlp=nlp,
date=date,
embedding_model=embedding_model,
)

if not similarity_scores:
Expand Down Expand Up @@ -883,6 +910,7 @@ def extract_and_sort_sentences(
urls: List[str],
event_question: str,
nlp,
embedding_model: EmbeddingModel,
) -> List[Tuple[str, float, str]]:
"""
Extract texts from a list of URLs using Spacy models.
Expand All @@ -906,9 +934,6 @@ def extract_and_sort_sentences(
doc_question = nlp(event_question)
event_date = extract_event_date(doc_question)

# Create embedding for event question with Spacy embedder model
query_emb = nlp(event_question)

if event_date is None:
print(
f"Could not extract precise event date from event question: {event_question}"
Expand All @@ -925,9 +950,10 @@ def extract_and_sort_sentences(
# Extract relevant information for the event question
extracted_sentences = extract_sentences(
html=result.text,
query_emb=query_emb,
doc_question=doc_question,
event_date=event_date,
nlp=nlp,
embedding_model=embedding_model,
)

# Delete the result object to free memory
Expand Down Expand Up @@ -1002,6 +1028,7 @@ def fetch_additional_information(
google_api_key: str,
google_engine: str,
nlp,
embedding_model: EmbeddingModel,
engine: str = "gpt-3.5-turbo",
temperature: float = 0.5,
max_compl_tokens: int = 500,
Expand Down Expand Up @@ -1072,6 +1099,7 @@ def fetch_additional_information(
urls=urls,
event_question=event_question,
nlp=nlp,
embedding_model=embedding_model,
)

# Join the sorted sentences and group them by date
Expand All @@ -1086,6 +1114,7 @@ def research(
max_tokens: int = None,
temperature: int = None,
engine: str = "gpt-3.5-turbo",
embedding_model: EmbeddingModel = EmbeddingModel.spacy,
) -> str:
prompt = f"\"{prompt}\""
max_compl_tokens = max_tokens or DEFAULT_OPENAI_SETTINGS["max_compl_tokens"]
Expand Down Expand Up @@ -1121,6 +1150,7 @@ def research(
max_add_words=max_add_words,
google_api_key=os.getenv("GOOGLE_SEARCH_API_KEY"),
google_engine=os.getenv("GOOGLE_SEARCH_ENGINE_ID"),
embedding_model=embedding_model,
)

# Truncate additional information to stay within the chat completion token limit of 4096
Expand Down
47 changes: 45 additions & 2 deletions evo_researcher/benchmark/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import typing as t

from evo_researcher.functions.evaluate_question import evaluate_question, EvalautedQuestion
from evo_researcher.functions.rephrase_question import rephrase_question
from evo_researcher.functions.research import research as research_evo
from evo_researcher.autonolas.research import (
EmbeddingModel,
make_prediction,
Prediction as LLMCompletionPredictionDict,
research as research_autonolas,
Expand Down Expand Up @@ -53,7 +55,7 @@ def evaluate(self, market_question: str) -> EvalautedQuestion:

def research(self, market_question: str) -> t.Optional[str]:
raise NotImplementedError

def predict(self, market_question: str, researched: str, evaluated: EvalautedQuestion) -> Prediction:
raise NotImplementedError

Expand All @@ -70,11 +72,13 @@ def evaluate_research_predict(self, market_question: str) -> Prediction:
evaluated=eval,
)


class OlasAgent(AbstractBenchmarkedAgent):
def __init__(self, model: str, temperature: float, agent_name: str = "olas", max_workers: t.Optional[int] = None):
def __init__(self, model: str, temperature: float, agent_name: str = "olas", max_workers: t.Optional[int] = None, embedding_model: EmbeddingModel = EmbeddingModel.spacy):
super().__init__(agent_name=agent_name, max_workers=max_workers)
self.model = model
self.temperature = temperature
self.embedding_model = embedding_model

def evaluate(self, market_question: str) -> EvalautedQuestion:
return evaluate_question(question=market_question)
Expand All @@ -84,6 +88,7 @@ def research(self, market_question: str) -> t.Optional[str]:
return research_autonolas(
prompt=market_question,
engine=self.model,
embedding_model=self.embedding_model,
)
except ValueError as e:
print(f"Error in OlasAgent's research: {e}")
Expand Down Expand Up @@ -141,7 +146,45 @@ def predict(self, market_question: str, researched: str, evaluated: EvalautedQue
return Prediction(evaluation=evaluated)


class RephrasingOlasAgent(OlasAgent):
def __init__(
self,
model: str,
temperature: float,
agent_name: str = "reph-olas",
max_workers: t.Optional[int] = None,
embedding_model: EmbeddingModel = EmbeddingModel.spacy,
):
super().__init__(
model=model,
temperature=temperature,
embedding_model=embedding_model,
agent_name=agent_name,
max_workers=max_workers,
)

def research(self, market_question: str) -> t.Optional[str]:
questions = rephrase_question(question=market_question)

report_original = super().research(market_question=questions.original_question)
report_negated = super().research(market_question=questions.negated_question)
report_universal = super().research(market_question=questions.open_ended_question)

report_concat = "\n\n---\n\n".join([
f"### {r_name}\n\n{r}"
for r_name, r in [
("Research based on the question", report_original),
("Research based on the negated question", report_negated),
("Research based on the universal search query", report_universal)
]
if r is not None
])

return report_concat


AGENTS = [
OlasAgent,
RephrasingOlasAgent,
EvoAgent,
]
23 changes: 23 additions & 0 deletions evo_researcher/functions/parallelism.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os
import concurrent
from typing import Callable, TypeVar
from concurrent.futures.thread import ThreadPoolExecutor

THREADPOOL = ThreadPoolExecutor(int(os.getenv("THREADPOOL_N_THREADS", 50)))

A = TypeVar("A")
B = TypeVar("B")

def par_map(
items: list[A], func: Callable[[A], B], executor: concurrent.futures.Executor = THREADPOOL
) -> "list[B]":
"""Applies the function to each element using the specified executor. Awaits for all results.
If executor is ProcessPoolExecutor, make sure the function passed is pickable, e.g. no lambda functions
"""
futures: list[concurrent.futures._base.Future[B]] = [
executor.submit(func, item) for item in items
]
results = []
for fut in futures:
results.append(fut.result())
return results
54 changes: 54 additions & 0 deletions evo_researcher/functions/rephrase_question.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import json
import tiktoken
from pydantic import BaseModel
from langchain.llms import OpenAI
from langchain_openai import ChatOpenAI
from evo_researcher.autonolas.research import clean_completion_json
from langchain.prompts import ChatPromptTemplate


QUESTION_REPHRASE_PROMPT = """We have the following question: `{question}`
Write a dictionary with following keys, don't answer the question, only rewrite it in the following ways:
```
- open_ended_question: Ask the question universally
- negated_question Ask the question in negation
```
"""


class RephrasedQuestion(BaseModel):
original_question: str
negated_question: str
open_ended_question: str


def rephrase_question(
question: str,
engine: str = "gpt-4-0125-preview"
) -> RephrasedQuestion:
"""
Rephrase the original question, by asking it in negation and universally, for example:
original_question: Is the sky blue?
negated_question: Is the sky not blue?
open_ended_question: What is the color of the sky?
"""
tokenizer = tiktoken.encoding_for_model(engine)
llm = ChatOpenAI(model=engine, temperature=0.0)

prompt = ChatPromptTemplate.from_template(template=QUESTION_REPHRASE_PROMPT)
messages = prompt.format_messages(question=question)

max_tokens = 2 * len(tokenizer.encode(question)) + 50 # Max tokens as the question two times + some buffer for formatting.
completion = llm(messages, max_tokens=max_tokens).content

try:
return RephrasedQuestion(
original_question=question,
**json.loads(clean_completion_json(completion))
)
except json.decoder.JSONDecodeError as e:
raise ValueError(f"Error in rephrase_question for `{question}`: {completion}") from e

2 changes: 1 addition & 1 deletion evo_researcher/functions/research.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def research(
top_k_per_query: int = 8
) -> tuple[str, str]:
queries = generate_subqueries(query=goal, limit=initial_subqueries_limit, api_key=openai_key)
queries = rerank_subqueries(queries=queries, goal=goal, api_key=openai_key)[:subqueries_limit]
queries = rerank_subqueries(queries=queries, goal=goal, api_key=openai_key)[:subqueries_limit] if initial_subqueries_limit > subqueries_limit else queries

search_results_with_queries = search(queries, tavily_key, lambda result: not result["url"].startswith("https://www.youtube"))

Expand Down
Loading

0 comments on commit 4de102b

Please sign in to comment.