Skip to content

Commit

Permalink
Merge pull request #183 from assafelovic/feature/permchain
Browse files Browse the repository at this point in the history
Feature/permchain
  • Loading branch information
rotemweiss57 authored Oct 3, 2023
2 parents 1d43446 + 0319921 commit be3e7a3
Show file tree
Hide file tree
Showing 16 changed files with 405 additions and 40 deletions.
2 changes: 2 additions & 0 deletions actions/web_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ def scrape_text_with_selenium(url: str) -> tuple[WebDriver, str]:
"prefs", {"download_restrictions": 3}
)
driver = webdriver.Chrome(options=options)

print(f"scraping url {url}...")
driver.get(url)

WebDriverWait(driver, 10).until(
Expand Down
2 changes: 1 addition & 1 deletion agent/llm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ async def stream_response(model, messages, temperature, max_tokens, websocket):
return response


def choose_agent(task: str) -> str:
def choose_agent(task: str) -> dict:
"""Determines what agent should be used
Args:
task (str): The research question the user asked
Expand Down
12 changes: 7 additions & 5 deletions agent/prompts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import datetime
def generate_agent_role_prompt(agent):
""" Generates the agent role prompt.
Args: agent (str): The type of the agent.
Expand Down Expand Up @@ -25,18 +26,19 @@ def generate_report_prompt(question, research_summary):
return f'"""{research_summary}""" Using the above information, answer the following'\
f' question or topic: "{question}" in a detailed report --'\
" The report should focus on the answer to the question, should be well structured, informative," \
" in depth, with facts and numbers if available, a minimum of 1,200 words and with markdown syntax and apa format. "\
"You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions." \
"Write all used source urls at the end of the report in apa format"
" in depth, with facts and numbers if available, a minimum of 1,200 words and with markdown syntax and apa format.\n "\
"You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.\n" \
f"Write all used source urls at the end of the report in apa format.\n " \
f"Assume that the current date is {datetime.now().strftime('%B %d, %Y')}"

def generate_search_queries_prompt(question):
""" Generates the search queries prompt for the given question.
Args: question (str): The question to generate the search queries prompt for
Returns: str: The search queries prompt for the given question
"""

return f'Write 4 google search queries to search online that form an objective opinion from the following: "{question}"'\
f'You must respond with a list of strings in the following format: ["query 1", "query 2", "query 3", "query 4"]'
return f'Write 3 google search queries to search online that form an objective opinion from the following: "{question}"'\
f'You must respond with a list of strings in the following format: ["query 1", "query 2", "query 3"]'


def generate_resource_report_prompt(question, research_summary):
Expand Down
51 changes: 27 additions & 24 deletions agent/research_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# libraries
import asyncio
import json
import uuid
import hashlib

from actions.web_search import web_search
from actions.web_scrape import async_browse
Expand All @@ -23,7 +23,7 @@


class ResearchAgent:
def __init__(self, question, agent, agent_role_prompt, websocket):
def __init__(self, question, agent, agent_role_prompt, websocket=None):
""" Initializes the research assistant with the given question.
Args: question (str): The question to research
Returns: None
Expand All @@ -34,10 +34,14 @@ def __init__(self, question, agent, agent_role_prompt, websocket):
self.agent_role_prompt = agent_role_prompt if agent_role_prompt else prompts.generate_agent_role_prompt(agent)
self.visited_urls = set()
self.research_summary = ""
self.directory_name = uuid.uuid4()
self.dir_path = os.path.dirname(f"./outputs/{self.directory_name}/")
self.dir_path = f"./outputs/{hashlib.sha1(question.encode()).hexdigest()}"
self.websocket = websocket

async def stream_output(self, output):
if not self.websocket:
return print(output)
await self.websocket.send_json({"type": "logs", "output": output})


async def summarize(self, text, topic):
""" Summarizes the given text for the given topic.
Expand All @@ -47,7 +51,7 @@ async def summarize(self, text, topic):
"""

messages = [create_message(text, topic)]
await self.websocket.send_json({"type": "logs", "output": f"📝 Summarizing text for query: {text}"})
await self.stream_output(f"📝 Summarizing text for query: {text}")

return create_chat_completion(
model=CFG.fast_llm_model,
Expand All @@ -63,7 +67,8 @@ async def get_new_urls(self, url_set_input):
new_urls = []
for url in url_set_input:
if url not in self.visited_urls:
await self.websocket.send_json({"type": "logs", "output": f"✅ Adding source url to research: {url}\n"})
await self.stream_output(f"✅ Adding source url to research: {url}\n")

self.visited_urls.add(url)
new_urls.append(url)

Expand Down Expand Up @@ -91,8 +96,7 @@ async def create_search_queries(self):
Returns: list[str]: The search queries for the given question
"""
result = await self.call_agent(prompts.generate_search_queries_prompt(self.question))
print(result)
await self.websocket.send_json({"type": "logs", "output": f"🧠 I will conduct my research based on the following queries: {result}..."})
await self.stream_output(f"🧠 I will conduct my research based on the following queries: {result}...")
return json.loads(result)

async def async_search(self, query):
Expand All @@ -103,8 +107,7 @@ async def async_search(self, query):
search_results = json.loads(web_search(query))
new_search_urls = self.get_new_urls([url.get("href") for url in search_results])

await self.websocket.send_json(
{"type": "logs", "output": f"🌐 Browsing the following sites for relevant information: {new_search_urls}..."})
await self.stream_output(f"🌐 Browsing the following sites for relevant information: {new_search_urls}...")

# Create a list to hold the coroutine objects
tasks = [async_browse(url, query, self.websocket) for url in await new_search_urls]
Expand All @@ -120,21 +123,20 @@ async def run_search_summary(self, query):
Returns: str: The search summary for the given query
"""

await self.websocket.send_json({"type": "logs", "output": f"🔎 Running research for '{query}'..."})
await self.stream_output(f"🔎 Running research for '{query}'...")

responses = await self.async_search(query)

result = "\n".join(responses)
os.makedirs(os.path.dirname(f"./outputs/{self.directory_name}/research-{query}.txt"), exist_ok=True)
write_to_file(f"./outputs/{self.directory_name}/research-{query}.txt", result)
os.makedirs(os.path.dirname(f"{self.dir_path}/research-{query}.txt"), exist_ok=True)
write_to_file(f"{self.dir_path}/research-{query}.txt", result)
return result

async def conduct_research(self):
""" Conducts the research for the given question.
Args: None
Returns: str: The research for the given question
"""

self.research_summary = read_txt_files(self.dir_path) if os.path.isdir(self.dir_path) else ""

if not self.research_summary:
Expand All @@ -143,8 +145,7 @@ async def conduct_research(self):
research_result = await self.run_search_summary(query)
self.research_summary += f"{research_result}\n\n"

await self.websocket.send_json(
{"type": "logs", "output": f"Total research words: {len(self.research_summary.split(' '))}"})
await self.stream_output(f"Total research words: {len(self.research_summary.split(' '))}")

return self.research_summary

Expand All @@ -156,21 +157,23 @@ async def create_concepts(self):
"""
result = self.call_agent(prompts.generate_concepts_prompt(self.question, self.research_summary))

await self.websocket.send_json({"type": "logs", "output": f"I will research based on the following concepts: {result}\n"})
await self.stream_output(f"I will research based on the following concepts: {result}\n")
return json.loads(result)

async def write_report(self, report_type, websocket):
async def write_report(self, report_type, websocket=None):
""" Writes the report for the given question.
Args: None
Returns: str: The report for the given question
"""
report_type_func = prompts.get_report_by_type(report_type)
await websocket.send_json(
{"type": "logs", "output": f"✍️ Writing {report_type} for research task: {self.question}..."})
answer = await self.call_agent(report_type_func(self.question, self.research_summary), stream=True,
websocket=websocket)
await self.stream_output(f"✍️ Writing {report_type} for research task: {self.question}...")

answer = await self.call_agent(report_type_func(self.question, self.research_summary),
stream=websocket is not None, websocket=websocket)
# if websocket is True than we are streaming gpt response, so we need to wait for the final response
final_report = await answer if websocket else answer

path = await write_md_to_pdf(report_type, self.directory_name, await answer)
path = await write_md_to_pdf(report_type, self.dir_path, final_report)

return answer, path

Expand All @@ -182,4 +185,4 @@ async def write_lessons(self):
concepts = await self.create_concepts()
for concept in concepts:
answer = await self.call_agent(prompts.generate_lesson_prompt(concept), stream=True)
write_md_to_pdf("Lesson", self.directory_name, answer)
await write_md_to_pdf("Lesson", self.dir_path, answer)
5 changes: 3 additions & 2 deletions config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ def __init__(self) -> None:
self.llm_provider = os.getenv("LLM_PROVIDER", "ChatOpenAI")
self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo-16k")
self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000))
self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000))
self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 2000))
self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 4000))
self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 8192))
self.summary_token_limit = int(os.getenv("SUMMARY_TOKEN_LIMIT", 700))

self.openai_api_key = os.getenv("OPENAI_API_KEY")
self.temperature = float(os.getenv("TEMPERATURE", "1"))
Expand Down
33 changes: 33 additions & 0 deletions permchain_example/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Permchain x Researcher
Sample use of Langchain's Autonomous agent framework Permchain with GPT Researcher.

## Use case
Permchain is a framework for building autonomous agents that can be used to automate tasks and communication between agents to complete complex tasks. This example uses Permchain to automate the process of finding and summarizing research reports on any given topic.

## The Agent Team
The research team is made up of 3 agents:
- Researcher agent (gpt-researcher) - This agent is in charge of finding and summarizing relevant research papers.
- Editor agent - This agent is in charge of validating the correctness of the report given a set of criteria.
- Reviser agent - This agent is in charge of revising the report until it is satisfactory.

## How it works
The research agent (gpt-researcher) is in charge of finding and summarizing relevant research papers. It does this by using the following process:
- Search for relevant research papers using a search engine
- Extract the relevant information from the research papers
- Summarize the information into a report
- Send the report to the editor agent for validation
- Send the report to the reviser agent for revision
- Repeat until the report is satisfactory

## How to run
1. Install required packages:
```bash
pip install -r requirements.txt
```
2. Run the application:
```bash
python test.py
```

## Usage
To change the research topic, edit the `query` variable in `test.py` to the desired topic.
51 changes: 51 additions & 0 deletions permchain_example/editor_actors/editor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from langchain.chat_models import ChatOpenAI
from langchain.prompts import SystemMessagePromptTemplate
from config import Config

CFG = Config()

EDIT_TEMPLATE = """You are an editor. \
You have been tasked with editing the following draft, which was written by a non-expert. \
Please accept the draft if it is good enough to publish, or send it for revision, along with your notes to guide the revision. \
Things you should be checking for:
- This draft MUST fully answer the original question
- This draft MUST be written in apa format
If not all of the above criteria are met, you should send appropriate revision notes.
"""


class EditorActor:
def __init__(self):
self.model = ChatOpenAI(model=CFG.smart_llm_model)
self.prompt = SystemMessagePromptTemplate.from_template(EDIT_TEMPLATE) + "Draft:\n\n{draft}"
self.functions = [
{
"name": "revise",
"description": "Sends the draft for revision",
"parameters": {
"type": "object",
"properties": {
"notes": {
"type": "string",
"description": "The editor's notes to guide the revision.",
},
},
},
},
{
"name": "accept",
"description": "Accepts the draft",
"parameters": {
"type": "object",
"properties": {"ready": {"const": True}},
},
},
]

@property
def runnable(self):
return (
self.prompt | self.model.bind(functions=self.functions)
)
73 changes: 73 additions & 0 deletions permchain_example/research_team.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from operator import itemgetter
from langchain.runnables.openai_functions import OpenAIFunctionsRouter

from permchain.connection_inmemory import InMemoryPubSubConnection
from permchain.pubsub import PubSub
from permchain.topic import Topic

'''
This is the research team.
It is a group of autonomous agents that work together to answer a given question
using a comprehensive research process that includes:
- Searching for relevant information across multiple sources
- Extracting relevant information
- Writing a well structured report
- Validating the report
- Revising the report
- Repeat until the report is satisfactory
'''
class ResearchTeam:
def __init__(self, research_actor, editor_actor, reviser_actor):
self.research_actor_instance = research_actor
self.editor_actor_instance = editor_actor
self.revise_actor_instance = reviser_actor

def run(self, query):
# create topics
editor_inbox = Topic("editor_inbox")
reviser_inbox = Topic("reviser_inbox")

research_chain = (
# Listed in inputs
Topic.IN.subscribe()
| {"draft": lambda x: self.research_actor_instance.run(x["question"])}
# The draft always goes to the editor inbox
| editor_inbox.publish()
)

editor_chain = (
# Listen for events in the editor_inbox
editor_inbox.subscribe()
| self.editor_actor_instance.runnable
# Depending on the output, different things should happen
| OpenAIFunctionsRouter({
# If revise is chosen, we send a push to the critique_inbox
"revise": (
{
"notes": itemgetter("notes"),
"draft": editor_inbox.current() | itemgetter("draft"),
"question": Topic.IN.current() | itemgetter("question"),
}
| reviser_inbox.publish()
),
# If accepted, then we return
"accept": editor_inbox.current() | Topic.OUT.publish(),
})
)

reviser_chain = (
# Listen for events in the reviser's inbox
reviser_inbox.subscribe()
| self.revise_actor_instance.runnable
# Publish to the editor inbox
| editor_inbox.publish()
)

web_researcher = PubSub(
processes=(research_chain, editor_chain, reviser_chain),
connection=InMemoryPubSubConnection(),
)

res = web_researcher.invoke({"question": query})
print(res)
return res[0]["draft"]
Loading

0 comments on commit be3e7a3

Please sign in to comment.