Skip to content

Commit

Permalink
Merge pull request #485 from assafelovic/feature/verbose
Browse files Browse the repository at this point in the history
Feature/verbose
  • Loading branch information
assafelovic authored May 9, 2024
2 parents d12ee5d + 4bdb971 commit 767fd90
Show file tree
Hide file tree
Showing 14 changed files with 72 additions and 47 deletions.
2 changes: 1 addition & 1 deletion gpt_researcher/context/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,5 @@ def _pretty_print_docs(self, docs, top_n):

def get_context(self, query, max_results=5):
compressed_docs = self._get_contextual_retriever()
relevant_docs = compressed_docs.get_relevant_documents(query)
relevant_docs = compressed_docs.invoke(query)
return self._pretty_print_docs(relevant_docs, max_results)
49 changes: 30 additions & 19 deletions gpt_researcher/master/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def __init__(
role=None,
parent_query: str = "",
subtopics: list = [],
visited_urls: set = set()
visited_urls: set = set(),
verbose: bool = True,
):
"""
Initialize the GPT Researcher class.
Expand Down Expand Up @@ -52,6 +53,7 @@ def __init__(
self.source_urls = source_urls
self.memory = Memory(self.cfg.embedding_provider)
self.visited_urls = visited_urls
self.verbose = verbose

# Only relevant for DETAILED REPORTS
# --------------------------------------
Expand All @@ -66,12 +68,15 @@ async def conduct_research(self):
"""
Runs the GPT Researcher to conduct research
"""
print(f"🔎 Running research for '{self.query}'...")
if self.verbose:
await stream_output("logs", f"🔎 Starting the research task for '{self.query}'...", self.websocket)

# Generate Agent
if not (self.agent and self.role):
self.agent, self.role = await choose_agent(self.query, self.cfg, self.parent_query)
await stream_output("logs", self.agent, self.websocket)

if self.verbose:
await stream_output("logs", self.agent, self.websocket)

# If specified, the researcher will use the given urls as the context for the research.
if self.source_urls:
Expand All @@ -88,8 +93,8 @@ async def write_report(self, existing_headers: list = []):
Returns:
str: The report
"""

await stream_output("logs", f"✍️ Writing summary for research task: {self.query}...", self.websocket)
if self.verbose:
await stream_output("logs", f"✍️ Writing summary for research task: {self.query}...", self.websocket)

if self.report_type == "custom_report":
self.role = self.cfg.agent_role if self.cfg.agent_role else self.role
Expand Down Expand Up @@ -121,7 +126,8 @@ async def get_context_by_urls(self, urls):
Scrapes and compresses the context from the given urls
"""
new_search_urls = await self.get_new_urls(urls)
await stream_output("logs",
if self.verbose:
await stream_output("logs",
f"🧠 I will conduct my research based on the following urls: {new_search_urls}...",
self.websocket)
scraped_sites = scrape_urls(new_search_urls, self.cfg)
Expand All @@ -141,9 +147,10 @@ async def get_context_by_search(self, query):
if self.report_type != "subtopic_report":
sub_queries.append(query)

await stream_output("logs",
f"🧠 I will conduct my research based on the following queries: {sub_queries}...",
self.websocket)
if self.verbose:
await stream_output("logs",
f"🧠 I will conduct my research based on the following queries: {sub_queries}...",
self.websocket)

# Using asyncio.gather to process the sub_queries asynchronously
context = await asyncio.gather(*[self.process_sub_query(sub_query) for sub_query in sub_queries])
Expand All @@ -158,14 +165,15 @@ async def process_sub_query(self, sub_query: str):
Returns:
str: The context gathered from search
"""
await stream_output("logs", f"\n🔎 Running research for '{sub_query}'...", self.websocket)
if self.verbose:
await stream_output("logs", f"\n🔎 Running research for '{sub_query}'...", self.websocket)

scraped_sites = await self.scrape_sites_by_query(sub_query)
content = await self.get_similar_content_by_query(sub_query, scraped_sites)

if content:
if content and self.verbose:
await stream_output("logs", f"📃 {content}", self.websocket)
else:
elif self.verbose:
await stream_output("logs", f"🤷 No content found for '{sub_query}'...", self.websocket)
return content

Expand All @@ -178,10 +186,10 @@ async def get_new_urls(self, url_set_input):
new_urls = []
for url in url_set_input:
if url not in self.visited_urls:
await stream_output("logs", f"✅ Adding source url to research: {url}\n", self.websocket)

self.visited_urls.add(url)
new_urls.append(url)
if self.verbose:
await stream_output("logs", f"✅ Added source url to research: {url}\n", self.websocket)

return new_urls

Expand All @@ -201,13 +209,14 @@ async def scrape_sites_by_query(self, sub_query):
new_search_urls = await self.get_new_urls([url.get("href") for url in search_results])

# Scrape Urls
# await stream_output("logs", f"📝Scraping urls {new_search_urls}...\n", self.websocket)
await stream_output("logs", f"🤔 Researching for relevant information...\n", self.websocket)
if self.verbose:
await stream_output("logs", f"🤔 Researching for relevant information...\n", self.websocket)
scraped_content_results = scrape_urls(new_search_urls, self.cfg)
return scraped_content_results

async def get_similar_content_by_query(self, query, pages):
await stream_output("logs", f"📝 Getting relevant content based on query: {query}...", self.websocket)
if self.verbose:
await stream_output("logs", f"📝 Getting relevant content based on query: {query}...", self.websocket)
# Summarize Raw Data
context_compressor = ContextCompressor(
documents=pages, embeddings=self.memory.get_embeddings())
Expand All @@ -232,7 +241,8 @@ async def get_subtopics(self):
The `get_subtopics` function is returning the `subtopics` that are generated by the
`construct_subtopics` function.
"""
await stream_output("logs", f"🤔 Generating subtopics...", self.websocket)
if self.verbose:
await stream_output("logs", f"🤔 Generating subtopics...", self.websocket)

subtopics = await construct_subtopics(
task=self.query,
Expand All @@ -242,6 +252,7 @@ async def get_subtopics(self):
subtopics=self.subtopics,
)

await stream_output("logs", f"📋Subtopics: {subtopics}", self.websocket)
if self.verbose:
await stream_output("logs", f"📋Subtopics: {subtopics}", self.websocket)

return subtopics
4 changes: 3 additions & 1 deletion multi_agents/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ To change the research query and customize the report, edit the `task.json` file
- `publish_formats` - The formats to publish the report in. The reports will be written in the `output` directory.
- `follow_guidelines` - If true, the research report will follow the guidelines below. It will take longer to complete. If false, the report will be generated faster but may not follow the guidelines.
- `guidelines` - A list of guidelines that the report must follow.
- `verbose` - If true, the application will print detailed logs to the console.

#### For example:
```json
Expand All @@ -84,6 +85,7 @@ To change the research query and customize the report, edit the `task.json` file
"The report MUST fully answer the original question",
"The report MUST be written in apa format",
"The report MUST be written in english"
]
],
"verbose": true
}
```
4 changes: 2 additions & 2 deletions multi_agents/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
from .reviser import ReviserAgent
from .reviewer import ReviewerAgent
from .editor import EditorAgent
from .master import MasterAgent
from .master import ChiefEditorAgent

__all__ = [
"MasterAgent",
"ChiefEditorAgent",
"ResearchAgent",
"WriterAgent",
"EditorAgent",
Expand Down
4 changes: 2 additions & 2 deletions multi_agents/agents/master.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
ResearchAgent


class MasterAgent:
class ChiefEditorAgent:
def __init__(self, task: dict):
self.task_id = int(time.time()) # Currently time based, but can be any unique identifier
self.output_dir = f"./outputs/run_{self.task_id}_{task.get('query')[0:60]}"
Expand Down Expand Up @@ -47,7 +47,7 @@ def init_research_team(self):

return workflow

async def run(self):
async def run_research_task(self):
research_team = self.init_research_team()

# compile the graph
Expand Down
17 changes: 10 additions & 7 deletions multi_agents/agents/researcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,20 @@ class ResearchAgent:
def __init__(self):
pass

async def research(self, query: str, research_report: str = "research_report", parent_query: str = ""):
async def research(self, query: str, research_report: str = "research_report", parent_query: str = "", verbose=True):
# Initialize the researcher
researcher = GPTResearcher(parent_query=parent_query, query=query, report_type=research_report, config_path=None)
researcher = GPTResearcher(query=query, report_type=research_report, parent_query=parent_query, verbose=verbose)
# Conduct research on the given query
await researcher.conduct_research()
# Write the report
report = await researcher.write_report()

return report

async def run_subtopic_research(self, title: str, subtopic: str):
async def run_subtopic_research(self, parent_query: str, subtopic: str, verbose: bool = True):
try:
report = await self.research(parent_query=title, query=subtopic, research_report="subtopic_report")
report = await self.research(parent_query=parent_query, query=subtopic,
research_report="subtopic_report", verbose=verbose)
except Exception as e:
print(f"{Fore.RED}Error in researching topic {subtopic}: {e}{Style.RESET_ALL}")
report = None
Expand All @@ -29,11 +30,13 @@ async def run_initial_research(self, research_state: dict):
task = research_state.get("task")
query = task.get("query")
print_agent_output(f"Running initial research on the following query: {query}", agent="RESEARCHER")
return {"task": task, "initial_research": await self.research(query)}
return {"task": task, "initial_research": await self.research(query=query, verbose=task.get("verbose"))}

async def run_depth_research(self, draft_state: dict):
title = draft_state.get("title")
task = draft_state.get("task")
topic = draft_state.get("topic")
parent_query = task.get("query")
verbose = task.get("verbose")
print_agent_output(f"Running in depth research on the following report topic: {topic}", agent="RESEARCHER")
research_draft = await self.run_subtopic_research(title, topic)
research_draft = await self.run_subtopic_research(parent_query, topic, verbose)
return {"draft": research_draft}
9 changes: 7 additions & 2 deletions multi_agents/agents/reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ def review_draft(self, draft_state: dict):

response = call_model(prompt, model=task.get("model"))

if task.get("verbose"):
print_agent_output(f"Review feedback is: {response}...", agent="REVIEWER")

if 'None' in response:
return None
return response
Expand All @@ -54,9 +57,11 @@ def run(self, draft_state: dict):
review = None
if to_follow_guidelines:
print_agent_output(f"Reviewing draft...", agent="REVIEWER")
print_agent_output(f"Following guidelines {guidelines}...", agent="REVIEWER")

if task.get("verbose"):
print_agent_output(f"Following guidelines {guidelines}...", agent="REVIEWER")

review = self.review_draft(draft_state)
print_agent_output(f"Review feedback is: {review}...", agent="REVIEWER")
else:
print_agent_output(f"Ignoring guidelines...", agent="REVIEWER")
return {"review": review}
6 changes: 4 additions & 2 deletions multi_agents/agents/reviser.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@ def revise_draft(self, draft_state: dict):
return json.loads(response)

def run(self, draft_state: dict):
draft_title = draft_state.get("title")
print_agent_output(f"Rewriting draft based on feedback...", agent="REVISOR")
revision = self.revise_draft(draft_state)
print_agent_output(f"Revision notes: {revision.get('revision_notes')}", agent="REVISOR")

if draft_state.get("task").get("verbose"):
print_agent_output(f"Revision notes: {revision.get('revision_notes')}", agent="REVISOR")

return {"draft": revision.get("draft"),
"revision_notes": revision.get("revision_notes")}
5 changes: 2 additions & 3 deletions multi_agents/agents/utils/file_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ async def write_text_to_md(text: str, path: str) -> str:
task = uuid.uuid4().hex
file_path = f"{path}/{task}.md"
await write_to_file(file_path, text)
print(f"Report written to {file_path}")
return file_path


Expand All @@ -49,14 +50,12 @@ async def write_md_to_pdf(text: str, path: str) -> str:
file_path = f"{path}/{task}.pdf"

try:
print(text)
print("writing to pdf...")
md2pdf(file_path,
md_content=text,
# md_file_path=f"{file_path}.md",
css_file_path="./agents/utils/pdf_styles.css",
base_url=None)
print(f"Report written to {file_path}.pdf")
print(f"Report written to {file_path}")
except Exception as e:
print(f"Error in converting Markdown to PDF: {e}")
return ""
Expand Down
4 changes: 3 additions & 1 deletion multi_agents/agents/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ def revise_headers(self, task: dict, headers: dict):
def run(self, research_state: dict):
print_agent_output(f"Writing final research report based on research data...", agent="WRITER")
research_layout_content = self.write_sections(research_state)
print_agent_output(research_layout_content, agent="WRITER")

if research_state.get("task").get("verbose"):
print_agent_output(research_layout_content, agent="WRITER")

headers = self.get_headers(research_state)
if research_state.get("task").get("follow_guidelines"):
Expand Down
9 changes: 5 additions & 4 deletions multi_agents/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dotenv import load_dotenv
from agents import MasterAgent
from agents import ChiefEditorAgent
import asyncio
import json
import os
Expand All @@ -14,9 +14,10 @@ async def main():
with open('task.json', 'r') as f:
task = json.load(f)

master_agent = MasterAgent(task)
research_report = await master_agent.run()
print(research_report)
chief_editor = ChiefEditorAgent(task)
research_report = await chief_editor.run_research_task()

return research_report

if __name__ == "__main__":
asyncio.run(main())
1 change: 0 additions & 1 deletion multi_agents/memory/draft.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

class DraftState(TypedDict):
task: dict
title: str
topic: str
draft: dict
review: str
Expand Down
3 changes: 2 additions & 1 deletion multi_agents/task.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@
"The report MUST be written in APA format",
"Each sub section MUST include supporting sources using hyperlinks. If none exist, erase the sub section or rewrite it to be a part of the previous section",
"The report MUST be written in spanish"
]
],
"verbose": true
}
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setup(
name="gpt-researcher",
version="0.3.3",
version="0.3.5",
description="GPT Researcher is an autonomous agent designed for comprehensive online research on a variety of tasks.",
package_dir={'gpt_researcher': 'gpt_researcher'},
packages=find_packages(),
Expand Down

0 comments on commit 767fd90

Please sign in to comment.