Skip to content

Commit

Permalink
Fix/biobuddy (#88)
Browse files Browse the repository at this point in the history
  • Loading branch information
timurishmuratov7 authored Aug 15, 2024
1 parent da70017 commit d2b592e
Show file tree
Hide file tree
Showing 11 changed files with 57 additions and 28 deletions.
4 changes: 2 additions & 2 deletions compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ services:
dockerfile: build/Dockerfile
command: --host=0.0.0.0 --port=5739
biobuddy:
image: 'ghcr.io/basedlabs/biobuddy:1.2.0'
image: 'ghcr.io/basedlabs/biobuddy:1.2.1'
network_mode: host
build:
context: microservices/biobuddy
Expand Down Expand Up @@ -167,7 +167,7 @@ services:
dockerfile: build/Dockerfile
command: --host=0.0.0.0 --port=5740 --workers=1
nolabs:
image: 'ghcr.io/basedlabs/nolabs:2.1.6'
image: 'ghcr.io/basedlabs/nolabs:2.1.7'
network_mode: host
build:
context: .
Expand Down
9 changes: 8 additions & 1 deletion frontend/src/features/biobuddy/BioBuddyChat.vue
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ export default defineComponent({
});
}
const researchMatches = this.currentMessageBuffer.match(/<RESEARCH/g);
if (researchMatches) {
researchMatches.forEach((match) => {
this.currentMessageBuffer = this.currentMessageBuffer.replace(/<RESEARCH/g, '');
});
}
// Process WORKFLOW tags
const workflowMatches = this.currentMessageBuffer.match(/<WORKFLOW>([\s\S]*?)<END_WORKFLOW>/g);
if (workflowMatches) {
Expand Down Expand Up @@ -537,4 +544,4 @@ export default defineComponent({
vertical-align: top;
/* Aligns icon with text if necessary */
}
</style>
</style>
6 changes: 3 additions & 3 deletions microservices/biobuddy/biobuddy/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from biobuddy.services import send_message, send_message_async, invoke_action
from biobuddy.api_models import SendMessageToBioBuddyRequest, SendMessageToBioBuddyResponse, IsJobRunningResponse, \
SendActionCallRequest
from typing import Dict, List, Any
from typing import Dict
import json

app = FastAPI(
Expand All @@ -15,7 +15,6 @@

connection_manager = ConnectionManager()

from biobuddy.loggers import Log
from biobuddy.job_state_manager import job_state_manager

@app.post("/send-message")
Expand Down Expand Up @@ -44,6 +43,7 @@ async def invoke_function(
async def websocket_endpoint(websocket: WebSocket):
await connection_manager.connect(websocket)
try:
print("Original Websocket: ", websocket)
while True:
data = await websocket.receive_text()
request_data = json.loads(data)
Expand All @@ -55,7 +55,7 @@ async def websocket_endpoint(websocket: WebSocket):
else:
request = SendMessageToBioBuddyRequest(**request_data)
stop_tokens[request.experiment_id] = False
async for message in send_message_async(request, stop_tokens):
async for message in send_message_async(request, stop_tokens, websocket):
if stop_tokens.get(request.experiment_id):
await websocket.send_text(json.dumps({"reply_type": "final", "content": "<STOP>"}))
break
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def _get_contextual_retriever(self):
return contextual_retriever

def _pretty_print_docs(self, docs, top_n):
return f"\n".join(f"Source: {d.metadata.get('source')}\n"
f"Title: {d.metadata.get('title')}\n"
f"Content: {d.page_content}\n"
return f" \n".join(f"Source: {d.metadata.get('source')}\n "
f"Title: {d.metadata.get('title')}\n "
f"Content: {d.page_content}\n "
for i, d in enumerate(docs) if i < top_n)

def get_context(self, query, max_results=5):
Expand Down
17 changes: 9 additions & 8 deletions microservices/biobuddy/biobuddy/gpt_researcher/master/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ async def run(self):
print(f"🔎 Running research for '{self.query}'...")
# Generate Agent
self.agent, self.role = await choose_agent(self.query, self.cfg)
await asyncio.sleep(0)
await stream_output("logs", self.agent, self.websocket)

# If specified, the researcher will use the given urls as the context for the research.
Expand All @@ -51,7 +52,7 @@ async def run(self):
# Write Research Report
if self.report_type == "custom_report":
self.role = self.cfg.agent_role if self.cfg.agent_role else self.role
await stream_output("logs", f"✍️ Writing {self.report_type} for research task: {self.query}...", self.websocket)
await stream_output("logs", f"✍️ Writing {self.report_type} for research task: {self.query}...\n", self.websocket)
report = await generate_report(query=self.query, context=self.context,
agent_role_prompt=self.role, report_type=self.report_type,
websocket=self.websocket, cfg=self.cfg)
Expand All @@ -64,7 +65,7 @@ async def get_context_by_urls(self, urls):
"""
new_search_urls = await self.get_new_urls(urls)
await stream_output("logs",
f"🧠 I will conduct my research based on the following urls: {new_search_urls}...",
f"\n 🧠 I will conduct my research based on the following urls: {new_search_urls}... \n",
self.websocket)
scraped_sites = scrape_urls(new_search_urls, self.cfg)
return await self.get_similar_content_by_query(self.query, scraped_sites)
Expand All @@ -79,15 +80,15 @@ async def get_context_by_search(self, query):
# Generate Sub-Queries including original query
sub_queries = await get_sub_queries(query, self.role, self.cfg) + [query]
await stream_output("logs",
f"🧠 I will conduct my research based on the following queries: {sub_queries}...",
f"\n 🧠 I will conduct my research based on the following queries: {sub_queries}... \n",
self.websocket)

# Run Sub-Queries
for sub_query in sub_queries:
await stream_output("logs", f"\n🔎 Running research for '{sub_query}'...", self.websocket)
await stream_output("logs", f"\n 🔎 Running research for '{sub_query}'... \n", self.websocket)
scraped_sites = await self.scrape_sites_by_query(sub_query)
content = await self.get_similar_content_by_query(sub_query, scraped_sites)
await stream_output("logs", f"📃 {content}", self.websocket)
await stream_output("logs", f"\n 📃 {content} \n", self.websocket)
context.append(content)

return context
Expand All @@ -101,7 +102,7 @@ async def get_new_urls(self, url_set_input):
new_urls = []
for url in url_set_input:
if url not in self.visited_urls:
await stream_output("logs", f"✅ Adding source url to research: {url}\n", self.websocket)
await stream_output("logs", f"\n ✅ Adding source url to research: [{url}]({url}) \n", self.websocket)

self.visited_urls.add(url)
new_urls.append(url)
Expand Down Expand Up @@ -129,12 +130,12 @@ async def scrape_sites_by_query(self, sub_query):

# Scrape Urls
# await stream_output("logs", f"📝Scraping urls {new_search_urls}...\n", self.websocket)
await stream_output("logs", f"🤔Researching for relevant information...\n", self.websocket)
await stream_output("logs", f"\n 🤔Researching for relevant information... \n", self.websocket)
scraped_content_results = scrape_urls(new_search_urls, self.cfg)
return scraped_content_results

async def get_similar_content_by_query(self, query, pages):
await stream_output("logs", f"📃 Getting relevant content based on query: {query}...", self.websocket)
await stream_output("logs", f"\n 📃 Getting relevant content based on query: {query}... \n", self.websocket)
# Summarize Raw Data
context_compressor = ContextCompressor(documents=pages, embeddings=self.memory.get_embeddings())
# Run Tasks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from .prompts import *
import json

from ...api_models import SendMessageToBioBuddyResponse


def get_retriever(retriever):
"""
Expand Down Expand Up @@ -242,7 +244,10 @@ async def stream_output(type, output, websocket=None, logging=True):
None
"""
if not websocket or logging:
print(output)
print("Static output: ", output)

if websocket:
await websocket.send_json({"type": type, "output": output})
await asyncio.sleep(0)
message = SendMessageToBioBuddyResponse(reply_type="stream", content=output)
await websocket.send_text(json.dumps(message.to_dict()))
await asyncio.sleep(0)
7 changes: 5 additions & 2 deletions microservices/biobuddy/biobuddy/gpt_researcher/run.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from websocket import WebSocket

from .master.agent import GPTResearcher


async def get_report(query: str, report_type: str = 'research_report') -> str:
async def get_report(query: str, websocket: WebSocket, report_type: str = 'research_report') -> str:
"""
Get the report for the given query and report type:
Args:
Expand All @@ -10,6 +12,7 @@ async def get_report(query: str, report_type: str = 'research_report') -> str:
Output:
report: The generated report in markdown format
"""
researcher = GPTResearcher(query, report_type, config_path="../config.json")
researcher = GPTResearcher(query, report_type, config_path="../config.json", websocket=websocket)
print("GPT researcher")
report = await researcher.run()
return report
4 changes: 3 additions & 1 deletion microservices/biobuddy/biobuddy/gpt_researcher/utils/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Optional

from ..master.prompts import auto_agent_instructions
from ...api_models import SendMessageToBioBuddyResponse


async def create_chat_completion(
Expand Down Expand Up @@ -85,7 +86,8 @@ async def stream_response(model, messages, temperature, max_tokens, llm_provider
paragraph += content
if "\n" in paragraph:
if websocket is not None:
await websocket.send_json({"type": "report", "output": paragraph})
message = SendMessageToBioBuddyResponse(reply_type="stream", content=paragraph)
await websocket.send_text(json.dumps(message.to_dict()))
else:
print(f"{Fore.GREEN}{paragraph}{Style.RESET_ALL}")
paragraph = ""
Expand Down
7 changes: 5 additions & 2 deletions microservices/biobuddy/biobuddy/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ def generate_system_prompt() -> str:


def generate_strategy_prompt(tools_description: str, query: str, available_components: str, current_workflow: str) -> str:
return (f"Given the available tools ({tools_description}), decide whether a direct reply is sufficient "
f"or if a specific plan involving these tools is necessary. "
return (f"Given the available tools ({tools_description}), decide whether a direct reply or a research is sufficient "
f"or if a specific plan involving these tools is necessary."
f"If a user asks to do some online literature research directly, such as 'Could you find me some information about latest research on GFP' or 'What are the latest news abuot rhodopsins?' or 'Can you search me some information about vaccines' etc., "
f"then reply just with one word-tag '<RESEARCH>'. Do it only if they ask you to search the information, search the Internet or some up-to-date info."
f"Otherwise, follow these instructions:"
f"If calling function calls are not needed, provide the direct reply (without stating that it's a direct reply, just the text of the reply). "
f"If the query explicitly asks to pull specific data (such as named proteins or ligands), reply with a plan of all actions required based on the query and tools descriptions in the format (have as many actions as needed): "
f"\"<ACTION> 1. Call tool_1 in order to do X <END_ACTION> <ACTION> 2. Call tool_2 to achieve Y <END_ACTION> <ACTION> 3. Call tool_3 to do something else <END_ACTION>\". "
Expand Down
12 changes: 10 additions & 2 deletions microservices/biobuddy/biobuddy/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from langchain.callbacks import AsyncIteratorCallbackHandler
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from websocket import WebSocket

from biobuddy.api_models import SendMessageToBioBuddyResponse, SendMessageToBioBuddyRequest
from biobuddy.prompts import generate_strategy_prompt, generate_system_prompt, generate_workflow_prompt
Expand Down Expand Up @@ -83,7 +84,10 @@ def send_message(request: SendMessageToBioBuddyRequest) -> SendMessageToBioBuddy
)


async def send_message_async(request: SendMessageToBioBuddyRequest, stop_tokens: Dict[str, bool]) -> AsyncGenerator[SendMessageToBioBuddyResponse, None]:
async def send_message_async(request: SendMessageToBioBuddyRequest,
stop_tokens: Dict[str, bool],
websocket: WebSocket
) -> AsyncGenerator[SendMessageToBioBuddyResponse, None]:
system_message_content = generate_system_prompt()
history_messages = [SystemMessage(content=system_message_content)]
for msg in request.previous_messages:
Expand All @@ -92,6 +96,8 @@ async def send_message_async(request: SendMessageToBioBuddyRequest, stop_tokens:
elif msg['role'] == 'assistant':
history_messages.append(AIMessage(content=msg['content']))

# await get_report(request.message_content, websocket)

tools_description = " ".join([f"{(tool['function']['name'], tool['function']['description'])}, " for tool in request.tools])
strategy_prompt = generate_strategy_prompt(tools_description, request.message_content, str(request.available_components), str(request.current_workflow))

Expand All @@ -117,7 +123,9 @@ async def chat_with_model():
break
response_buffer += response

print(response_buffer)
if "<RESEARCH>" in response_buffer:
await get_report(request.message_content, websocket)
response_buffer = response_buffer.replace("<RESEARCH>", "")

yield SendMessageToBioBuddyResponse(reply_type="stream", content=response)

Expand Down
4 changes: 2 additions & 2 deletions microservices/biobuddy/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ openapi
openai
biopython
langchain-openai
langchain
langchain==0.1.14
websockets
asyncio==3.4.3
beautifulsoup4==4.12.2
colorama==0.4.6
duckduckgo_search==5.3.0
duckduckgo_search
tavily-python==0.3.1
arxiv==2.0.0
PyMuPDF==1.23.6

0 comments on commit d2b592e

Please sign in to comment.