Skip to content

Commit

Permalink
feat: add web search (#274)
Browse files Browse the repository at this point in the history
Co-authored-by: Guohao Li <lightaime@gmail.com>
  • Loading branch information
zhiyu-01 and lightaime authored Oct 5, 2023
1 parent 50c9c97 commit 3ba1754
Show file tree
Hide file tree
Showing 5 changed files with 295 additions and 5 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/pytest_apps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ jobs:
- name: Run pytest
env:
OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
GOOGLE_API_KEY: "${{ secrets.GOOGLE_API_KEY }}"
SEARCH_ENGINE_ID: "${{ secrets.SEARCH_ENGINE_ID }}"
run: poetry run pytest -v apps/

pytest_examples:
Expand All @@ -41,4 +43,6 @@ jobs:
- name: Run pytest
env:
OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
GOOGLE_API_KEY: "${{ secrets.GOOGLE_API_KEY }}"
SEARCH_ENGINE_ID: "${{ secrets.SEARCH_ENGINE_ID }}"
run: poetry run pytest -v examples/
6 changes: 6 additions & 0 deletions .github/workflows/pytest_package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ jobs:
- name: Run pytest
env:
OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
GOOGLE_API_KEY: "${{ secrets.GOOGLE_API_KEY }}"
SEARCH_ENGINE_ID: "${{ secrets.SEARCH_ENGINE_ID }}"
run: poetry run pytest --fast-test-mode test/

pytest_package_llm_test:
Expand All @@ -38,6 +40,8 @@ jobs:
- name: Run pytest
env:
OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
GOOGLE_API_KEY: "${{ secrets.GOOGLE_API_KEY }}"
SEARCH_ENGINE_ID: "${{ secrets.SEARCH_ENGINE_ID }}"
run: poetry run pytest --llm-test-only test/

pytest_package_very_slow_test:
Expand All @@ -51,4 +55,6 @@ jobs:
- name: Run pytest
env:
OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
GOOGLE_API_KEY: "${{ secrets.GOOGLE_API_KEY }}"
SEARCH_ENGINE_ID: "${{ secrets.SEARCH_ENGINE_ID }}"
run: poetry run pytest --very-slow-test-only test/
259 changes: 256 additions & 3 deletions camel/functions/search_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import List
import os
from typing import Any, Dict, List

from .openai_function import OpenAIFunction
import camel.agents
from camel.functions import OpenAIFunction
from camel.messages import BaseMessage
from camel.prompts import TextPrompt


def search_wiki(entity: str) -> str:
Expand Down Expand Up @@ -45,6 +49,255 @@ def search_wiki(entity: str) -> str:
return result


def search_google(query: str) -> List[Dict[str, Any]]:
r"""Use google search engine to search information for the given query.
Args:
query (string): The query to be searched.
Returns:
List[Dict[str, Any]]: A list of dictionaries where each dictionary
represents a website.
Each dictionary contains the following keys:
- 'result_id': A number in order.
- 'title': The title of the website.
- 'description': A brief description of the website.
- 'long_description': More detail of the website.
- 'url': The URL of the website.
Example:
{
'result_id': 1,
'title': 'OpenAI',
'description': 'An organization focused on ensuring that
artificial general intelligence benefits all of humanity.',
'long_description': 'OpenAI is a non-profit artificial
intelligence research company. Our goal is to advance digital
intelligence in the way that is most likely to benefit humanity
as a whole',
'url': 'https://www.openai.com'
}
title, descrption, url of a website.
"""
import requests

# https://developers.google.com/custom-search/v1/overview
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# https://cse.google.com/cse/all
SEARCH_ENGINE_ID = os.getenv("SEARCH_ENGINE_ID")

# Using the first page
start_page_idx = 1
# Different language may get different result
search_language = "en"
# How many pages to return
num_result_pages = 10
# Constructing the URL
# Doc: https://developers.google.com/custom-search/v1/using_rest
url = f"https://www.googleapis.com/customsearch/v1?" \
f"key={GOOGLE_API_KEY}&cx={SEARCH_ENGINE_ID}&q={query}&start=" \
f"{start_page_idx}&lr={search_language}&num={num_result_pages}"

responses = []
# Fetch the results given the URL
try:
# Make the get
result = requests.get(url)
data = result.json()

# Get the result items
if "items" in data:
search_items = data.get("items")

# Iterate over 10 results found
for i, search_item in enumerate(search_items, start=1):
if "og:description" in search_item["pagemap"]["metatags"][0]:
long_description = \
search_item["pagemap"]["metatags"][0]["og:description"]
else:
long_description = "N/A"
# Get the page title
title = search_item.get("title")
# Page snippet
snippet = search_item.get("snippet")

# Extract the page url
link = search_item.get("link")
response = {
"result_id": i,
"title": title,
"description": snippet,
"long_description": long_description,
"url": link
}
responses.append(response)
else:
responses.append({"error": "google search failed."})

except requests.RequestException:
responses.append({"erro": "google search failed."})

return responses


def text_extract_from_web(url: str) -> str:
r"""Get the text information from given url.
Args:
url (string): The web site you want to search.
Returns:
string: All texts extract from the web.
"""
import requests
from bs4 import BeautifulSoup

try:
# Request the target page
response_text = requests.get(url).text

# Parse the obtained page
soup = BeautifulSoup(response_text, features="html.parser")

for script in soup(["script", "style"]):
script.extract()

text = soup.get_text()
# Strip text
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines
for phrase in line.split(" "))
text = ".".join(chunk for chunk in chunks if chunk)

except requests.RequestException:
text = f"can't access {url}"

return text


# Split a text into smaller chunks of size n
def create_chunks(text: str, n: int) -> List[str]:
r"""Returns successive n-sized chunks from provided text."
Args:
text (string): The text to be split.
n (int): The max length of a single chunk.
Returns:
List[str]: A list of splited texts.
"""

chunks = []
i = 0
while i < len(text):
# Find the nearest end of sentence within a range of 0.5 * n
# and 1.5 * n tokens
j = min(i + int(1.2 * n), len(text))
while j > i + int(0.8 * n):
# Decode the tokens and check for full stop or newline
chunk = text[i:j]
if chunk.endswith(".") or chunk.endswith("\n"):
break
j -= 1
# If no end of sentence found, use n tokens as the chunk size
if j == i + int(0.8 * n):
j = min(i + n, len(text))
chunks.append(text[i:j])
i = j
return chunks


def prompt_single_step_agent(prompt: str) -> str:
"""Prompt a single-step agent to summarize texts or answer a question."""

assistant_sys_msg = BaseMessage.make_assistant_message(
role_name="Assistant",
content="You are a helpful assistant.",
)
agent = camel.agents.ChatAgent(assistant_sys_msg)
agent.reset()

user_msg = BaseMessage.make_user_message(
role_name="User",
content=prompt,
)
assistant_response = agent.step(user_msg)
if assistant_response.msgs is not None:
return assistant_response.msg.content
return ""


def summarize_text(text: str, query: str) -> str:
r"""Summarize the information from the text, base on the query if query is
given.
Args:
text (string): Text to summarise.
query (string): What information you want.
Returns:
string: Strings with information.
"""
summary_prompt = TextPrompt(
'''Gather information from this text that relative to the question, but
do not directly answer the question.\nquestion: {query}\ntext ''')
summary_prompt = summary_prompt.format(query=query)
# Max length of each chunk
max_len = 3000
results = ""
chunks = create_chunks(text, max_len)
# Summarize
for i, chunk in enumerate(chunks, start=1):
prompt = summary_prompt + str(i) + ": " + chunk
result = prompt_single_step_agent(prompt)
results += result + "\n"

# Final summarise
final_prompt = TextPrompt(
'''Here are some summarized texts which split from one text, Using the
information to answer the question: {query}.\n\nText: ''')
final_prompt = final_prompt.format(query=query)
prompt = final_prompt + results

response = prompt_single_step_agent(prompt)

return response


def search_google_and_summarize(query: str) -> str:
r"""Search webs for information. Given a query, this function will use
the google search engine to search for related information from the
internet, and then return a summarized answer.
Args:
query (string): Question you want to be answered.
Returns:
string: Summarized information from webs.
"""
# Google search will return a list of urls
responses = search_google(query)
for item in responses:
if "url" in item:
url = item.get("url")
# Extract text
text = text_extract_from_web(str(url))
# Using chatgpt summarise text
answer = summarize_text(text, query)

# Let chatgpt decide whether to continue search or not
prompt = TextPrompt(
'''Do you think the answer: {answer} can answer the query:
{query}. Use only 'yes' or 'no' to answer.''')
prompt = prompt.format(answer=answer, query=query)
reply = prompt_single_step_agent(prompt)
if "yes" in str(reply).lower():
return answer

return "Failed to find the answer from google search."


SEARCH_FUNCS: List[OpenAIFunction] = [
OpenAIFunction(func) for func in [search_wiki]
OpenAIFunction(func)
for func in [search_wiki, search_google_and_summarize]
]
1 change: 0 additions & 1 deletion camel/societies/role_playing.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,6 @@ def init_chat(self) -> Tuple[BaseMessage, List[BaseMessage]]:
content=(f"{self.user_sys_msg.content}. "
"Now start to give me instructions one by one. "
"Only reply with Instruction and Input."))

user_msg = BaseMessage.make_user_message(
role_name=self.user_sys_msg.role_name,
content=f"{self.assistant_sys_msg.content}")
Expand Down
30 changes: 29 additions & 1 deletion test/functions/test_search_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
import os

import requests
import wikipedia

from camel.functions.search_functions import search_wiki
from camel.functions.search_functions import (
search_google_and_summarize,
search_wiki,
)


def test_search_wiki_normal():
Expand All @@ -38,3 +44,25 @@ def test_search_wiki_with_ambiguity():
expected_output = wikipedia.summary("New York (state)", sentences=5,
auto_suggest=False)
assert search_wiki("New York") == expected_output


def test_google_api():
# Check the google search api

# https://developers.google.com/custom-search/v1/overview
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# https://cse.google.com/cse/all
SEARCH_ENGINE_ID = os.getenv("SEARCH_ENGINE_ID")

url = f"https://www.googleapis.com/customsearch/v1?" \
f"key={GOOGLE_API_KEY}&cx={SEARCH_ENGINE_ID}&q=any"
result = requests.get(url)

assert result.status_code == 200


def test_web_search():
query = "What big things are happening in 2023?"
answer = search_google_and_summarize(query)

assert answer is not None

0 comments on commit 3ba1754

Please sign in to comment.