Skip to content

Commit

Permalink
Merge pull request #261 from reasonmethis/fix/serp-retriever
Browse files Browse the repository at this point in the history
Fix serper retriever
  • Loading branch information
assafelovic authored Nov 21, 2023
2 parents d3c62cf + b756fd2 commit 57c65ab
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 19 deletions.
10 changes: 7 additions & 3 deletions gpt_researcher/master/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,13 @@ def get_retriever(retriever):
case "searx":
from gpt_researcher.retrievers import SearxSearch
retriever = SearxSearch
case "serp":
from gpt_researcher.retrievers import SerpSearch
retriever = SerpSearch
case "serpapi":
raise NotImplementedError("SerpApiSearch is not fully implemented yet.")
from gpt_researcher.retrievers import SerpApiSearch
retriever = SerpApiSearch
case "googleSerp":
from gpt_researcher.retrievers import SerperSearch
retriever = SerperSearch
case "duckduckgo":
from gpt_researcher.retrievers import Duckduckgo
retriever = Duckduckgo
Expand Down
5 changes: 3 additions & 2 deletions gpt_researcher/retrievers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from .tavily_search.tavily_search import TavilySearch
from .duckduckgo.duckduckgo import Duckduckgo
from .google.google import GoogleSearch
from .serper.serper import SerpSearch
from .serper.serper import SerperSearch
from .serpapi.serpapi import SerpApiSearch
from .searx.searx import SearxSearch

__all__ = ["TavilySearch", "Duckduckgo", "SerpSearch", "GoogleSearch", "SearxSearch"]
__all__ = ["TavilySearch", "Duckduckgo", "SerperSearch", "SerpApiSearch", "GoogleSearch", "SearxSearch"]
78 changes: 78 additions & 0 deletions gpt_researcher/retrievers/serpapi/serpapi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# SerpApi Retriever

# libraries
import os
import requests
import json


class SerpApiSearch():
"""
SerpApi Retriever
"""
def __init__(self, query):
"""
Initializes the SerpApiSearch object
Args:
query:
"""
raise NotImplementedError("SerpApiSearch is not fully implemented yet.")
self.query = query
self.api_key = self.get_api_key()

def get_api_key(self):
"""
Gets the SerpApi API key
Returns:
"""
try:
api_key = os.environ["SERPAPI_API_KEY"]
except:
raise Exception("SerpApi API key not found. Please set the SERPAPI_API_KEY environment variable. "
"You can get a key at https://serpapi.com/")
return api_key

def search(self, max_results=7):
"""
Searches the query
Returns:
"""
print("Searching with query {0}...".format(self.query))
"""Useful for general internet search queries using SerpApi."""


# Perform the search
# TODO: query needs to be url encoded, so the code won't work as is.
# Encoding should look something like this (but this is untested):
# url_encoded_query = self.query.replace(" ", "+")
url = "https://serpapi.com/search.json?engine=google&q=" + self.query + "&api_key=" + self.api_key
resp = requests.request("GET", url)

# Preprocess the results
if resp is None:
return
try:
search_results = json.loads(resp.text)
except Exception:
return
if search_results is None:
return

results = search_results["organic_results"]
search_results = []

# Normalize the results to match the format of the other search APIs
for result in results:
# skip youtube results
if "youtube.com" in result["link"]:
continue
search_result = {
"title": result["title"],
"href": result["link"],
"body": result["snippet"],
}
search_results.append(search_result)

return search_results
36 changes: 22 additions & 14 deletions gpt_researcher/retrievers/serper/serper.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,34 @@
# Tavily API Retriever
# Google Serper Retriever

# libraries
import os
import requests
import json
from tavily import TavilyClient


class SerpSearch():
class SerperSearch():
"""
Tavily API Retriever
Google Serper Retriever
"""
def __init__(self, query):
"""
Initializes the TavilySearch object
Initializes the SerperSearch object
Args:
query:
"""
self.query = query
self.api_key = self.get_api_key()
self.client = TavilyClient(self.api_key)

def get_api_key(self):
"""
Gets the Tavily API key
Gets the Serper API key
Returns:
"""
# Get the API key
try:
api_key = os.environ["SERP_API_KEY"]
api_key = os.environ["SERPER_API_KEY"]
except:
raise Exception("Serp API key not found. Please set the SERP_API_KEY environment variable. "
raise Exception("Serper API key not found. Please set the SERPER_API_KEY environment variable. "
"You can get a key at https://serper.dev/")
return api_key

Expand All @@ -43,9 +40,20 @@ def search(self, max_results=7):
"""
print("Searching with query {0}...".format(self.query))
"""Useful for general internet search queries using the Serp API."""
url = "https://serpapi.com/search.json?engine=google&q=" + self.query + "&api_key=" + self.api_key
resp = requests.request("GET", url)


# Search the query (see https://serper.dev/playground for the format)
url = "https://google.serper.dev/search"

headers = {
'X-API-KEY': self.api_key,
'Content-Type': 'application/json'
}
data = json.dumps({"q": self.query})

resp = requests.request("POST", url, headers=headers, data=data)

# Preprocess the results
if resp is None:
return
try:
Expand All @@ -55,10 +63,10 @@ def search(self, max_results=7):
if search_results is None:
return

results = search_results["organic_results"]
results = search_results["organic"]
search_results = []

# Normalizing results to match the format of the other search APIs
# Normalize the results to match the format of the other search APIs
for result in results:
# skip youtube results
if "youtube.com" in result["link"]:
Expand Down

0 comments on commit 57c65ab

Please sign in to comment.