-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #687 from 0x11c11e/feature/add-semantic-scholar
Feature/add semantic scholar
- Loading branch information
Showing
5 changed files
with
98 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,38 @@ | ||
# .env.example | ||
|
||
# OpenAI API key for accessing OpenAI's GPT models | ||
OPENAI_API_KEY= | ||
|
||
# API key for accessing Tavily's services | ||
TAVILY_API_KEY= | ||
|
||
# API key for accessing LangChain's services | ||
LANGCHAIN_API_KEY= | ||
|
||
# Path to the directory where documents are stored | ||
DOC_PATH=./my-docs | ||
|
||
# the name of the embedding model to use for Ollama | ||
# The name of the embedding model to use for Ollama | ||
OLLAMA_EMBEDDING_MODEL= | ||
# the Ollama endpoint to use | ||
OLLAMA_BASE_URL= | ||
|
||
# The Ollama endpoint to use | ||
OLLAMA_BASE_URL= | ||
|
||
# Choose one of the available retrievers by uncommenting the desired retriever: | ||
# RETRIEVER=arxiv | ||
# RETRIEVER=bing | ||
# RETRIEVER=custom | ||
# RETRIEVER=duckduckgo | ||
# RETRIEVER=exa | ||
# RETRIEVER=google | ||
# RETRIEVER=searx | ||
# RETRIEVER=semantic_scholar | ||
# RETRIEVER=serpapi | ||
# RETRIEVER=serper | ||
# RETRIEVER=tavily | ||
|
||
# Example setting for retriever | ||
RETRIEVER=tavily | ||
|
||
# Maximum number of search results to return per query | ||
MAX_SEARCH_RESULTS_PER_QUERY=5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
59 changes: 59 additions & 0 deletions
59
gpt_researcher/retrievers/semantic_scholar/semantic_scholar.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from typing import Dict, List | ||
|
||
import requests | ||
|
||
|
||
class SemanticScholarSearch: | ||
""" | ||
Semantic Scholar API Retriever | ||
""" | ||
|
||
BASE_URL = "https://api.semanticscholar.org/graph/v1/paper/search" | ||
VALID_SORT_CRITERIA = ["relevance", "citationCount", "publicationDate"] | ||
|
||
def __init__(self, query: str, sort: str = "relevance"): | ||
""" | ||
Initialize the SemanticScholarSearch class with a query and sort criterion. | ||
:param query: Search query string | ||
:param sort: Sort criterion ('relevance', 'citationCount', 'publicationDate') | ||
""" | ||
self.query = query | ||
assert sort in self.VALID_SORT_CRITERIA, "Invalid sort criterion" | ||
self.sort = sort.lower() | ||
|
||
def search(self, max_results: int = 20) -> List[Dict[str, str]]: | ||
""" | ||
Perform the search on Semantic Scholar and return results. | ||
:param max_results: Maximum number of results to retrieve | ||
:return: List of dictionaries containing title, href, and body of each paper | ||
""" | ||
params = { | ||
"query": self.query, | ||
"limit": max_results, | ||
"fields": "title,abstract,url,venue,year,authors,isOpenAccess,openAccessPdf", | ||
"sort": self.sort, | ||
} | ||
|
||
try: | ||
response = requests.get(self.BASE_URL, params=params) | ||
response.raise_for_status() | ||
except requests.RequestException as e: | ||
print(f"An error occurred while accessing Semantic Scholar API: {e}") | ||
return [] | ||
|
||
results = response.json().get("data", []) | ||
search_result = [] | ||
|
||
for result in results: | ||
if result.get("isOpenAccess") and result.get("openAccessPdf"): | ||
search_result.append( | ||
{ | ||
"title": result.get("title", "No Title"), | ||
"href": result["openAccessPdf"].get("url", "No URL"), | ||
"body": result.get("abstract", "Abstract not available"), | ||
} | ||
) | ||
|
||
return search_result |