-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
bf895a3
commit 232208a
Showing
6 changed files
with
69 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
GOOGLE_API_KEY = | ||
GOOGLE_CSE_ID = |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,31 @@ | ||
from googlesearch import search | ||
from googleapiclient.discovery import build | ||
from dotenv import load_dotenv | ||
import os | ||
|
||
def get_google_search_urls(query: str, num_results: int = 100) -> list[str]: | ||
def google_search(search_term, api_key, cse_id, **kwargs): | ||
service = build("customsearch", "v1", developerKey=api_key) | ||
res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute() | ||
return res['items'] | ||
|
||
def search_google_urls(query: str, num_results: int = 100) -> list[str]: | ||
""" | ||
Retrieves a list of Google search result URLs for the given query. | ||
Retrieves a list of Google search result URLs for the given query using the Google API. | ||
Args: | ||
query (str): The search query. | ||
num_results (int, optional): The number of search results to retrieve. Defaults to 100. | ||
Returns: | ||
list[str]: A list of URLs representing the search results. | ||
Raises: | ||
Exception: If the request limit is exceeded (error 429 Too Many Requests). | ||
""" | ||
num_results -= 2 # 2 extra urls are added by googlesearch library | ||
load_dotenv() | ||
links = [] | ||
for url in search(query, num_results, sleep_interval=1): | ||
links.append(url) | ||
api_key = os.getenv("GOOGLE_API_KEY") | ||
cse_id = os.getenv("GOOGLE_CSE_ID") | ||
results = google_search(query, api_key, cse_id, start=11) | ||
for item in results: | ||
links.append(item['link']) | ||
return links |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
locust | ||
regex | ||
googlesearch-python | ||
google-api-python-client | ||
python-dotenv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters