-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
issue #6: Refactoring + Bing Search + Bing Filtered Search
- Loading branch information
1 parent
508151b
commit 478660d
Showing
4 changed files
with
151 additions
and
111 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,39 @@ | ||
from azure.cognitiveservices.search.websearch import WebSearchClient | ||
from msrest.authentication import CognitiveServicesCredentials | ||
import time | ||
import statistics | ||
class BingSearch(): | ||
""" | ||
A class for performing web searches using the Bing Search API. | ||
""" | ||
|
||
import os | ||
from pprint import pprint | ||
import requests | ||
from dotenv import load_dotenv | ||
import os | ||
def __init__(self, endpoint, subscription_key): | ||
self.endpoint = endpoint | ||
self.subscription_key = subscription_key | ||
self.client = WebSearchClient(endpoint=self.endpoint, credentials=CognitiveServicesCredentials(self.subscription_key)) | ||
self.client.config.base_url = '{Endpoint}/v7.0' # Temporary change to fix the error. Issue opened https://github.com/Azure/azure-sdk-for-python/issues/34917 | ||
|
||
def search_bing_urls(query: str, num_results: int = 100) -> list[str]: | ||
load_dotenv() | ||
urls = [] | ||
endpoint = os.getenv("BING_ENDPOINT") + "/v7.0/search" | ||
subscription_key = os.getenv("BING_SEARCH_KEY") | ||
mkt = 'en-US' | ||
params = { 'q': query, 'mkt': mkt, 'count': 50 } | ||
headers = { 'Ocp-Apim-Subscription-Key': subscription_key } | ||
# Call the API | ||
try: | ||
response = requests.get(endpoint, headers=headers, params=params) | ||
response.raise_for_status() | ||
def search_urls(self, query: str, num_results: int = 100) -> tuple[list[str], float]: | ||
""" | ||
Search for URLs using the Bing Search API. | ||
print("\nHeaders:\n") | ||
print(response.headers) | ||
Args: | ||
query (str): The search query. | ||
num_results (int, optional): The number of results to retrieve. Defaults to 100. | ||
print("\nJSON Response:\n") | ||
pprint(response.json()) | ||
|
||
except Exception as ex: | ||
raise ex | ||
Returns: | ||
tuple[list[str], float]: A tuple containing a list of URLs and the average elapsed time for the search. | ||
""" | ||
urls = [] | ||
elapsed_time = [] | ||
offset = 0 | ||
# Limit of 50 results per query and Bing Search return less than 50 web results | ||
while len(urls) < num_results: | ||
start_time = time.time() | ||
web_data = self.client.web.search(query=query, market="en-ca", count=50, response_filter=["Webpages"], offset=offset) | ||
elapsed_time.append(time.time() - start_time) | ||
if hasattr(web_data, 'web_pages') and web_data.web_pages is not None: | ||
urls.extend([item.url for item in web_data.web_pages.value]) | ||
offset += len([item.url for item in web_data.web_pages.value]) | ||
urls = urls[:num_results] | ||
return urls, statistics.mean(elapsed_time) * 1000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
locust | ||
regex | ||
python-dotenv | ||
azure-cognitiveservices-search-websearch | ||
msrest |