From 508151b24187517c1f924bbf89df103cec4a43fa Mon Sep 17 00:00:00 2001 From: Ibrahim Kabir Date: Mon, 25 Mar 2024 12:28:09 -0400 Subject: [PATCH] issue #6: Bing Search works --- .env.template | 4 +-- finesse/accuracy_functions.py | 4 +-- finesse/bing_search.py | 28 +++++++++++++++++ finesse/google_search.py | 31 ------------------- requirements.txt | 1 - ...t_google_search.py => test_bing_search.py} | 8 ++--- 6 files changed, 36 insertions(+), 40 deletions(-) create mode 100644 finesse/bing_search.py delete mode 100644 finesse/google_search.py rename tests/{test_google_search.py => test_bing_search.py} (57%) diff --git a/.env.template b/.env.template index 1fc0389..3c3d4c2 100644 --- a/.env.template +++ b/.env.template @@ -1,2 +1,2 @@ -GOOGLE_API_KEY = -GOOGLE_CSE_ID = +BING_SEARCH_KEY = +BING_ENDPOINT = diff --git a/finesse/accuracy_functions.py b/finesse/accuracy_functions.py index 341422d..9d95b2e 100644 --- a/finesse/accuracy_functions.py +++ b/finesse/accuracy_functions.py @@ -4,7 +4,7 @@ import os from collections import namedtuple import regex as re -from finesse.google_search import search_google_urls +from finesse.bing_search import search_bing_urls OUTPUT_FOLDER = "./finesse/output" AccuracyResult = namedtuple("AccuracyResult", ["position", "total_pages", "score"]) @@ -138,7 +138,7 @@ def update_dict_google_data(test_data: dict): question = value.get("question") expected_url = value.get("expected_page").get("url") top = value.get("top") - google_response_url = search_google_urls(question, top) + google_response_url = search_bing_urls(question, top) google_accuracy_result = calculate_accuracy(google_response_url, expected_url) value["google_accuracy"] = google_accuracy_result.score count += 1 diff --git a/finesse/bing_search.py b/finesse/bing_search.py new file mode 100644 index 0000000..1013780 --- /dev/null +++ b/finesse/bing_search.py @@ -0,0 +1,28 @@ + +import os +from pprint import pprint +import requests +from dotenv import load_dotenv +import os + +def search_bing_urls(query: str, num_results: int = 100) -> list[str]: + load_dotenv() + urls = [] + endpoint = os.getenv("BING_ENDPOINT") + "/v7.0/search" + subscription_key = os.getenv("BING_SEARCH_KEY") + mkt = 'en-US' + params = { 'q': query, 'mkt': mkt, 'count': 50 } + headers = { 'Ocp-Apim-Subscription-Key': subscription_key } + # Call the API + try: + response = requests.get(endpoint, headers=headers, params=params) + response.raise_for_status() + + print("\nHeaders:\n") + print(response.headers) + + print("\nJSON Response:\n") + pprint(response.json()) + + except Exception as ex: + raise ex diff --git a/finesse/google_search.py b/finesse/google_search.py deleted file mode 100644 index 05271bf..0000000 --- a/finesse/google_search.py +++ /dev/null @@ -1,31 +0,0 @@ -from googleapiclient.discovery import build -from dotenv import load_dotenv -import os - -def google_search(search_term, api_key, cse_id, **kwargs): - service = build("customsearch", "v1", developerKey=api_key) - res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute() - return res['items'] - -def search_google_urls(query: str, num_results: int = 100) -> list[str]: - """ - Retrieves a list of Google search result URLs for the given query using the Google API. - - Args: - query (str): The search query. - num_results (int, optional): The number of search results to retrieve. Defaults to 100. - - Returns: - list[str]: A list of URLs representing the search results. - - Raises: - Exception: If the request limit is exceeded (error 429 Too Many Requests). - """ - load_dotenv() - links = [] - api_key = os.getenv("GOOGLE_API_KEY") - cse_id = os.getenv("GOOGLE_CSE_ID") - results = google_search(query, api_key, cse_id, start=11) - for item in results: - links.append(item['link']) - return links diff --git a/requirements.txt b/requirements.txt index 8acc2a0..71f973f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ locust regex -google-api-python-client python-dotenv diff --git a/tests/test_google_search.py b/tests/test_bing_search.py similarity index 57% rename from tests/test_google_search.py rename to tests/test_bing_search.py index 0285042..a5cc355 100644 --- a/tests/test_google_search.py +++ b/tests/test_bing_search.py @@ -1,11 +1,11 @@ import unittest -from finesse.google_search import search_google_urls +from finesse.bing_search import search_bing_urls -class TestGoogleSearch(unittest.TestCase): - def test_get_google_search_urls(self): +class TestBingSearch(unittest.TestCase): + def test_get_bing_search_urls(self): query = "Canada Food Inspection Agency" num_results = 100 - urls = search_google_urls(query, num_results) + urls = search_bing_urls(query, num_results) self.assertEqual(len(urls), num_results) self.assertTrue(all(url.startswith("http") for url in urls))