issue #6: Refactoring + Bing Search + Bing Filtered Search

ai-cfia · Mar 26, 2024 · 478660d · 478660d
1 parent 508151b
commit 478660d
Show file tree

Hide file tree

Showing 4 changed files with 151 additions and 111 deletions.
diff --git a/finesse/accuracy_functions.py b/finesse/accuracy_functions.py
@@ -4,7 +4,8 @@
 import os
 from collections import namedtuple
 import regex as re
-from finesse.bing_search import search_bing_urls
+from finesse.bing_search import BingSearch
+from dotenv import load_dotenv
 
 OUTPUT_FOLDER = "./finesse/output"
 AccuracyResult = namedtuple("AccuracyResult", ["position", "total_pages", "score"])
@@ -17,12 +18,15 @@ def calculate_accuracy(responses_url: list[str], expected_url: str) -> AccuracyR
 
     for idx, response_url in enumerate(responses_url):
         if response_url.startswith("https://inspection.canada.ca"):
-            response_number = int(re.findall(r'/(\d+)/', response_url)[0])
-            if response_number == expected_number:
-                position = idx
-                score = 1 - (position / total_pages)
-                score= round(score, 2)
-                break
+            try:
+                response_number = int(re.findall(r'/(\d+)/', response_url)[0])
+                if response_number == expected_number:
+                    position = idx
+                    score = 1 - (position / total_pages)
+                    score= round(score, 2)
+                    break
+            except IndexError:
+                pass
 
     return AccuracyResult(position, total_pages, score)
 
@@ -35,23 +39,35 @@ def save_to_markdown(test_data: dict, engine: str):
     with open(output_file, "w") as md_file:
         md_file.write(f"# Test on the {engine} search engine: {date_string}\n\n")
         md_file.write("## Test data table\n\n")
-        md_file.write("| 📄 File               | 💬 Question                                                                                                                | 📏 Accuracy Score | 🌐 Google Score |⌛ Time     |\n")
-        md_file.write("|--------------------|-------------------------------------------------------------------------------------------------------------------------|----------------|----------|\n")
+        md_file.write("| 📄 File | 💬 Question| 🔎 Finesse Accuracy Score | 🌐 Bing Accuracy Score | 🌐 Filtered Bing Accuracy Score |⌛ Finesse Time | ⌛ Bing Time | ⌛ Filtered Bing Time |\n")
+        md_file.write("|---|---|---|---|---|---|---|---|\n")
         for key, value in test_data.items():
-            md_file.write(f"| {key} | [{value.get('question')}]({value.get('expected_page').get('url')}) | {value.get('accuracy')*100}% | {value.get('google_accuracy')*100}% |{int(value.get('time'))}ms |\n")
+            md_file.write(f"| {key} | [{value.get('question')}]({value.get('expected_page').get('url')}) | {int(value.get('accuracy')*100)}% | {int(value.get('bing_accuracy')*100)}% |{int(value.get('bing_filtered_accuracy')*100)}% |{int(value.get('time'))}ms | {int(value.get('bing_time'))}ms | {int(value.get('bing_filtered_time'))}ms |\n")
         md_file.write("\n")
         md_file.write(f"Tested on {len(test_data)} files.\n\n")
 
-        time_stats, accuracy_stats, google_stats = calculate_statistical_summary(test_data)
+        time_stats, accuracy_stats, bing_accuracy_stats, bing_time_stats, bing_filtered_accuracy_stats, bing_filtered_time_stats = calculate_statistical_summary(test_data)
         md_file.write("## Statistical summary\n\n")
-        md_file.write("| Statistic             | ⌛ Time       | 📏 Accuracy score| 🌐 Google Score |\n")
-        md_file.write("|-----------------------|------------|---------|\n")
-        md_file.write(f"|Mean| {int(time_stats.get('Mean'))}ms | {int(accuracy_stats.get('Mean')*100)}% |{int(google_stats.get('Mean')*100)}% |\n")
-        md_file.write(f"|Median| {int(time_stats.get('Median'))}ms | {int(accuracy_stats.get('Median')*100)}% | {int(google_stats.get('Median')*100)}% |\n")
-        md_file.write(f"|Standard Deviation| {int(time_stats.get('Standard Deviation'))}ms | {int(accuracy_stats.get('Standard Deviation')*100)}% | {int(google_stats.get('Standard Deviation')*100)}% |\n")
-        md_file.write(f"|Maximum| {int(time_stats.get('Maximum'))}ms | {int(accuracy_stats.get('Maximum')*100)}% | {int(google_stats.get('Maximum')*100)}% |\n")
-        md_file.write(f"|Minimum| {int(time_stats.get('Minimum'))}ms | {int(accuracy_stats.get('Minimum')*100)}% | {int(google_stats.get('Minimum')*100)}% |\n")
-        md_file.write(f"\nThere are a total of {len([result.get('accuracy') for result in test_data.values() if result.get('accuracy') == 0])} null scores\n")
+        md_file.write("| Statistic\Engine | 🔎 Finesse Accuracy score| 🌐 Bing Accuracy Score | 🌐 Filtered Bing Accuracy Score |⌛  Finesse Time |  ⌛ Bing Time | ⌛ Filtered Bing Time |\n")
+        md_file.write("|---|---|---|---|---|---|---|\n")
+        for stat in ["Mean", "Median", "Standard Deviation", "Maximum", "Minimum"]:
+            md_file.write(f"|{stat}| {accuracy_stats.get(stat)}% | {bing_accuracy_stats.get(stat)}% | {bing_filtered_accuracy_stats.get(stat)}% |{time_stats.get(stat)}ms | {bing_time_stats.get(stat)}ms | {bing_filtered_time_stats.get(stat)}ms |\n")
+
+        md_file.write("\n## Count of null and top scores\n\n")
+        md_file.write("| Score\Engine | 🔎 Finesse Accuracy score| 🌐 Bing Accuracy Score | 🌐 Filtered Bing Accuracy Score |\n")
+        md_file.write("|---|---|---|---|\n")
+        finesse_null, finesse_top = count_null_top_scores({key: value.get("accuracy") for key, value in test_data.items()})
+        bing_null, bing_top = count_null_top_scores({key: value.get("bing_accuracy") for key, value in test_data.items()})
+        bing_filtered_null, bing_filtered_top = count_null_top_scores({key: value.get("bing_filtered_accuracy") for key, value in test_data.items()})
+
+        md_file.write(f"| Null (0%) | {finesse_null} | {bing_null} |{bing_filtered_null} |\n")
+        md_file.write(f"| Top (100%)| {finesse_top} | {bing_top} |{bing_filtered_top} |\n")
+
+def count_null_top_scores(accuracy_scores: dict):
+    null_scores = len([score for score in accuracy_scores.values() if score == 0])
+    top_scores = len([score for score in accuracy_scores.values() if score == 1])
+
+    return null_scores, top_scores
 
 def save_to_csv(test_data: dict, engine: str):
     if not os.path.exists(OUTPUT_FOLDER):
@@ -71,75 +87,85 @@ def save_to_csv(test_data: dict, engine: str):
             ])
         writer.writerow([])
 
-        time_stats, accuracy_stats = calculate_statistical_summary(test_data)
+        time_stats, accuracy_stats, bing_stats = calculate_statistical_summary(test_data)
         writer.writerow(["Statistic", "Time", "Accuracy Score"])
         writer.writerow(["Mean", f"{int(time_stats.get('Mean'))}", f"{int(accuracy_stats.get('Mean'))}"])
         writer.writerow(["Median", f"{int(time_stats.get('Median'))}", f"{int(accuracy_stats.get('Median'))}"])
         writer.writerow(["Standard Deviation", f"{int(time_stats.get('Standard Deviation'))}", f"{int(accuracy_stats.get('Standard Deviation'))}"])
         writer.writerow(["Maximum", f"{int(time_stats.get('Maximum'))}", f"{int(accuracy_stats.get('Maximum'))}"])
         writer.writerow(["Minimum", f"{int(time_stats.get('Minimum'))}", f"{int(accuracy_stats.get('Minimum'))}"])
 
-def log_data(test_data: dict):
-    for key, value in test_data.items():
-        print("File:", key)
-        print("Question:", value.get("question"))
-        print("Expected URL:", value.get("expected_page").get("url"))
-        print(f'Accuracy Score: {int(value.get("accuracy")*100)}%')
-        print(f'Time: {int(value.get("time"))}ms')
-        print()
-    time_stats, accuracy_stats = calculate_statistical_summary(test_data)
-    print("---")
-    print(f"Tested on {len(test_data)} files.")
-    print("Time statistical summary:", end="\n  ")
-    for key,value in time_stats.items():
-        print(f"{key}:{int(value)},", end=' ')
-    print("\nAccuracy statistical summary:", end="\n  ")
-    for key,value in accuracy_stats.items():
-        print(f"{key}:{int(value*100)}%,", end=' ')
-    print("\n---")
-
-
-def calculate_statistical_summary(test_data: dict) -> tuple[dict, dict]:
+def calculate_statistical_summary(test_data: dict) -> tuple[dict, dict, dict, dict, dict, dict]:
+    def calculate_stats(data: list) -> dict:
+        stats = {
+            "Mean": statistics.mean(data),
+            "Median": statistics.median(data),
+            "Standard Deviation": statistics.stdev(data),
+            "Maximum": max(data),
+            "Minimum": min(data),
+        }
+        return stats
+
+    def round_values(stats: dict) -> dict:
+        return {key: int(round(value, 3)) for key, value in stats.items()}
+
+    def convert_to_percentage(stats: dict) -> dict:
+        return {key: int(round(value * 100, 2)) for key, value in stats.items()}
+
     times = [result.get("time") for result in test_data.values()]
     accuracies = [result.get("accuracy") for result in test_data.values()]
-    google = [result.get("google_accuracy") for result in test_data.values()]
-    time_stats = {
-        "Mean": round(statistics.mean(times), 3),
-        "Median": round(statistics.median(times), 3),
-        "Standard Deviation": round(statistics.stdev(times), 3),
-        "Maximum": round(max(times), 3),
-        "Minimum": round(min(times), 3),
-    }
-    accuracy_stats = {
-        "Mean": round(statistics.mean(accuracies), 2),
-        "Median": round(statistics.median(accuracies), 2),
-        "Standard Deviation": round(statistics.stdev(accuracies), 2),
-        "Maximum": round(max(accuracies), 2),
-        "Minimum": round(min(accuracies), 2),
-    }
-    google_stats= {
-        "Mean": round(statistics.mean(google), 2),
-        "Median": round(statistics.median(google), 2),
-        "Standard Deviation": round(statistics.stdev(google), 2),
-        "Maximum": round(max(google), 2),
-        "Minimum": round(min(google), 2),
-    }
-    return time_stats, accuracy_stats, google_stats
-
-def update_dict_google_data(test_data: dict):
+    bing_accuracies = [result.get("bing_accuracy") for result in test_data.values()]
+    bing_times = [result.get("bing_time") for result in test_data.values()]
+    bing_filtered_accuracies = [result.get("bing_filtered_accuracy") for result in test_data.values()]
+    bing_filtered_times = [result.get("bing_filtered_time") for result in test_data.values()]
+
+    time_stats = calculate_stats(times)
+    accuracy_stats = calculate_stats(accuracies)
+    bing_accuracy_stats = calculate_stats(bing_accuracies)
+    bing_times_stats = calculate_stats(bing_times)
+    bing_filtered_accuracy_stats = calculate_stats(bing_filtered_accuracies)
+    bing_filtered_times_stats = calculate_stats(bing_filtered_times)
+
+    time_stats = round_values(time_stats)
+    bing_times_stats = round_values(bing_times_stats)
+    bing_filtered_times_stats = round_values(bing_filtered_times_stats)
+    bing_accuracy_stats = convert_to_percentage(bing_accuracy_stats)
+    accuracy_stats = convert_to_percentage(accuracy_stats)
+    bing_filtered_accuracy_stats = convert_to_percentage(bing_filtered_accuracy_stats)
+
+    return time_stats, accuracy_stats, bing_accuracy_stats, bing_times_stats, bing_filtered_accuracy_stats, bing_filtered_times_stats
+
+def update_dict_bing_data(test_data: dict):
     """
-    Updates the given test_data dictionary with the Google accuracy results.
+    Updates the given test_data dictionary with the bing accuracy results.
 
     Args:
         test_data (dict): The dictionary containing the test data.
     """
-    count = 0
+    load_dotenv()
+    endpoint = os.getenv("BING_ENDPOINT")
+    subscription_key = os.getenv("BING_SEARCH_KEY")
+    search_engine = BingSearch(endpoint, subscription_key)
+    count = 1
     for key, value in test_data.items():
         question = value.get("question")
         expected_url = value.get("expected_page").get("url")
         top = value.get("top")
-        google_response_url = search_bing_urls(question, top)
-        google_accuracy_result = calculate_accuracy(google_response_url, expected_url)
-        value["google_accuracy"] = google_accuracy_result.score
+        response_url, time_elapsed = search_engine.search_urls(question, top)
+        accuracy_result = calculate_accuracy(response_url, expected_url)
+        value["bing_accuracy"] = accuracy_result.score
+        value["bing_time"] = time_elapsed
+        print(f"{count} files are done")
+        count += 1
+
+    count = 1
+    for key, value in test_data.items():
+        question = f"site:inspection.canada.ca {value.get('question')}"
+        expected_url = value.get("expected_page").get("url")
+        top = value.get("top")
+        response_url, time_elapsed = search_engine.search_urls(question, top)
+        accuracy_result = calculate_accuracy(response_url, expected_url)
+        value["bing_filtered_accuracy"] = accuracy_result.score
+        value["bing_filtered_time"] = time_elapsed
+        print(f"{count} files are done")
         count += 1
-        print(f"{count} file is done")
diff --git a/finesse/bing_search.py b/finesse/bing_search.py
@@ -1,28 +1,39 @@
+from azure.cognitiveservices.search.websearch import WebSearchClient
+from msrest.authentication import CognitiveServicesCredentials
+import time
+import statistics
+class BingSearch():
+    """
+    A class for performing web searches using the Bing Search API.
+    """
 
-import os
-from pprint import pprint
-import requests
-from dotenv import load_dotenv
-import os
+    def __init__(self, endpoint, subscription_key):
+        self.endpoint = endpoint
+        self.subscription_key = subscription_key
+        self.client = WebSearchClient(endpoint=self.endpoint, credentials=CognitiveServicesCredentials(self.subscription_key))
+        self.client.config.base_url = '{Endpoint}/v7.0' # Temporary change to fix the error. Issue opened https://github.com/Azure/azure-sdk-for-python/issues/34917
 
-def search_bing_urls(query: str, num_results: int = 100) -> list[str]:
-    load_dotenv()
-    urls = []
-    endpoint = os.getenv("BING_ENDPOINT") + "/v7.0/search"
-    subscription_key = os.getenv("BING_SEARCH_KEY")
-    mkt = 'en-US'
-    params = { 'q': query, 'mkt': mkt, 'count': 50 }
-    headers = { 'Ocp-Apim-Subscription-Key': subscription_key }
-    # Call the API
-    try:
-        response = requests.get(endpoint, headers=headers, params=params)
-        response.raise_for_status()
+    def search_urls(self, query: str, num_results: int = 100) -> tuple[list[str], float]:
+        """
+        Search for URLs using the Bing Search API.
 
-        print("\nHeaders:\n")
-        print(response.headers)
+        Args:
+            query (str): The search query.
+            num_results (int, optional): The number of results to retrieve. Defaults to 100.
 
-        print("\nJSON Response:\n")
-        pprint(response.json())
-
-    except Exception as ex:
-        raise ex
+        Returns:
+            tuple[list[str], float]: A tuple containing a list of URLs and the average elapsed time for the search.
+        """
+        urls = []
+        elapsed_time = []
+        offset = 0
+        # Limit of 50 results per query and Bing Search return less than 50 web results
+        while len(urls) < num_results:
+            start_time = time.time()
+            web_data = self.client.web.search(query=query, market="en-ca", count=50, response_filter=["Webpages"], offset=offset)
+            elapsed_time.append(time.time() - start_time)
+            if hasattr(web_data, 'web_pages') and web_data.web_pages is not None:
+                urls.extend([item.url for item in web_data.web_pages.value])
+            offset += len([item.url for item in web_data.web_pages.value])
+        urls = urls[:num_results]
+        return urls, statistics.mean(elapsed_time) * 1000
diff --git a/finesse/finesse_test.py b/finesse/finesse_test.py
@@ -2,9 +2,10 @@
 from jsonreader import JSONReader
 import os
 import json
-from accuracy_functions import save_to_markdown, save_to_csv, log_data, calculate_accuracy, update_dict_google_data
+from accuracy_functions import save_to_markdown, save_to_csv, calculate_accuracy, update_dict_bing_data
 from host import is_host_up
 
+global_test_data = dict()
 class NoTestDataError(Exception):
     """Raised when all requests have failed and there is no test data"""
 
@@ -58,12 +59,12 @@ def search_accuracy(self):
                 for page in response_pages:
                     response_url.append(page.get("url"))
                 accuracy_result = calculate_accuracy(response_url, expected_url)
-                time_taken = round(response.elapsed.microseconds/1000,3)
+                time_taken = round(response.elapsed.total_seconds()*1000,3)
 
                 expected_page = json_data.copy()
                 del expected_page['question']
                 del expected_page['answer']
-                self.qna_results[file_name] = {
+                global_test_data[file_name] = {
                     "question": question,
                     "expected_page": expected_page,
                     "response_pages": response_pages,
@@ -76,26 +77,26 @@ def search_accuracy(self):
 
     def on_start(self):
         self.qna_reader = JSONReader(self.path)
-        self.qna_results = dict()
 
     def on_stop(self):
-        if not self.qna_results:
+        if not global_test_data:
             raise NoTestDataError
 
-        print("Search accuracy test completed")
-        print("Starting google search test")
-
-        update_dict_google_data(self.qna_results)
-        log_data(self.qna_results)
-        if self.format == "md":
-            save_to_markdown(self.qna_results, self.engine)
-        elif self.format == "csv":
-            save_to_csv(self.qna_results, self.engine)
-
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.path = self.environment.parsed_options.path
         self.engine = self.environment.parsed_options.engine
         self.format = self.environment.parsed_options.format
         self.once = self.environment.parsed_options.once
         self.top = self.environment.parsed_options.top
+
+@events.quitting.add_listener
+def quitting(environment, **_kwargs):
+    print("Search accuracy test completed")
+    print("Starting bing search test")
+
+    update_dict_bing_data(global_test_data)
+    if environment.parsed_options.format == "md":
+        save_to_markdown(global_test_data, environment.parsed_options.engine)
+    elif environment.parsed_options.format == "csv":
+        save_to_csv(global_test_data, environment.parsed_options.engine)
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,5 @@
 locust
 regex
 python-dotenv
+azure-cognitiveservices-search-websearch
+msrest