issue #6: Refactored + Fix markdown issue on links + Add parsing scri…

…pt to the repo+ Review csv function+ Sort files by number
ai-cfia · Mar 27, 2024 · 43698da · 43698da
1 parent 478660d
commit 43698da
Show file tree

Hide file tree

Showing 10 changed files with 254 additions and 45 deletions.
diff --git a/.gitignore b/.gitignore
@@ -43,5 +43,8 @@ flask_session/
 # Ignore local QnA json files
 QnA
 
-# Ignore output of api-test
-output
+# Ignore output of api-test and from the scripts
+output/
+
+# Ignore input of the scripts
+input/
diff --git a/finesse/FINESSE_USAGE.md b/finesse/FINESSE_USAGE.md
@@ -1,8 +1,8 @@
 # How to use the Finesse Locust script
 
 This tool simplifies the process of comparing different search engines and
-assessing their accuracy. It's designed to be straightforward, making it easy
-to understand and use.
+assessing their accuracy. It's designed to be straightforward, making it easy to
+understand and use.
 
 ## How it Works
 
@@ -16,8 +16,8 @@ to understand and use.
       - `static`: Static search engine
       - `llamaindex`: LlamaIndex search engine
     - `--path [directory path]`: Point to the directory with files structured
-    - `--host [API URL]`: Point to the finesse-backend URL
-      with JSON files with the following properties:
+    - `--host [API URL]`: Point to the finesse-backend URL with JSON files with
+      the following properties:
       - `score`: The score of the page.
       - `crawl_id`: The unique identifier associated with the crawl table.
       - `chunk_id`: The unique identifier of the chunk.
@@ -43,7 +43,8 @@ to understand and use.
 - **Round trip time**
   - Measure round trip time of each request
 - **Summary statistical value**
-  - Measure the average, median, standard deviation, minimum and maximal accuracy scores and round trip time
+  - Measure the average, median, standard deviation, minimum and maximal
+    accuracy scores and round trip time
 
 ## Diagram
 
@@ -100,3 +101,49 @@ Accuracy statistical summary:
 
 This example shows how the CLI Output of the tool, analyzing search results from
 Azure Search and providing an accuracy score for Finesse.
+
+## Scripts
+
+### XLSX Converter to JSON 📄
+
+This script converts data from an Excel file (.xlsx) into JSON format. It is
+used for questions generated created by non-developers. It is more readable an
+excel than a json file.
+
+### Usage
+
+1. **Input Excel File**: Place the Excel file containing the data in the
+   specified input folder (`--input-folder`). By default, the input folder is
+   set to `'finesse/scripts/input/'`.
+
+2. **Output Folder**: Specify the folder where the resulting JSON files will be
+   saved using the `--output-folder` argument. By default, the output folder is
+   set to `'finesse/scripts/output/'`.
+
+3. **Input File Name**: Provide the name of the input Excel file using the
+   `--file-name` argument..
+
+4. **Worksheet Name**: Specify the name of the worksheet containing the data
+   using the `--sheet-name` argument. By default, it is set to `'To fill'`.
+
+### Example Command
+
+```bash
+python finesse/scripts/xlsx_converter_json.py --input-folder finesse/scripts/input/ --output-folder finesse/scripts/output/ --file-name Finesse_questions_for_testing.xlsx --sheet-name "To fill"
+```
+
+Replace `'example.xlsx'` with the actual name of your input Excel file and
+`'Sheet1'` with the name of the worksheet containing the data.
+
+### Output
+
+The script generates individual JSON files for each row of data in the specified
+output folder. Each JSON file contains the following fields:
+
+- `question`: The question extracted from the Excel file.
+- `answer`: The answer extracted from the Excel file.
+- `title`: The title(s) extracted from specified columns in the Excel file.
+- `url`: The URL(s) extracted from specified columns in the Excel file.
+
+Upon completion, the script prints "Conversion terminée !" (Conversion
+completed!) to indicate that the conversion process is finished.
diff --git a/finesse/accuracy_functions.py b/finesse/accuracy_functions.py
@@ -10,17 +10,37 @@
 OUTPUT_FOLDER = "./finesse/output"
 AccuracyResult = namedtuple("AccuracyResult", ["position", "total_pages", "score"])
 
-def calculate_accuracy(responses_url: list[str], expected_url: str) -> AccuracyResult:
+def calculate_accuracy(responses_url: list[str], expected_url: list | str) -> AccuracyResult:
+    """
+    Calculates the accuracy of the responses by comparing the URLs of the responses with the expected URL.
+
+    Args:
+        responses_url (list[str]): A list of URLs representing the responses.
+        expected_url (list[str] | str): The expected URL or a list of expected URLs.
+
+    Returns:
+        AccuracyResult: An object containing the position, total pages, and score of the accuracy calculation.
+    """
     position: int = 0
     total_pages: int = len(responses_url)
     score: float = 0.0
-    expected_number = int(re.findall(r'/(\d+)/', expected_url)[0])
+    expected_number = []
+
+    PATTERN = r'/(\d+)/'
+    if isinstance(expected_url, list):
+        for url in expected_url:
+            if url.startswith("https://inspection.canada.ca"):
+                number = int(re.findall(PATTERN, url)[0])
+                expected_number.append(number)
+    elif isinstance(expected_url, str) and expected_url.startswith("https://inspection.canada.ca"):
+        number = int(re.findall(PATTERN, expected_url)[0])
+        expected_number.append(number)
 
     for idx, response_url in enumerate(responses_url):
         if response_url.startswith("https://inspection.canada.ca"):
             try:
-                response_number = int(re.findall(r'/(\d+)/', response_url)[0])
-                if response_number == expected_number:
+                response_number = int(re.findall(PATTERN, response_url)[0])
+                if response_number in expected_number:
                     position = idx
                     score = 1 - (position / total_pages)
                     score= round(score, 2)
@@ -42,7 +62,15 @@ def save_to_markdown(test_data: dict, engine: str):
         md_file.write("| 📄 File | 💬 Question| 🔎 Finesse Accuracy Score | 🌐 Bing Accuracy Score | 🌐 Filtered Bing Accuracy Score |⌛ Finesse Time | ⌛ Bing Time | ⌛ Filtered Bing Time |\n")
         md_file.write("|---|---|---|---|---|---|---|---|\n")
         for key, value in test_data.items():
-            md_file.write(f"| {key} | [{value.get('question')}]({value.get('expected_page').get('url')}) | {int(value.get('accuracy')*100)}% | {int(value.get('bing_accuracy')*100)}% |{int(value.get('bing_filtered_accuracy')*100)}% |{int(value.get('time'))}ms | {int(value.get('bing_time'))}ms | {int(value.get('bing_filtered_time'))}ms |\n")
+            question = ""
+            if isinstance(value.get("expected_page").get("url"), list):
+                question = f"{value.get('question')} "
+                for index, url in enumerate(value.get("expected_page").get("url")):
+                    question += f"\| [Link{index+1}]({url}) "
+                question += "\|"
+            else:
+                question = f"[{value.get('question')}]({value.get('expected_page').get('url')})"
+            md_file.write(f"| {key} | {question} | {int(value.get('accuracy')*100)}% | {int(value.get('bing_accuracy')*100)}% |{int(value.get('bing_filtered_accuracy')*100)}% |{int(value.get('time'))}ms | {int(value.get('bing_time'))}ms | {int(value.get('bing_filtered_time'))}ms |\n")
         md_file.write("\n")
         md_file.write(f"Tested on {len(test_data)} files.\n\n")
 
@@ -64,6 +92,15 @@ def save_to_markdown(test_data: dict, engine: str):
         md_file.write(f"| Top (100%)| {finesse_top} | {bing_top} |{bing_filtered_top} |\n")
 
 def count_null_top_scores(accuracy_scores: dict):
+    """
+    Counts the number of null scores and top scores in the given accuracy_scores dictionary.
+
+    Args:
+        accuracy_scores (dict): A dictionary containing accuracy scores.
+
+    Returns:
+        tuple: A tuple containing the count of null scores and top scores, respectively.
+    """
     null_scores = len([score for score in accuracy_scores.values() if score == 0])
     top_scores = len([score for score in accuracy_scores.values() if score == 1])
 
@@ -77,25 +114,52 @@ def save_to_csv(test_data: dict, engine: str):
     output_file = os.path.join(OUTPUT_FOLDER, file_name)
     with open(output_file, "w", newline="") as csv_file:
         writer = csv.writer(csv_file)
-        writer.writerow(["File", "Question", "Accuracy Score", "Time"])
+        writer.writerow(["File", "Question", "Finesse Accuracy Score", "Bing Accuracy Score", "Filtered Bing Accuracy Score", "Finesse Time", "Bing Time", "Filtered Bing Time"])
         for key, value in test_data.items():
+            question = ""
+            if isinstance(value.get("expected_page").get("url"), list):
+                question = f"{value.get('question')} "
+                for index, url in enumerate(value.get("expected_page").get("url")):
+                    question += f"[{index+1}]({url}) "
+            else:
+                question = f"[{value.get('question')}]({value.get('expected_page').get('url')})"
             writer.writerow([
                 key,
-                value.get("question"),
-                f"{value.get('accuracy')}",
-                f"{int(value.get('time'))}"
+                question,
+                f"{int(value.get('accuracy')*100)}%",
+                f"{int(value.get('bing_accuracy')*100)}%",
+                f"{int(value.get('bing_filtered_accuracy')*100)}%",
+                f"{int(value.get('time'))}ms",
+                f"{int(value.get('bing_time'))}ms",
+                f"{int(value.get('bing_filtered_time'))}ms"
             ])
         writer.writerow([])
 
-        time_stats, accuracy_stats, bing_stats = calculate_statistical_summary(test_data)
-        writer.writerow(["Statistic", "Time", "Accuracy Score"])
-        writer.writerow(["Mean", f"{int(time_stats.get('Mean'))}", f"{int(accuracy_stats.get('Mean'))}"])
-        writer.writerow(["Median", f"{int(time_stats.get('Median'))}", f"{int(accuracy_stats.get('Median'))}"])
-        writer.writerow(["Standard Deviation", f"{int(time_stats.get('Standard Deviation'))}", f"{int(accuracy_stats.get('Standard Deviation'))}"])
-        writer.writerow(["Maximum", f"{int(time_stats.get('Maximum'))}", f"{int(accuracy_stats.get('Maximum'))}"])
-        writer.writerow(["Minimum", f"{int(time_stats.get('Minimum'))}", f"{int(accuracy_stats.get('Minimum'))}"])
+        time_stats, accuracy_stats, bing_accuracy_stats, bing_time_stats, bing_filtered_accuracy_stats, bing_filtered_time_stats = calculate_statistical_summary(test_data)
+        writer.writerow(["Statistic", "Finesse Accuracy Score", "Bing Accuracy Score", "Filtered Bing Accuracy Score", "Finesse Time", "Bing Time", "Filtered Bing Time"])
+        writer.writerow(["Mean", f"{accuracy_stats.get('Mean')}%", f"{bing_accuracy_stats.get('Mean')}%", f"{bing_filtered_accuracy_stats.get('Mean')}%", f"{time_stats.get('Mean')}ms", f"{bing_time_stats.get('Mean')}ms", f"{bing_filtered_time_stats.get('Mean')}ms"])
+        writer.writerow(["Median", f"{accuracy_stats.get('Median')}%", f"{bing_accuracy_stats.get('Median')}%", f"{bing_filtered_accuracy_stats.get('Median')}%", f"{time_stats.get('Median')}ms", f"{bing_time_stats.get('Median')}ms", f"{bing_filtered_time_stats.get('Median')}ms"])
+        writer.writerow(["Standard Deviation", f"{accuracy_stats.get('Standard Deviation')}%", f"{bing_accuracy_stats.get('Standard Deviation')}%", f"{bing_filtered_accuracy_stats.get('Standard Deviation')}%", f"{time_stats.get('Standard Deviation')}ms", f"{bing_time_stats.get('Standard Deviation')}ms", f"{bing_filtered_time_stats.get('Standard Deviation')}ms"])
+        writer.writerow(["Maximum", f"{accuracy_stats.get('Maximum')}%", f"{bing_accuracy_stats.get('Maximum')}%", f"{bing_filtered_accuracy_stats.get('Maximum')}%", f"{time_stats.get('Maximum')}ms", f"{bing_time_stats.get('Maximum')}ms", f"{bing_filtered_time_stats.get('Maximum')}ms"])
+        writer.writerow(["Minimum", f"{accuracy_stats.get('Minimum')}%", f"{bing_accuracy_stats.get('Minimum')}%", f"{bing_filtered_accuracy_stats.get('Minimum')}%", f"{time_stats.get('Minimum')}ms", f"{bing_time_stats.get('Minimum')}ms", f"{bing_filtered_time_stats.get('Minimum')}ms"])
 
 def calculate_statistical_summary(test_data: dict) -> tuple[dict, dict, dict, dict, dict, dict]:
+    """
+    Calculate the statistical summary of the test data.
+
+    Args:
+        test_data (dict): A dictionary containing the test data.
+
+    Returns:
+        tuple[dict, dict, dict, dict, dict, dict]: A tuple containing the statistical summary for different metrics.
+            The tuple contains the following dictionaries:
+            - time_stats: Statistical summary for the 'time' metric.
+            - accuracy_stats: Statistical summary for the 'accuracy' metric.
+            - bing_accuracy_stats: Statistical summary for the 'bing_accuracy' metric.
+            - bing_times_stats: Statistical summary for the 'bing_times' metric.
+            - bing_filtered_accuracy_stats: Statistical summary for the 'bing_filtered_accuracy' metric.
+            - bing_filtered_times_stats: Statistical summary for the 'bing_filtered_times' metric.
+    """
     def calculate_stats(data: list) -> dict:
         stats = {
             "Mean": statistics.mean(data),
@@ -142,30 +206,32 @@ def update_dict_bing_data(test_data: dict):
     Args:
         test_data (dict): The dictionary containing the test data.
     """
+    copy_data = test_data.copy()
     load_dotenv()
     endpoint = os.getenv("BING_ENDPOINT")
     subscription_key = os.getenv("BING_SEARCH_KEY")
     search_engine = BingSearch(endpoint, subscription_key)
     count = 1
-    for key, value in test_data.items():
+    for key, value in copy_data.items():
         question = value.get("question")
         expected_url = value.get("expected_page").get("url")
         top = value.get("top")
         response_url, time_elapsed = search_engine.search_urls(question, top)
         accuracy_result = calculate_accuracy(response_url, expected_url)
-        value["bing_accuracy"] = accuracy_result.score
-        value["bing_time"] = time_elapsed
+        test_data[key]["bing_accuracy"] = accuracy_result.score
+        test_data[key]["bing_time"] = time_elapsed
         print(f"{count} files are done")
         count += 1
 
+    print("Second Bing Search Test")
     count = 1
-    for key, value in test_data.items():
+    for key, value in copy_data.items():
         question = f"site:inspection.canada.ca {value.get('question')}"
         expected_url = value.get("expected_page").get("url")
         top = value.get("top")
         response_url, time_elapsed = search_engine.search_urls(question, top)
         accuracy_result = calculate_accuracy(response_url, expected_url)
-        value["bing_filtered_accuracy"] = accuracy_result.score
-        value["bing_filtered_time"] = time_elapsed
+        test_data[key]["bing_filtered_accuracy"] = accuracy_result.score
+        test_data[key]["bing_filtered_time"] = time_elapsed
         print(f"{count} files are done")
         count += 1
diff --git a/finesse/bing_search.py b/finesse/bing_search.py
@@ -34,6 +34,9 @@ def search_urls(self, query: str, num_results: int = 100) -> tuple[list[str], fl
             elapsed_time.append(time.time() - start_time)
             if hasattr(web_data, 'web_pages') and web_data.web_pages is not None:
                 urls.extend([item.url for item in web_data.web_pages.value])
-            offset += len([item.url for item in web_data.web_pages.value])
+            try:
+                offset += len([item.url for item in web_data.web_pages.value])
+            except AttributeError:
+                break
         urls = urls[:num_results]
         return urls, statistics.mean(elapsed_time) * 1000
diff --git a/finesse/finesse_test.py b/finesse/finesse_test.py
@@ -6,6 +6,7 @@
 from host import is_host_up
 
 global_test_data = dict()
+settings = dict()
 class NoTestDataError(Exception):
     """Raised when all requests have failed and there is no test data"""
 
@@ -60,7 +61,6 @@ def search_accuracy(self):
                     response_url.append(page.get("url"))
                 accuracy_result = calculate_accuracy(response_url, expected_url)
                 time_taken = round(response.elapsed.total_seconds()*1000,3)
-
                 expected_page = json_data.copy()
                 del expected_page['question']
                 del expected_page['answer']
@@ -72,7 +72,7 @@ def search_accuracy(self):
                     "total_pages": accuracy_result.total_pages,
                     "accuracy": accuracy_result.score,
                     "time": time_taken,
-                    "top": self.top
+                    "top": self.top,
                 }
 
     def on_start(self):
@@ -89,14 +89,19 @@ def __init__(self, *args, **kwargs):
         self.format = self.environment.parsed_options.format
         self.once = self.environment.parsed_options.once
         self.top = self.environment.parsed_options.top
+        settings["engine"] = self.engine
+        settings["format"] = self.format
+        settings["once"] = self.once
+        settings["top"] = self.top
+        settings["path"] = self.path
+
 
-@events.quitting.add_listener
-def quitting(environment, **_kwargs):
+@events.quit.add_listener
+def quit(**_kwargs):
     print("Search accuracy test completed")
     print("Starting bing search test")
-
     update_dict_bing_data(global_test_data)
-    if environment.parsed_options.format == "md":
-        save_to_markdown(global_test_data, environment.parsed_options.engine)
-    elif environment.parsed_options.format == "csv":
-        save_to_csv(global_test_data, environment.parsed_options.engine)
+    if settings.get("format") == "md":
+        save_to_markdown(global_test_data, "azure")
+    elif settings.get("format") == "csv":
+        save_to_csv(global_test_data, settings.get("engine"))
diff --git a/finesse/jsonreader.py b/finesse/jsonreader.py
@@ -1,13 +1,13 @@
 import json
 from typing import Iterator
 import os
-
+from natsort import natsorted
 class JSONReader(Iterator):
     "Read test data from JSON files using an iterator"
 
     def __init__(self, directory):
         self.directory = directory
-        self.file_list = sorted([f for f in os.listdir(directory) if f.endswith('.json')])
+        self.file_list = natsorted([f for f in os.listdir(directory) if f.endswith('.json')])
         if not self.file_list:
             raise FileNotFoundError(f"No JSON files found in the directory '{directory}'")
         self.current_file_index = 0