Skip to content

Commit

Permalink
Merge pull request #8 from ai-cfia/7-update-finesse-benchmarking-scri…
Browse files Browse the repository at this point in the history
…pt-to-skip-specific-files

Skip test files when necessary
  • Loading branch information
ibrahim-kabir authored Apr 2, 2024
2 parents 0a55151 + c04a437 commit 72ba3fb
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 19 deletions.
2 changes: 1 addition & 1 deletion finesse/FINESSE_USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ sequenceDiagram
## Example Command

```cmd
$locust -f finesse/finesse_test.py --engine azure --path finesse/QnA/good_question --host https://finesse-guidance.ninebasetwo.xyz/api --once
$locust -f finesse/finesse_test.py --engine azure --path finesse/QnA/sorted-2024-02-22/ --host https://finesse.inspection.alpha.canada.ca/api --once
Searching with Azure Search...
File: qna_2023-12-08_36.json
Expand Down
35 changes: 18 additions & 17 deletions finesse/accuracy_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def calculate_accuracy(responses_url: list[str], expected_url: str) -> AccuracyR
if response_number == expected_number:
position = idx
score = 1 - (position / total_pages)
score= round(score, 2)
score = round(score, 2)
break

return AccuracyResult(position, total_pages, score)
Expand All @@ -36,19 +36,20 @@ def save_to_markdown(test_data: dict, engine: str):
md_file.write("| 📄 File | 💬 Question | 📏 Accuracy Score | ⌛ Time |\n")
md_file.write("|--------------------|-------------------------------------------------------------------------------------------------------------------------|----------------|----------|\n")
for key, value in test_data.items():
md_file.write(f"| {key} | [{value.get('question')}]({value.get('expected_page').get('url')})' | {value.get('accuracy')*100:.1f}% | {value.get('time')}ms |\n")
md_file.write(f"| {key} | [{value.get('question')}]({value.get('expected_page').get('url')}) | {value.get('accuracy')*100:.0f}% | {int(value.get('time'))}ms |\n")
md_file.write("\n")
md_file.write(f"Tested on {len(test_data)} files.\n\n")

time_stats, accuracy_stats = calculate_statistical_summary(test_data)
md_file.write("## Statistical summary\n\n")
md_file.write("| Statistic | Time | Accuracy score|\n")
md_file.write("|-----------------------|------------|---------|\n")
md_file.write(f"|Mean| {time_stats.get('Mean')}ms | {accuracy_stats.get('Mean')*100}% |\n")
md_file.write(f"|Median| {time_stats.get('Median')}ms | {accuracy_stats.get('Median')*100}% |\n")
md_file.write(f"|Standard Deviation| {time_stats.get('Standard Deviation')}ms | {accuracy_stats.get('Standard Deviation')*100}% |\n")
md_file.write(f"|Maximum| {time_stats.get('Maximum')}ms | {accuracy_stats.get('Maximum')*100}% |\n")
md_file.write(f"|Minimum| {time_stats.get('Minimum')}ms | {accuracy_stats.get('Minimum')*100}% |\n")
md_file.write(f"|Mean| {int(time_stats.get('Mean'))}ms | {int(accuracy_stats.get('Mean')*100)}% |\n")
md_file.write(f"|Median| {int(time_stats.get('Median'))}ms | {int(accuracy_stats.get('Median')*100)}% |\n")
md_file.write(f"|Standard Deviation| {int(time_stats.get('Standard Deviation'))}ms | {int(accuracy_stats.get('Standard Deviation')*100)}% |\n")
md_file.write(f"|Maximum| {int(time_stats.get('Maximum'))}ms | {int(accuracy_stats.get('Maximum')*100)}% |\n")
md_file.write(f"|Minimum| {int(time_stats.get('Minimum'))}ms | {int(accuracy_stats.get('Minimum')*100)}% |\n")
md_file.write(f"\nThere are a total of {len([result.get('accuracy') for result in test_data.values() if result.get('accuracy') == 0])} null scores\n")

def save_to_csv(test_data: dict, engine: str):
if not os.path.exists(OUTPUT_FOLDER):
Expand All @@ -64,35 +65,35 @@ def save_to_csv(test_data: dict, engine: str):
key,
value.get("question"),
f"{value.get('accuracy')}",
f"{value.get('time')}"
f"{int(value.get('time'))}"
])
writer.writerow([])

time_stats, accuracy_stats = calculate_statistical_summary(test_data)
writer.writerow(["Statistic", "Time", "Accuracy Score"])
writer.writerow(["Mean", f"{time_stats.get('Mean')}", f"{accuracy_stats.get('Mean')}"])
writer.writerow(["Median", f"{time_stats.get('Median')}", f"{accuracy_stats.get('Median')}"])
writer.writerow(["Standard Deviation", f"{time_stats.get('Standard Deviation')}", f"{accuracy_stats.get('Standard Deviation')}"])
writer.writerow(["Maximum", f"{time_stats.get('Maximum')}", f"{accuracy_stats.get('Maximum')}"])
writer.writerow(["Minimum", f"{time_stats.get('Minimum')}", f"{accuracy_stats.get('Minimum')}"])
writer.writerow(["Mean", f"{int(time_stats.get('Mean'))}", f"{int(accuracy_stats.get('Mean'))}"])
writer.writerow(["Median", f"{int(time_stats.get('Median'))}", f"{int(accuracy_stats.get('Median'))}"])
writer.writerow(["Standard Deviation", f"{int(time_stats.get('Standard Deviation'))}", f"{int(accuracy_stats.get('Standard Deviation'))}"])
writer.writerow(["Maximum", f"{int(time_stats.get('Maximum'))}", f"{int(accuracy_stats.get('Maximum'))}"])
writer.writerow(["Minimum", f"{int(time_stats.get('Minimum'))}", f"{int(accuracy_stats.get('Minimum'))}"])

def log_data(test_data: dict):
for key, value in test_data.items():
print("File:", key)
print("Question:", value.get("question"))
print("Expected URL:", value.get("expected_page").get("url"))
print(f'Accuracy Score: {value.get("accuracy")*100}%')
print(f'Time: {value.get("time")}ms')
print(f'Accuracy Score: {int(value.get("accuracy")*100)}%')
print(f'Time: {int(value.get("time"))}ms')
print()
time_stats, accuracy_stats = calculate_statistical_summary(test_data)
print("---")
print(f"Tested on {len(test_data)} files.")
print("Time statistical summary:", end="\n ")
for key,value in time_stats.items():
print(f"{key}:{value},", end=' ')
print(f"{key}:{int(value)},", end=' ')
print("\nAccuracy statistical summary:", end="\n ")
for key,value in accuracy_stats.items():
print(f"{key}:{value*100}%,", end=' ')
print(f"{key}:{int(value*100)}%,", end=' ')
print("\n---")


Expand Down
4 changes: 4 additions & 0 deletions finesse/finesse_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,15 @@ class FinesseUser(HttpUser):
def search_accuracy(self):
try:
json_data = next(self.qna_reader)
while json_data.get("skip") is True:
json_data = next(self.qna_reader)
except StopIteration:
if not self.once:
# Reset variables
self.on_start()
json_data = next(self.qna_reader)
while json_data.get("skip") is True:
json_data = next(self.qna_reader)
print("Restarting the running test")
else:
print("Stopping the running test")
Expand Down
2 changes: 1 addition & 1 deletion finesse/jsonreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class JSONReader(Iterator):

def __init__(self, directory):
self.directory = directory
self.file_list = [f for f in os.listdir(directory) if f.endswith('.json')]
self.file_list = sorted([f for f in os.listdir(directory) if f.endswith('.json')])
if not self.file_list:
raise FileNotFoundError(f"No JSON files found in the directory '{directory}'")
self.current_file_index = 0
Expand Down

0 comments on commit 72ba3fb

Please sign in to comment.