From 7a0ca7834852b588d5d41c3aaad7868fd5535841 Mon Sep 17 00:00:00 2001 From: Iamhexi Date: Fri, 11 Oct 2024 11:28:04 +0200 Subject: [PATCH] fix(backend): hide implementation details in adding material endpoint --- knowledge_verificator/answer_chooser.py | 95 ++++++++++++++----------- knowledge_verificator/backend.py | 5 +- 2 files changed, 54 insertions(+), 46 deletions(-) diff --git a/knowledge_verificator/answer_chooser.py b/knowledge_verificator/answer_chooser.py index 7850762..bf74b6e 100644 --- a/knowledge_verificator/answer_chooser.py +++ b/knowledge_verificator/answer_chooser.py @@ -1,6 +1,6 @@ """Module with AnswerChooser, which finds a best candidate for an answer in a paragraph.""" -from functools import cache +from copy import copy import random import nltk # type: ignore[import-untyped] from nltk.corpus import wordnet # type: ignore[import-untyped] @@ -13,6 +13,7 @@ class AnswerChooser: """ def __init__(self) -> None: + self._cache: dict[str, str | None] = {} dependencies = ('wordnet', 'stopwords', 'punkt') for dependency in tqdm( dependencies, @@ -90,48 +91,58 @@ def find_part_of_speech(self, word: str) -> str: case _: return 'n/a' + def choose_answer( + self, paragraph: str, use_cached: bool = True + ) -> str | None: + """ + Choose a good candidate for an answer from a paragraph. -@cache -def choose_answer(ac_module: AnswerChooser, paragraph: str) -> str | None: - """ - Choose a good candidate for an answer from a paragraph. - - Choose a good candidate from `paragraph` based on the following algorithm: - 1. Remove stop words. - 2. If any word with undetermined part of speech (PoS) is present, - a random word with undetermined PoS is chosen. - 3. Otherwise, a random noun is chosen. - - Args: - paragraph (str): Source paragraph to choose candidate from. + Choose a good candidate from `paragraph` based on the following algorithm: + 1. Remove stop words. + 2. If any word with undetermined part of speech (PoS) is present, + a random word with undetermined PoS is chosen. + 3. Otherwise, a random noun is chosen. + This operation may be costly so its results are cached. Custom caching + mechanism was implemented as `functools` `@cache` and `@lru_cache` should + not be called on methods, only on functions. - Returns: - str | None: Either chosen word or `None` if there are no good candidates. - """ + Args: + paragraph (str): Source paragraph to choose candidate from. + use_cached (bool): Use a cached results if available. - paragraph = ac_module.remove_stopwords(paragraph) - - words = paragraph.split(' ') - words = [ac_module.sanitize(word) for word in words] - tagged_words = [ - (sanitized_word, ac_module.find_part_of_speech(sanitized_word)) - for sanitized_word in words - if sanitized_word - ] - - if not words: - return None - - unknown_words_present = any( - part_of_speech == 'n/a' for _, part_of_speech in tagged_words - ) - if unknown_words_present: - return random.choice(words) - - return random.choice( - [ - word - for word, part_of_speech in tagged_words - if part_of_speech == 'noun' + Returns: + str | None: Either chosen word or `None` if there are no good candidates. + """ + if paragraph in self._cache and use_cached: + return self._cache[paragraph] + + entered_paragraph = copy(paragraph) + paragraph = self.remove_stopwords(paragraph) + + words = paragraph.split(' ') + words = [self.sanitize(word) for word in words] + tagged_words = [ + (sanitized_word, self.find_part_of_speech(sanitized_word)) + for sanitized_word in words + if sanitized_word ] - ) + + if not words: + return None + + unknown_words_present = any( + part_of_speech == 'n/a' for _, part_of_speech in tagged_words + ) + if unknown_words_present: + return random.choice(words) + + output = random.choice( + [ + word + for word, part_of_speech in tagged_words + if part_of_speech == 'noun' + ] + ) + + self._cache[entered_paragraph] = output + return output diff --git a/knowledge_verificator/backend.py b/knowledge_verificator/backend.py index b6a487e..bf0baac 100644 --- a/knowledge_verificator/backend.py +++ b/knowledge_verificator/backend.py @@ -102,12 +102,9 @@ def add_material(material: Material, response: Response) -> dict: message = '' try: material_db.add_material(material=material) - except ValueError as e: + except (ValueError, FileExistsError) as e: message = str(e) response.status_code = 400 - except FileExistsError as e: - message = str(e) - response.status_code = 403 if response.status_code != 200: return format_response(message=message)