Iamhexi · Iamhexi · Oct 14, 2024 · Oct 7, 2024 · Oct 7, 2024 · Oct 7, 2024
diff --git a/config.yaml b/config.yaml
@@ -0,0 +1,6 @@
+mode: BACKEND
+logging_level: DEBUG
+production_mode: false
+learning_materials: ./learning_assets
+experiment_implementation: ./tests/model
+experiment_results: ./tests/model/results
diff --git a/knowledge_verificator/answer_chooser.py b/knowledge_verificator/answer_chooser.py
@@ -1,5 +1,6 @@
 """Module with AnswerChooser, which finds a best candidate for an answer in a paragraph."""
 
+from copy import copy
 import random
 import nltk  # type: ignore[import-untyped]
 from nltk.corpus import wordnet  # type: ignore[import-untyped]
@@ -12,6 +13,7 @@ class AnswerChooser:
     """
 
     def __init__(self) -> None:
+        self._cache: dict[str, list] = {}
         dependencies = ('wordnet', 'stopwords', 'punkt')
         for dependency in tqdm(
             dependencies,
@@ -40,12 +42,12 @@ def remove_stopwords(self, text: str) -> str:
         cleaned_text = ' '.join(filtered_words)
         return cleaned_text
 
-    def santize(self, word: str) -> str:
+    def sanitize(self, word: str) -> str:
         """
         Convert to lowercase and remove any punctuation mark.
 
         Args:
-            word (str): Word to santize.
+            word (str): Word to sanitize.
 
         Returns:
             str: Sanitized word.
@@ -67,7 +69,7 @@ def find_part_of_speech(self, word: str) -> str:
         Returns:
             str: Part of speech of the supplied word.
         """
-        word = self.santize(word=word)
+        word = self.sanitize(word=word)
         synsets = wordnet.synsets(word)
 
         # If the word is not found, return 'n/a'
@@ -89,30 +91,42 @@ def find_part_of_speech(self, word: str) -> str:
             case _:
                 return 'n/a'
 
-    def choose_answer(self, paragraph: str) -> str | None:
+    def choose_answer(
+        self, paragraph: str, use_cached: bool = True
+    ) -> str | None:
         """
         Choose a good candidate for an answer from a paragraph.
 
         Choose a good candidate from `paragraph` based on the following algorithm:
         1. Remove stop words.
-        2. If any unknown word is present, a random unknown word is chosen.
+        2. If any word with undetermined part of speech (PoS) is present,
+            a random word with undetermined PoS is chosen.
         3. Otherwise, a random noun is chosen.
+        This operation may be costly so its results are cached. Custom caching
+        mechanism was implemented as `functools` `@cache` and `@lru_cache` should
+        not be called on methods, only on functions.
 
         Args:
             paragraph (str): Source paragraph to choose candidate from.
+            use_cached (bool): Use a cached results if available.
 
         Returns:
             str | None: Either chosen word or `None` if there are no good candidates.
         """
+        if paragraph in self._cache and use_cached:
+            if len(self._cache[paragraph]) == 0:
+                return None
+            return random.choice(self._cache[paragraph])
 
+        entered_paragraph = copy(paragraph)
         paragraph = self.remove_stopwords(paragraph)
 
         words = paragraph.split(' ')
-        words = [self.santize(word) for word in words]
+        words = [self.sanitize(word) for word in words]
         tagged_words = [
-            (santized_word, self.find_part_of_speech(santized_word))
-            for santized_word in words
-            if santized_word
+            (sanitized_word, self.find_part_of_speech(sanitized_word))
+            for sanitized_word in words
+            if sanitized_word
         ]
 
         if not words:
@@ -124,10 +138,11 @@ def choose_answer(self, paragraph: str) -> str | None:
         if unknown_words_present:
             return random.choice(words)
 
-        return random.choice(
-            [
-                word
-                for word, part_of_speech in tagged_words
-                if part_of_speech == 'noun'
-            ]
-        )
+        available_outputs = [
+            word
+            for word, part_of_speech in tagged_words
+            if part_of_speech == 'noun'
+        ]
+
+        self._cache[entered_paragraph] = available_outputs
+        return random.choice(available_outputs)
diff --git a/knowledge_verificator/backend.py b/knowledge_verificator/backend.py
@@ -0,0 +1,137 @@
+"""Module with the backend defining available endpoints."""
+
+from typing import Any, Union
+
+from fastapi import FastAPI, Response
+
+from knowledge_verificator.materials import Material, MaterialDatabase
+from knowledge_verificator.io_handler import config
+
+endpoints = FastAPI()
+material_db = MaterialDatabase(materials_dir=config.learning_materials)
+
+
+def format_response(data: Any = '', message: str = '') -> dict:
+    """
+    Format a response to a request to a defined JSON format.
+
+    The format looks in the following way:
+    ```json
+    {
+        'data': <data>,
+        'message': <message>
+    }
+    ```
+    Args:
+        data (Any, optional): Requested data. Defaults to ''.
+        message (str, optional): Description of a result. Especially useful
+            when something went wrong. Defaults to ''.
+
+    Returns:
+        dict: Dict with keys `data` and `message`. Data contains crucial
+            information about a requested operation. Message is used to
+            convey additional information such as a failure description.
+    """
+    return {
+        'data': data,
+        'message': message,
+    }
+
+
+@endpoints.get('/materials')
+def get_materials(
+    response: Response, criteria: Union[str, None] = None
+) -> dict:
+    """
+    Get all learning materials matching criteria.
+
+    Args:
+        response (Response): Instance of response, provided automatically.
+        criteria (Union[str, None], optional): Criteria, which materials have
+        to match to be retrieved. Defaults to None.
+
+    Returns:
+        dict: Requested materials with corresponding IDs.
+    """
+    if criteria is not None:
+        message = 'Applying criteria is not implemented yet.'
+        response.status_code = 501
+        return format_response(message=message)
+    response.status_code = 200
+    return format_response(data=material_db.materials)
+
+
+@endpoints.get('/materials/{material_id}')
+def get_material(material_id: str, response: Response):
+    """
+    Get a specific learning material.
+
+    Args:
+        material_id (str): ID of a material to retrieve.
+        response (Response): Instance of response, provided automatically.
+
+    Returns:
+        dict: Under `data` key, there are `material_id` and `material` keys.
+    """
+    try:
+        material = material_db[material_id]
+    except KeyError:
+        message = f'Material with id = {material_id} was not found.'
+        response.status_code = 404
+        return format_response(message=message)
+
+    data = {'material_id': material_id, 'material': material}
+    response.status_code = 200
+    return format_response(data=data)
+
+
+@endpoints.post('/materials')
+def add_material(material: Material, response: Response) -> dict:
+    """
+    Endpoint to add a learning material to a database.
+
+    Args:
+        material (Material): Learning material to be added.
+        response (Response): Response to a request. Automatically passed.
+
+    Returns:
+        dict: Under 'data' key, there is `material_id` key containing ID
+        of the newly added material.
+    """
+    response.status_code = 200
+    message = ''
+    try:
+        material_db.add_material(material=material)
+    except (ValueError, FileExistsError) as e:
+        message = str(e)
+        response.status_code = 400
+
+    if response.status_code != 200:
+        return format_response(message=message)
+
+    data = {'material_id': material.id}
+    return format_response(data=data)
+
+
+@endpoints.delete('/materials/{material_id}')
+def delete_material(material_id: str, response: Response) -> dict:
+    """
+    Endpoint to delete a learning material.
+
+    Args:
+        material_id (str): ID of the material to be removed.
+        response (Response): Response to a request. Automatically passed.
+
+    Returns:
+        dict: Under `data` key, there is `material_id` key containing ID
+        of the removed material.
+    """
+    try:
+        material_db.delete_material(material=material_id)
+    except KeyError as e:
+        message = str(e)
+        response.status_code = 400
+        return format_response(message=message)
+
+    response.status_code = 200
+    return format_response(data=str(material_id))
diff --git a/knowledge_verificator/command_line.py b/knowledge_verificator/command_line.py
@@ -0,0 +1,141 @@
+"""Module with an interactive command-line interface."""
+
+from rich.text import Text
+
+from knowledge_verificator.io_handler import logger, console, config
+from knowledge_verificator.answer_chooser import AnswerChooser
+from knowledge_verificator.materials import MaterialDatabase
+from knowledge_verificator.nli import NaturalLanguageInference, Relation
+from knowledge_verificator.qg import QuestionGeneration
+from knowledge_verificator.utils.menu import choose_from_menu
+
+
+def display_feedback(relation: Relation, chosen_answer: str) -> None:
+    """
+    Display feedback to a terminal.
+
+    Args:
+        relation (Relation): Relation between a reference answer and the
+            answer provided by a user. Either they are consistent, not
+            consistent or they are independent claims.
+        chosen_answer (str): An answer provided by a user.
+    """
+    match relation:
+        case Relation.ENTAILMENT:
+            feedback = 'correct'
+            style = 'green'
+        case Relation.CONTRADICTION:
+            feedback = f'wrong. Correct answer is {chosen_answer}'
+            style = 'red'
+        case Relation.NEUTRAL:
+            feedback = 'not directly associated with the posed question'
+            style = 'yellow'
+
+    feedback_text = Text(f'Your answer is {feedback}.', style=style)
+    console.print(feedback_text)
+
+
+def run_cli_mode():
+    """
+    Run an interactive command-line interface.
+
+    Raises:
+        ValueError:
+    """
+    qg_module = QuestionGeneration()
+    ac_module = AnswerChooser()
+    nli_module = NaturalLanguageInference()
+
+    while True:
+        options = ['knowledge database', 'my own paragraph']
+        user_choice = choose_from_menu(
+            menu_elements=options, plural_name='options'
+        )
+
+        match user_choice:
+            case 'knowledge database':
+                try:
+                    material_db = MaterialDatabase(config.learning_materials)
+                except FileNotFoundError:
+                    console.print(
+                        f'In the `{config.learning_materials}` there is no database. '
+                        'Try using your own materials.'
+                    )
+                    continue
+
+                if not material_db.materials:
+                    console.print(
+                        'The knowledge database exists but is empty. '
+                        'Try using your own materials.'
+                    )
+                    continue
+
+                material = choose_from_menu(
+                    material_db.materials,
+                    plural_name='materials',
+                    attribute_to_show='title',
+                )
+
+                if material is None:
+                    continue
+
+                available_paragraphs: list[str] = [
+                    _paragraph
+                    for _paragraph in material.paragraphs
+                    if ac_module.choose_answer(_paragraph) is not None
+                ]
+
+                paragraph = choose_from_menu(available_paragraphs, 'paragraphs')
+
+                if paragraph is None:
+                    continue
+
+                paragraph = str(paragraph)
+                console.print('Learn this paragraph: ')
+                console.print(paragraph)
+                console.print()
+                input('Press ENTER when ready.')
+
+            case 'my own paragraph':
+                console.print('Enter a paragraph you would like to learn: ')
+                paragraph = input().strip()
+
+            case _:
+                console.print('Unrecognised option, try again!')
+                continue
+
+        logger.debug('Loaded the following paragraph:\n %s', paragraph)
+
+        chosen_answer = ac_module.choose_answer(paragraph=paragraph)
+        if not chosen_answer:
+            logger.error(
+                'The supplied paragraph is either too short or too general. '
+                'Please, try providing a longer or more specific paragraph.'
+            )
+            continue
+
+        console.clear()
+
+        logger.debug(
+            'The `%s` has been chosen as the answer, based on which question '
+            'will be generated.',
+            chosen_answer,
+        )
+
+        question_with_context = qg_module.generate(
+            answer=chosen_answer, context=paragraph
+        )
+        question = question_with_context['question']
+        logger.debug(
+            'Question Generation module has supplied the question: %s', question
+        )
+
+        console.print(
+            f'\nAnswer the question with full sentence. {question} \nYour answer: '
+        )
+        user_answer = input().strip()
+        relation = nli_module.infer_relation(
+            premise=paragraph, hypothesis=user_answer
+        )
+
+        display_feedback(relation=relation, chosen_answer=chosen_answer)