diff --git a/data/programming/python.txt b/data/programming/python.txt new file mode 100644 index 0000000..49c526a --- /dev/null +++ b/data/programming/python.txt @@ -0,0 +1,9 @@ +Programming in Python +--- +python, programming language +--- +Python programming language is used for system scripting, back-end development for the web and Machine Learning applications. + +Python has a beginner-friendly syntax but is tough to master. It was created by a Dutch programmer in 90'. + +Python is known for its simple `Hello, world!`, which has exactly one line `print('Hello, world!')`. diff --git a/knowledge_verificator/main.py b/knowledge_verificator/main.py index d5b9f6e..0892f67 100755 --- a/knowledge_verificator/main.py +++ b/knowledge_verificator/main.py @@ -4,58 +4,118 @@ from knowledge_verificator.io_handler import logger, console from knowledge_verificator.answer_chooser import AnswerChooser +from knowledge_verificator.materials import MaterialDatabase from knowledge_verificator.nli import NaturalLanguageInference, Relation from knowledge_verificator.qg import QuestionGeneration +from knowledge_verificator.utils.menu import choose_from_menu if __name__ == '__main__': - chooser = AnswerChooser() qg_module = QuestionGeneration() + chooser = AnswerChooser() + + while True: + console.print('Where you want to learn from?') + console.print('[1] knowledge database') + console.print('[2] my own paragraph') + user_choice = input('Your choice: ') + console.print() + + match user_choice: + case '1': + try: + DB_PATH = './data' + material_db = MaterialDatabase(DB_PATH) + except FileNotFoundError: + console.print( + f'In the `{DB_PATH}` there is no database. ' + 'Try using your own materials.' + ) + continue + + if not material_db.materials: + console.print( + 'The knowledge database exists but is empty. ' + 'Try using your own materials.' + ) + continue + + material_titles = [ + material.title for material in material_db.materials + ] + material = choose_from_menu( + material_db.materials, + plural_name='materials', + attribute_to_show='title', + ) + + if material is None: + continue + + PARAGRAPH = str( + choose_from_menu(material.paragraphs, 'paragraphs') + ) + + if PARAGRAPH is None: + continue + + console.print('Learn this paragraph: ') + console.print(PARAGRAPH) + console.print() + input('Press ENTER when ready.') + + case '2': + console.print('Enter a paragraph you would like to learn: ') + PARAGRAPH = input().strip() - console.print('Enter a paragraph you would like to learn: ') - paragraph = input().strip() - logger.debug('Loaded the following paragraph:\n %s', paragraph) + case _: + console.print('Unrecognised option, try again!') - chosen_answer = chooser.choose_answer(paragraph=paragraph) - if not chosen_answer: - raise ValueError( - 'The supplied paragaph is either too short or too general. ' - 'Please, try providing a longer or more specific paragraph.' + logger.debug('Loaded the following paragraph:\n %s', PARAGRAPH) + + chosen_answer = chooser.choose_answer(paragraph=PARAGRAPH) + if not chosen_answer: + raise ValueError( + 'The supplied paragaph is either too short or too general. ' + 'Please, try providing a longer or more specific paragraph.' + ) + + console.clear() + + logger.debug( + 'The `%s` has been chosen as the answer, based on which question ' + 'will be generated.', + chosen_answer, + ) + + question_with_context = qg_module.generate( + answer=chosen_answer, context=PARAGRAPH + ) + question = question_with_context['question'] + logger.debug( + 'Question Generation module has supplied the question: %s', question ) - logger.debug( - 'The `%s` has been chosen as the answer, based on which the question will be generated.', - chosen_answer, - ) - - question_with_context = qg_module.generate( - answer=chosen_answer, context=paragraph - ) - question = question_with_context['question'] - logger.debug( - 'Question Generation module has supplied the question: %s', question - ) - - console.print( - f'\nAnswer the question with full sentence. {question} \nYour answer: ' - ) - user_answer = input().strip() - - nli_module = NaturalLanguageInference() - relation = nli_module.infer_relation( - premise=paragraph, hypothesis=user_answer - ) - - match relation: - case Relation.ENTAILMENT: - FEEDBACK = 'correct' - STYLE = 'green' - case Relation.CONTRADICTION: - FEEDBACK = f'wrong. Correct answer is {chosen_answer}' - STYLE = 'red' - case Relation.NEUTRAL: - FEEDBACK = 'not directly associated with the posed question' - STYLE = 'yellow' - - feedback_text = Text(f'Your answer is {FEEDBACK}.', style=STYLE) - console.print(feedback_text) + console.print( + f'\nAnswer the question with full sentence. {question} \nYour answer: ' + ) + user_answer = input().strip() + + nli_module = NaturalLanguageInference() + relation = nli_module.infer_relation( + premise=PARAGRAPH, hypothesis=user_answer + ) + + match relation: + case Relation.ENTAILMENT: + FEEDBACK = 'correct' + STYLE = 'green' + case Relation.CONTRADICTION: + FEEDBACK = f'wrong. Correct answer is {chosen_answer}' + STYLE = 'red' + case Relation.NEUTRAL: + FEEDBACK = 'not directly associated with the posed question' + STYLE = 'yellow' + + feedback_text = Text(f'Your answer is {FEEDBACK}.', style=STYLE) + console.print(feedback_text) diff --git a/knowledge_verificator/materials.py b/knowledge_verificator/materials.py new file mode 100644 index 0000000..6bdf274 --- /dev/null +++ b/knowledge_verificator/materials.py @@ -0,0 +1,129 @@ +"""Module with tools for managing learning material.""" + +from dataclasses import dataclass +import os +from pathlib import Path + +from knowledge_verificator.utils.filesystem import in_directory + + +@dataclass +class Material: + """ + Data class representing a learning material loaded from a database. + """ + + path: Path + title: str + paragraphs: list[str] + tags: list[str] + + +class MaterialDatabase: + """Class managing a database with learning materials.""" + + def __init__(self, materials_dir: Path | str) -> None: + """ + Load all learning materials from `material_dir` directory + into an internal storage. + + Args: + materials_dir (Path | str): Path to directory with learning materials. + + Raises: + FileNotFoundError: Raised if supplied path to a directory does not exist. + """ + if isinstance(materials_dir, str): + materials_dir = Path(materials_dir) + + self.materials_dir = materials_dir.resolve() + if not self.materials_dir.exists(): + raise FileNotFoundError( + f'There is no directory under `{self.materials_dir}`.' + ) + + self.materials: list[Material] = [] + for directory_path, _, filenames in self.materials_dir.walk(): + for filename in filenames: + path = Path(directory_path).joinpath(filename) + material = self.load_material(path) + self.materials.append(material) + + def load_material(self, path: Path) -> Material: + """ + Load a learning material from a file. + + Args: + path (Path): Path to a learning material. + + Returns: + Material: Learning material loaded from the file. + """ + with open(path.resolve(), 'rt', encoding='utf-8') as fd: + title = fd.readline().rstrip() + fd.readline() + tags_line = fd.readline() + tags = [tag.strip() for tag in tags_line.split(',')] + tags_line = fd.readline() + + content = ''.join(fd.readlines()).rstrip() + paragraphs = content.split('\n\n') + + return Material( + path=path.resolve(), + title=title, + paragraphs=paragraphs, + tags=tags, + ) + + def add_material(self, material: Material) -> None: + """ + Add a learning material to a database, also material's its + representation in a file. + + Args: + material (Material): Initialised learning material without + existing file representation. + + Raises: + ValueError: Raised if title of a learning material is empty. + FileExistsError: Raised if learning material in a supplied + path already exists. + ValueError: Raised if a supplied path path is outside the + directory for learning materials. + """ + if not material.title: + raise ValueError('Title of a learning material cannot be empty.') + if material.path.exists(): + raise FileExistsError( + 'A file in the provided path already exists. ' + 'Choose a different filename.' + ) + if not in_directory(file=material.path, directory=self.materials_dir): + raise ValueError( + f'A file {os.path.basename(material.path)}' + f' has to be in {self.materials_dir}' + ) + self._create_file_with_material(material=material) + self.materials.append(material) + + def _format_file_content(self, material: Material) -> str: + output = '' + # Format a title. + output += material.title + output += '\n---\n' + + # Format tags. + tags_line = ', '.join('tags') + output += tags_line + '\n' + + # Format content. + content_lines = '\n\n'.join(material.paragraphs) + output += content_lines + '\n\n' + + return output + + def _create_file_with_material(self, material: Material) -> None: + with open(material.path, 'wt', encoding='utf-8') as fd: + file_content = self._format_file_content(material=material) + fd.write(file_content) diff --git a/knowledge_verificator/utils/filesystem.py b/knowledge_verificator/utils/filesystem.py new file mode 100644 index 0000000..17eee76 --- /dev/null +++ b/knowledge_verificator/utils/filesystem.py @@ -0,0 +1,20 @@ +"""Module with filesystem utility functions.""" + +from pathlib import Path + + +def in_directory(file: Path, directory: Path) -> bool: + """ + Determine if a file is located in the supplied directory + or one of its subdirectories. + + Args: + file (Path): Path to a file. + directory (Path): Path to a directory. + + Returns: + bool: Present in a directory or subdirectories (True) or not (False). + """ + return str(directory.resolve()) in str( + file.resolve() + ) and not file.samefile(directory) diff --git a/knowledge_verificator/utils/menu.py b/knowledge_verificator/utils/menu.py new file mode 100644 index 0000000..05d6f6f --- /dev/null +++ b/knowledge_verificator/utils/menu.py @@ -0,0 +1,51 @@ +"""Module with utilities supporting creation of CLI menus.""" + +from typing import Any +from knowledge_verificator.io_handler import console +from knowledge_verificator.utils.string import clip_text + + +def choose_from_menu( + menu_elements: list[Any], + plural_name: str, + attribute_to_show: str = '', + max_line_width: int = 40, +) -> Any | None: + """ + Prompt a user to choose an element from a list via terminal. + + Args: + menu_elements (list[str]): List of elements to choose from. + Elements should be convertible to `str` (implement `__str__` method). + plural_name (str): Plural name of the elements. For example: options, paragraphs or names. + attribute_to_show (str): Attribute, which should be shown. If empty, print an entire object. + max_line_width (int): Maximum line width in number of columns. By default: 40. + + Returns: + any | None: Element of a list or None if a user provided incorrect value via a terminal. + """ + console.print(f'Available {plural_name}:') + for i, element in enumerate(menu_elements): + option_name = '' + if attribute_to_show: + option_name = getattr(element, attribute_to_show) + else: + option_name = element + console.print(f'[{i+1}] {clip_text(option_name, max_line_width)}') + material_choice = input('Your choice: ') + console.print() + + incorrect_choice_warning = ( + 'This is incorrect choice. Next time, provide a number ' + 'next to a element from the list of available ones.' + ) + if not material_choice.isnumeric(): + console.print(incorrect_choice_warning) + return None + + chosen_index = int(material_choice) - 1 + if chosen_index < 0 or chosen_index >= len(menu_elements): + console.print(incorrect_choice_warning) + return None + + return menu_elements[chosen_index] diff --git a/knowledge_verificator/utils/string.py b/knowledge_verificator/utils/string.py new file mode 100644 index 0000000..04d07b2 --- /dev/null +++ b/knowledge_verificator/utils/string.py @@ -0,0 +1,26 @@ +"""Module with string-related utility function.""" + + +def clip_text(text: str, max_length: int) -> str: + """ + Clip `text` if its length exceeds `max_length`. + + If the text was clipped, it has three dots `...` at the end. Otherwise, + the text is returned unchanged. + + Args: + text (str): Text to clip. + max_length (int): Maximum allowed length. It will not be exceeded. + + Returns: + str: Clipped text. + """ + if max_length <= 4: + raise ValueError( + 'Minimal reasonable value of `max_length` is 4 (one character ' + f'and three dots). Supplied value of {max_length}.' + ) + text_length = len(text) + if text_length > max_length: + return text[: max_length - 3] + '...' + return text diff --git a/tests/test_filesystem_utils.py b/tests/test_filesystem_utils.py new file mode 100644 index 0000000..38892c9 --- /dev/null +++ b/tests/test_filesystem_utils.py @@ -0,0 +1,29 @@ +"""Module with tests for filesystem utils.""" + +from pathlib import Path +import pytest + +from knowledge_verificator.utils.filesystem import in_directory + + +@pytest.mark.parametrize( + 'directory, file, exists_there', + ( + ('knowledge_verificator', 'knowledge_verificator/main.py', True), + ('knowledge_verificator', 'tests/test_filesystem_utils.py', False), + ( + 'knowledge_verificator', + 'knowledge_verificator/utils/filesystem.py', + True, + ), + ('knowledge_verificator', 'knowledge_verificator', False), + ), +) +def test_in_directory(file: str, directory: str, exists_there: bool): + """ + Test if a function determining if a file is located inside a directory + or one of its subdirectories works properly. + """ + assert ( + in_directory(file=Path(file), directory=Path(directory)) == exists_there + ) diff --git a/tests/test_qg.py b/tests/test_qg.py index 29898cc..ea9a45a 100644 --- a/tests/test_qg.py +++ b/tests/test_qg.py @@ -8,7 +8,10 @@ @pytest.fixture def qg(): - """Provide non-deterministically initialized instance of the `QuestionGeneration` class.""" + """ + Provide non-deterministically initialized instance of + the `QuestionGeneration` class. + """ set_seed(0) question_generation = QuestionGeneration() return question_generation @@ -36,3 +39,5 @@ def test_basic_question_generation( } assert output == expected + + # https://huggingface.co/sentence-transformers/all-distilroberta-v1