Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement the basic backend with API #14

Merged
merged 15 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
mode: BACKEND
logging_level: DEBUG
production_mode: false
learning_materials: ./learning_assets
experiment_implementation: ./tests/model
experiment_results: ./tests/model/results
47 changes: 31 additions & 16 deletions knowledge_verificator/answer_chooser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Module with AnswerChooser, which finds a best candidate for an answer in a paragraph."""

from copy import copy
import random
import nltk # type: ignore[import-untyped]
from nltk.corpus import wordnet # type: ignore[import-untyped]
Expand All @@ -12,6 +13,7 @@ class AnswerChooser:
"""

def __init__(self) -> None:
self._cache: dict[str, list] = {}
dependencies = ('wordnet', 'stopwords', 'punkt')
for dependency in tqdm(
dependencies,
Expand Down Expand Up @@ -40,12 +42,12 @@ def remove_stopwords(self, text: str) -> str:
cleaned_text = ' '.join(filtered_words)
return cleaned_text

def santize(self, word: str) -> str:
def sanitize(self, word: str) -> str:
"""
Convert to lowercase and remove any punctuation mark.

Args:
word (str): Word to santize.
word (str): Word to sanitize.

Returns:
str: Sanitized word.
Expand All @@ -67,7 +69,7 @@ def find_part_of_speech(self, word: str) -> str:
Returns:
str: Part of speech of the supplied word.
"""
word = self.santize(word=word)
word = self.sanitize(word=word)
synsets = wordnet.synsets(word)

# If the word is not found, return 'n/a'
Expand All @@ -89,30 +91,42 @@ def find_part_of_speech(self, word: str) -> str:
case _:
return 'n/a'

def choose_answer(self, paragraph: str) -> str | None:
def choose_answer(
self, paragraph: str, use_cached: bool = True
) -> str | None:
"""
Choose a good candidate for an answer from a paragraph.

Choose a good candidate from `paragraph` based on the following algorithm:
1. Remove stop words.
2. If any unknown word is present, a random unknown word is chosen.
2. If any word with undetermined part of speech (PoS) is present,
a random word with undetermined PoS is chosen.
3. Otherwise, a random noun is chosen.
This operation may be costly so its results are cached. Custom caching
mechanism was implemented as `functools` `@cache` and `@lru_cache` should
not be called on methods, only on functions.

Args:
paragraph (str): Source paragraph to choose candidate from.
use_cached (bool): Use a cached results if available.

Returns:
str | None: Either chosen word or `None` if there are no good candidates.
"""
if paragraph in self._cache and use_cached:
if len(self._cache[paragraph]) == 0:
return None
return random.choice(self._cache[paragraph])

entered_paragraph = copy(paragraph)
paragraph = self.remove_stopwords(paragraph)

words = paragraph.split(' ')
words = [self.santize(word) for word in words]
words = [self.sanitize(word) for word in words]
tagged_words = [
(santized_word, self.find_part_of_speech(santized_word))
for santized_word in words
if santized_word
(sanitized_word, self.find_part_of_speech(sanitized_word))
for sanitized_word in words
if sanitized_word
]

if not words:
Expand All @@ -124,10 +138,11 @@ def choose_answer(self, paragraph: str) -> str | None:
if unknown_words_present:
return random.choice(words)

return random.choice(
[
word
for word, part_of_speech in tagged_words
if part_of_speech == 'noun'
]
)
available_outputs = [
word
for word, part_of_speech in tagged_words
if part_of_speech == 'noun'
]

self._cache[entered_paragraph] = available_outputs
return random.choice(available_outputs)
137 changes: 137 additions & 0 deletions knowledge_verificator/backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""Module with the backend defining available endpoints."""

from typing import Any, Union

from fastapi import FastAPI, Response

from knowledge_verificator.materials import Material, MaterialDatabase
from knowledge_verificator.io_handler import config

endpoints = FastAPI()
material_db = MaterialDatabase(materials_dir=config.learning_materials)


def format_response(data: Any = '', message: str = '') -> dict:
"""
Format a response to a request to a defined JSON format.

The format looks in the following way:
```json
{
'data': <data>,
'message': <message>
}
```
Args:
data (Any, optional): Requested data. Defaults to ''.
message (str, optional): Description of a result. Especially useful
when something went wrong. Defaults to ''.

Returns:
dict: Dict with keys `data` and `message`. Data contains crucial
information about a requested operation. Message is used to
convey additional information such as a failure description.
"""
return {
'data': data,
'message': message,
}


@endpoints.get('/materials')
def get_materials(
response: Response, criteria: Union[str, None] = None
) -> dict:
"""
Get all learning materials matching criteria.

Args:
response (Response): Instance of response, provided automatically.
criteria (Union[str, None], optional): Criteria, which materials have
to match to be retrieved. Defaults to None.

Returns:
dict: Requested materials with corresponding IDs.
"""
if criteria is not None:
message = 'Applying criteria is not implemented yet.'
response.status_code = 501
return format_response(message=message)
response.status_code = 200
return format_response(data=material_db.materials)


@endpoints.get('/materials/{material_id}')
def get_material(material_id: str, response: Response):
"""
Get a specific learning material.

Args:
material_id (str): ID of a material to retrieve.
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
response (Response): Instance of response, provided automatically.

Returns:
dict: Under `data` key, there are `material_id` and `material` keys.
"""
try:
material = material_db[material_id]
except KeyError:
message = f'Material with id = {material_id} was not found.'
response.status_code = 404
return format_response(message=message)

data = {'material_id': material_id, 'material': material}
response.status_code = 200
return format_response(data=data)


@endpoints.post('/materials')
def add_material(material: Material, response: Response) -> dict:
"""
Endpoint to add a learning material to a database.

Args:
material (Material): Learning material to be added.
response (Response): Response to a request. Automatically passed.

Returns:
dict: Under 'data' key, there is `material_id` key containing ID
of the newly added material.
"""
response.status_code = 200
message = ''
try:
material_db.add_material(material=material)
except (ValueError, FileExistsError) as e:
message = str(e)
response.status_code = 400

if response.status_code != 200:
return format_response(message=message)

data = {'material_id': material.id}
return format_response(data=data)


@endpoints.delete('/materials/{material_id}')
def delete_material(material_id: str, response: Response) -> dict:
"""
Endpoint to delete a learning material.

Args:
material_id (str): ID of the material to be removed.
response (Response): Response to a request. Automatically passed.

Returns:
dict: Under `data` key, there is `material_id` key containing ID
of the removed material.
"""
try:
material_db.delete_material(material=material_id)
except KeyError as e:
message = str(e)
response.status_code = 400
return format_response(message=message)

response.status_code = 200
return format_response(data=str(material_id))
141 changes: 141 additions & 0 deletions knowledge_verificator/command_line.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""Module with an interactive command-line interface."""

from rich.text import Text

from knowledge_verificator.io_handler import logger, console, config
from knowledge_verificator.answer_chooser import AnswerChooser
from knowledge_verificator.materials import MaterialDatabase
from knowledge_verificator.nli import NaturalLanguageInference, Relation
from knowledge_verificator.qg import QuestionGeneration
from knowledge_verificator.utils.menu import choose_from_menu


def display_feedback(relation: Relation, chosen_answer: str) -> None:
"""
Display feedback to a terminal.

Args:
relation (Relation): Relation between a reference answer and the
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
answer provided by a user. Either they are consistent, not
consistent or they are independent claims.
chosen_answer (str): An answer provided by a user.
"""
match relation:
case Relation.ENTAILMENT:
feedback = 'correct'
style = 'green'
case Relation.CONTRADICTION:
feedback = f'wrong. Correct answer is {chosen_answer}'
style = 'red'
case Relation.NEUTRAL:
feedback = 'not directly associated with the posed question'
style = 'yellow'

feedback_text = Text(f'Your answer is {feedback}.', style=style)
console.print(feedback_text)


def run_cli_mode():
"""
Run an interactive command-line interface.

Raises:
ValueError:
"""
qg_module = QuestionGeneration()
ac_module = AnswerChooser()
nli_module = NaturalLanguageInference()

while True:
options = ['knowledge database', 'my own paragraph']
user_choice = choose_from_menu(
menu_elements=options, plural_name='options'
)

match user_choice:
case 'knowledge database':
try:
material_db = MaterialDatabase(config.learning_materials)
except FileNotFoundError:
console.print(
f'In the `{config.learning_materials}` there is no database. '
'Try using your own materials.'
)
continue

if not material_db.materials:
console.print(
'The knowledge database exists but is empty. '
'Try using your own materials.'
)
continue

material = choose_from_menu(
material_db.materials,
plural_name='materials',
attribute_to_show='title',
)

if material is None:
continue

available_paragraphs: list[str] = [
_paragraph
for _paragraph in material.paragraphs
if ac_module.choose_answer(_paragraph) is not None
]

Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
paragraph = choose_from_menu(available_paragraphs, 'paragraphs')

if paragraph is None:
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
continue

paragraph = str(paragraph)
console.print('Learn this paragraph: ')
console.print(paragraph)
console.print()
input('Press ENTER when ready.')

case 'my own paragraph':
console.print('Enter a paragraph you would like to learn: ')
paragraph = input().strip()

case _:
console.print('Unrecognised option, try again!')
continue

logger.debug('Loaded the following paragraph:\n %s', paragraph)

chosen_answer = ac_module.choose_answer(paragraph=paragraph)
if not chosen_answer:
logger.error(
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
'The supplied paragraph is either too short or too general. '
'Please, try providing a longer or more specific paragraph.'
)
continue

console.clear()

logger.debug(
'The `%s` has been chosen as the answer, based on which question '
'will be generated.',
chosen_answer,
)

question_with_context = qg_module.generate(
answer=chosen_answer, context=paragraph
)
question = question_with_context['question']
logger.debug(
'Question Generation module has supplied the question: %s', question
)

console.print(
f'\nAnswer the question with full sentence. {question} \nYour answer: '
)
user_answer = input().strip()
relation = nli_module.infer_relation(
premise=paragraph, hypothesis=user_answer
)

display_feedback(relation=relation, chosen_answer=chosen_answer)
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
Loading
Loading