Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement the basic backend with API #14

Merged
merged 15 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions knowledge_verificator/answer_chooser.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ def remove_stopwords(self, text: str) -> str:
cleaned_text = ' '.join(filtered_words)
return cleaned_text

def santize(self, word: str) -> str:
def sanitize(self, word: str) -> str:
"""
Convert to lowercase and remove any punctuation mark.

Args:
word (str): Word to santize.
word (str): Word to sanitize.

Returns:
str: Sanitized word.
Expand All @@ -67,7 +67,7 @@ def find_part_of_speech(self, word: str) -> str:
Returns:
str: Part of speech of the supplied word.
"""
word = self.santize(word=word)
word = self.sanitize(word=word)
synsets = wordnet.synsets(word)

# If the word is not found, return 'n/a'
Expand Down Expand Up @@ -95,7 +95,8 @@ def choose_answer(self, paragraph: str) -> str | None:

Choose a good candidate from `paragraph` based on the following algorithm:
1. Remove stop words.
2. If any unknown word is present, a random unknown word is chosen.
2. If any word with undetermined part of speech (PoS) is present,
a random word with undetermined PoS is chosen.
3. Otherwise, a random noun is chosen.

Args:
Expand All @@ -108,11 +109,11 @@ def choose_answer(self, paragraph: str) -> str | None:
paragraph = self.remove_stopwords(paragraph)

words = paragraph.split(' ')
words = [self.santize(word) for word in words]
words = [self.sanitize(word) for word in words]
tagged_words = [
(santized_word, self.find_part_of_speech(santized_word))
for santized_word in words
if santized_word
(sanitized_word, self.find_part_of_speech(sanitized_word))
for sanitized_word in words
if sanitized_word
]

if not words:
Expand Down
127 changes: 104 additions & 23 deletions knowledge_verificator/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,128 @@

from typing import Any, Union

from fastapi import FastAPI
from fastapi import FastAPI, Response

from knowledge_verificator.materials import Material, MaterialDatabase
from knowledge_verificator.io_handler import config

endpoints = FastAPI()
material_db = MaterialDatabase(materials_dir=config.learning_materials)


def format_response(data: Any = '', successful: bool = True) -> dict:
"""Format a response to a request to a single format."""
if successful:
message = 'Success'
else:
message = 'Failure'
return {'data': data, 'message': message}
def format_response(data: Any = '', message: str = '') -> dict:
"""
Format a response to a request to a defined JSON format.

The format looks in the following way:
```json
{
'data': <data>,
'message': <message>
}
```
Args:
data (Any, optional): Requested data. Defaults to ''.
message (str, optional): Description of a result. Especially useful
when something went wrong. Defaults to ''.

Returns:
dict:
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
"""
return {
'data': data,
'message': message,
}


@endpoints.get('/materials')
def get_materials():
"""Get all learning materials."""
# return materials.materials()
return format_response(successful=True)
def get_materials(
response: Response, criteria: Union[str, None] = None
) -> dict:
"""
Get all learning materials matching criteria.

Args:
response (Response): Instance of response, provided automatically.
criteria (Union[str, None], optional): Criteria, which materials have
to match to be retrieved. Defaults to None.

Returns:
dict: Requested materials with corresponding IDs.
"""
if criteria is not None:
raise NotImplementedError('Applying criteria is not implemented yet.')
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
response.status_code = 200
return format_response(data=material_db.materials)


@endpoints.get('/materials/{material_id}')
def get_material(material_id: int, q: Union[str, None] = None):
def get_material(material_id: str, response: Response):
"""
Get a specific learning material.

Args:
material_id (int): ID of a material to retrieve.
q (Union[str, None], optional): Query to find a material if
`material_id` is not known. Defaults to None.
material_id (str): ID of a material to retrieve.
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved

Returns:
dict: Response with status of a request
status and a learning material if request was processed correctly.
dict: Under `data` key, there are `material_id` and `material` keys.
"""
data = {'material_id': material_id, 'query': q}
return format_response(data=data, successful=True)
try:
material = material_db[material_id]
except KeyError:
message = f'Material with id = {material_id} was not found.'
response.status_code = 404
return format_response(message=message)

data = {'material_id': material_id, 'material': material}
response.status_code = 200
return format_response(data=data)


@endpoints.post('/materials')
def add_material(material: Material, response: Response) -> dict:
"""
Endpoint to add a learning material to a database.

Args:
material (Material): Learning material to be added.
response (Response): Response to a request. Automatically passed.

Returns:
dict: Under 'data' key, there is `material_id` key containing ID
of the newly added material.
"""
response.status_code = 200
message = ''
try:
material_db.add_material(material=material)
except ValueError as e:
message = str(e)
response.status_code = 400
except FileExistsError as e:
message = str(e)
response.status_code = 403
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved

if response.status_code != 200:
return format_response(message=message)

data = {'material_id': material.id}
return format_response(data=data)


@endpoints.delete('/materials/{material_id}')
def delete_material(material_id: int):
"""Delete a learning material with the supplied `material_id`."""
return format_response(data=str(material_id), successful=False)
def delete_material(material_id: str, response: Response) -> dict:
"""
Endpoint to delete a learning material.

Args:
material_id (str): ID of the material to be removed.
response (Response): Response to a request. Automatically passed.

Returns:
dict: Under `data` key, there is `material_id` key containing ID
of the removed material.
"""
material_db.delete_material(material=material_id)
response.status_code = 200
return format_response(data=str(material_id))
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
14 changes: 10 additions & 4 deletions knowledge_verificator/command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def run_cli_mode():
ValueError:
"""
qg_module = QuestionGeneration()
chooser = AnswerChooser()
ac_module = AnswerChooser()
nli_module = NaturalLanguageInference()

while True:
Expand Down Expand Up @@ -79,11 +79,17 @@ def run_cli_mode():
if material is None:
continue

available_paragraphs: list[str] = [
_paragraph
for _paragraph in material.paragraphs
if ac_module.choose_answer(_paragraph) is not None
]

Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
paragraph = str(
choose_from_menu(material.paragraphs, 'paragraphs')
choose_from_menu(available_paragraphs, 'paragraphs')
)

if paragraph is None:
if paragraph == 'None':
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
continue

console.print('Learn this paragraph: ')
Expand All @@ -100,7 +106,7 @@ def run_cli_mode():

logger.debug('Loaded the following paragraph:\n %s', paragraph)

chosen_answer = chooser.choose_answer(paragraph=paragraph)
chosen_answer = ac_module.choose_answer(paragraph=paragraph)
if not chosen_answer:
logger.error(
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
'The supplied paragraph is either too short or too general. '
Expand Down
5 changes: 3 additions & 2 deletions knowledge_verificator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
MODE,
'knowledge_verificator/backend.py',
]
process = subprocess.Popen(
with subprocess.Popen(
args=args,
)
):
pass
47 changes: 45 additions & 2 deletions knowledge_verificator/materials.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from dataclasses import dataclass
import os
from pathlib import Path
import uuid

from knowledge_verificator.utils.filesystem import in_directory

Expand All @@ -17,6 +18,7 @@ class Material:
title: str
paragraphs: list[str]
tags: list[str]
id: str = str(uuid.uuid4())


class MaterialDatabase:
Expand Down Expand Up @@ -49,6 +51,14 @@ def __init__(self, materials_dir: Path | str) -> None:
material = self.load_material(path)
self.materials.append(material)

def __getitem__(self, material_id: str) -> Material:
for material in self.materials:
if material.id == material_id:
return material
raise KeyError(
f'No material with id = {material_id} in the materials database.'
)

def load_material(self, path: Path) -> Material:
"""
Load a learning material from a file.
Expand Down Expand Up @@ -76,6 +86,32 @@ def load_material(self, path: Path) -> Material:
tags=tags,
)

def delete_material(self, material: Material | str) -> None:
"""
Remove the first material matching the provided material with its `id`.

As `id` is actually universally unique identifier it should remove one item


Args:
material (Material | str): _description_

Raises:
KeyError: _description_
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
"""
if isinstance(material, str):
matching_materials = [
_material
for _material in self.materials
if _material.id == material
]
if len(matching_materials) == 0:
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
raise KeyError(f'There are no materials with id = {material}.')
material = matching_materials[0]

index = self.materials.index(material)
del self.materials[index]

def add_material(self, material: Material) -> None:
"""
Add a learning material to a database, also material's its
Expand All @@ -89,8 +125,9 @@ def add_material(self, material: Material) -> None:
ValueError: Raised if title of a learning material is empty.
FileExistsError: Raised if learning material in a supplied
path already exists.
ValueError: Raised if a supplied path path is outside the
directory for learning materials.
ValueError: Raised if a supplied path is outside the
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
directory for learning materials. Prevents path
traversal.
"""
if not material.title:
raise ValueError('Title of a learning material cannot be empty.')
Expand All @@ -104,6 +141,12 @@ def add_material(self, material: Material) -> None:
f'A file {os.path.basename(material.path)}'
f' has to be in {self.materials_dir}'
)

if self.materials.count(material) > 0:
Iamhexi marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
f'The provided material already exists. Material: {material}.'
)

self._create_file_with_material(material=material)
self.materials.append(material)

Expand Down
Loading