diff --git a/poetry.lock b/poetry.lock index 3fa6578..6e86f87 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1943,4 +1943,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "8f44260f1b165512710f0c30616816925349e30148ac0934f2942b46fce2f9e0" +content-hash = "858e7227721f623f56a82f0bb47a9f5a2512eff08a5ff0b826ea164d6d7b7617" diff --git a/pyproject.toml b/pyproject.toml index e0869d6..be53a49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ nltk = "^3.9.1" rich = "^13.8.1" tqdm = "^4.66.5" sentence-transformers = "^3.1.1" +pyyaml = "^6.0.2" [tool.poetry.group.test] diff --git a/tests/model/qg_experiment.py b/tests/model/qg_experiment.py index 05f98cc..71a7aa4 100644 --- a/tests/model/qg_experiment.py +++ b/tests/model/qg_experiment.py @@ -1,5 +1,7 @@ """Module with performance experiments of Question Generation module.""" +from pathlib import Path +import yaml # type: ignore[import-untyped] import numpy as np from sentence_transformers import SentenceTransformer @@ -15,18 +17,11 @@ def measure_qg_performance_with_cosine_similarity() -> Result: """ model = SentenceTransformer('sentence-transformers/all-distilroberta-v1') - test_data = [ - { - 'question': 'What color is the sky during the day?', - 'context': 'During the day, the sky appears blue.', - 'answer': 'blue', - }, - { - 'question': 'What is the function of the frontend in software development?', - 'context': 'In software development, the terms frontend and backend refer to the distinct roles of the user interface (frontend) and the data management layer (backend) of an application. In a client-server architecture, the client typically represents the frontend, while the server represents the backend, even if some presentation tasks are handled by the server.', - 'answer': 'presentation layer', - }, - ] + test_data = None + with open( + Path('tests/model/qg_test_data.yaml'), 'rt', encoding='utf-8' + ) as fd: + test_data = yaml.safe_load(fd) qg = QuestionGeneration() metric = Metric.COSINE_SIMILARITY @@ -34,9 +29,10 @@ def measure_qg_performance_with_cosine_similarity() -> Result: data_points: np.ndarray = np.zeros(shape=(len(test_data), 1)) for i, test_item in enumerate(test_data): - suggested_answer = test_item['answer'] - context = test_item['context'] - reference_question = test_item['question'] + item = test_item['item'] + suggested_answer = item['answer'] + context = item['context'] + reference_question = item['question'] generated_question = qg.generate( answer=suggested_answer, context=context diff --git a/tests/model/qg_test_data.yaml b/tests/model/qg_test_data.yaml new file mode 100644 index 0000000..f54bfa2 --- /dev/null +++ b/tests/model/qg_test_data.yaml @@ -0,0 +1,19 @@ +- item: + context: In software development, the terms frontend and backend refer to the distinct roles of the user interface (frontend) and the data management layer (backend) of an application. In a client-server architecture, the client typically represents the frontend, while the server represents the backend, even if some presentation tasks are handled by the server. + question: What is the function of the frontend in software development? + answer: presentation layer + +- item: + context: During the day, the sky appears blue. + question: What color is the sky during the day? + answer: blue + +- item: + context: GNU Recutils is a set of tools and libraries to access human-editable, plain text databases called recfiles. The data is stored as a sequence of records, each record containing an arbitrary number of named fields. + question: What is an advantage of using GNU Recutils? + answer: There is free software to manipulate recfiles and a text editor is enough to edit it. + +- item: + context: To calculate a word embedding vector for the provided sentence, you would typically use a pre-trained word embedding model such as Word2Vec, GloVe, or FastText. These models convert words into numerical vectors based on their semantic meanings and contexts. + question: What is used to convert a setence to a word embedding vector? + answer: Pre-trained word embedding model, e.g. Word2Vec, GloVE, FastText. diff --git a/tests/model/runner.py b/tests/model/runner.py index 8de6804..423ae0f 100644 --- a/tests/model/runner.py +++ b/tests/model/runner.py @@ -14,6 +14,7 @@ from tqdm import tqdm from knowledge_verificator.utils.filesystem import create_text_file +from knowledge_verificator.io_handler import console class Metric(Enum): @@ -75,6 +76,9 @@ def _collect_experiments(self) -> list[Callable]: if not os.path.isfile(file_path): continue + if not file.endswith('.py'): + continue + experiment_functions.extend( self._collect_functions_from_file(file_path=file_path) ) @@ -106,9 +110,13 @@ def _collect_functions_from_file(self, file_path: Path) -> list[Callable]: spec.loader.exec_module(module) - # Get all functions in the module. + # Only function performing experiments: measure_ return [ - func for _, func in inspect.getmembers(module, inspect.isfunction) + func + for func_name, func in inspect.getmembers( + module, inspect.isfunction + ) + if func_name.startswith('measure_') ] def run(self) -> None: @@ -123,6 +131,7 @@ def run(self) -> None: unit='experiment', iterable=self._collect_experiments(), ): + console.print(f'Running {experiment.__name__}...') result = experiment() results.append(result)