feat(model, pyproject): develop reading test data from yaml

Iamhexi · Oct 6, 2024 · a6b153e · a6b153e
1 parent 36dcd3d
commit a6b153e
Show file tree

Hide file tree

Showing 5 changed files with 43 additions and 18 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,6 +15,7 @@ nltk = "^3.9.1"
 rich = "^13.8.1"
 tqdm = "^4.66.5"
 sentence-transformers = "^3.1.1"
+pyyaml = "^6.0.2"
 
 [tool.poetry.group.test]
 

diff --git a/tests/model/qg_experiment.py b/tests/model/qg_experiment.py
@@ -1,5 +1,7 @@
 """Module with performance experiments of Question Generation module."""
 
+from pathlib import Path
+import yaml  # type: ignore[import-untyped]
 import numpy as np
 from sentence_transformers import SentenceTransformer
 
@@ -15,28 +17,22 @@ def measure_qg_performance_with_cosine_similarity() -> Result:
     """
     model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')
 
-    test_data = [
-        {
-            'question': 'What color is the sky during the day?',
-            'context': 'During the day, the sky appears blue.',
-            'answer': 'blue',
-        },
-        {
-            'question': 'What is the function of the frontend in software development?',
-            'context': 'In software development, the terms frontend and backend refer to the distinct roles of the user interface (frontend) and the data management layer (backend) of an application. In a client-server architecture, the client typically represents the frontend, while the server represents the backend, even if some presentation tasks are handled by the server.',
-            'answer': 'presentation layer',
-        },
-    ]
+    test_data = None
+    with open(
+        Path('tests/model/qg_test_data.yaml'), 'rt', encoding='utf-8'
+    ) as fd:
+        test_data = yaml.safe_load(fd)
 
     qg = QuestionGeneration()
     metric = Metric.COSINE_SIMILARITY
     model_name = qg.trained_model_path.split('/')[1]
     data_points: np.ndarray = np.zeros(shape=(len(test_data), 1))
 
     for i, test_item in enumerate(test_data):
-        suggested_answer = test_item['answer']
-        context = test_item['context']
-        reference_question = test_item['question']
+        item = test_item['item']
+        suggested_answer = item['answer']
+        context = item['context']
+        reference_question = item['question']
 
         generated_question = qg.generate(
             answer=suggested_answer, context=context

diff --git a/tests/model/qg_test_data.yaml b/tests/model/qg_test_data.yaml
@@ -0,0 +1,19 @@
+- item:
+    context: In software development, the terms frontend and backend refer to the distinct roles of the user interface (frontend) and the data management layer (backend) of an application. In a client-server architecture, the client typically represents the frontend, while the server represents the backend, even if some presentation tasks are handled by the server.
+    question: What is the function of the frontend in software development?
+    answer: presentation layer
+
+- item:
+    context: During the day, the sky appears blue.
+    question: What color is the sky during the day?
+    answer: blue
+
+- item:
+    context: GNU Recutils is a set of tools and libraries to access human-editable, plain text databases called recfiles. The data is stored as a sequence of records, each record containing an arbitrary number of named fields.
+    question: What is an advantage of using GNU Recutils?
+    answer: There is free software to manipulate recfiles and a text editor is enough to edit it.
+
+- item:
+    context: To calculate a word embedding vector for the provided sentence, you would typically use a pre-trained word embedding model such as Word2Vec, GloVe, or FastText. These models convert words into numerical vectors based on their semantic meanings and contexts.
+    question: What is used to convert a setence to a word embedding vector?
+    answer: Pre-trained word embedding model, e.g. Word2Vec, GloVE, FastText.
diff --git a/tests/model/runner.py b/tests/model/runner.py
@@ -14,6 +14,7 @@
 from tqdm import tqdm
 
 from knowledge_verificator.utils.filesystem import create_text_file
+from knowledge_verificator.io_handler import console
 
 
 class Metric(Enum):
@@ -75,6 +76,9 @@ def _collect_experiments(self) -> list[Callable]:
             if not os.path.isfile(file_path):
                 continue
 
+            if not file.endswith('.py'):
+                continue
+
             experiment_functions.extend(
                 self._collect_functions_from_file(file_path=file_path)
             )
@@ -106,9 +110,13 @@ def _collect_functions_from_file(self, file_path: Path) -> list[Callable]:
 
         spec.loader.exec_module(module)
 
-        # Get all functions in the module.
+        # Only function performing experiments: measure_
         return [
-            func for _, func in inspect.getmembers(module, inspect.isfunction)
+            func
+            for func_name, func in inspect.getmembers(
+                module, inspect.isfunction
+            )
+            if func_name.startswith('measure_')
         ]
 
     def run(self) -> None:
@@ -123,6 +131,7 @@ def run(self) -> None:
             unit='experiment',
             iterable=self._collect_experiments(),
         ):
+            console.print(f'Running {experiment.__name__}...')
             result = experiment()
             results.append(result)