From c2b13932e22345f95620400af6f2919f707ba024 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Thu, 25 Jul 2024 17:20:28 +0530 Subject: [PATCH 01/27] Refactor NERSample.is_pass() to handle cases where either aligned span has entity "O" --- langtest/utils/custom_types/sample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langtest/utils/custom_types/sample.py b/langtest/utils/custom_types/sample.py index 64011f826..ce2c8205d 100644 --- a/langtest/utils/custom_types/sample.py +++ b/langtest/utils/custom_types/sample.py @@ -295,7 +295,7 @@ def get_aligned_span_pairs( def is_pass(self) -> bool: """Checks if the sample passes based on the maximum score.""" return all( - [a == b for (a, b) in self.get_aligned_span_pairs() if a and a.entity != "O"] + [a == b for (a, b) in self.get_aligned_span_pairs() if (a and a.entity != "O") or (b and b.entity != "O")] ) From ea48e1f325b402d8e3c446730d55cc65946a8564 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Thu, 25 Jul 2024 17:24:37 +0530 Subject: [PATCH 02/27] format issues --- langtest/utils/custom_types/sample.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/langtest/utils/custom_types/sample.py b/langtest/utils/custom_types/sample.py index ce2c8205d..d67f2386d 100644 --- a/langtest/utils/custom_types/sample.py +++ b/langtest/utils/custom_types/sample.py @@ -295,7 +295,11 @@ def get_aligned_span_pairs( def is_pass(self) -> bool: """Checks if the sample passes based on the maximum score.""" return all( - [a == b for (a, b) in self.get_aligned_span_pairs() if (a and a.entity != "O") or (b and b.entity != "O")] + [ + a == b + for (a, b) in self.get_aligned_span_pairs() + if (a and a.entity != "O") or (b and b.entity != "O") + ] ) From 52b81f2aea32be8e13db11bd10ea847ec0e55887 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Mon, 5 Aug 2024 16:13:33 +0530 Subject: [PATCH 03/27] resolved: recovering the transformation object. --- langtest/datahandler/datasource.py | 6 ++- langtest/langtest.py | 23 +++++---- langtest/utils/custom_types/helpers.py | 68 +++++++++----------------- langtest/utils/custom_types/sample.py | 9 ++++ 4 files changed, 47 insertions(+), 59 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 51e343ea3..e37dc5043 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -467,7 +467,7 @@ def load_data(self) -> List[NERSample]: List[NERSample]: List of formatted sentences from the dataset. """ data = [] - with open(self._file_path) as f: + with open(self._file_path, encoding="utf-8") as f: content = f.read() docs_strings = re.findall(r"-DOCSTART- \S+ \S+ O", content.strip()) docs = [ @@ -930,6 +930,10 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]: return samples for i in data.to_dict(orient="records"): + if self.task == "ner" and isinstance(i["transformations"], str): + import json + + i["transformations"] = eval(i["transformations"]) sample = self.task.get_sample_class(**i) samples.append(sample) diff --git a/langtest/langtest.py b/langtest/langtest.py index efaea4db1..028f37f60 100644 --- a/langtest/langtest.py +++ b/langtest/langtest.py @@ -23,7 +23,7 @@ from .transform.utils import RepresentationOperation from langtest.utils.benchmark_utils import Leaderboard, Summary from langtest.utils.lib_manager import try_import_lib -from langtest.utils.custom_types.helpers import TestResultManager, get_transformations +from langtest.utils.custom_types.helpers import TestResultManager from langtest.utils.checkpoints import divide_into_batches, CheckpointManager from langtest.prompts import PromptManager from .errors import Warnings, Errors @@ -826,7 +826,7 @@ def augment( return self - def testcases(self) -> pd.DataFrame: + def testcases(self, additional_cols=False) -> pd.DataFrame: """Testcases after .generate() is called Returns: @@ -869,6 +869,8 @@ def testcases(self) -> pd.DataFrame: "expected_result", ] + if additional_cols: + column_order.extend(["transformations"]) if isinstance(self._testcases, dict) and not self.is_multi_dataset: testcases_df = [] for k, v in self._testcases.items(): @@ -1077,7 +1079,9 @@ def load( harness._generated_results = generated_results return harness - def edit_testcases(self, output_path: str, **kwargs): + def edit_testcases( + self, output_path: str = "./edit_testcases.csv", return_dataframe=False, **kwargs + ): """Testcases are exported to a csv file to be edited. The edited file can be imported back to the harness @@ -1085,8 +1089,10 @@ def edit_testcases(self, output_path: str, **kwargs): Args: output_path (str): path to save the testcases to """ - temp_df = self.testcases() + temp_df = self.testcases(additional_col=True) temp_df = temp_df[temp_df["category"].isin(["robustness", "bias"])] + if return_dataframe: + return temp_df temp_df.to_csv(output_path, index=False) def import_edited_testcases(self, input_path: str, **kwargs): @@ -1138,10 +1144,6 @@ def import_edited_testcases(self, input_path: str, **kwargs): # merge the testcases with the imported ones to the temp_testcases for name, list_samples in imported_testcases.items(): # check the task and apply transformations - if self.task == "ner": - list_samples = [ - get_transformations(sample) for sample in list_samples - ] if name not in temp_testcases: temp_testcases[name] = list_samples temp_testcases[name].extend(list_samples) @@ -1168,10 +1170,7 @@ def import_edited_testcases(self, input_path: str, **kwargs): self._testcases = DataFactory( {"data_source": input_path}, task=self.task, is_import=True ).load() - if self.task == "ner": - self._testcases = [ - get_transformations(sample) for sample in self._testcases - ] + self._testcases.extend(temp_testcases) return self diff --git a/langtest/utils/custom_types/helpers.py b/langtest/utils/custom_types/helpers.py index e8ef77910..fa9e43f61 100644 --- a/langtest/utils/custom_types/helpers.py +++ b/langtest/utils/custom_types/helpers.py @@ -186,6 +186,10 @@ def __repr__(self): """""" return f"" + def __add__(self, other: "Span") -> "Span": + """""" + return Span(start=self.start, end=other.end, word=f"{self.word} {other.word}") + class Transformation(BaseModel): """ @@ -197,6 +201,24 @@ class Transformation(BaseModel): new_span: Span ignore: bool = False + def from_dict(self, data: dict): + """""" + self.original_span = Span(**data["original_span"]) + self.new_span = Span(**data["new_span"]) + self.ignore = data.get("ignore", False) + + def to_dict(self): + """""" + import json + + return json.dumps( + { + "original_span": self.original_span.dict(), + "new_span": self.new_span.dict(), + "ignore": self.ignore, + } + ) + class SimplePromptTemplate: """Simple prompt template for formatting messages with variables.""" @@ -753,49 +775,3 @@ def clear_instance(self): def clear_data(self): self._data = [] - - -def get_transformations(sample) -> List[Transformation]: - """Detects the changes between two texts and returns the transformations.""" - from langtest.utils.custom_types.helpers import Span, Transformation - - original_text = sample.original - new_text = sample.test_case - transformations = [] - i, j = 0, 0 - len_orig = len(original_text) - len_new = len(new_text) - - while i < len_orig and j < len_new: - if original_text[i] != new_text[j]: - start_i = i - start_j = j - - while ( - i < len_orig - and j < len_new - and original_text[i] != " " - and new_text[j] != " " - ): - i += 1 - j += 1 - - while i < len_orig and original_text[i] != " ": - i += 1 - while j < len_new and new_text[j] != " ": - j += 1 - - original_word = original_text[start_i:i] - new_word = new_text[start_j:j] - - original_span = Span(start=start_i, end=i, word=original_word) - new_span = Span(start=start_j, end=j, word=new_word) - transformations.append( - Transformation(original_span=original_span, new_span=new_span) - ) - else: - i += 1 - j += 1 - - sample.transformations = transformations - return sample diff --git a/langtest/utils/custom_types/sample.py b/langtest/utils/custom_types/sample.py index d67f2386d..8477fb9bb 100644 --- a/langtest/utils/custom_types/sample.py +++ b/langtest/utils/custom_types/sample.py @@ -75,6 +75,15 @@ def to_dict(self) -> Dict[str, Any]: } ) + if self.transformations: + result.update( + { + "transformations": [ + transformation.dict() for transformation in self.transformations + ] + } + ) + return result @validator("transformations") From 1c0112e8b699a20b5d27ffcfd2f0997634b864c6 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Mon, 5 Aug 2024 16:17:56 +0530 Subject: [PATCH 04/27] removed the unused imports --- langtest/datahandler/datasource.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index e37dc5043..b18d6f959 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -931,8 +931,7 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]: for i in data.to_dict(orient="records"): if self.task == "ner" and isinstance(i["transformations"], str): - import json - + i["transformations"] = eval(i["transformations"]) sample = self.task.get_sample_class(**i) samples.append(sample) From 3229bd21f64f19339c8def59d2dd425c1ba2de3b Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Mon, 5 Aug 2024 16:39:58 +0530 Subject: [PATCH 05/27] chore: Recover transformation object and apply to NER task test cases --- langtest/datahandler/datasource.py | 1 - 1 file changed, 1 deletion(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index b18d6f959..f3d4d2abb 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -931,7 +931,6 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]: for i in data.to_dict(orient="records"): if self.task == "ner" and isinstance(i["transformations"], str): - i["transformations"] = eval(i["transformations"]) sample = self.task.get_sample_class(**i) samples.append(sample) From e42f2821b6913cf906b540195e208dcdb1e389db Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Mon, 5 Aug 2024 18:44:59 +0530 Subject: [PATCH 06/27] resolved: unknown args --- langtest/langtest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langtest/langtest.py b/langtest/langtest.py index 028f37f60..d7a1f15cd 100644 --- a/langtest/langtest.py +++ b/langtest/langtest.py @@ -1089,7 +1089,7 @@ def edit_testcases( Args: output_path (str): path to save the testcases to """ - temp_df = self.testcases(additional_col=True) + temp_df = self.testcases(additional_cols=True) temp_df = temp_df[temp_df["category"].isin(["robustness", "bias"])] if return_dataframe: return temp_df From f195d1fec813c3d635dc3979b2c8b1f6c38449d1 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Mon, 5 Aug 2024 20:01:57 +0530 Subject: [PATCH 07/27] chore: Refactor CSVDataset to handle missing or invalid transformations --- langtest/datahandler/datasource.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index f3d4d2abb..5313fddc6 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -930,8 +930,15 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]: return samples for i in data.to_dict(orient="records"): - if self.task == "ner" and isinstance(i["transformations"], str): - i["transformations"] = eval(i["transformations"]) + temp = i["transformations"] + if temp == "-" or len(temp) < 3: + temp = None + i.pop("transformations") + + if self.task == "ner" and isinstance(temp, str): + import ast + + i["transformations"] = ast.literal_eval(temp) sample = self.task.get_sample_class(**i) samples.append(sample) From 82f8b871e81fffd807566251dbef156404e28aff Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Tue, 13 Aug 2024 18:35:28 +0530 Subject: [PATCH 08/27] fixed: consistent issues while generated templates in templatic augmentation. --- langtest/augmentation/base.py | 33 ++++++----- poetry.lock | 101 +++++++++++++++++++++++++++++----- pyproject.toml | 2 +- 3 files changed, 106 insertions(+), 30 deletions(-) diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py index 5b1036c4b..8b640fc1c 100644 --- a/langtest/augmentation/base.py +++ b/langtest/augmentation/base.py @@ -338,6 +338,12 @@ def __init__( if generate_templates: if try_import_lib("openai"): import openai + from pydantic import BaseModel, Field + + client = openai.OpenAI() + + class Templates(BaseModel): + templates: List[str] given_template = self.__templates[:] for template in given_template: @@ -346,30 +352,29 @@ def __init__( Template: "{template}" - Expected Python List Output: - ['Template 1', 'Template 2', 'Template 3', ...] # Replace with actual generated templates """ - response = openai.Completion.create( - engine="gpt-3.5-turbo-instruct", - prompt=prompt, + response = client.beta.chat.completions.parse( + model="gpt-4o-mini", + messages=[{"role": "system", "content": "Action: Generate templates"}, {"role": "user", "content": prompt}], max_tokens=500, temperature=0, + response_format=Templates, ) - generated_response = response.choices[0].text.strip() + generated_response = response.choices[0].message.parsed # Process the generated response if generated_response: - # Assuming the response format is a Python-like list in a string - templates_list = generated_response.strip("[]").split('",') - templates_list = [ - template.strip().strip('"') - for template in templates_list - if template.strip() - ] + # # Assuming the response format is a Python-like list in a string + # templates_list = generated_response.strip("[]").split('",') + # templates_list = [ + # template.strip().strip('"') + # for template in templates_list + # if template.strip() + # ] # Extend the existing templates list - self.__templates.extend(templates_list) + self.__templates.extend(generated_response.templates) else: print("No response or unexpected format.") diff --git a/poetry.lock b/poetry.lock index eece46db7..eb8658718 100644 --- a/poetry.lock +++ b/poetry.lock @@ -216,7 +216,7 @@ dev = ["black", "coverage", "isort", "pre-commit", "pyenchant", "pylint"] name = "anyio" version = "3.7.1" description = "High level compatibility layer for multiple asynchronous event loop implementations" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "anyio-3.7.1-py3-none-any.whl", hash = "sha256:91dee416e570e92c64041bd18b900d1d6fa78dff7048769ce5ac5ddad004fbb5"}, @@ -923,7 +923,7 @@ files = [ name = "distro" version = "1.9.0" description = "Distro - an OS platform information API" -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, @@ -1483,7 +1483,7 @@ tornado = ["tornado (>=0.2)"] name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, @@ -1494,7 +1494,7 @@ files = [ name = "httpcore" version = "1.0.4" description = "A minimal low-level HTTP client." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "httpcore-1.0.4-py3-none-any.whl", hash = "sha256:ac418c1db41bade2ad53ae2f3834a3a0f5ae76b56cf5aa497d2d033384fc7d73"}, @@ -1515,7 +1515,7 @@ trio = ["trio (>=0.22.0,<0.25.0)"] name = "httpx" version = "0.27.0" description = "The next generation HTTP client." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"}, @@ -1743,6 +1743,76 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "jiter" +version = "0.5.0" +description = "Fast iterable JSON parser." +optional = false +python-versions = ">=3.8" +files = [ + {file = "jiter-0.5.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b599f4e89b3def9a94091e6ee52e1d7ad7bc33e238ebb9c4c63f211d74822c3f"}, + {file = "jiter-0.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a063f71c4b06225543dddadbe09d203dc0c95ba352d8b85f1221173480a71d5"}, + {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acc0d5b8b3dd12e91dd184b87273f864b363dfabc90ef29a1092d269f18c7e28"}, + {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c22541f0b672f4d741382a97c65609332a783501551445ab2df137ada01e019e"}, + {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63314832e302cc10d8dfbda0333a384bf4bcfce80d65fe99b0f3c0da8945a91a"}, + {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a25fbd8a5a58061e433d6fae6d5298777c0814a8bcefa1e5ecfff20c594bd749"}, + {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:503b2c27d87dfff5ab717a8200fbbcf4714516c9d85558048b1fc14d2de7d8dc"}, + {file = "jiter-0.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d1f3d27cce923713933a844872d213d244e09b53ec99b7a7fdf73d543529d6d"}, + {file = "jiter-0.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c95980207b3998f2c3b3098f357994d3fd7661121f30669ca7cb945f09510a87"}, + {file = "jiter-0.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:afa66939d834b0ce063f57d9895e8036ffc41c4bd90e4a99631e5f261d9b518e"}, + {file = "jiter-0.5.0-cp310-none-win32.whl", hash = "sha256:f16ca8f10e62f25fd81d5310e852df6649af17824146ca74647a018424ddeccf"}, + {file = "jiter-0.5.0-cp310-none-win_amd64.whl", hash = "sha256:b2950e4798e82dd9176935ef6a55cf6a448b5c71515a556da3f6b811a7844f1e"}, + {file = "jiter-0.5.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d4c8e1ed0ef31ad29cae5ea16b9e41529eb50a7fba70600008e9f8de6376d553"}, + {file = "jiter-0.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c6f16e21276074a12d8421692515b3fd6d2ea9c94fd0734c39a12960a20e85f3"}, + {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5280e68e7740c8c128d3ae5ab63335ce6d1fb6603d3b809637b11713487af9e6"}, + {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:583c57fc30cc1fec360e66323aadd7fc3edeec01289bfafc35d3b9dcb29495e4"}, + {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26351cc14507bdf466b5f99aba3df3143a59da75799bf64a53a3ad3155ecded9"}, + {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4829df14d656b3fb87e50ae8b48253a8851c707da9f30d45aacab2aa2ba2d614"}, + {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a42a4bdcf7307b86cb863b2fb9bb55029b422d8f86276a50487982d99eed7c6e"}, + {file = "jiter-0.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04d461ad0aebf696f8da13c99bc1b3e06f66ecf6cfd56254cc402f6385231c06"}, + {file = "jiter-0.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e6375923c5f19888c9226582a124b77b622f8fd0018b843c45eeb19d9701c403"}, + {file = "jiter-0.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2cec323a853c24fd0472517113768c92ae0be8f8c384ef4441d3632da8baa646"}, + {file = "jiter-0.5.0-cp311-none-win32.whl", hash = "sha256:aa1db0967130b5cab63dfe4d6ff547c88b2a394c3410db64744d491df7f069bb"}, + {file = "jiter-0.5.0-cp311-none-win_amd64.whl", hash = "sha256:aa9d2b85b2ed7dc7697597dcfaac66e63c1b3028652f751c81c65a9f220899ae"}, + {file = "jiter-0.5.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9f664e7351604f91dcdd557603c57fc0d551bc65cc0a732fdacbf73ad335049a"}, + {file = "jiter-0.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:044f2f1148b5248ad2c8c3afb43430dccf676c5a5834d2f5089a4e6c5bbd64df"}, + {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:702e3520384c88b6e270c55c772d4bd6d7b150608dcc94dea87ceba1b6391248"}, + {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:528d742dcde73fad9d63e8242c036ab4a84389a56e04efd854062b660f559544"}, + {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8cf80e5fe6ab582c82f0c3331df27a7e1565e2dcf06265afd5173d809cdbf9ba"}, + {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:44dfc9ddfb9b51a5626568ef4e55ada462b7328996294fe4d36de02fce42721f"}, + {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c451f7922992751a936b96c5f5b9bb9312243d9b754c34b33d0cb72c84669f4e"}, + {file = "jiter-0.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:308fce789a2f093dca1ff91ac391f11a9f99c35369117ad5a5c6c4903e1b3e3a"}, + {file = "jiter-0.5.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7f5ad4a7c6b0d90776fdefa294f662e8a86871e601309643de30bf94bb93a64e"}, + {file = "jiter-0.5.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ea189db75f8eca08807d02ae27929e890c7d47599ce3d0a6a5d41f2419ecf338"}, + {file = "jiter-0.5.0-cp312-none-win32.whl", hash = "sha256:e3bbe3910c724b877846186c25fe3c802e105a2c1fc2b57d6688b9f8772026e4"}, + {file = "jiter-0.5.0-cp312-none-win_amd64.whl", hash = "sha256:a586832f70c3f1481732919215f36d41c59ca080fa27a65cf23d9490e75b2ef5"}, + {file = "jiter-0.5.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:f04bc2fc50dc77be9d10f73fcc4e39346402ffe21726ff41028f36e179b587e6"}, + {file = "jiter-0.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6f433a4169ad22fcb550b11179bb2b4fd405de9b982601914ef448390b2954f3"}, + {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad4a6398c85d3a20067e6c69890ca01f68659da94d74c800298581724e426c7e"}, + {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6baa88334e7af3f4d7a5c66c3a63808e5efbc3698a1c57626541ddd22f8e4fbf"}, + {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ece0a115c05efca597c6d938f88c9357c843f8c245dbbb53361a1c01afd7148"}, + {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:335942557162ad372cc367ffaf93217117401bf930483b4b3ebdb1223dbddfa7"}, + {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:649b0ee97a6e6da174bffcb3c8c051a5935d7d4f2f52ea1583b5b3e7822fbf14"}, + {file = "jiter-0.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f4be354c5de82157886ca7f5925dbda369b77344b4b4adf2723079715f823989"}, + {file = "jiter-0.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5206144578831a6de278a38896864ded4ed96af66e1e63ec5dd7f4a1fce38a3a"}, + {file = "jiter-0.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8120c60f8121ac3d6f072b97ef0e71770cc72b3c23084c72c4189428b1b1d3b6"}, + {file = "jiter-0.5.0-cp38-none-win32.whl", hash = "sha256:6f1223f88b6d76b519cb033a4d3687ca157c272ec5d6015c322fc5b3074d8a5e"}, + {file = "jiter-0.5.0-cp38-none-win_amd64.whl", hash = "sha256:c59614b225d9f434ea8fc0d0bec51ef5fa8c83679afedc0433905994fb36d631"}, + {file = "jiter-0.5.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:0af3838cfb7e6afee3f00dc66fa24695199e20ba87df26e942820345b0afc566"}, + {file = "jiter-0.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:550b11d669600dbc342364fd4adbe987f14d0bbedaf06feb1b983383dcc4b961"}, + {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:489875bf1a0ffb3cb38a727b01e6673f0f2e395b2aad3c9387f94187cb214bbf"}, + {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b250ca2594f5599ca82ba7e68785a669b352156260c5362ea1b4e04a0f3e2389"}, + {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ea18e01f785c6667ca15407cd6dabbe029d77474d53595a189bdc813347218e"}, + {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:462a52be85b53cd9bffd94e2d788a09984274fe6cebb893d6287e1c296d50653"}, + {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92cc68b48d50fa472c79c93965e19bd48f40f207cb557a8346daa020d6ba973b"}, + {file = "jiter-0.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1c834133e59a8521bc87ebcad773608c6fa6ab5c7a022df24a45030826cf10bc"}, + {file = "jiter-0.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab3a71ff31cf2d45cb216dc37af522d335211f3a972d2fe14ea99073de6cb104"}, + {file = "jiter-0.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cccd3af9c48ac500c95e1bcbc498020c87e1781ff0345dd371462d67b76643eb"}, + {file = "jiter-0.5.0-cp39-none-win32.whl", hash = "sha256:368084d8d5c4fc40ff7c3cc513c4f73e02c85f6009217922d0823a48ee7adf61"}, + {file = "jiter-0.5.0-cp39-none-win_amd64.whl", hash = "sha256:ce03f7b4129eb72f1687fa11300fbf677b02990618428934662406d2a76742a1"}, + {file = "jiter-0.5.0.tar.gz", hash = "sha256:1d916ba875bcab5c5f7d927df998c4cb694d27dceddf3392e58beaf10563368a"}, +] + [[package]] name = "jmespath" version = "0.10.0" @@ -2683,23 +2753,24 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] [[package]] name = "openai" -version = "1.13.3" +version = "1.40.6" description = "The official Python library for the openai API" -optional = true +optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.13.3-py3-none-any.whl", hash = "sha256:5769b62abd02f350a8dd1a3a242d8972c947860654466171d60fb0972ae0a41c"}, - {file = "openai-1.13.3.tar.gz", hash = "sha256:ff6c6b3bc7327e715e4b3592a923a5a1c7519ff5dd764a83d69f633d49e77a7b"}, + {file = "openai-1.40.6-py3-none-any.whl", hash = "sha256:b36372124a779381a420a34dd96f762baa748b6bdfaf83a6b9f2745f72ccc1c5"}, + {file = "openai-1.40.6.tar.gz", hash = "sha256:2239232bcb7f4bd4ce8e02544b5769618582411cf399816d96686d1b6c1e5c8d"}, ] [package.dependencies] anyio = ">=3.5.0,<5" distro = ">=1.7.0,<2" httpx = ">=0.23.0,<1" +jiter = ">=0.4.0,<1" pydantic = ">=1.9.0,<3" sniffio = "*" tqdm = ">4" -typing-extensions = ">=4.7,<5" +typing-extensions = ">=4.11,<5" [package.extras] datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] @@ -4023,7 +4094,7 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, @@ -4842,13 +4913,13 @@ test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6. [[package]] name = "typing-extensions" -version = "4.10.0" +version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.10.0-py3-none-any.whl", hash = "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475"}, - {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] [[package]] @@ -5309,4 +5380,4 @@ transformers = ["accelerate", "datasets", "torch", "transformers"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "15c15ff226c5ab0a96736e03993662dfe2052a5442b52778e3bffd9e176c0cb5" +content-hash = "477b1347105836e413565aa36f398e96038fba0de7daa8f6123e7d03e5fe4907" diff --git a/pyproject.toml b/pyproject.toml index 34943255b..44caae217 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,7 @@ transformers = "^4.38.2" huggingface_hub = { version = ">0.16.0", optional = true} spacy = { version = ">=3.0.0", optional = true } nest-asyncio = "^1.5.0" -openai = {version = "^1.13.3", optional = true} +openai = "^1.40.6" jsonlines = "^3.1.0" torch = { version = "^2.0.0", optional = true } pandas = "^2.0.3" From a47156db059f8f5a86a71be12b511a1e56367344 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Tue, 13 Aug 2024 19:18:22 +0530 Subject: [PATCH 09/27] resolved: lint and format issues. --- langtest/augmentation/base.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py index 8b640fc1c..1fa0bb226 100644 --- a/langtest/augmentation/base.py +++ b/langtest/augmentation/base.py @@ -338,7 +338,7 @@ def __init__( if generate_templates: if try_import_lib("openai"): import openai - from pydantic import BaseModel, Field + from pydantic import BaseModel client = openai.OpenAI() @@ -356,7 +356,10 @@ class Templates(BaseModel): response = client.beta.chat.completions.parse( model="gpt-4o-mini", - messages=[{"role": "system", "content": "Action: Generate templates"}, {"role": "user", "content": prompt}], + messages=[ + {"role": "system", "content": "Action: Generate templates"}, + {"role": "user", "content": prompt}, + ], max_tokens=500, temperature=0, response_format=Templates, From 92cd12cf3cc0dc048e0f8f1e84304507cfaf81c6 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Wed, 14 Aug 2024 19:18:46 +0530 Subject: [PATCH 10/27] fixed: transformed and add export types are supported in DataAugumenter --- langtest/augmentation/augmenter.py | 144 +++++++++++++++++++++-------- langtest/datahandler/datasource.py | 27 +++++- 2 files changed, 127 insertions(+), 44 deletions(-) diff --git a/langtest/augmentation/augmenter.py b/langtest/augmentation/augmenter.py index 96b5125e1..d98b6cf86 100644 --- a/langtest/augmentation/augmenter.py +++ b/langtest/augmentation/augmenter.py @@ -1,10 +1,13 @@ +from collections import defaultdict import random import yaml +import pandas as pd from typing import Any, Dict, Iterable, Union from langtest.datahandler.datasource import DataFactory from langtest.transform import TestFactory from langtest.tasks.task import TaskManager +from langtest.utils.custom_types.sample import Sample class DataAugmenter: @@ -40,14 +43,12 @@ def __init__(self, task: Union[str, TaskManager], config: Union[str, dict]) -> N self.__testfactory.is_augment = True # parameters - self.__max_proportion = self.__tests.get("defaults", {}).get( - "max_proportion", 0.6 - ) + self.__max_data_limit = self.__tests.get("parameters", {}).get("max_limit", 0.5) # self.__ntests = len(v for k, v in self.__tests.items()) - 1 self.__type = self.__config.get("parameters", {}).get("type", "proportion") self.__style = self.__config.get("parameters", {}).get("style", "extend") - self.__df_config = self.__config_df() + self.__df_config = self.__initialize_config_df() def load_config(self, config: str) -> dict: """ @@ -61,45 +62,67 @@ def augment(self, data: Union[str, Iterable]) -> str: Augment the content. """ # load the data - if isinstance(data, dict): + if isinstance(data, dict) and not isinstance(self.__datafactory, DataFactory): self.__datafactory = self.__datafactory(file_path=data, task=self.__task) + data = self.__datafactory.load() + # generate the augmented data + test_cases = self.__testfactory.transform(self.__task, data, self.__tests) + # check the style of augmentation to be applied. Default is extend - if self.__style == "extend": - self.extend(data) + if self.__style == "extend" or self.__style == "add": + self.extend(data, test_cases) elif self.__style == "inplace": - self.inplace(data) - elif self.__style == "new": - self.new_data(data) + self.inplace(data, test_cases) + elif self.__style == "new" or self.__style == "transformed": + self.new_data(data, test_cases) else: raise ValueError("Invalid style") return self - def extend(self, data: Iterable) -> "DataAugmenter": + def extend(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter": """ Extend the content. """ # calculate the number of rows to be added - n = len(data) + test_cases = defaultdict(list) + for sample in testcases: + if sample.test_type in test_cases: + test_cases[sample.test_type].append(sample) + else: + test_cases[sample.test_type] = [sample] - data_cut = random.sample(data, int(n * self.__max_proportion)) + final_data = [] - test_cases: list = self.__testfactory.transform( - self.__task, data_cut, self.__tests - ) + for _, tests in self.__tests.items(): + for test_name, _ in tests.items(): + size = self.allocated_size(test_name) - self.__augmented_data = [*data, *test_cases] if isinstance(data, list) else data + if size == 0: + continue + + temp_test_cases = test_cases.get(test_name, []) + if temp_test_cases: + # select random rows based on the size + temp_test_cases = ( + random.choices(temp_test_cases, k=size) + if size < len(temp_test_cases) + else temp_test_cases + ) + final_data.extend(temp_test_cases) + + self.__augmented_data = [*data, *final_data] if isinstance(data, list) else data return self - def inplace(self, data: Iterable) -> "DataAugmenter": + def inplace(self, data: Iterable, testcases: Iterable) -> "DataAugmenter": """ Inplace augmentation. """ # calculate the number of rows to be added - size = int(len(data) * self.__max_proportion) + size = int(len(data) * self.allocated_size()) # create a dictionary with index as key and data as value data_dict = self.prepare_hash_map(data) @@ -117,28 +140,59 @@ def inplace(self, data: Iterable) -> "DataAugmenter": return self - def new_data(self, data: Iterable) -> "DataAugmenter": + def new_data(self, data: Iterable, testcases: Iterable) -> "DataAugmenter": """ Create new data. """ # calculate the number of rows to be added - size = int(len(data) * self.__max_proportion) + test_cases = defaultdict(list) + for sample in testcases: + if sample.test_type in test_cases: + test_cases[sample.test_type].append(sample) + else: + test_cases[sample.test_type] = [sample] - data_cut = random.sample(data, size) + final_data = [] + for _, tests in self.__tests.items(): + for test_name, _ in tests.items(): + size = self.allocated_size(test_name) - test_cases = self.__testfactory.transform(self.__task, data_cut, self.__tests) + if size == 0: + continue - self.__augmented_data = test_cases + temp_test_cases = test_cases.get(test_name, []) + if temp_test_cases: + # select random rows based on the size + temp_test_cases = ( + random.choices(temp_test_cases, k=size) + if size < len(temp_test_cases) + else temp_test_cases + ) + final_data.extend(temp_test_cases) + + self.__augmented_data = final_data return self - def size(self, category: str, test_name: str) -> int: - return ( - self.__max_proportion - * self.__tests.get(category, {}).get(test_name, {}).get("max_proportion", 0.6) - ) / self.__df_config.shape[0] + def allocated_size(self, test_name: str) -> int: + """allocation size of the test to be augmented""" - def prepare_hash_map(self, data: Union[str, Iterable]) -> Dict[str, Any]: + try: + max_data_limit = ( + len(self.__datafactory) + * self.__max_data_limit + * self.__df_config.loc[test_name, "avg_proportion"] + ) + + return int( + max_data_limit * self.__df_config.loc[test_name, "normalized_proportion"] + ) + except AttributeError: + raise ValueError( + "Dataset is not loaded. please load the data using the `DataAugmenter.augment(data={'data_source': '..'})` method" + ) + + def prepare_hash_map(self, data: Union[Iterable[Sample], Sample]) -> Dict[str, Any]: hashmap = {index: sample for index, sample in enumerate(data)} return hashmap @@ -157,28 +211,38 @@ def __ror__(self, other: Iterable): results = self.augment(other) return results - def __config_df(self): + def __initialize_config_df(self) -> pd.DataFrame: """ Configure the data frame. """ - import pandas as pd - df = pd.DataFrame(columns=["category", "test_name", "proportion"]) # read the configuration + temp_data = [] for category, tests in self.__tests.items(): if category not in ["robustness", "bias"]: continue for test_name, test in tests.items(): - proportion = test.get("max_proportion", 0.6) - temp = pd.DataFrame( + proportion = test.get("max_proportion", 0.2) + temp_data.append( { - "category": [category], - "test_name": [test_name], - "proportion": [proportion], - }, + "category": category, + "test_name": test_name, + "proportion": proportion, + } ) - df = pd.concat([df, temp], ignore_index=True) + df = pd.concat([df, pd.DataFrame(temp_data)], ignore_index=True) + + # normalize the proportion and round it to 2 decimal places + df["normalized_proportion"] = df["proportion"] / df["proportion"].sum() + df["normalized_proportion"] = df["normalized_proportion"].apply( + lambda x: round(x, 2) + ) + + df["avg_proportion"] = df["proportion"].mean(numeric_only=True).round(2) + + # set the index as test_name + df.set_index("test_name", inplace=True) return df diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 51e343ea3..217a06f17 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -104,6 +104,7 @@ class BaseDataset(ABC): """ data_sources = defaultdict() + dataset_size = None @abstractmethod def load_raw_data(self): @@ -153,6 +154,12 @@ def __init_subclass__(cls, **kwargs): else: cls.data_sources[dataset_cls] = cls + def __len__(self): + """Returns the size of the dataset""" + if self.dataset_size is None: + self.dataset_size = len(self.load_data()) + return self.dataset_size + class DataFactory: """Data factory for creating Dataset objects. @@ -178,6 +185,7 @@ def __init__(self, file_path: dict, task: TaskManager, **kwargs) -> None: raise ValueError(Errors.E025) self._custom_label = file_path.copy() self._file_path = file_path.get("data_source") + self._size = None self.datasets_with_jsonl_extension = [] for dataset_name, dataset_info in datasets_info.items(): @@ -250,7 +258,10 @@ def load(self) -> List[Sample]: self.init_cls = self.data_sources[self.file_ext.replace(".", "")]( self._file_path, task=self.task, **self.kwargs ) - return self.init_cls.load_data() + + loaded_data = self.init_cls.load_data() + self._size = len(loaded_data) + return loaded_data def export(self, data: List[Sample], output_path: str) -> None: """Exports the data to the corresponding format and saves it to 'output_path'. @@ -399,6 +410,12 @@ def _load_dataset(cls, custom_label: dict) -> str: extension = dataset_info.get("extension", "jsonl") return script_dir[:-7] + "/" + dataset_name + "/" + split + extension + def __len__(self): + """dataset size""" + if self._size is None: + self._size = len(self.load()) + return self._size + class ConllDataset(BaseDataset): """Class to handle Conll files. Subclass of BaseDataset.""" @@ -522,7 +539,7 @@ def load_data(self) -> List[NERSample]: expected_results=NEROutput(predictions=ner_labels), ) ) - + self.dataset_size = len(data) return data def export_data(self, data: List[NERSample], output_path: str): @@ -812,6 +829,7 @@ def load_data(self) -> List[Sample]: logging.warning(Warnings.W005(idx=idx, row_data=row_data, e=e)) continue + self.dataset_size = len(data) return data def export_data(self, data: List[Sample], output_path: str): @@ -1025,7 +1043,7 @@ def load_data(self, *args, **kwargs) -> List[Sample]: item, dataset_name=dataset_name, *args, **kwargs ) data.append(sample) - + self.dataset_size = len(data) return data def __load_jsonl(self, file: str, dataset_name: str, data, *args, **kwargs): @@ -1215,7 +1233,7 @@ def load_data(self) -> List[Sample]: **column_names, ) data.append(sample) - + self.dataset_size = len(data) return data def export_data(self, data: List[Sample], output_path: str): @@ -1296,6 +1314,7 @@ def load_data(self) -> List[Sample]: method_name = f"load_{self.dataset_name.replace('-', '_')}" if hasattr(self, method_name): samples = getattr(self, method_name)() + self.dataset_size = len(samples) return samples else: raise ValueError(Errors.E030(dataset_name=self.dataset_name)) From 712d4d6bf115569bf78a07111d065be58f3ed7e9 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Wed, 14 Aug 2024 19:39:05 +0530 Subject: [PATCH 11/27] fixed: inplace method in DataAugmenter with proper proportion. --- langtest/augmentation/augmenter.py | 61 +++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 13 deletions(-) diff --git a/langtest/augmentation/augmenter.py b/langtest/augmentation/augmenter.py index d98b6cf86..ca9a5b202 100644 --- a/langtest/augmentation/augmenter.py +++ b/langtest/augmentation/augmenter.py @@ -3,7 +3,7 @@ import yaml import pandas as pd -from typing import Any, Dict, Iterable, Union +from typing import Any, Dict, Iterable, List, Union from langtest.datahandler.datasource import DataFactory from langtest.transform import TestFactory from langtest.tasks.task import TaskManager @@ -117,24 +117,47 @@ def extend(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter" return self - def inplace(self, data: Iterable, testcases: Iterable) -> "DataAugmenter": + def inplace(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter": """ Inplace augmentation. """ - # calculate the number of rows to be added - size = int(len(data) * self.allocated_size()) - # create a dictionary with index as key and data as value + data_indices = self.prepare_hash_map(data, inverted=True) data_dict = self.prepare_hash_map(data) - # select random rows based on the size with its index - selected = random.sample(data_dict.keys(), int(size)) + test_cases = defaultdict(list) + for sample in testcases: + if sample.test_type in test_cases: + test_cases[sample.test_type].append(sample) + else: + test_cases[sample.test_type] = [sample] + + final_data: List[Sample] = [] + for _, tests in self.__tests.items(): + for test_name, _ in tests.items(): + size = self.allocated_size(test_name) + print(size) + if size == 0: + continue - for idx in selected: - test_cases = self.__testfactory.transform( - self.__task, [data_dict[idx]], self.__tests + temp_test_cases = test_cases.get(test_name, []) + if temp_test_cases: + # select random rows based on the size + temp_test_cases = ( + random.choices(temp_test_cases, k=size) + if size < len(temp_test_cases) + else temp_test_cases + ) + final_data.extend(temp_test_cases) + + for sample in final_data: + key = ( + sample.original_question + if hasattr(sample, "original_question") + else sample.original ) - data_dict[idx] = test_cases[0] if test_cases else data_dict[idx] + index = data_indices[key] + data_dict[index] = sample self.__augmented_data = data_dict.values() @@ -192,8 +215,20 @@ def allocated_size(self, test_name: str) -> int: "Dataset is not loaded. please load the data using the `DataAugmenter.augment(data={'data_source': '..'})` method" ) - def prepare_hash_map(self, data: Union[Iterable[Sample], Sample]) -> Dict[str, Any]: - hashmap = {index: sample for index, sample in enumerate(data)} + def prepare_hash_map( + self, data: Union[Iterable[Sample], Sample], inverted=False + ) -> Dict[str, Any]: + if inverted: + hashmap = {} + for index, sample in enumerate(data): + key = ( + sample.original_question + if hasattr(sample, "original_question") + else sample.original + ) + hashmap[key] = index + else: + hashmap = {index: sample for index, sample in enumerate(data)} return hashmap From fd2333dfc178d3fbd568a056a46b736e3a0446f2 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Wed, 14 Aug 2024 19:46:52 +0530 Subject: [PATCH 12/27] update doc strings and remove the print statements. --- langtest/augmentation/augmenter.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/langtest/augmentation/augmenter.py b/langtest/augmentation/augmenter.py index ca9a5b202..2cd118aab 100644 --- a/langtest/augmentation/augmenter.py +++ b/langtest/augmentation/augmenter.py @@ -86,7 +86,7 @@ def extend(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter" """ Extend the content. """ - # calculate the number of rows to be added + # arrange the test cases based on the test_type in a dictionary test_cases = defaultdict(list) for sample in testcases: if sample.test_type in test_cases: @@ -95,7 +95,7 @@ def extend(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter" test_cases[sample.test_type] = [sample] final_data = [] - + # pick the test cases based on the allocated size of the test_type for _, tests in self.__tests.items(): for test_name, _ in tests.items(): size = self.allocated_size(test_name) @@ -113,6 +113,7 @@ def extend(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter" ) final_data.extend(temp_test_cases) + # append the augmented data to the original data self.__augmented_data = [*data, *final_data] if isinstance(data, list) else data return self @@ -121,10 +122,11 @@ def inplace(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter """ Inplace augmentation. """ - + # indices of the data and the data itself data_indices = self.prepare_hash_map(data, inverted=True) data_dict = self.prepare_hash_map(data) + # arrange the test cases based on the test type in a dictionary test_cases = defaultdict(list) for sample in testcases: if sample.test_type in test_cases: @@ -132,11 +134,12 @@ def inplace(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter else: test_cases[sample.test_type] = [sample] + # pick the test cases based on the allocated size of the test_type final_data: List[Sample] = [] for _, tests in self.__tests.items(): for test_name, _ in tests.items(): size = self.allocated_size(test_name) - print(size) + if size == 0: continue @@ -150,6 +153,7 @@ def inplace(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter ) final_data.extend(temp_test_cases) + # replace the original data with the augmented data in extact position. for sample in final_data: key = ( sample.original_question @@ -163,11 +167,11 @@ def inplace(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter return self - def new_data(self, data: Iterable, testcases: Iterable) -> "DataAugmenter": + def new_data(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter": """ Create new data. """ - # calculate the number of rows to be added + # arrange the test cases based on the test type in a dictionary test_cases = defaultdict(list) for sample in testcases: if sample.test_type in test_cases: @@ -176,6 +180,8 @@ def new_data(self, data: Iterable, testcases: Iterable) -> "DataAugmenter": test_cases[sample.test_type] = [sample] final_data = [] + + # pick the test cases based on the allocated size of the test_type for _, tests in self.__tests.items(): for test_name, _ in tests.items(): size = self.allocated_size(test_name) @@ -193,6 +199,7 @@ def new_data(self, data: Iterable, testcases: Iterable) -> "DataAugmenter": ) final_data.extend(temp_test_cases) + # replace the original data with the augmented data self.__augmented_data = final_data return self From 526ae6f666774453a82d21d621c315865d8df5aa Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Thu, 15 Aug 2024 12:32:37 +0530 Subject: [PATCH 13/27] chore: generate additional templates in TemplaticAugment as user choice to number of extra templates to be needed --- langtest/augmentation/base.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py index 1fa0bb226..c8488ab95 100644 --- a/langtest/augmentation/base.py +++ b/langtest/augmentation/base.py @@ -323,6 +323,7 @@ def __init__( task: TaskManager, generate_templates=False, show_templates=False, + num_extra_templates=10, ) -> None: """This constructor for the TemplaticAugment class. @@ -347,7 +348,7 @@ class Templates(BaseModel): given_template = self.__templates[:] for template in given_template: - prompt = f"""Based on the template provided, create 10 new and unique templates that are variations on this theme. Present these as a Python list, with each template as a quoted string. The list should contain only the templates without any additional text or explanation. + prompt = f"""Based on the template provided, create {num_extra_templates} new and unique templates that are variations on this theme. Present these as a Python list, with each template as a quoted string. The list should contain only the templates without any additional text or explanation. Template: "{template}" @@ -357,7 +358,10 @@ class Templates(BaseModel): response = client.beta.chat.completions.parse( model="gpt-4o-mini", messages=[ - {"role": "system", "content": "Action: Generate templates"}, + { + "role": "system", + "content": f"Action: Generate templates upto {num_extra_templates}", + }, {"role": "user", "content": prompt}, ], max_tokens=500, @@ -368,16 +372,9 @@ class Templates(BaseModel): generated_response = response.choices[0].message.parsed # Process the generated response if generated_response: - # # Assuming the response format is a Python-like list in a string - # templates_list = generated_response.strip("[]").split('",') - # templates_list = [ - # template.strip().strip('"') - # for template in templates_list - # if template.strip() - # ] - + # Extend the existing templates list - self.__templates.extend(generated_response.templates) + self.__templates.extend(generated_response.templates[:num_extra_templates]) else: print("No response or unexpected format.") From 8185a90a8eb96984235fbee13bcfa5ad949ddef9 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Thu, 15 Aug 2024 15:15:37 +0530 Subject: [PATCH 14/27] chore: remove quotes in generated template and self check the num_extra_templates. --- langtest/augmentation/base.py | 111 ++++++++++++++++++++++------------ langtest/errors.py | 1 + 2 files changed, 75 insertions(+), 37 deletions(-) diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py index c8488ab95..a1d01dfa6 100644 --- a/langtest/augmentation/base.py +++ b/langtest/augmentation/base.py @@ -337,49 +337,25 @@ def __init__( self.__task = task if generate_templates: - if try_import_lib("openai"): - import openai - from pydantic import BaseModel - - client = openai.OpenAI() - - class Templates(BaseModel): - templates: List[str] - + try: given_template = self.__templates[:] for template in given_template: - prompt = f"""Based on the template provided, create {num_extra_templates} new and unique templates that are variations on this theme. Present these as a Python list, with each template as a quoted string. The list should contain only the templates without any additional text or explanation. - - Template: - "{template}" - - """ - - response = client.beta.chat.completions.parse( - model="gpt-4o-mini", - messages=[ - { - "role": "system", - "content": f"Action: Generate templates upto {num_extra_templates}", - }, - {"role": "user", "content": prompt}, - ], - max_tokens=500, - temperature=0, - response_format=Templates, + generated_templates: List[str] = self.__generate_templates( + template, num_extra_templates ) - generated_response = response.choices[0].message.parsed - # Process the generated response - if generated_response: - + while len(generated_templates) < num_extra_templates: + temp_templates = self.__generate_templates( + template, num_extra_templates + ) + generated_templates.extend(temp_templates) + + if generated_templates: # Extend the existing templates list - self.__templates.extend(generated_response.templates[:num_extra_templates]) - else: - print("No response or unexpected format.") - else: - raise RuntimeError(Errors.E084) + self.__templates.extend(generated_templates[:num_extra_templates]) + except Exception as e: + raise Errors.E095(e) if show_templates: [print(template) for template in self.__templates] @@ -619,3 +595,64 @@ def add_spaces_around_punctuation(text: str): text = re.sub(r"\s+", " ", text).strip() return text + + def __generate_templates(self, template, num_extra_templates) -> List[str]: + if try_import_lib("openai"): + import openai + from pydantic import BaseModel, validator + + client = openai.OpenAI() + + class Templates(BaseModel): + templates: List[str] + + def __post_init__(self): + self.templates = [i.strip('"') for i in self.templates] + + @validator("templates", each_item=True) + def check_templates(cls, v: str): + if not v: + raise ValueError("No templates generated.") + return v.strip('"') + + def remove_invalid_templates(self, original_template): + # extract variable names using regex + regexs = r"{([^{}]*)}" + original_vars = re.findall(regexs, original_template) + original_vars = set([var.strip() for var in original_vars]) + + # remove invalid templates + valid_templates = [] + for template in self.templates: + template_vars: List[str] = re.findall(regexs, template) + template_vars = set([var.strip() for var in template_vars]) + if template_vars == original_vars: + valid_templates.append(template) + self.templates = valid_templates + + prompt = ( + f"Based on the provided template, create {num_extra_templates} new and unique templates that are " + "variations on this theme. Present these as a list, with each template as a quoted string. The list should " + "contain only the templates, without any additional text or explanation. Ensure that the structure of " + "these variables remains consistent in each generated template. Note: don't add any extra variables and ignore typo errors.\n\n" + "Template:\n" + f"{template}\n" + ) + response = client.beta.chat.completions.parse( + model="gpt-4o-mini", + messages=[ + { + "role": "system", + "content": f"Action: Generate up to {num_extra_templates} templates and ensure that the structure of the variables within the templates remains unchanged and don't add any extra variables.", + }, + {"role": "user", "content": prompt}, + ], + max_tokens=500, + temperature=0, + response_format=Templates, + ) + + generated_response = response.choices[0].message.parsed + generated_response.remove_invalid_templates(template) + + return generated_response.templates[:num_extra_templates] diff --git a/langtest/errors.py b/langtest/errors.py index 6e7359858..4dfc38ce6 100644 --- a/langtest/errors.py +++ b/langtest/errors.py @@ -274,6 +274,7 @@ class Errors(metaclass=ErrorsWithCodes): E093 = ("Category cannot be None. Please provide a valid category.") E094 = ("Unsupported category: '{category}'. Supported categories: {supported_category}") E095 = ("Failed to make API request: {e}") + E096 = ("Failed to generate the templates in Augmentation: {e}") class ColumnNameError(Exception): From 6734f706a6a68db10856d0e4a49e0fb48d6dca0a Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Fri, 16 Aug 2024 11:39:56 +0530 Subject: [PATCH 15/27] chore: Fix error message in Augmentation when generating templates --- langtest/augmentation/base.py | 4 ++-- langtest/errors.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py index a1d01dfa6..116f30be6 100644 --- a/langtest/augmentation/base.py +++ b/langtest/augmentation/base.py @@ -355,7 +355,7 @@ def __init__( self.__templates.extend(generated_templates[:num_extra_templates]) except Exception as e: - raise Errors.E095(e) + raise Errors.E095(msg=e) if show_templates: [print(template) for template in self.__templates] @@ -609,7 +609,7 @@ class Templates(BaseModel): def __post_init__(self): self.templates = [i.strip('"') for i in self.templates] - @validator("templates", each_item=True) + @validator("templates", each_item=True, allow_reuse=True) def check_templates(cls, v: str): if not v: raise ValueError("No templates generated.") diff --git a/langtest/errors.py b/langtest/errors.py index 4dfc38ce6..d3d7d1bba 100644 --- a/langtest/errors.py +++ b/langtest/errors.py @@ -274,7 +274,7 @@ class Errors(metaclass=ErrorsWithCodes): E093 = ("Category cannot be None. Please provide a valid category.") E094 = ("Unsupported category: '{category}'. Supported categories: {supported_category}") E095 = ("Failed to make API request: {e}") - E096 = ("Failed to generate the templates in Augmentation: {e}") + E096 = ("Failed to generate the templates in Augmentation: {msg}") class ColumnNameError(Exception): From 55d17e1d9bf319314812193c2d541104d35f9e0f Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Fri, 16 Aug 2024 14:30:50 +0530 Subject: [PATCH 16/27] chore: Refactor DataAugmenter to improve template generation and proportion handling --- langtest/augmentation/augmenter.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/langtest/augmentation/augmenter.py b/langtest/augmentation/augmenter.py index 2cd118aab..7af5a30d4 100644 --- a/langtest/augmentation/augmenter.py +++ b/langtest/augmentation/augmenter.py @@ -26,7 +26,7 @@ def __init__(self, task: Union[str, TaskManager], config: Union[str, dict]) -> N if isinstance(config, str): self.__config = self.load_config(config) - self.__tests: dict = self.__config.get("tests", []) + self.__tests: Dict[str, Dict[str, dict]] = self.__config.get("tests", []) if isinstance(task, str): if task in ["ner", "text-classification", "question-answering"]: task = TaskManager(task) @@ -276,6 +276,9 @@ def __initialize_config_df(self) -> pd.DataFrame: ) df = pd.concat([df, pd.DataFrame(temp_data)], ignore_index=True) + # Convert 'proportion' column to float + df["proportion"] = pd.to_numeric(df["proportion"], errors="coerce") + # normalize the proportion and round it to 2 decimal places df["normalized_proportion"] = df["proportion"] / df["proportion"].sum() df["normalized_proportion"] = df["normalized_proportion"].apply( From b7f68c1e161f0da644f0f0b4343650490a82def5 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Fri, 16 Aug 2024 14:33:06 +0530 Subject: [PATCH 17/27] Refactor DataAugmenter to improve proportion handling --- langtest/augmentation/augmenter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/langtest/augmentation/augmenter.py b/langtest/augmentation/augmenter.py index 7af5a30d4..f587adc27 100644 --- a/langtest/augmentation/augmenter.py +++ b/langtest/augmentation/augmenter.py @@ -66,6 +66,8 @@ def augment(self, data: Union[str, Iterable]) -> str: self.__datafactory = self.__datafactory(file_path=data, task=self.__task) data = self.__datafactory.load() + elif isinstance(self.__datafactory, DataFactory): + data = self.__datafactory.load() # generate the augmented data test_cases = self.__testfactory.transform(self.__task, data, self.__tests) From 24be4be5db0182b9eab2ceca47efefa51a14e414 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Mon, 26 Aug 2024 20:30:14 +0530 Subject: [PATCH 18/27] Refactor TemplaticAugment to support multiple AI providers for template generation --- langtest/augmentation/base.py | 29 +++++++++++++++++++++++------ langtest/augmentation/types.py | 23 +++++++++++++++++++++++ 2 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 langtest/augmentation/types.py diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py index 116f30be6..167529bb2 100644 --- a/langtest/augmentation/base.py +++ b/langtest/augmentation/base.py @@ -10,6 +10,7 @@ import pandas as pd import yaml +from langtest.augmentation.types import AzureOpenAIConfig, OpenAIConfig from langtest.datahandler.datasource import DataFactory from langtest.transform import TestFactory from langtest.transform.utils import create_terminology @@ -324,6 +325,7 @@ def __init__( generate_templates=False, show_templates=False, num_extra_templates=10, + model_config: Union[OpenAIConfig, AzureOpenAIConfig] = None, ) -> None: """This constructor for the TemplaticAugment class. @@ -341,12 +343,14 @@ def __init__( given_template = self.__templates[:] for template in given_template: generated_templates: List[str] = self.__generate_templates( - template, num_extra_templates + template, num_extra_templates, model_config ) while len(generated_templates) < num_extra_templates: temp_templates = self.__generate_templates( - template, num_extra_templates + template, + num_extra_templates, + model_config, ) generated_templates.extend(temp_templates) @@ -354,8 +358,8 @@ def __init__( # Extend the existing templates list self.__templates.extend(generated_templates[:num_extra_templates]) - except Exception as e: - raise Errors.E095(msg=e) + except Exception as e_msg: + raise Errors.E095(e=e_msg) if show_templates: [print(template) for template in self.__templates] @@ -596,12 +600,25 @@ def add_spaces_around_punctuation(text: str): return text - def __generate_templates(self, template, num_extra_templates) -> List[str]: + def __generate_templates( + self, + template: str, + num_extra_templates: int, + model_config: Union[OpenAIConfig, AzureOpenAIConfig] = None, + ) -> List[str]: if try_import_lib("openai"): import openai from pydantic import BaseModel, validator - client = openai.OpenAI() + if model_config and model_config.get("provider") == "openai": + client = openai.OpenAI() + elif model_config and model_config.get("provider") == "azure": + params = model_config + del params["provider"] + + client = openai.AzureOpenAI(**params) + else: + client = openai.OpenAI() class Templates(BaseModel): templates: List[str] diff --git a/langtest/augmentation/types.py b/langtest/augmentation/types.py new file mode 100644 index 000000000..44d6386ad --- /dev/null +++ b/langtest/augmentation/types.py @@ -0,0 +1,23 @@ +from typing import TypedDict, Union +import os + + +class OpenAIConfig(TypedDict): + api_key: str = os.environ.get("OPENAI_API_KEY") + base_url: Union[str, None] = None + organization: Union[str, None] = (None,) + project: Union[str, None] = (None,) + provider: str = "openai" + + +class AzureOpenAIConfig(TypedDict): + from openai.lib.azure import AzureADTokenProvider + + azure_endpoint: str + api_version: str + api_key: str + provider: str + azure_deployment: Union[str, None] = None + azure_ad_token: Union[str, None] = (None,) + azure_ad_token_provider: Union[AzureADTokenProvider, None] = (None,) + organization: Union[str, None] = (None,) From 4d866f282a387de5e77e5f7572d27bacf39ea7da Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Tue, 27 Aug 2024 21:47:59 +0530 Subject: [PATCH 19/27] Integrated Azure OpenAI and OpenAI services for automated template generation. --- langtest/augmentation/base.py | 77 ++++------------- langtest/augmentation/types.py | 23 ----- langtest/augmentation/utils.py | 148 +++++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 84 deletions(-) delete mode 100644 langtest/augmentation/types.py create mode 100644 langtest/augmentation/utils.py diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py index 167529bb2..c81589b94 100644 --- a/langtest/augmentation/base.py +++ b/langtest/augmentation/base.py @@ -10,7 +10,7 @@ import pandas as pd import yaml -from langtest.augmentation.types import AzureOpenAIConfig, OpenAIConfig +from langtest.augmentation.utils import AzureOpenAIConfig, OpenAIConfig from langtest.datahandler.datasource import DataFactory from langtest.transform import TestFactory from langtest.transform.utils import create_terminology @@ -607,69 +607,24 @@ def __generate_templates( model_config: Union[OpenAIConfig, AzureOpenAIConfig] = None, ) -> List[str]: if try_import_lib("openai"): - import openai - from pydantic import BaseModel, validator + from langtest.augmentation.utils import ( + generate_templates_azoi, + generate_templates_openai, + ) if model_config and model_config.get("provider") == "openai": - client = openai.OpenAI() - elif model_config and model_config.get("provider") == "azure": params = model_config - del params["provider"] + if "provider" in params: + del params["provider"] - client = openai.AzureOpenAI(**params) - else: - client = openai.OpenAI() - - class Templates(BaseModel): - templates: List[str] - - def __post_init__(self): - self.templates = [i.strip('"') for i in self.templates] - - @validator("templates", each_item=True, allow_reuse=True) - def check_templates(cls, v: str): - if not v: - raise ValueError("No templates generated.") - return v.strip('"') - - def remove_invalid_templates(self, original_template): - # extract variable names using regex - regexs = r"{([^{}]*)}" - original_vars = re.findall(regexs, original_template) - original_vars = set([var.strip() for var in original_vars]) - - # remove invalid templates - valid_templates = [] - for template in self.templates: - template_vars: List[str] = re.findall(regexs, template) - template_vars = set([var.strip() for var in template_vars]) - if template_vars == original_vars: - valid_templates.append(template) - self.templates = valid_templates - - prompt = ( - f"Based on the provided template, create {num_extra_templates} new and unique templates that are " - "variations on this theme. Present these as a list, with each template as a quoted string. The list should " - "contain only the templates, without any additional text or explanation. Ensure that the structure of " - "these variables remains consistent in each generated template. Note: don't add any extra variables and ignore typo errors.\n\n" - "Template:\n" - f"{template}\n" - ) - response = client.beta.chat.completions.parse( - model="gpt-4o-mini", - messages=[ - { - "role": "system", - "content": f"Action: Generate up to {num_extra_templates} templates and ensure that the structure of the variables within the templates remains unchanged and don't add any extra variables.", - }, - {"role": "user", "content": prompt}, - ], - max_tokens=500, - temperature=0, - response_format=Templates, - ) + return generate_templates_openai(template, num_extra_templates, params) - generated_response = response.choices[0].message.parsed - generated_response.remove_invalid_templates(template) + elif model_config and model_config.get("provider") == "azure": + params = model_config + if "provider" in params: + del params["provider"] - return generated_response.templates[:num_extra_templates] + return generate_templates_azoi(template, num_extra_templates, params) + + else: + return generate_templates_openai(template, num_extra_templates) diff --git a/langtest/augmentation/types.py b/langtest/augmentation/types.py deleted file mode 100644 index 44d6386ad..000000000 --- a/langtest/augmentation/types.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import TypedDict, Union -import os - - -class OpenAIConfig(TypedDict): - api_key: str = os.environ.get("OPENAI_API_KEY") - base_url: Union[str, None] = None - organization: Union[str, None] = (None,) - project: Union[str, None] = (None,) - provider: str = "openai" - - -class AzureOpenAIConfig(TypedDict): - from openai.lib.azure import AzureADTokenProvider - - azure_endpoint: str - api_version: str - api_key: str - provider: str - azure_deployment: Union[str, None] = None - azure_ad_token: Union[str, None] = (None,) - azure_ad_token_provider: Union[AzureADTokenProvider, None] = (None,) - organization: Union[str, None] = (None,) diff --git a/langtest/augmentation/utils.py b/langtest/augmentation/utils.py new file mode 100644 index 000000000..b4275f1ac --- /dev/null +++ b/langtest/augmentation/utils.py @@ -0,0 +1,148 @@ +import re +from typing import List, TypedDict, Union +import os + +from pydantic import BaseModel, validator + + +class OpenAIConfig(TypedDict): + api_key: str = os.environ.get("OPENAI_API_KEY") + base_url: Union[str, None] = None + organization: Union[str, None] = (None,) + project: Union[str, None] = (None,) + provider: str = "openai" + + +class AzureOpenAIConfig(TypedDict): + from openai.lib.azure import AzureADTokenProvider + + azure_endpoint: str + api_version: str + api_key: str + provider: str + azure_deployment: Union[str, None] = None + azure_ad_token: Union[str, None] = (None,) + azure_ad_token_provider: Union[AzureADTokenProvider, None] = (None,) + organization: Union[str, None] = (None,) + + +class Templates(BaseModel): + templates: List[str] + + def __post_init__(self): + self.templates = [i.strip('"') for i in self.templates] + + @validator("templates", each_item=True, allow_reuse=True) + def check_templates(cls, v: str): + if not v: + raise ValueError("No templates generated.") + return v.strip('"') + + def remove_invalid_templates(self, original_template): + # extract variable names using regex + regexs = r"{([^{}]*)}" + original_vars = re.findall(regexs, original_template) + original_vars = set([var.strip() for var in original_vars]) + + # remove invalid templates + valid_templates = [] + for template in self.templates: + template_vars: List[str] = re.findall(regexs, template) + template_vars = set([var.strip() for var in template_vars]) + if template_vars == original_vars: + valid_templates.append(template) + self.templates = valid_templates + + +def generate_templates_azoi( + template: str, num_extra_templates: int, model_config: AzureOpenAIConfig +): + """Generate new templates based on the provided template using Azure OpenAI API.""" + import openai + + client = openai.AzureOpenAI(**model_config) + + prompt = ( + "Based on the provided template, create {num_extra_templates} new and unique templates that are " + "variations on this theme. Present these as a list, with each template as a quoted string. The list should " + "contain only the templates, without any additional text or explanation. Ensure that the structure of " + "these variables remains consistent in each generated template. Note: don't add any extra variables and ignore typo errors.\n\n" + "Template:\n" + "{template}\n" + ) + + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + { + "role": "system", + "content": f"Generate new templates based on the provided template.\n\n Output Schema: {Templates.schema()}\n", + }, + { + "role": "user", + "content": prompt.format( + template="The {ORG} company is located in {LOC}", + num_extra_templates=2, + ), + }, + { + "role": "assistant", + "content": '["The {ORG} corporation is based out of {LOC}",\n "The {ORG} organization operates in {LOC}"]', + }, + { + "role": "user", + "content": prompt.format( + template=template, num_extra_templates=num_extra_templates + ), + }, + ], + temperature=0, + ) + + import json + + try: + clean_response = response.choices[0].message.content.replace("'", '"') + gen_templates = Templates(templates=json.loads(clean_response)) + gen_templates.remove_invalid_templates(template) + + return gen_templates.templates[:num_extra_templates] + + except json.JSONDecodeError as e: + raise ValueError(f"Error decoding response: {e}") + + +def generate_templates_openai( + template: str, num_extra_templates: int, model_config: OpenAIConfig = OpenAIConfig() +): + """Generate new templates based on the provided template using OpenAI API.""" + import openai + + client = openai.OpenAI(**model_config) + + prompt = ( + f"Based on the provided template, create {num_extra_templates} new and unique templates that are " + "variations on this theme. Present these as a list, with each template as a quoted string. The list should " + "contain only the templates, without any additional text or explanation. Ensure that the structure of " + "these variables remains consistent in each generated template. Note: don't add any extra variables and ignore typo errors.\n\n" + "Template:\n" + f"{template}\n" + ) + response = client.beta.chat.completions.parse( + model="gpt-4o-mini", + messages=[ + { + "role": "system", + "content": f"Action: Generate up to {num_extra_templates} templates and ensure that the structure of the variables within the templates remains unchanged and don't add any extra variables.", + }, + {"role": "user", "content": prompt}, + ], + max_tokens=500, + temperature=0, + response_format=Templates, + ) + + generated_response = response.choices[0].message.parsed + generated_response.remove_invalid_templates(template) + + return generated_response.templates[:num_extra_templates] From d04d5006a3fae36f6af76e4628def2536bbac500 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Tue, 27 Aug 2024 21:49:47 +0530 Subject: [PATCH 20/27] added comment for "azoi means Azue OpenAI" --- langtest/augmentation/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py index c81589b94..3fd368dc4 100644 --- a/langtest/augmentation/base.py +++ b/langtest/augmentation/base.py @@ -608,7 +608,7 @@ def __generate_templates( ) -> List[str]: if try_import_lib("openai"): from langtest.augmentation.utils import ( - generate_templates_azoi, + generate_templates_azoi, # azoi means Azure OpenAI generate_templates_openai, ) From 29d136e6d7bd58645f9ddd5a4f7efefd105f300d Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Wed, 28 Aug 2024 18:27:14 +0530 Subject: [PATCH 21/27] updated the model_config handling. --- langtest/augmentation/base.py | 10 ++-------- langtest/augmentation/utils.py | 25 +++++++++++++++++++++---- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py index 3fd368dc4..ca6718133 100644 --- a/langtest/augmentation/base.py +++ b/langtest/augmentation/base.py @@ -612,18 +612,12 @@ def __generate_templates( generate_templates_openai, ) - if model_config and model_config.get("provider") == "openai": - params = model_config - if "provider" in params: - del params["provider"] + params = model_config.copy() if model_config else {} + if model_config and model_config.get("provider") == "openai": return generate_templates_openai(template, num_extra_templates, params) elif model_config and model_config.get("provider") == "azure": - params = model_config - if "provider" in params: - del params["provider"] - return generate_templates_azoi(template, num_extra_templates, params) else: diff --git a/langtest/augmentation/utils.py b/langtest/augmentation/utils.py index b4275f1ac..2f6400734 100644 --- a/langtest/augmentation/utils.py +++ b/langtest/augmentation/utils.py @@ -3,6 +3,7 @@ import os from pydantic import BaseModel, validator +from langtest.logger import logger class OpenAIConfig(TypedDict): @@ -31,6 +32,7 @@ class Templates(BaseModel): def __post_init__(self): self.templates = [i.strip('"') for i in self.templates] + logger.info(f"Generated templates: {self.templates}") @validator("templates", each_item=True, allow_reuse=True) def check_templates(cls, v: str): @@ -51,7 +53,14 @@ def remove_invalid_templates(self, original_template): template_vars = set([var.strip() for var in template_vars]) if template_vars == original_vars: valid_templates.append(template) + logger.info(f"Valid template: {template}") + else: + logger.warning( + f"Invalid Variables in template: {template} - {template_vars}" + ) + self.templates = valid_templates + logger.info(f"Valid templates: {self.templates}") def generate_templates_azoi( @@ -60,6 +69,9 @@ def generate_templates_azoi( """Generate new templates based on the provided template using Azure OpenAI API.""" import openai + if "provider" in model_config: + del model_config["provider"] + client = openai.AzureOpenAI(**model_config) prompt = ( @@ -76,7 +88,7 @@ def generate_templates_azoi( messages=[ { "role": "system", - "content": f"Generate new templates based on the provided template.\n\n Output Schema: {Templates.schema()}\n", + "content": f"Generate up to {num_extra_templates} templates based on the provided template.\n\n JSON Output Schema: {Templates.schema()}\n", }, { "role": "user", @@ -96,7 +108,8 @@ def generate_templates_azoi( ), }, ], - temperature=0, + temperature=0.1, + max_tokens=1000, ) import json @@ -109,6 +122,7 @@ def generate_templates_azoi( return gen_templates.templates[:num_extra_templates] except json.JSONDecodeError as e: + logger.error(f"Error decoding response: {e}") raise ValueError(f"Error decoding response: {e}") @@ -118,6 +132,9 @@ def generate_templates_openai( """Generate new templates based on the provided template using OpenAI API.""" import openai + if "provider" in model_config: + del model_config["provider"] + client = openai.OpenAI(**model_config) prompt = ( @@ -137,8 +154,8 @@ def generate_templates_openai( }, {"role": "user", "content": prompt}, ], - max_tokens=500, - temperature=0, + max_tokens=100, + temperature=0.1, response_format=Templates, ) From cccb562d19f32673e6ec2c693dc0ed556375e10f Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Thu, 29 Aug 2024 20:51:01 +0530 Subject: [PATCH 22/27] changed: logging to logger from langtest --- langtest/datahandler/datasource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 217a06f17..c6ae54638 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -1,6 +1,5 @@ import csv import importlib -import logging import os import random import re @@ -11,6 +10,7 @@ import jsonlines import pandas as pd from langtest.tasks.task import TaskManager +from langtest.logger import logger as logging from .format import Formatter from langtest.utils.custom_types import ( From 85d7e7030010f46dffc39cff9416d5212423e545 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Mon, 2 Sep 2024 18:07:29 +0530 Subject: [PATCH 23/27] added: doc lines --- langtest/augmentation/utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/langtest/augmentation/utils.py b/langtest/augmentation/utils.py index 2f6400734..a13a8d2e2 100644 --- a/langtest/augmentation/utils.py +++ b/langtest/augmentation/utils.py @@ -7,6 +7,8 @@ class OpenAIConfig(TypedDict): + """OpenAI Configuration for API Key and Provider.""" + api_key: str = os.environ.get("OPENAI_API_KEY") base_url: Union[str, None] = None organization: Union[str, None] = (None,) @@ -15,6 +17,8 @@ class OpenAIConfig(TypedDict): class AzureOpenAIConfig(TypedDict): + """Azure OpenAI Configuration for API Key and Provider.""" + from openai.lib.azure import AzureADTokenProvider azure_endpoint: str @@ -28,19 +32,24 @@ class AzureOpenAIConfig(TypedDict): class Templates(BaseModel): + """Model to validate generated templates.""" + templates: List[str] def __post_init__(self): + """Post init method to remove quotes from templates.""" self.templates = [i.strip('"') for i in self.templates] logger.info(f"Generated templates: {self.templates}") @validator("templates", each_item=True, allow_reuse=True) def check_templates(cls, v: str): + """Validator to check if templates are generated.""" if not v: raise ValueError("No templates generated.") return v.strip('"') def remove_invalid_templates(self, original_template): + """Remove invalid templates based on the original template.""" # extract variable names using regex regexs = r"{([^{}]*)}" original_vars = re.findall(regexs, original_template) From df7776e3408022cb3f01732172d55ad5650bd38d Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Tue, 3 Sep 2024 11:37:20 +0530 Subject: [PATCH 24/27] implemented: text-classification support for multi-label classification. --- langtest/datahandler/datasource.py | 2 ++ langtest/modelhandler/jsl_modelhandler.py | 38 +++++++++++++++++++---- langtest/tasks/task.py | 18 +++++++++-- langtest/utils/custom_types/output.py | 22 +++++++++---- 4 files changed, 66 insertions(+), 14 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 868a4152a..90e3abcb8 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -957,6 +957,8 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]: import ast i["transformations"] = ast.literal_eval(temp) + else: + i["transformations"] = None sample = self.task.get_sample_class(**i) samples.append(sample) diff --git a/langtest/modelhandler/jsl_modelhandler.py b/langtest/modelhandler/jsl_modelhandler.py index f13b18d32..9bb67fcc2 100644 --- a/langtest/modelhandler/jsl_modelhandler.py +++ b/langtest/modelhandler/jsl_modelhandler.py @@ -42,6 +42,7 @@ XlmRoBertaForSequenceClassification, XlnetForSequenceClassification, MarianTransformer, + MultiClassifierDLModel, ) from sparknlp.base import LightPipeline from sparknlp.pretrained import PretrainedPipeline @@ -63,6 +64,7 @@ SUPPORTED_SPARKNLP_CLASSIFERS.extend( [ + MultiClassifierDLModel, ClassifierDLModel, SentimentDLModel, AlbertForSequenceClassification, @@ -409,6 +411,7 @@ def __init__( super().__init__(model) _classifier = None + self.multi_label_classifier = False for annotator in self.model.stages: if self.is_classifier(annotator): _classifier = annotator @@ -417,6 +420,10 @@ def __init__( if _classifier is None: raise ValueError(Errors.E040(var="classifier")) + if isinstance(_classifier, MultiClassifierDLModel): + self.multi_label_classifier = True + self.threshold = _classifier.getThreshold() + self.output_col = _classifier.getOutputCol() self.classes = _classifier.getClasses() self.model = LightPipeline(self.model) @@ -442,13 +449,32 @@ def predict( Returns: SequenceClassificationOutput: Classification output from SparkNLP LightPipeline. """ - prediction_metadata = self.model.fullAnnotate(text)[0][self.output_col][ - 0 - ].metadata - prediction = [{"label": x, "score": y} for x, y in prediction_metadata.items()] + prediction_metadata = self.model.fullAnnotate(text)[0][self.output_col] + + if self.multi_label_classifier: + multi_label = True + if len(prediction_metadata) > 0: + prediction_metadata = prediction_metadata[0].metadata + + prediction = [ + {"label": x, "score": y} for x, y in prediction_metadata.items() + ] + # filter based on the threshold value with score greater than threshold + prediction = [x for x in prediction if float(x["score"]) > self.threshold] + + return SequenceClassificationOutput( + text=text, + predictions=prediction, + multi_label=multi_label, + ) + else: + return SequenceClassificationOutput( + text=text, predictions=[], multi_label=multi_label + ) - if not return_all_scores: - prediction = [max(prediction, key=lambda x: x["score"])] + else: + if not return_all_scores: + prediction = [max(prediction, key=lambda x: x["score"])] return SequenceClassificationOutput(text=text, predictions=prediction) diff --git a/langtest/tasks/task.py b/langtest/tasks/task.py index 035725bb8..93af99114 100644 --- a/langtest/tasks/task.py +++ b/langtest/tasks/task.py @@ -1,3 +1,4 @@ +import ast import re from abc import ABC, abstractmethod from typing import Union @@ -267,17 +268,28 @@ def create_sample( row_data: dict, feature_column="text", target_column: Union[samples.SequenceLabel, str] = "label", + multi_label: bool = False, + *args, + **kwargs, ) -> samples.SequenceClassificationSample: """Create a sample.""" keys = list(row_data.keys()) # auto-detect the default column names from the row_data column_mapper = cls.column_mapping(keys, [feature_column, target_column]) + # is multi-label classification + # if "multi_label" in kwargs: + # multi_label = kwargs.get("multi_label", False) + # kwargs.pop("multi_label") + labels = row_data.get(column_mapper[target_column]) if isinstance(labels, samples.SequenceLabel): labels = [labels] - elif isinstance(labels, list): + elif isinstance(labels, list) or isinstance(labels, str): + labels = ast.literal_eval(labels) + if not isinstance(labels, list): + labels = [labels] labels = [ samples.SequenceLabel(label=label, score=1.0) if isinstance(label, str) @@ -289,7 +301,9 @@ def create_sample( return samples.SequenceClassificationSample( original=row_data[column_mapper[feature_column]], - expected_results=samples.SequenceClassificationOutput(predictions=labels), + expected_results=samples.SequenceClassificationOutput( + predictions=labels, multi_label=multi_label + ), ) diff --git a/langtest/utils/custom_types/output.py b/langtest/utils/custom_types/output.py index bcd1e4cf0..6961e4b0f 100644 --- a/langtest/utils/custom_types/output.py +++ b/langtest/utils/custom_types/output.py @@ -8,6 +8,7 @@ class SequenceClassificationOutput(BaseModel): """Output model for text classification tasks.""" predictions: List[SequenceLabel] + multi_label: bool = False def to_str_list(self) -> str: """Convert the output into list of strings. @@ -15,18 +16,27 @@ def to_str_list(self) -> str: Returns: List[str]: predictions in form of a list of strings. """ - return ",".join([x.label for x in self.predictions]) + return ", ".join([x.label for x in self.predictions]) - def __str__(self): + def __str__(self) -> str: """String representation""" labels = {elt.label: elt.score for elt in self.predictions} return f"SequenceClassificationOutput(predictions={labels})" - def __eq__(self, other): + def __eq__(self, other: "SequenceClassificationOutput") -> bool: """Equality comparison method.""" - top_class = max(self.predictions, key=lambda x: x.score).label - other_top_class = max(other.predictions, key=lambda x: x.score).label - return top_class == other_top_class + + if self.multi_label: + # get all labels + self_labels = {elt.label for elt in self.predictions} + other_labels = {elt.label for elt in other.predictions} + return set(self_labels) == set(other_labels) + elif len(self.predictions) == 0 and len(other.predictions) == 0: + return True + else: + top_class = max(self.predictions, key=lambda x: x.score).label + other_top_class = max(other.predictions, key=lambda x: x.score).label + return top_class == other_top_class class MinScoreOutput(BaseModel): From 2da96b76e9d52c6e75c6c5ed2b2fe41cd9735774 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Tue, 3 Sep 2024 12:17:33 +0530 Subject: [PATCH 25/27] Refactor SequenceClassificationOutputFormatter to handle multi-label predictions --- langtest/datahandler/format.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/langtest/datahandler/format.py b/langtest/datahandler/format.py index 0755108f0..621fe34e0 100644 --- a/langtest/datahandler/format.py +++ b/langtest/datahandler/format.py @@ -108,9 +108,18 @@ def to_csv(sample: SequenceClassificationSample) -> Tuple[str, str]: Tuple[str, str]: Row formatted as a list of strings. """ - if sample.test_case: - return [sample.test_case, sample.expected_results.predictions[0].label] - return [sample.original, sample.expected_results.predictions[0].label] + predictions = sample.expected_results.predictions + multi_label = sample.expected_results.multi_label + + if multi_label: + return [ + sample.test_case or sample.original, + [elt.label for elt in predictions] if predictions else [], + ] + else: + if sample.test_case: + return [sample.test_case, sample.expected_results.predictions[0].label] + return [sample.original, sample.expected_results.predictions[0].label] class NEROutputFormatter(BaseFormatter): From 16fee46de37e09d003b170aa85c902428ccd5902 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Tue, 3 Sep 2024 12:35:54 +0530 Subject: [PATCH 26/27] Refactor CSVDataset to remove unnecessary transformation field --- langtest/datahandler/datasource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 90e3abcb8..42648338d 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -958,7 +958,7 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]: i["transformations"] = ast.literal_eval(temp) else: - i["transformations"] = None + i.pop("transformations") sample = self.task.get_sample_class(**i) samples.append(sample) From 258a0f7ddeb9b05ca5a99c8a196febbc1dabcbd1 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Tue, 3 Sep 2024 14:19:05 +0530 Subject: [PATCH 27/27] fixed: Unbound Error and Key Error. --- langtest/datahandler/datasource.py | 2 +- langtest/modelhandler/jsl_modelhandler.py | 38 ++++++++++------------- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 42648338d..1d89303ae 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -957,7 +957,7 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]: import ast i["transformations"] = ast.literal_eval(temp) - else: + elif "transformations" in i: i.pop("transformations") sample = self.task.get_sample_class(**i) samples.append(sample) diff --git a/langtest/modelhandler/jsl_modelhandler.py b/langtest/modelhandler/jsl_modelhandler.py index 9bb67fcc2..0b703d637 100644 --- a/langtest/modelhandler/jsl_modelhandler.py +++ b/langtest/modelhandler/jsl_modelhandler.py @@ -450,31 +450,25 @@ def predict( SequenceClassificationOutput: Classification output from SparkNLP LightPipeline. """ prediction_metadata = self.model.fullAnnotate(text)[0][self.output_col] + prediction = [] + + if len(prediction_metadata) > 0: + prediction_metadata = prediction_metadata[0].metadata + prediction = [ + {"label": x, "score": y} for x, y in prediction_metadata.items() + ] if self.multi_label_classifier: - multi_label = True - if len(prediction_metadata) > 0: - prediction_metadata = prediction_metadata[0].metadata - - prediction = [ - {"label": x, "score": y} for x, y in prediction_metadata.items() - ] - # filter based on the threshold value with score greater than threshold - prediction = [x for x in prediction if float(x["score"]) > self.threshold] - - return SequenceClassificationOutput( - text=text, - predictions=prediction, - multi_label=multi_label, - ) - else: - return SequenceClassificationOutput( - text=text, predictions=[], multi_label=multi_label - ) + prediction = [x for x in prediction if float(x["score"]) > self.threshold] - else: - if not return_all_scores: - prediction = [max(prediction, key=lambda x: x["score"])] + return SequenceClassificationOutput( + text=text, + predictions=prediction, + multi_label=self.multi_label_classifier, + ) + + if not return_all_scores: + prediction = [max(prediction, key=lambda x: x["score"])] return SequenceClassificationOutput(text=text, predictions=prediction)