From c2b13932e22345f95620400af6f2919f707ba024 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Thu, 25 Jul 2024 17:20:28 +0530
Subject: [PATCH 01/27] Refactor NERSample.is_pass() to handle cases where
 either aligned span has entity "O"

---
 langtest/utils/custom_types/sample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langtest/utils/custom_types/sample.py b/langtest/utils/custom_types/sample.py
index 64011f826..ce2c8205d 100644
--- a/langtest/utils/custom_types/sample.py
+++ b/langtest/utils/custom_types/sample.py
@@ -295,7 +295,7 @@ def get_aligned_span_pairs(
     def is_pass(self) -> bool:
         """Checks if the sample passes based on the maximum score."""
         return all(
-            [a == b for (a, b) in self.get_aligned_span_pairs() if a and a.entity != "O"]
+            [a == b for (a, b) in self.get_aligned_span_pairs() if (a and a.entity != "O") or (b and b.entity != "O")]
         )
 
 

From ea48e1f325b402d8e3c446730d55cc65946a8564 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Thu, 25 Jul 2024 17:24:37 +0530
Subject: [PATCH 02/27] format issues

---
 langtest/utils/custom_types/sample.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/langtest/utils/custom_types/sample.py b/langtest/utils/custom_types/sample.py
index ce2c8205d..d67f2386d 100644
--- a/langtest/utils/custom_types/sample.py
+++ b/langtest/utils/custom_types/sample.py
@@ -295,7 +295,11 @@ def get_aligned_span_pairs(
     def is_pass(self) -> bool:
         """Checks if the sample passes based on the maximum score."""
         return all(
-            [a == b for (a, b) in self.get_aligned_span_pairs() if (a and a.entity != "O") or (b and b.entity != "O")]
+            [
+                a == b
+                for (a, b) in self.get_aligned_span_pairs()
+                if (a and a.entity != "O") or (b and b.entity != "O")
+            ]
         )
 
 

From 52b81f2aea32be8e13db11bd10ea847ec0e55887 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Mon, 5 Aug 2024 16:13:33 +0530
Subject: [PATCH 03/27] resolved: recovering the transformation object.

---
 langtest/datahandler/datasource.py     |  6 ++-
 langtest/langtest.py                   | 23 +++++----
 langtest/utils/custom_types/helpers.py | 68 +++++++++-----------------
 langtest/utils/custom_types/sample.py  |  9 ++++
 4 files changed, 47 insertions(+), 59 deletions(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index 51e343ea3..e37dc5043 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -467,7 +467,7 @@ def load_data(self) -> List[NERSample]:
             List[NERSample]: List of formatted sentences from the dataset.
         """
         data = []
-        with open(self._file_path) as f:
+        with open(self._file_path, encoding="utf-8") as f:
             content = f.read()
             docs_strings = re.findall(r"-DOCSTART- \S+ \S+ O", content.strip())
             docs = [
@@ -930,6 +930,10 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]:
             return samples
 
         for i in data.to_dict(orient="records"):
+            if self.task == "ner" and isinstance(i["transformations"], str):
+                import json
+
+                i["transformations"] = eval(i["transformations"])
             sample = self.task.get_sample_class(**i)
             samples.append(sample)
 
diff --git a/langtest/langtest.py b/langtest/langtest.py
index efaea4db1..028f37f60 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -23,7 +23,7 @@
 from .transform.utils import RepresentationOperation
 from langtest.utils.benchmark_utils import Leaderboard, Summary
 from langtest.utils.lib_manager import try_import_lib
-from langtest.utils.custom_types.helpers import TestResultManager, get_transformations
+from langtest.utils.custom_types.helpers import TestResultManager
 from langtest.utils.checkpoints import divide_into_batches, CheckpointManager
 from langtest.prompts import PromptManager
 from .errors import Warnings, Errors
@@ -826,7 +826,7 @@ def augment(
 
         return self
 
-    def testcases(self) -> pd.DataFrame:
+    def testcases(self, additional_cols=False) -> pd.DataFrame:
         """Testcases after .generate() is called
 
         Returns:
@@ -869,6 +869,8 @@ def testcases(self) -> pd.DataFrame:
             "expected_result",
         ]
 
+        if additional_cols:
+            column_order.extend(["transformations"])
         if isinstance(self._testcases, dict) and not self.is_multi_dataset:
             testcases_df = []
             for k, v in self._testcases.items():
@@ -1077,7 +1079,9 @@ def load(
             harness._generated_results = generated_results
         return harness
 
-    def edit_testcases(self, output_path: str, **kwargs):
+    def edit_testcases(
+        self, output_path: str = "./edit_testcases.csv", return_dataframe=False, **kwargs
+    ):
         """Testcases are exported to a csv file to be edited.
 
         The edited file can be imported back to the harness
@@ -1085,8 +1089,10 @@ def edit_testcases(self, output_path: str, **kwargs):
         Args:
             output_path (str): path to save the testcases to
         """
-        temp_df = self.testcases()
+        temp_df = self.testcases(additional_col=True)
         temp_df = temp_df[temp_df["category"].isin(["robustness", "bias"])]
+        if return_dataframe:
+            return temp_df
         temp_df.to_csv(output_path, index=False)
 
     def import_edited_testcases(self, input_path: str, **kwargs):
@@ -1138,10 +1144,6 @@ def import_edited_testcases(self, input_path: str, **kwargs):
             # merge the testcases with the imported ones to the temp_testcases
             for name, list_samples in imported_testcases.items():
                 # check the task and apply transformations
-                if self.task == "ner":
-                    list_samples = [
-                        get_transformations(sample) for sample in list_samples
-                    ]
                 if name not in temp_testcases:
                     temp_testcases[name] = list_samples
                 temp_testcases[name].extend(list_samples)
@@ -1168,10 +1170,7 @@ def import_edited_testcases(self, input_path: str, **kwargs):
             self._testcases = DataFactory(
                 {"data_source": input_path}, task=self.task, is_import=True
             ).load()
-            if self.task == "ner":
-                self._testcases = [
-                    get_transformations(sample) for sample in self._testcases
-                ]
+
             self._testcases.extend(temp_testcases)
 
         return self
diff --git a/langtest/utils/custom_types/helpers.py b/langtest/utils/custom_types/helpers.py
index e8ef77910..fa9e43f61 100644
--- a/langtest/utils/custom_types/helpers.py
+++ b/langtest/utils/custom_types/helpers.py
@@ -186,6 +186,10 @@ def __repr__(self):
         """"""
         return f"<Span(start={self.start}, end={self.end}, word='{self.word}')>"
 
+    def __add__(self, other: "Span") -> "Span":
+        """"""
+        return Span(start=self.start, end=other.end, word=f"{self.word} {other.word}")
+
 
 class Transformation(BaseModel):
     """
@@ -197,6 +201,24 @@ class Transformation(BaseModel):
     new_span: Span
     ignore: bool = False
 
+    def from_dict(self, data: dict):
+        """"""
+        self.original_span = Span(**data["original_span"])
+        self.new_span = Span(**data["new_span"])
+        self.ignore = data.get("ignore", False)
+
+    def to_dict(self):
+        """"""
+        import json
+
+        return json.dumps(
+            {
+                "original_span": self.original_span.dict(),
+                "new_span": self.new_span.dict(),
+                "ignore": self.ignore,
+            }
+        )
+
 
 class SimplePromptTemplate:
     """Simple prompt template for formatting messages with variables."""
@@ -753,49 +775,3 @@ def clear_instance(self):
 
     def clear_data(self):
         self._data = []
-
-
-def get_transformations(sample) -> List[Transformation]:
-    """Detects the changes between two texts and returns the transformations."""
-    from langtest.utils.custom_types.helpers import Span, Transformation
-
-    original_text = sample.original
-    new_text = sample.test_case
-    transformations = []
-    i, j = 0, 0
-    len_orig = len(original_text)
-    len_new = len(new_text)
-
-    while i < len_orig and j < len_new:
-        if original_text[i] != new_text[j]:
-            start_i = i
-            start_j = j
-
-            while (
-                i < len_orig
-                and j < len_new
-                and original_text[i] != " "
-                and new_text[j] != " "
-            ):
-                i += 1
-                j += 1
-
-            while i < len_orig and original_text[i] != " ":
-                i += 1
-            while j < len_new and new_text[j] != " ":
-                j += 1
-
-            original_word = original_text[start_i:i]
-            new_word = new_text[start_j:j]
-
-            original_span = Span(start=start_i, end=i, word=original_word)
-            new_span = Span(start=start_j, end=j, word=new_word)
-            transformations.append(
-                Transformation(original_span=original_span, new_span=new_span)
-            )
-        else:
-            i += 1
-            j += 1
-
-    sample.transformations = transformations
-    return sample
diff --git a/langtest/utils/custom_types/sample.py b/langtest/utils/custom_types/sample.py
index d67f2386d..8477fb9bb 100644
--- a/langtest/utils/custom_types/sample.py
+++ b/langtest/utils/custom_types/sample.py
@@ -75,6 +75,15 @@ def to_dict(self) -> Dict[str, Any]:
                 }
             )
 
+        if self.transformations:
+            result.update(
+                {
+                    "transformations": [
+                        transformation.dict() for transformation in self.transformations
+                    ]
+                }
+            )
+
         return result
 
     @validator("transformations")

From 1c0112e8b699a20b5d27ffcfd2f0997634b864c6 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Mon, 5 Aug 2024 16:17:56 +0530
Subject: [PATCH 04/27] removed the unused imports

---
 langtest/datahandler/datasource.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index e37dc5043..b18d6f959 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -931,8 +931,7 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]:
 
         for i in data.to_dict(orient="records"):
             if self.task == "ner" and isinstance(i["transformations"], str):
-                import json
-
+                
                 i["transformations"] = eval(i["transformations"])
             sample = self.task.get_sample_class(**i)
             samples.append(sample)

From 3229bd21f64f19339c8def59d2dd425c1ba2de3b Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Mon, 5 Aug 2024 16:39:58 +0530
Subject: [PATCH 05/27] chore: Recover transformation object and apply to NER
 task test cases

---
 langtest/datahandler/datasource.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index b18d6f959..f3d4d2abb 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -931,7 +931,6 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]:
 
         for i in data.to_dict(orient="records"):
             if self.task == "ner" and isinstance(i["transformations"], str):
-                
                 i["transformations"] = eval(i["transformations"])
             sample = self.task.get_sample_class(**i)
             samples.append(sample)

From e42f2821b6913cf906b540195e208dcdb1e389db Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Mon, 5 Aug 2024 18:44:59 +0530
Subject: [PATCH 06/27] resolved: unknown args

---
 langtest/langtest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langtest/langtest.py b/langtest/langtest.py
index 028f37f60..d7a1f15cd 100644
--- a/langtest/langtest.py
+++ b/langtest/langtest.py
@@ -1089,7 +1089,7 @@ def edit_testcases(
         Args:
             output_path (str): path to save the testcases to
         """
-        temp_df = self.testcases(additional_col=True)
+        temp_df = self.testcases(additional_cols=True)
         temp_df = temp_df[temp_df["category"].isin(["robustness", "bias"])]
         if return_dataframe:
             return temp_df

From f195d1fec813c3d635dc3979b2c8b1f6c38449d1 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Mon, 5 Aug 2024 20:01:57 +0530
Subject: [PATCH 07/27] chore: Refactor CSVDataset to handle missing or invalid
 transformations

---
 langtest/datahandler/datasource.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index f3d4d2abb..5313fddc6 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -930,8 +930,15 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]:
             return samples
 
         for i in data.to_dict(orient="records"):
-            if self.task == "ner" and isinstance(i["transformations"], str):
-                i["transformations"] = eval(i["transformations"])
+            temp = i["transformations"]
+            if temp == "-" or len(temp) < 3:
+                temp = None
+                i.pop("transformations")
+
+            if self.task == "ner" and isinstance(temp, str):
+                import ast
+
+                i["transformations"] = ast.literal_eval(temp)
             sample = self.task.get_sample_class(**i)
             samples.append(sample)
 

From 82f8b871e81fffd807566251dbef156404e28aff Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Tue, 13 Aug 2024 18:35:28 +0530
Subject: [PATCH 08/27] fixed: consistent issues while generated templates in
 templatic augmentation.

---
 langtest/augmentation/base.py |  33 ++++++-----
 poetry.lock                   | 101 +++++++++++++++++++++++++++++-----
 pyproject.toml                |   2 +-
 3 files changed, 106 insertions(+), 30 deletions(-)

diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py
index 5b1036c4b..8b640fc1c 100644
--- a/langtest/augmentation/base.py
+++ b/langtest/augmentation/base.py
@@ -338,6 +338,12 @@ def __init__(
         if generate_templates:
             if try_import_lib("openai"):
                 import openai
+                from pydantic import BaseModel, Field
+
+                client = openai.OpenAI()
+
+                class Templates(BaseModel):
+                    templates: List[str]
 
                 given_template = self.__templates[:]
                 for template in given_template:
@@ -346,30 +352,29 @@ def __init__(
                         Template:
                         "{template}"
 
-                        Expected Python List Output:
-                        ['Template 1', 'Template 2', 'Template 3', ...]  # Replace with actual generated templates
                         """
 
-                    response = openai.Completion.create(
-                        engine="gpt-3.5-turbo-instruct",
-                        prompt=prompt,
+                    response = client.beta.chat.completions.parse(
+                        model="gpt-4o-mini",
+                        messages=[{"role": "system", "content": "Action: Generate templates"}, {"role": "user", "content": prompt}],
                         max_tokens=500,
                         temperature=0,
+                        response_format=Templates,
                     )
 
-                    generated_response = response.choices[0].text.strip()
+                    generated_response = response.choices[0].message.parsed
                     # Process the generated response
                     if generated_response:
-                        # Assuming the response format is a Python-like list in a string
-                        templates_list = generated_response.strip("[]").split('",')
-                        templates_list = [
-                            template.strip().strip('"')
-                            for template in templates_list
-                            if template.strip()
-                        ]
+                        # # Assuming the response format is a Python-like list in a string
+                        # templates_list = generated_response.strip("[]").split('",')
+                        # templates_list = [
+                        #     template.strip().strip('"')
+                        #     for template in templates_list
+                        #     if template.strip()
+                        # ]
 
                         # Extend the existing templates list
-                        self.__templates.extend(templates_list)
+                        self.__templates.extend(generated_response.templates)
                     else:
                         print("No response or unexpected format.")
 
diff --git a/poetry.lock b/poetry.lock
index eece46db7..eb8658718 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -216,7 +216,7 @@ dev = ["black", "coverage", "isort", "pre-commit", "pyenchant", "pylint"]
 name = "anyio"
 version = "3.7.1"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
-optional = true
+optional = false
 python-versions = ">=3.7"
 files = [
     {file = "anyio-3.7.1-py3-none-any.whl", hash = "sha256:91dee416e570e92c64041bd18b900d1d6fa78dff7048769ce5ac5ddad004fbb5"},
@@ -923,7 +923,7 @@ files = [
 name = "distro"
 version = "1.9.0"
 description = "Distro - an OS platform information API"
-optional = true
+optional = false
 python-versions = ">=3.6"
 files = [
     {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
@@ -1483,7 +1483,7 @@ tornado = ["tornado (>=0.2)"]
 name = "h11"
 version = "0.14.0"
 description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
-optional = true
+optional = false
 python-versions = ">=3.7"
 files = [
     {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
@@ -1494,7 +1494,7 @@ files = [
 name = "httpcore"
 version = "1.0.4"
 description = "A minimal low-level HTTP client."
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
     {file = "httpcore-1.0.4-py3-none-any.whl", hash = "sha256:ac418c1db41bade2ad53ae2f3834a3a0f5ae76b56cf5aa497d2d033384fc7d73"},
@@ -1515,7 +1515,7 @@ trio = ["trio (>=0.22.0,<0.25.0)"]
 name = "httpx"
 version = "0.27.0"
 description = "The next generation HTTP client."
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
     {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"},
@@ -1743,6 +1743,76 @@ MarkupSafe = ">=2.0"
 [package.extras]
 i18n = ["Babel (>=2.7)"]
 
+[[package]]
+name = "jiter"
+version = "0.5.0"
+description = "Fast iterable JSON parser."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jiter-0.5.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b599f4e89b3def9a94091e6ee52e1d7ad7bc33e238ebb9c4c63f211d74822c3f"},
+    {file = "jiter-0.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a063f71c4b06225543dddadbe09d203dc0c95ba352d8b85f1221173480a71d5"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acc0d5b8b3dd12e91dd184b87273f864b363dfabc90ef29a1092d269f18c7e28"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c22541f0b672f4d741382a97c65609332a783501551445ab2df137ada01e019e"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63314832e302cc10d8dfbda0333a384bf4bcfce80d65fe99b0f3c0da8945a91a"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a25fbd8a5a58061e433d6fae6d5298777c0814a8bcefa1e5ecfff20c594bd749"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:503b2c27d87dfff5ab717a8200fbbcf4714516c9d85558048b1fc14d2de7d8dc"},
+    {file = "jiter-0.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d1f3d27cce923713933a844872d213d244e09b53ec99b7a7fdf73d543529d6d"},
+    {file = "jiter-0.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c95980207b3998f2c3b3098f357994d3fd7661121f30669ca7cb945f09510a87"},
+    {file = "jiter-0.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:afa66939d834b0ce063f57d9895e8036ffc41c4bd90e4a99631e5f261d9b518e"},
+    {file = "jiter-0.5.0-cp310-none-win32.whl", hash = "sha256:f16ca8f10e62f25fd81d5310e852df6649af17824146ca74647a018424ddeccf"},
+    {file = "jiter-0.5.0-cp310-none-win_amd64.whl", hash = "sha256:b2950e4798e82dd9176935ef6a55cf6a448b5c71515a556da3f6b811a7844f1e"},
+    {file = "jiter-0.5.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d4c8e1ed0ef31ad29cae5ea16b9e41529eb50a7fba70600008e9f8de6376d553"},
+    {file = "jiter-0.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c6f16e21276074a12d8421692515b3fd6d2ea9c94fd0734c39a12960a20e85f3"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5280e68e7740c8c128d3ae5ab63335ce6d1fb6603d3b809637b11713487af9e6"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:583c57fc30cc1fec360e66323aadd7fc3edeec01289bfafc35d3b9dcb29495e4"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26351cc14507bdf466b5f99aba3df3143a59da75799bf64a53a3ad3155ecded9"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4829df14d656b3fb87e50ae8b48253a8851c707da9f30d45aacab2aa2ba2d614"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a42a4bdcf7307b86cb863b2fb9bb55029b422d8f86276a50487982d99eed7c6e"},
+    {file = "jiter-0.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04d461ad0aebf696f8da13c99bc1b3e06f66ecf6cfd56254cc402f6385231c06"},
+    {file = "jiter-0.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e6375923c5f19888c9226582a124b77b622f8fd0018b843c45eeb19d9701c403"},
+    {file = "jiter-0.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2cec323a853c24fd0472517113768c92ae0be8f8c384ef4441d3632da8baa646"},
+    {file = "jiter-0.5.0-cp311-none-win32.whl", hash = "sha256:aa1db0967130b5cab63dfe4d6ff547c88b2a394c3410db64744d491df7f069bb"},
+    {file = "jiter-0.5.0-cp311-none-win_amd64.whl", hash = "sha256:aa9d2b85b2ed7dc7697597dcfaac66e63c1b3028652f751c81c65a9f220899ae"},
+    {file = "jiter-0.5.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9f664e7351604f91dcdd557603c57fc0d551bc65cc0a732fdacbf73ad335049a"},
+    {file = "jiter-0.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:044f2f1148b5248ad2c8c3afb43430dccf676c5a5834d2f5089a4e6c5bbd64df"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:702e3520384c88b6e270c55c772d4bd6d7b150608dcc94dea87ceba1b6391248"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:528d742dcde73fad9d63e8242c036ab4a84389a56e04efd854062b660f559544"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8cf80e5fe6ab582c82f0c3331df27a7e1565e2dcf06265afd5173d809cdbf9ba"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:44dfc9ddfb9b51a5626568ef4e55ada462b7328996294fe4d36de02fce42721f"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c451f7922992751a936b96c5f5b9bb9312243d9b754c34b33d0cb72c84669f4e"},
+    {file = "jiter-0.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:308fce789a2f093dca1ff91ac391f11a9f99c35369117ad5a5c6c4903e1b3e3a"},
+    {file = "jiter-0.5.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7f5ad4a7c6b0d90776fdefa294f662e8a86871e601309643de30bf94bb93a64e"},
+    {file = "jiter-0.5.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ea189db75f8eca08807d02ae27929e890c7d47599ce3d0a6a5d41f2419ecf338"},
+    {file = "jiter-0.5.0-cp312-none-win32.whl", hash = "sha256:e3bbe3910c724b877846186c25fe3c802e105a2c1fc2b57d6688b9f8772026e4"},
+    {file = "jiter-0.5.0-cp312-none-win_amd64.whl", hash = "sha256:a586832f70c3f1481732919215f36d41c59ca080fa27a65cf23d9490e75b2ef5"},
+    {file = "jiter-0.5.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:f04bc2fc50dc77be9d10f73fcc4e39346402ffe21726ff41028f36e179b587e6"},
+    {file = "jiter-0.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6f433a4169ad22fcb550b11179bb2b4fd405de9b982601914ef448390b2954f3"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad4a6398c85d3a20067e6c69890ca01f68659da94d74c800298581724e426c7e"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6baa88334e7af3f4d7a5c66c3a63808e5efbc3698a1c57626541ddd22f8e4fbf"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ece0a115c05efca597c6d938f88c9357c843f8c245dbbb53361a1c01afd7148"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:335942557162ad372cc367ffaf93217117401bf930483b4b3ebdb1223dbddfa7"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:649b0ee97a6e6da174bffcb3c8c051a5935d7d4f2f52ea1583b5b3e7822fbf14"},
+    {file = "jiter-0.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f4be354c5de82157886ca7f5925dbda369b77344b4b4adf2723079715f823989"},
+    {file = "jiter-0.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5206144578831a6de278a38896864ded4ed96af66e1e63ec5dd7f4a1fce38a3a"},
+    {file = "jiter-0.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8120c60f8121ac3d6f072b97ef0e71770cc72b3c23084c72c4189428b1b1d3b6"},
+    {file = "jiter-0.5.0-cp38-none-win32.whl", hash = "sha256:6f1223f88b6d76b519cb033a4d3687ca157c272ec5d6015c322fc5b3074d8a5e"},
+    {file = "jiter-0.5.0-cp38-none-win_amd64.whl", hash = "sha256:c59614b225d9f434ea8fc0d0bec51ef5fa8c83679afedc0433905994fb36d631"},
+    {file = "jiter-0.5.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:0af3838cfb7e6afee3f00dc66fa24695199e20ba87df26e942820345b0afc566"},
+    {file = "jiter-0.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:550b11d669600dbc342364fd4adbe987f14d0bbedaf06feb1b983383dcc4b961"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:489875bf1a0ffb3cb38a727b01e6673f0f2e395b2aad3c9387f94187cb214bbf"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b250ca2594f5599ca82ba7e68785a669b352156260c5362ea1b4e04a0f3e2389"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ea18e01f785c6667ca15407cd6dabbe029d77474d53595a189bdc813347218e"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:462a52be85b53cd9bffd94e2d788a09984274fe6cebb893d6287e1c296d50653"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92cc68b48d50fa472c79c93965e19bd48f40f207cb557a8346daa020d6ba973b"},
+    {file = "jiter-0.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1c834133e59a8521bc87ebcad773608c6fa6ab5c7a022df24a45030826cf10bc"},
+    {file = "jiter-0.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab3a71ff31cf2d45cb216dc37af522d335211f3a972d2fe14ea99073de6cb104"},
+    {file = "jiter-0.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cccd3af9c48ac500c95e1bcbc498020c87e1781ff0345dd371462d67b76643eb"},
+    {file = "jiter-0.5.0-cp39-none-win32.whl", hash = "sha256:368084d8d5c4fc40ff7c3cc513c4f73e02c85f6009217922d0823a48ee7adf61"},
+    {file = "jiter-0.5.0-cp39-none-win_amd64.whl", hash = "sha256:ce03f7b4129eb72f1687fa11300fbf677b02990618428934662406d2a76742a1"},
+    {file = "jiter-0.5.0.tar.gz", hash = "sha256:1d916ba875bcab5c5f7d927df998c4cb694d27dceddf3392e58beaf10563368a"},
+]
+
 [[package]]
 name = "jmespath"
 version = "0.10.0"
@@ -2683,23 +2753,24 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
 
 [[package]]
 name = "openai"
-version = "1.13.3"
+version = "1.40.6"
 description = "The official Python library for the openai API"
-optional = true
+optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "openai-1.13.3-py3-none-any.whl", hash = "sha256:5769b62abd02f350a8dd1a3a242d8972c947860654466171d60fb0972ae0a41c"},
-    {file = "openai-1.13.3.tar.gz", hash = "sha256:ff6c6b3bc7327e715e4b3592a923a5a1c7519ff5dd764a83d69f633d49e77a7b"},
+    {file = "openai-1.40.6-py3-none-any.whl", hash = "sha256:b36372124a779381a420a34dd96f762baa748b6bdfaf83a6b9f2745f72ccc1c5"},
+    {file = "openai-1.40.6.tar.gz", hash = "sha256:2239232bcb7f4bd4ce8e02544b5769618582411cf399816d96686d1b6c1e5c8d"},
 ]
 
 [package.dependencies]
 anyio = ">=3.5.0,<5"
 distro = ">=1.7.0,<2"
 httpx = ">=0.23.0,<1"
+jiter = ">=0.4.0,<1"
 pydantic = ">=1.9.0,<3"
 sniffio = "*"
 tqdm = ">4"
-typing-extensions = ">=4.7,<5"
+typing-extensions = ">=4.11,<5"
 
 [package.extras]
 datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
@@ -4023,7 +4094,7 @@ files = [
 name = "sniffio"
 version = "1.3.0"
 description = "Sniff out which async library your code is running under"
-optional = true
+optional = false
 python-versions = ">=3.7"
 files = [
     {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"},
@@ -4842,13 +4913,13 @@ test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.
 
 [[package]]
 name = "typing-extensions"
-version = "4.10.0"
+version = "4.12.2"
 description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "typing_extensions-4.10.0-py3-none-any.whl", hash = "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475"},
-    {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"},
+    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
 ]
 
 [[package]]
@@ -5309,4 +5380,4 @@ transformers = ["accelerate", "datasets", "torch", "transformers"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0"
-content-hash = "15c15ff226c5ab0a96736e03993662dfe2052a5442b52778e3bffd9e176c0cb5"
+content-hash = "477b1347105836e413565aa36f398e96038fba0de7daa8f6123e7d03e5fe4907"
diff --git a/pyproject.toml b/pyproject.toml
index 34943255b..44caae217 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,7 +59,7 @@ transformers = "^4.38.2"
 huggingface_hub = { version = ">0.16.0", optional = true}
 spacy = { version = ">=3.0.0", optional = true }
 nest-asyncio = "^1.5.0"
-openai = {version = "^1.13.3", optional = true}
+openai = "^1.40.6"
 jsonlines = "^3.1.0"
 torch = { version = "^2.0.0", optional = true }
 pandas = "^2.0.3"

From a47156db059f8f5a86a71be12b511a1e56367344 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Tue, 13 Aug 2024 19:18:22 +0530
Subject: [PATCH 09/27] resolved: lint and format issues.

---
 langtest/augmentation/base.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py
index 8b640fc1c..1fa0bb226 100644
--- a/langtest/augmentation/base.py
+++ b/langtest/augmentation/base.py
@@ -338,7 +338,7 @@ def __init__(
         if generate_templates:
             if try_import_lib("openai"):
                 import openai
-                from pydantic import BaseModel, Field
+                from pydantic import BaseModel
 
                 client = openai.OpenAI()
 
@@ -356,7 +356,10 @@ class Templates(BaseModel):
 
                     response = client.beta.chat.completions.parse(
                         model="gpt-4o-mini",
-                        messages=[{"role": "system", "content": "Action: Generate templates"}, {"role": "user", "content": prompt}],
+                        messages=[
+                            {"role": "system", "content": "Action: Generate templates"},
+                            {"role": "user", "content": prompt},
+                        ],
                         max_tokens=500,
                         temperature=0,
                         response_format=Templates,

From 92cd12cf3cc0dc048e0f8f1e84304507cfaf81c6 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Wed, 14 Aug 2024 19:18:46 +0530
Subject: [PATCH 10/27] fixed: transformed and add export types are supported
 in DataAugumenter

---
 langtest/augmentation/augmenter.py | 144 +++++++++++++++++++++--------
 langtest/datahandler/datasource.py |  27 +++++-
 2 files changed, 127 insertions(+), 44 deletions(-)

diff --git a/langtest/augmentation/augmenter.py b/langtest/augmentation/augmenter.py
index 96b5125e1..d98b6cf86 100644
--- a/langtest/augmentation/augmenter.py
+++ b/langtest/augmentation/augmenter.py
@@ -1,10 +1,13 @@
+from collections import defaultdict
 import random
 import yaml
+import pandas as pd
 
 from typing import Any, Dict, Iterable, Union
 from langtest.datahandler.datasource import DataFactory
 from langtest.transform import TestFactory
 from langtest.tasks.task import TaskManager
+from langtest.utils.custom_types.sample import Sample
 
 
 class DataAugmenter:
@@ -40,14 +43,12 @@ def __init__(self, task: Union[str, TaskManager], config: Union[str, dict]) -> N
         self.__testfactory.is_augment = True
 
         # parameters
-        self.__max_proportion = self.__tests.get("defaults", {}).get(
-            "max_proportion", 0.6
-        )
+        self.__max_data_limit = self.__tests.get("parameters", {}).get("max_limit", 0.5)
         # self.__ntests = len(v for k, v in self.__tests.items()) - 1
         self.__type = self.__config.get("parameters", {}).get("type", "proportion")
         self.__style = self.__config.get("parameters", {}).get("style", "extend")
 
-        self.__df_config = self.__config_df()
+        self.__df_config = self.__initialize_config_df()
 
     def load_config(self, config: str) -> dict:
         """
@@ -61,45 +62,67 @@ def augment(self, data: Union[str, Iterable]) -> str:
         Augment the content.
         """
         # load the data
-        if isinstance(data, dict):
+        if isinstance(data, dict) and not isinstance(self.__datafactory, DataFactory):
             self.__datafactory = self.__datafactory(file_path=data, task=self.__task)
+
             data = self.__datafactory.load()
 
+        # generate the augmented data
+        test_cases = self.__testfactory.transform(self.__task, data, self.__tests)
+
         # check the style of augmentation to be applied. Default is extend
-        if self.__style == "extend":
-            self.extend(data)
+        if self.__style == "extend" or self.__style == "add":
+            self.extend(data, test_cases)
         elif self.__style == "inplace":
-            self.inplace(data)
-        elif self.__style == "new":
-            self.new_data(data)
+            self.inplace(data, test_cases)
+        elif self.__style == "new" or self.__style == "transformed":
+            self.new_data(data, test_cases)
         else:
             raise ValueError("Invalid style")
 
         return self
 
-    def extend(self, data: Iterable) -> "DataAugmenter":
+    def extend(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter":
         """
         Extend the content.
         """
         # calculate the number of rows to be added
-        n = len(data)
+        test_cases = defaultdict(list)
+        for sample in testcases:
+            if sample.test_type in test_cases:
+                test_cases[sample.test_type].append(sample)
+            else:
+                test_cases[sample.test_type] = [sample]
 
-        data_cut = random.sample(data, int(n * self.__max_proportion))
+        final_data = []
 
-        test_cases: list = self.__testfactory.transform(
-            self.__task, data_cut, self.__tests
-        )
+        for _, tests in self.__tests.items():
+            for test_name, _ in tests.items():
+                size = self.allocated_size(test_name)
 
-        self.__augmented_data = [*data, *test_cases] if isinstance(data, list) else data
+                if size == 0:
+                    continue
+
+                temp_test_cases = test_cases.get(test_name, [])
+                if temp_test_cases:
+                    # select random rows based on the size
+                    temp_test_cases = (
+                        random.choices(temp_test_cases, k=size)
+                        if size < len(temp_test_cases)
+                        else temp_test_cases
+                    )
+                    final_data.extend(temp_test_cases)
+
+        self.__augmented_data = [*data, *final_data] if isinstance(data, list) else data
 
         return self
 
-    def inplace(self, data: Iterable) -> "DataAugmenter":
+    def inplace(self, data: Iterable, testcases: Iterable) -> "DataAugmenter":
         """
         Inplace augmentation.
         """
         # calculate the number of rows to be added
-        size = int(len(data) * self.__max_proportion)
+        size = int(len(data) * self.allocated_size())
 
         # create a dictionary with index as key and data as value
         data_dict = self.prepare_hash_map(data)
@@ -117,28 +140,59 @@ def inplace(self, data: Iterable) -> "DataAugmenter":
 
         return self
 
-    def new_data(self, data: Iterable) -> "DataAugmenter":
+    def new_data(self, data: Iterable, testcases: Iterable) -> "DataAugmenter":
         """
         Create new data.
         """
         # calculate the number of rows to be added
-        size = int(len(data) * self.__max_proportion)
+        test_cases = defaultdict(list)
+        for sample in testcases:
+            if sample.test_type in test_cases:
+                test_cases[sample.test_type].append(sample)
+            else:
+                test_cases[sample.test_type] = [sample]
 
-        data_cut = random.sample(data, size)
+        final_data = []
+        for _, tests in self.__tests.items():
+            for test_name, _ in tests.items():
+                size = self.allocated_size(test_name)
 
-        test_cases = self.__testfactory.transform(self.__task, data_cut, self.__tests)
+                if size == 0:
+                    continue
 
-        self.__augmented_data = test_cases
+                temp_test_cases = test_cases.get(test_name, [])
+                if temp_test_cases:
+                    # select random rows based on the size
+                    temp_test_cases = (
+                        random.choices(temp_test_cases, k=size)
+                        if size < len(temp_test_cases)
+                        else temp_test_cases
+                    )
+                    final_data.extend(temp_test_cases)
+
+        self.__augmented_data = final_data
 
         return self
 
-    def size(self, category: str, test_name: str) -> int:
-        return (
-            self.__max_proportion
-            * self.__tests.get(category, {}).get(test_name, {}).get("max_proportion", 0.6)
-        ) / self.__df_config.shape[0]
+    def allocated_size(self, test_name: str) -> int:
+        """allocation size of the test to be augmented"""
 
-    def prepare_hash_map(self, data: Union[str, Iterable]) -> Dict[str, Any]:
+        try:
+            max_data_limit = (
+                len(self.__datafactory)
+                * self.__max_data_limit
+                * self.__df_config.loc[test_name, "avg_proportion"]
+            )
+
+            return int(
+                max_data_limit * self.__df_config.loc[test_name, "normalized_proportion"]
+            )
+        except AttributeError:
+            raise ValueError(
+                "Dataset is not loaded. please load the data using the `DataAugmenter.augment(data={'data_source': '..'})` method"
+            )
+
+    def prepare_hash_map(self, data: Union[Iterable[Sample], Sample]) -> Dict[str, Any]:
         hashmap = {index: sample for index, sample in enumerate(data)}
 
         return hashmap
@@ -157,28 +211,38 @@ def __ror__(self, other: Iterable):
         results = self.augment(other)
         return results
 
-    def __config_df(self):
+    def __initialize_config_df(self) -> pd.DataFrame:
         """
         Configure the data frame.
         """
 
-        import pandas as pd
-
         df = pd.DataFrame(columns=["category", "test_name", "proportion"])
 
         # read the configuration
+        temp_data = []
         for category, tests in self.__tests.items():
             if category not in ["robustness", "bias"]:
                 continue
             for test_name, test in tests.items():
-                proportion = test.get("max_proportion", 0.6)
-                temp = pd.DataFrame(
+                proportion = test.get("max_proportion", 0.2)
+                temp_data.append(
                     {
-                        "category": [category],
-                        "test_name": [test_name],
-                        "proportion": [proportion],
-                    },
+                        "category": category,
+                        "test_name": test_name,
+                        "proportion": proportion,
+                    }
                 )
-                df = pd.concat([df, temp], ignore_index=True)
+        df = pd.concat([df, pd.DataFrame(temp_data)], ignore_index=True)
+
+        # normalize the proportion and round it to 2 decimal places
+        df["normalized_proportion"] = df["proportion"] / df["proportion"].sum()
+        df["normalized_proportion"] = df["normalized_proportion"].apply(
+            lambda x: round(x, 2)
+        )
+
+        df["avg_proportion"] = df["proportion"].mean(numeric_only=True).round(2)
+
+        # set the index as test_name
+        df.set_index("test_name", inplace=True)
 
         return df
diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index 51e343ea3..217a06f17 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -104,6 +104,7 @@ class BaseDataset(ABC):
     """
 
     data_sources = defaultdict()
+    dataset_size = None
 
     @abstractmethod
     def load_raw_data(self):
@@ -153,6 +154,12 @@ def __init_subclass__(cls, **kwargs):
         else:
             cls.data_sources[dataset_cls] = cls
 
+    def __len__(self):
+        """Returns the size of the dataset"""
+        if self.dataset_size is None:
+            self.dataset_size = len(self.load_data())
+        return self.dataset_size
+
 
 class DataFactory:
     """Data factory for creating Dataset objects.
@@ -178,6 +185,7 @@ def __init__(self, file_path: dict, task: TaskManager, **kwargs) -> None:
             raise ValueError(Errors.E025)
         self._custom_label = file_path.copy()
         self._file_path = file_path.get("data_source")
+        self._size = None
 
         self.datasets_with_jsonl_extension = []
         for dataset_name, dataset_info in datasets_info.items():
@@ -250,7 +258,10 @@ def load(self) -> List[Sample]:
             self.init_cls = self.data_sources[self.file_ext.replace(".", "")](
                 self._file_path, task=self.task, **self.kwargs
             )
-        return self.init_cls.load_data()
+
+        loaded_data = self.init_cls.load_data()
+        self._size = len(loaded_data)
+        return loaded_data
 
     def export(self, data: List[Sample], output_path: str) -> None:
         """Exports the data to the corresponding format and saves it to 'output_path'.
@@ -399,6 +410,12 @@ def _load_dataset(cls, custom_label: dict) -> str:
             extension = dataset_info.get("extension", "jsonl")
             return script_dir[:-7] + "/" + dataset_name + "/" + split + extension
 
+    def __len__(self):
+        """dataset size"""
+        if self._size is None:
+            self._size = len(self.load())
+        return self._size
+
 
 class ConllDataset(BaseDataset):
     """Class to handle Conll files. Subclass of BaseDataset."""
@@ -522,7 +539,7 @@ def load_data(self) -> List[NERSample]:
                             expected_results=NEROutput(predictions=ner_labels),
                         )
                     )
-
+        self.dataset_size = len(data)
         return data
 
     def export_data(self, data: List[NERSample], output_path: str):
@@ -812,6 +829,7 @@ def load_data(self) -> List[Sample]:
                 logging.warning(Warnings.W005(idx=idx, row_data=row_data, e=e))
                 continue
 
+        self.dataset_size = len(data)
         return data
 
     def export_data(self, data: List[Sample], output_path: str):
@@ -1025,7 +1043,7 @@ def load_data(self, *args, **kwargs) -> List[Sample]:
                     item, dataset_name=dataset_name, *args, **kwargs
                 )
                 data.append(sample)
-
+        self.dataset_size = len(data)
         return data
 
     def __load_jsonl(self, file: str, dataset_name: str, data, *args, **kwargs):
@@ -1215,7 +1233,7 @@ def load_data(self) -> List[Sample]:
                 **column_names,
             )
             data.append(sample)
-
+        self.dataset_size = len(data)
         return data
 
     def export_data(self, data: List[Sample], output_path: str):
@@ -1296,6 +1314,7 @@ def load_data(self) -> List[Sample]:
         method_name = f"load_{self.dataset_name.replace('-', '_')}"
         if hasattr(self, method_name):
             samples = getattr(self, method_name)()
+            self.dataset_size = len(samples)
             return samples
         else:
             raise ValueError(Errors.E030(dataset_name=self.dataset_name))

From 712d4d6bf115569bf78a07111d065be58f3ed7e9 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Wed, 14 Aug 2024 19:39:05 +0530
Subject: [PATCH 11/27] fixed: inplace method in DataAugmenter with proper
 proportion.

---
 langtest/augmentation/augmenter.py | 61 +++++++++++++++++++++++-------
 1 file changed, 48 insertions(+), 13 deletions(-)

diff --git a/langtest/augmentation/augmenter.py b/langtest/augmentation/augmenter.py
index d98b6cf86..ca9a5b202 100644
--- a/langtest/augmentation/augmenter.py
+++ b/langtest/augmentation/augmenter.py
@@ -3,7 +3,7 @@
 import yaml
 import pandas as pd
 
-from typing import Any, Dict, Iterable, Union
+from typing import Any, Dict, Iterable, List, Union
 from langtest.datahandler.datasource import DataFactory
 from langtest.transform import TestFactory
 from langtest.tasks.task import TaskManager
@@ -117,24 +117,47 @@ def extend(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter"
 
         return self
 
-    def inplace(self, data: Iterable, testcases: Iterable) -> "DataAugmenter":
+    def inplace(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter":
         """
         Inplace augmentation.
         """
-        # calculate the number of rows to be added
-        size = int(len(data) * self.allocated_size())
 
-        # create a dictionary with index as key and data as value
+        data_indices = self.prepare_hash_map(data, inverted=True)
         data_dict = self.prepare_hash_map(data)
 
-        # select random rows based on the size with its index
-        selected = random.sample(data_dict.keys(), int(size))
+        test_cases = defaultdict(list)
+        for sample in testcases:
+            if sample.test_type in test_cases:
+                test_cases[sample.test_type].append(sample)
+            else:
+                test_cases[sample.test_type] = [sample]
+
+        final_data: List[Sample] = []
+        for _, tests in self.__tests.items():
+            for test_name, _ in tests.items():
+                size = self.allocated_size(test_name)
+                print(size)
+                if size == 0:
+                    continue
 
-        for idx in selected:
-            test_cases = self.__testfactory.transform(
-                self.__task, [data_dict[idx]], self.__tests
+                temp_test_cases = test_cases.get(test_name, [])
+                if temp_test_cases:
+                    # select random rows based on the size
+                    temp_test_cases = (
+                        random.choices(temp_test_cases, k=size)
+                        if size < len(temp_test_cases)
+                        else temp_test_cases
+                    )
+                    final_data.extend(temp_test_cases)
+
+        for sample in final_data:
+            key = (
+                sample.original_question
+                if hasattr(sample, "original_question")
+                else sample.original
             )
-            data_dict[idx] = test_cases[0] if test_cases else data_dict[idx]
+            index = data_indices[key]
+            data_dict[index] = sample
 
         self.__augmented_data = data_dict.values()
 
@@ -192,8 +215,20 @@ def allocated_size(self, test_name: str) -> int:
                 "Dataset is not loaded. please load the data using the `DataAugmenter.augment(data={'data_source': '..'})` method"
             )
 
-    def prepare_hash_map(self, data: Union[Iterable[Sample], Sample]) -> Dict[str, Any]:
-        hashmap = {index: sample for index, sample in enumerate(data)}
+    def prepare_hash_map(
+        self, data: Union[Iterable[Sample], Sample], inverted=False
+    ) -> Dict[str, Any]:
+        if inverted:
+            hashmap = {}
+            for index, sample in enumerate(data):
+                key = (
+                    sample.original_question
+                    if hasattr(sample, "original_question")
+                    else sample.original
+                )
+                hashmap[key] = index
+        else:
+            hashmap = {index: sample for index, sample in enumerate(data)}
 
         return hashmap
 

From fd2333dfc178d3fbd568a056a46b736e3a0446f2 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Wed, 14 Aug 2024 19:46:52 +0530
Subject: [PATCH 12/27] update doc strings and remove the print statements.

---
 langtest/augmentation/augmenter.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/langtest/augmentation/augmenter.py b/langtest/augmentation/augmenter.py
index ca9a5b202..2cd118aab 100644
--- a/langtest/augmentation/augmenter.py
+++ b/langtest/augmentation/augmenter.py
@@ -86,7 +86,7 @@ def extend(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter"
         """
         Extend the content.
         """
-        # calculate the number of rows to be added
+        # arrange the test cases based on the test_type in a dictionary
         test_cases = defaultdict(list)
         for sample in testcases:
             if sample.test_type in test_cases:
@@ -95,7 +95,7 @@ def extend(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter"
                 test_cases[sample.test_type] = [sample]
 
         final_data = []
-
+        # pick the test cases based on the allocated size of the test_type
         for _, tests in self.__tests.items():
             for test_name, _ in tests.items():
                 size = self.allocated_size(test_name)
@@ -113,6 +113,7 @@ def extend(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter"
                     )
                     final_data.extend(temp_test_cases)
 
+        # append the augmented data to the original data
         self.__augmented_data = [*data, *final_data] if isinstance(data, list) else data
 
         return self
@@ -121,10 +122,11 @@ def inplace(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter
         """
         Inplace augmentation.
         """
-
+        # indices of the data and the data itself
         data_indices = self.prepare_hash_map(data, inverted=True)
         data_dict = self.prepare_hash_map(data)
 
+        # arrange the test cases based on the test type in a dictionary
         test_cases = defaultdict(list)
         for sample in testcases:
             if sample.test_type in test_cases:
@@ -132,11 +134,12 @@ def inplace(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter
             else:
                 test_cases[sample.test_type] = [sample]
 
+        # pick the test cases based on the allocated size of the test_type
         final_data: List[Sample] = []
         for _, tests in self.__tests.items():
             for test_name, _ in tests.items():
                 size = self.allocated_size(test_name)
-                print(size)
+
                 if size == 0:
                     continue
 
@@ -150,6 +153,7 @@ def inplace(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter
                     )
                     final_data.extend(temp_test_cases)
 
+        # replace the original data with the augmented data in extact position.
         for sample in final_data:
             key = (
                 sample.original_question
@@ -163,11 +167,11 @@ def inplace(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter
 
         return self
 
-    def new_data(self, data: Iterable, testcases: Iterable) -> "DataAugmenter":
+    def new_data(self, data: Iterable, testcases: Iterable[Sample]) -> "DataAugmenter":
         """
         Create new data.
         """
-        # calculate the number of rows to be added
+        # arrange the test cases based on the test type in a dictionary
         test_cases = defaultdict(list)
         for sample in testcases:
             if sample.test_type in test_cases:
@@ -176,6 +180,8 @@ def new_data(self, data: Iterable, testcases: Iterable) -> "DataAugmenter":
                 test_cases[sample.test_type] = [sample]
 
         final_data = []
+
+        # pick the test cases based on the allocated size of the test_type
         for _, tests in self.__tests.items():
             for test_name, _ in tests.items():
                 size = self.allocated_size(test_name)
@@ -193,6 +199,7 @@ def new_data(self, data: Iterable, testcases: Iterable) -> "DataAugmenter":
                     )
                     final_data.extend(temp_test_cases)
 
+        # replace the original data with the augmented data
         self.__augmented_data = final_data
 
         return self

From 526ae6f666774453a82d21d621c315865d8df5aa Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Thu, 15 Aug 2024 12:32:37 +0530
Subject: [PATCH 13/27] chore: generate additional templates in
 TemplaticAugment as user choice to number of extra templates to be needed

---
 langtest/augmentation/base.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py
index 1fa0bb226..c8488ab95 100644
--- a/langtest/augmentation/base.py
+++ b/langtest/augmentation/base.py
@@ -323,6 +323,7 @@ def __init__(
         task: TaskManager,
         generate_templates=False,
         show_templates=False,
+        num_extra_templates=10,
     ) -> None:
         """This constructor for the TemplaticAugment class.
 
@@ -347,7 +348,7 @@ class Templates(BaseModel):
 
                 given_template = self.__templates[:]
                 for template in given_template:
-                    prompt = f"""Based on the template provided, create 10 new and unique templates that are variations on this theme. Present these as a Python list, with each template as a quoted string. The list should contain only the templates without any additional text or explanation.
+                    prompt = f"""Based on the template provided, create {num_extra_templates} new and unique templates that are variations on this theme. Present these as a Python list, with each template as a quoted string. The list should contain only the templates without any additional text or explanation.
 
                         Template:
                         "{template}"
@@ -357,7 +358,10 @@ class Templates(BaseModel):
                     response = client.beta.chat.completions.parse(
                         model="gpt-4o-mini",
                         messages=[
-                            {"role": "system", "content": "Action: Generate templates"},
+                            {
+                                "role": "system",
+                                "content": f"Action: Generate templates upto {num_extra_templates}",
+                            },
                             {"role": "user", "content": prompt},
                         ],
                         max_tokens=500,
@@ -368,16 +372,9 @@ class Templates(BaseModel):
                     generated_response = response.choices[0].message.parsed
                     # Process the generated response
                     if generated_response:
-                        # # Assuming the response format is a Python-like list in a string
-                        # templates_list = generated_response.strip("[]").split('",')
-                        # templates_list = [
-                        #     template.strip().strip('"')
-                        #     for template in templates_list
-                        #     if template.strip()
-                        # ]
-
+                        
                         # Extend the existing templates list
-                        self.__templates.extend(generated_response.templates)
+                        self.__templates.extend(generated_response.templates[:num_extra_templates])
                     else:
                         print("No response or unexpected format.")
 

From 8185a90a8eb96984235fbee13bcfa5ad949ddef9 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Thu, 15 Aug 2024 15:15:37 +0530
Subject: [PATCH 14/27] chore: remove quotes in generated template and self
 check the num_extra_templates.

---
 langtest/augmentation/base.py | 111 ++++++++++++++++++++++------------
 langtest/errors.py            |   1 +
 2 files changed, 75 insertions(+), 37 deletions(-)

diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py
index c8488ab95..a1d01dfa6 100644
--- a/langtest/augmentation/base.py
+++ b/langtest/augmentation/base.py
@@ -337,49 +337,25 @@ def __init__(
         self.__task = task
 
         if generate_templates:
-            if try_import_lib("openai"):
-                import openai
-                from pydantic import BaseModel
-
-                client = openai.OpenAI()
-
-                class Templates(BaseModel):
-                    templates: List[str]
-
+            try:
                 given_template = self.__templates[:]
                 for template in given_template:
-                    prompt = f"""Based on the template provided, create {num_extra_templates} new and unique templates that are variations on this theme. Present these as a Python list, with each template as a quoted string. The list should contain only the templates without any additional text or explanation.
-
-                        Template:
-                        "{template}"
-
-                        """
-
-                    response = client.beta.chat.completions.parse(
-                        model="gpt-4o-mini",
-                        messages=[
-                            {
-                                "role": "system",
-                                "content": f"Action: Generate templates upto {num_extra_templates}",
-                            },
-                            {"role": "user", "content": prompt},
-                        ],
-                        max_tokens=500,
-                        temperature=0,
-                        response_format=Templates,
+                    generated_templates: List[str] = self.__generate_templates(
+                        template, num_extra_templates
                     )
 
-                    generated_response = response.choices[0].message.parsed
-                    # Process the generated response
-                    if generated_response:
-                        
+                    while len(generated_templates) < num_extra_templates:
+                        temp_templates = self.__generate_templates(
+                            template, num_extra_templates
+                        )
+                        generated_templates.extend(temp_templates)
+
+                    if generated_templates:
                         # Extend the existing templates list
-                        self.__templates.extend(generated_response.templates[:num_extra_templates])
-                    else:
-                        print("No response or unexpected format.")
 
-            else:
-                raise RuntimeError(Errors.E084)
+                        self.__templates.extend(generated_templates[:num_extra_templates])
+            except Exception as e:
+                raise Errors.E095(e)
 
         if show_templates:
             [print(template) for template in self.__templates]
@@ -619,3 +595,64 @@ def add_spaces_around_punctuation(text: str):
         text = re.sub(r"\s+", " ", text).strip()
 
         return text
+
+    def __generate_templates(self, template, num_extra_templates) -> List[str]:
+        if try_import_lib("openai"):
+            import openai
+            from pydantic import BaseModel, validator
+
+            client = openai.OpenAI()
+
+            class Templates(BaseModel):
+                templates: List[str]
+
+                def __post_init__(self):
+                    self.templates = [i.strip('"') for i in self.templates]
+
+                @validator("templates", each_item=True)
+                def check_templates(cls, v: str):
+                    if not v:
+                        raise ValueError("No templates generated.")
+                    return v.strip('"')
+
+                def remove_invalid_templates(self, original_template):
+                    # extract variable names using regex
+                    regexs = r"{([^{}]*)}"
+                    original_vars = re.findall(regexs, original_template)
+                    original_vars = set([var.strip() for var in original_vars])
+
+                    # remove invalid templates
+                    valid_templates = []
+                    for template in self.templates:
+                        template_vars: List[str] = re.findall(regexs, template)
+                        template_vars = set([var.strip() for var in template_vars])
+                        if template_vars == original_vars:
+                            valid_templates.append(template)
+                    self.templates = valid_templates
+
+            prompt = (
+                f"Based on the provided template, create {num_extra_templates} new and unique templates that are "
+                "variations on this theme. Present these as a list, with each template as a quoted string. The list should "
+                "contain only the templates, without any additional text or explanation. Ensure that the structure of "
+                "these variables remains consistent in each generated template. Note: don't add any extra variables and ignore typo errors.\n\n"
+                "Template:\n"
+                f"{template}\n"
+            )
+            response = client.beta.chat.completions.parse(
+                model="gpt-4o-mini",
+                messages=[
+                    {
+                        "role": "system",
+                        "content": f"Action: Generate up to {num_extra_templates} templates and ensure that the structure of the variables within the templates remains unchanged and don't add any extra variables.",
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                max_tokens=500,
+                temperature=0,
+                response_format=Templates,
+            )
+
+            generated_response = response.choices[0].message.parsed
+            generated_response.remove_invalid_templates(template)
+
+            return generated_response.templates[:num_extra_templates]
diff --git a/langtest/errors.py b/langtest/errors.py
index 6e7359858..4dfc38ce6 100644
--- a/langtest/errors.py
+++ b/langtest/errors.py
@@ -274,6 +274,7 @@ class Errors(metaclass=ErrorsWithCodes):
     E093 = ("Category cannot be None. Please provide a valid category.")
     E094 = ("Unsupported category: '{category}'. Supported categories: {supported_category}")
     E095 = ("Failed to make API request: {e}")
+    E096 = ("Failed to generate the templates in Augmentation: {e}")
 
 
 class ColumnNameError(Exception):

From 6734f706a6a68db10856d0e4a49e0fb48d6dca0a Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Fri, 16 Aug 2024 11:39:56 +0530
Subject: [PATCH 15/27] chore: Fix error message in Augmentation when
 generating templates

---
 langtest/augmentation/base.py | 4 ++--
 langtest/errors.py            | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py
index a1d01dfa6..116f30be6 100644
--- a/langtest/augmentation/base.py
+++ b/langtest/augmentation/base.py
@@ -355,7 +355,7 @@ def __init__(
 
                         self.__templates.extend(generated_templates[:num_extra_templates])
             except Exception as e:
-                raise Errors.E095(e)
+                raise Errors.E095(msg=e)
 
         if show_templates:
             [print(template) for template in self.__templates]
@@ -609,7 +609,7 @@ class Templates(BaseModel):
                 def __post_init__(self):
                     self.templates = [i.strip('"') for i in self.templates]
 
-                @validator("templates", each_item=True)
+                @validator("templates", each_item=True, allow_reuse=True)
                 def check_templates(cls, v: str):
                     if not v:
                         raise ValueError("No templates generated.")
diff --git a/langtest/errors.py b/langtest/errors.py
index 4dfc38ce6..d3d7d1bba 100644
--- a/langtest/errors.py
+++ b/langtest/errors.py
@@ -274,7 +274,7 @@ class Errors(metaclass=ErrorsWithCodes):
     E093 = ("Category cannot be None. Please provide a valid category.")
     E094 = ("Unsupported category: '{category}'. Supported categories: {supported_category}")
     E095 = ("Failed to make API request: {e}")
-    E096 = ("Failed to generate the templates in Augmentation: {e}")
+    E096 = ("Failed to generate the templates in Augmentation: {msg}")
 
 
 class ColumnNameError(Exception):

From 55d17e1d9bf319314812193c2d541104d35f9e0f Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Fri, 16 Aug 2024 14:30:50 +0530
Subject: [PATCH 16/27] chore: Refactor DataAugmenter to improve template
 generation and proportion handling

---
 langtest/augmentation/augmenter.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/langtest/augmentation/augmenter.py b/langtest/augmentation/augmenter.py
index 2cd118aab..7af5a30d4 100644
--- a/langtest/augmentation/augmenter.py
+++ b/langtest/augmentation/augmenter.py
@@ -26,7 +26,7 @@ def __init__(self, task: Union[str, TaskManager], config: Union[str, dict]) -> N
         if isinstance(config, str):
             self.__config = self.load_config(config)
 
-        self.__tests: dict = self.__config.get("tests", [])
+        self.__tests: Dict[str, Dict[str, dict]] = self.__config.get("tests", [])
         if isinstance(task, str):
             if task in ["ner", "text-classification", "question-answering"]:
                 task = TaskManager(task)
@@ -276,6 +276,9 @@ def __initialize_config_df(self) -> pd.DataFrame:
                 )
         df = pd.concat([df, pd.DataFrame(temp_data)], ignore_index=True)
 
+        # Convert 'proportion' column to float
+        df["proportion"] = pd.to_numeric(df["proportion"], errors="coerce")
+
         # normalize the proportion and round it to 2 decimal places
         df["normalized_proportion"] = df["proportion"] / df["proportion"].sum()
         df["normalized_proportion"] = df["normalized_proportion"].apply(

From b7f68c1e161f0da644f0f0b4343650490a82def5 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Fri, 16 Aug 2024 14:33:06 +0530
Subject: [PATCH 17/27] Refactor DataAugmenter to improve proportion handling

---
 langtest/augmentation/augmenter.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/langtest/augmentation/augmenter.py b/langtest/augmentation/augmenter.py
index 7af5a30d4..f587adc27 100644
--- a/langtest/augmentation/augmenter.py
+++ b/langtest/augmentation/augmenter.py
@@ -66,6 +66,8 @@ def augment(self, data: Union[str, Iterable]) -> str:
             self.__datafactory = self.__datafactory(file_path=data, task=self.__task)
 
             data = self.__datafactory.load()
+        elif isinstance(self.__datafactory, DataFactory):
+            data = self.__datafactory.load()
 
         # generate the augmented data
         test_cases = self.__testfactory.transform(self.__task, data, self.__tests)

From 24be4be5db0182b9eab2ceca47efefa51a14e414 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Mon, 26 Aug 2024 20:30:14 +0530
Subject: [PATCH 18/27] Refactor TemplaticAugment to support multiple AI
 providers for template generation

---
 langtest/augmentation/base.py  | 29 +++++++++++++++++++++++------
 langtest/augmentation/types.py | 23 +++++++++++++++++++++++
 2 files changed, 46 insertions(+), 6 deletions(-)
 create mode 100644 langtest/augmentation/types.py

diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py
index 116f30be6..167529bb2 100644
--- a/langtest/augmentation/base.py
+++ b/langtest/augmentation/base.py
@@ -10,6 +10,7 @@
 import pandas as pd
 import yaml
 
+from langtest.augmentation.types import AzureOpenAIConfig, OpenAIConfig
 from langtest.datahandler.datasource import DataFactory
 from langtest.transform import TestFactory
 from langtest.transform.utils import create_terminology
@@ -324,6 +325,7 @@ def __init__(
         generate_templates=False,
         show_templates=False,
         num_extra_templates=10,
+        model_config: Union[OpenAIConfig, AzureOpenAIConfig] = None,
     ) -> None:
         """This constructor for the TemplaticAugment class.
 
@@ -341,12 +343,14 @@ def __init__(
                 given_template = self.__templates[:]
                 for template in given_template:
                     generated_templates: List[str] = self.__generate_templates(
-                        template, num_extra_templates
+                        template, num_extra_templates, model_config
                     )
 
                     while len(generated_templates) < num_extra_templates:
                         temp_templates = self.__generate_templates(
-                            template, num_extra_templates
+                            template,
+                            num_extra_templates,
+                            model_config,
                         )
                         generated_templates.extend(temp_templates)
 
@@ -354,8 +358,8 @@ def __init__(
                         # Extend the existing templates list
 
                         self.__templates.extend(generated_templates[:num_extra_templates])
-            except Exception as e:
-                raise Errors.E095(msg=e)
+            except Exception as e_msg:
+                raise Errors.E095(e=e_msg)
 
         if show_templates:
             [print(template) for template in self.__templates]
@@ -596,12 +600,25 @@ def add_spaces_around_punctuation(text: str):
 
         return text
 
-    def __generate_templates(self, template, num_extra_templates) -> List[str]:
+    def __generate_templates(
+        self,
+        template: str,
+        num_extra_templates: int,
+        model_config: Union[OpenAIConfig, AzureOpenAIConfig] = None,
+    ) -> List[str]:
         if try_import_lib("openai"):
             import openai
             from pydantic import BaseModel, validator
 
-            client = openai.OpenAI()
+            if model_config and model_config.get("provider") == "openai":
+                client = openai.OpenAI()
+            elif model_config and model_config.get("provider") == "azure":
+                params = model_config
+                del params["provider"]
+
+                client = openai.AzureOpenAI(**params)
+            else:
+                client = openai.OpenAI()
 
             class Templates(BaseModel):
                 templates: List[str]
diff --git a/langtest/augmentation/types.py b/langtest/augmentation/types.py
new file mode 100644
index 000000000..44d6386ad
--- /dev/null
+++ b/langtest/augmentation/types.py
@@ -0,0 +1,23 @@
+from typing import TypedDict, Union
+import os
+
+
+class OpenAIConfig(TypedDict):
+    api_key: str = os.environ.get("OPENAI_API_KEY")
+    base_url: Union[str, None] = None
+    organization: Union[str, None] = (None,)
+    project: Union[str, None] = (None,)
+    provider: str = "openai"
+
+
+class AzureOpenAIConfig(TypedDict):
+    from openai.lib.azure import AzureADTokenProvider
+
+    azure_endpoint: str
+    api_version: str
+    api_key: str
+    provider: str
+    azure_deployment: Union[str, None] = None
+    azure_ad_token: Union[str, None] = (None,)
+    azure_ad_token_provider: Union[AzureADTokenProvider, None] = (None,)
+    organization: Union[str, None] = (None,)

From 4d866f282a387de5e77e5f7572d27bacf39ea7da Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Tue, 27 Aug 2024 21:47:59 +0530
Subject: [PATCH 19/27] Integrated Azure OpenAI and OpenAI services for
 automated template generation.

---
 langtest/augmentation/base.py  |  77 ++++-------------
 langtest/augmentation/types.py |  23 -----
 langtest/augmentation/utils.py | 148 +++++++++++++++++++++++++++++++++
 3 files changed, 164 insertions(+), 84 deletions(-)
 delete mode 100644 langtest/augmentation/types.py
 create mode 100644 langtest/augmentation/utils.py

diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py
index 167529bb2..c81589b94 100644
--- a/langtest/augmentation/base.py
+++ b/langtest/augmentation/base.py
@@ -10,7 +10,7 @@
 import pandas as pd
 import yaml
 
-from langtest.augmentation.types import AzureOpenAIConfig, OpenAIConfig
+from langtest.augmentation.utils import AzureOpenAIConfig, OpenAIConfig
 from langtest.datahandler.datasource import DataFactory
 from langtest.transform import TestFactory
 from langtest.transform.utils import create_terminology
@@ -607,69 +607,24 @@ def __generate_templates(
         model_config: Union[OpenAIConfig, AzureOpenAIConfig] = None,
     ) -> List[str]:
         if try_import_lib("openai"):
-            import openai
-            from pydantic import BaseModel, validator
+            from langtest.augmentation.utils import (
+                generate_templates_azoi,
+                generate_templates_openai,
+            )
 
             if model_config and model_config.get("provider") == "openai":
-                client = openai.OpenAI()
-            elif model_config and model_config.get("provider") == "azure":
                 params = model_config
-                del params["provider"]
+                if "provider" in params:
+                    del params["provider"]
 
-                client = openai.AzureOpenAI(**params)
-            else:
-                client = openai.OpenAI()
-
-            class Templates(BaseModel):
-                templates: List[str]
-
-                def __post_init__(self):
-                    self.templates = [i.strip('"') for i in self.templates]
-
-                @validator("templates", each_item=True, allow_reuse=True)
-                def check_templates(cls, v: str):
-                    if not v:
-                        raise ValueError("No templates generated.")
-                    return v.strip('"')
-
-                def remove_invalid_templates(self, original_template):
-                    # extract variable names using regex
-                    regexs = r"{([^{}]*)}"
-                    original_vars = re.findall(regexs, original_template)
-                    original_vars = set([var.strip() for var in original_vars])
-
-                    # remove invalid templates
-                    valid_templates = []
-                    for template in self.templates:
-                        template_vars: List[str] = re.findall(regexs, template)
-                        template_vars = set([var.strip() for var in template_vars])
-                        if template_vars == original_vars:
-                            valid_templates.append(template)
-                    self.templates = valid_templates
-
-            prompt = (
-                f"Based on the provided template, create {num_extra_templates} new and unique templates that are "
-                "variations on this theme. Present these as a list, with each template as a quoted string. The list should "
-                "contain only the templates, without any additional text or explanation. Ensure that the structure of "
-                "these variables remains consistent in each generated template. Note: don't add any extra variables and ignore typo errors.\n\n"
-                "Template:\n"
-                f"{template}\n"
-            )
-            response = client.beta.chat.completions.parse(
-                model="gpt-4o-mini",
-                messages=[
-                    {
-                        "role": "system",
-                        "content": f"Action: Generate up to {num_extra_templates} templates and ensure that the structure of the variables within the templates remains unchanged and don't add any extra variables.",
-                    },
-                    {"role": "user", "content": prompt},
-                ],
-                max_tokens=500,
-                temperature=0,
-                response_format=Templates,
-            )
+                return generate_templates_openai(template, num_extra_templates, params)
 
-            generated_response = response.choices[0].message.parsed
-            generated_response.remove_invalid_templates(template)
+            elif model_config and model_config.get("provider") == "azure":
+                params = model_config
+                if "provider" in params:
+                    del params["provider"]
 
-            return generated_response.templates[:num_extra_templates]
+                return generate_templates_azoi(template, num_extra_templates, params)
+
+            else:
+                return generate_templates_openai(template, num_extra_templates)
diff --git a/langtest/augmentation/types.py b/langtest/augmentation/types.py
deleted file mode 100644
index 44d6386ad..000000000
--- a/langtest/augmentation/types.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from typing import TypedDict, Union
-import os
-
-
-class OpenAIConfig(TypedDict):
-    api_key: str = os.environ.get("OPENAI_API_KEY")
-    base_url: Union[str, None] = None
-    organization: Union[str, None] = (None,)
-    project: Union[str, None] = (None,)
-    provider: str = "openai"
-
-
-class AzureOpenAIConfig(TypedDict):
-    from openai.lib.azure import AzureADTokenProvider
-
-    azure_endpoint: str
-    api_version: str
-    api_key: str
-    provider: str
-    azure_deployment: Union[str, None] = None
-    azure_ad_token: Union[str, None] = (None,)
-    azure_ad_token_provider: Union[AzureADTokenProvider, None] = (None,)
-    organization: Union[str, None] = (None,)
diff --git a/langtest/augmentation/utils.py b/langtest/augmentation/utils.py
new file mode 100644
index 000000000..b4275f1ac
--- /dev/null
+++ b/langtest/augmentation/utils.py
@@ -0,0 +1,148 @@
+import re
+from typing import List, TypedDict, Union
+import os
+
+from pydantic import BaseModel, validator
+
+
+class OpenAIConfig(TypedDict):
+    api_key: str = os.environ.get("OPENAI_API_KEY")
+    base_url: Union[str, None] = None
+    organization: Union[str, None] = (None,)
+    project: Union[str, None] = (None,)
+    provider: str = "openai"
+
+
+class AzureOpenAIConfig(TypedDict):
+    from openai.lib.azure import AzureADTokenProvider
+
+    azure_endpoint: str
+    api_version: str
+    api_key: str
+    provider: str
+    azure_deployment: Union[str, None] = None
+    azure_ad_token: Union[str, None] = (None,)
+    azure_ad_token_provider: Union[AzureADTokenProvider, None] = (None,)
+    organization: Union[str, None] = (None,)
+
+
+class Templates(BaseModel):
+    templates: List[str]
+
+    def __post_init__(self):
+        self.templates = [i.strip('"') for i in self.templates]
+
+    @validator("templates", each_item=True, allow_reuse=True)
+    def check_templates(cls, v: str):
+        if not v:
+            raise ValueError("No templates generated.")
+        return v.strip('"')
+
+    def remove_invalid_templates(self, original_template):
+        # extract variable names using regex
+        regexs = r"{([^{}]*)}"
+        original_vars = re.findall(regexs, original_template)
+        original_vars = set([var.strip() for var in original_vars])
+
+        # remove invalid templates
+        valid_templates = []
+        for template in self.templates:
+            template_vars: List[str] = re.findall(regexs, template)
+            template_vars = set([var.strip() for var in template_vars])
+            if template_vars == original_vars:
+                valid_templates.append(template)
+        self.templates = valid_templates
+
+
+def generate_templates_azoi(
+    template: str, num_extra_templates: int, model_config: AzureOpenAIConfig
+):
+    """Generate new templates based on the provided template using Azure OpenAI API."""
+    import openai
+
+    client = openai.AzureOpenAI(**model_config)
+
+    prompt = (
+        "Based on the provided template, create {num_extra_templates} new and unique templates that are "
+        "variations on this theme. Present these as a list, with each template as a quoted string. The list should "
+        "contain only the templates, without any additional text or explanation. Ensure that the structure of "
+        "these variables remains consistent in each generated template. Note: don't add any extra variables and ignore typo errors.\n\n"
+        "Template:\n"
+        "{template}\n"
+    )
+
+    response = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[
+            {
+                "role": "system",
+                "content": f"Generate new templates based on the provided template.\n\n Output Schema: {Templates.schema()}\n",
+            },
+            {
+                "role": "user",
+                "content": prompt.format(
+                    template="The {ORG} company is located in {LOC}",
+                    num_extra_templates=2,
+                ),
+            },
+            {
+                "role": "assistant",
+                "content": '["The {ORG} corporation is based out of {LOC}",\n "The {ORG} organization operates in {LOC}"]',
+            },
+            {
+                "role": "user",
+                "content": prompt.format(
+                    template=template, num_extra_templates=num_extra_templates
+                ),
+            },
+        ],
+        temperature=0,
+    )
+
+    import json
+
+    try:
+        clean_response = response.choices[0].message.content.replace("'", '"')
+        gen_templates = Templates(templates=json.loads(clean_response))
+        gen_templates.remove_invalid_templates(template)
+
+        return gen_templates.templates[:num_extra_templates]
+
+    except json.JSONDecodeError as e:
+        raise ValueError(f"Error decoding response: {e}")
+
+
+def generate_templates_openai(
+    template: str, num_extra_templates: int, model_config: OpenAIConfig = OpenAIConfig()
+):
+    """Generate new templates based on the provided template using OpenAI API."""
+    import openai
+
+    client = openai.OpenAI(**model_config)
+
+    prompt = (
+        f"Based on the provided template, create {num_extra_templates} new and unique templates that are "
+        "variations on this theme. Present these as a list, with each template as a quoted string. The list should "
+        "contain only the templates, without any additional text or explanation. Ensure that the structure of "
+        "these variables remains consistent in each generated template. Note: don't add any extra variables and ignore typo errors.\n\n"
+        "Template:\n"
+        f"{template}\n"
+    )
+    response = client.beta.chat.completions.parse(
+        model="gpt-4o-mini",
+        messages=[
+            {
+                "role": "system",
+                "content": f"Action: Generate up to {num_extra_templates} templates and ensure that the structure of the variables within the templates remains unchanged and don't add any extra variables.",
+            },
+            {"role": "user", "content": prompt},
+        ],
+        max_tokens=500,
+        temperature=0,
+        response_format=Templates,
+    )
+
+    generated_response = response.choices[0].message.parsed
+    generated_response.remove_invalid_templates(template)
+
+    return generated_response.templates[:num_extra_templates]

From d04d5006a3fae36f6af76e4628def2536bbac500 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Tue, 27 Aug 2024 21:49:47 +0530
Subject: [PATCH 20/27] added comment for "azoi means Azue OpenAI"

---
 langtest/augmentation/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py
index c81589b94..3fd368dc4 100644
--- a/langtest/augmentation/base.py
+++ b/langtest/augmentation/base.py
@@ -608,7 +608,7 @@ def __generate_templates(
     ) -> List[str]:
         if try_import_lib("openai"):
             from langtest.augmentation.utils import (
-                generate_templates_azoi,
+                generate_templates_azoi,  # azoi means Azure OpenAI
                 generate_templates_openai,
             )
 

From 29d136e6d7bd58645f9ddd5a4f7efefd105f300d Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Wed, 28 Aug 2024 18:27:14 +0530
Subject: [PATCH 21/27] updated the model_config handling.

---
 langtest/augmentation/base.py  | 10 ++--------
 langtest/augmentation/utils.py | 25 +++++++++++++++++++++----
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py
index 3fd368dc4..ca6718133 100644
--- a/langtest/augmentation/base.py
+++ b/langtest/augmentation/base.py
@@ -612,18 +612,12 @@ def __generate_templates(
                 generate_templates_openai,
             )
 
-            if model_config and model_config.get("provider") == "openai":
-                params = model_config
-                if "provider" in params:
-                    del params["provider"]
+            params = model_config.copy() if model_config else {}
 
+            if model_config and model_config.get("provider") == "openai":
                 return generate_templates_openai(template, num_extra_templates, params)
 
             elif model_config and model_config.get("provider") == "azure":
-                params = model_config
-                if "provider" in params:
-                    del params["provider"]
-
                 return generate_templates_azoi(template, num_extra_templates, params)
 
             else:
diff --git a/langtest/augmentation/utils.py b/langtest/augmentation/utils.py
index b4275f1ac..2f6400734 100644
--- a/langtest/augmentation/utils.py
+++ b/langtest/augmentation/utils.py
@@ -3,6 +3,7 @@
 import os
 
 from pydantic import BaseModel, validator
+from langtest.logger import logger
 
 
 class OpenAIConfig(TypedDict):
@@ -31,6 +32,7 @@ class Templates(BaseModel):
 
     def __post_init__(self):
         self.templates = [i.strip('"') for i in self.templates]
+        logger.info(f"Generated templates: {self.templates}")
 
     @validator("templates", each_item=True, allow_reuse=True)
     def check_templates(cls, v: str):
@@ -51,7 +53,14 @@ def remove_invalid_templates(self, original_template):
             template_vars = set([var.strip() for var in template_vars])
             if template_vars == original_vars:
                 valid_templates.append(template)
+                logger.info(f"Valid template: {template}")
+            else:
+                logger.warning(
+                    f"Invalid Variables in template: {template} - {template_vars}"
+                )
+
         self.templates = valid_templates
+        logger.info(f"Valid templates: {self.templates}")
 
 
 def generate_templates_azoi(
@@ -60,6 +69,9 @@ def generate_templates_azoi(
     """Generate new templates based on the provided template using Azure OpenAI API."""
     import openai
 
+    if "provider" in model_config:
+        del model_config["provider"]
+
     client = openai.AzureOpenAI(**model_config)
 
     prompt = (
@@ -76,7 +88,7 @@ def generate_templates_azoi(
         messages=[
             {
                 "role": "system",
-                "content": f"Generate new templates based on the provided template.\n\n Output Schema: {Templates.schema()}\n",
+                "content": f"Generate up to {num_extra_templates} templates based on the provided template.\n\n JSON Output Schema: {Templates.schema()}\n",
             },
             {
                 "role": "user",
@@ -96,7 +108,8 @@ def generate_templates_azoi(
                 ),
             },
         ],
-        temperature=0,
+        temperature=0.1,
+        max_tokens=1000,
     )
 
     import json
@@ -109,6 +122,7 @@ def generate_templates_azoi(
         return gen_templates.templates[:num_extra_templates]
 
     except json.JSONDecodeError as e:
+        logger.error(f"Error decoding response: {e}")
         raise ValueError(f"Error decoding response: {e}")
 
 
@@ -118,6 +132,9 @@ def generate_templates_openai(
     """Generate new templates based on the provided template using OpenAI API."""
     import openai
 
+    if "provider" in model_config:
+        del model_config["provider"]
+
     client = openai.OpenAI(**model_config)
 
     prompt = (
@@ -137,8 +154,8 @@ def generate_templates_openai(
             },
             {"role": "user", "content": prompt},
         ],
-        max_tokens=500,
-        temperature=0,
+        max_tokens=100,
+        temperature=0.1,
         response_format=Templates,
     )
 

From cccb562d19f32673e6ec2c693dc0ed556375e10f Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Thu, 29 Aug 2024 20:51:01 +0530
Subject: [PATCH 22/27] changed: logging to logger from langtest

---
 langtest/datahandler/datasource.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index 217a06f17..c6ae54638 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -1,6 +1,5 @@
 import csv
 import importlib
-import logging
 import os
 import random
 import re
@@ -11,6 +10,7 @@
 import jsonlines
 import pandas as pd
 from langtest.tasks.task import TaskManager
+from langtest.logger import logger as logging
 
 from .format import Formatter
 from langtest.utils.custom_types import (

From 85d7e7030010f46dffc39cff9416d5212423e545 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Mon, 2 Sep 2024 18:07:29 +0530
Subject: [PATCH 23/27] added: doc lines

---
 langtest/augmentation/utils.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/langtest/augmentation/utils.py b/langtest/augmentation/utils.py
index 2f6400734..a13a8d2e2 100644
--- a/langtest/augmentation/utils.py
+++ b/langtest/augmentation/utils.py
@@ -7,6 +7,8 @@
 
 
 class OpenAIConfig(TypedDict):
+    """OpenAI Configuration for API Key and Provider."""
+
     api_key: str = os.environ.get("OPENAI_API_KEY")
     base_url: Union[str, None] = None
     organization: Union[str, None] = (None,)
@@ -15,6 +17,8 @@ class OpenAIConfig(TypedDict):
 
 
 class AzureOpenAIConfig(TypedDict):
+    """Azure OpenAI Configuration for API Key and Provider."""
+
     from openai.lib.azure import AzureADTokenProvider
 
     azure_endpoint: str
@@ -28,19 +32,24 @@ class AzureOpenAIConfig(TypedDict):
 
 
 class Templates(BaseModel):
+    """Model to validate generated templates."""
+
     templates: List[str]
 
     def __post_init__(self):
+        """Post init method to remove quotes from templates."""
         self.templates = [i.strip('"') for i in self.templates]
         logger.info(f"Generated templates: {self.templates}")
 
     @validator("templates", each_item=True, allow_reuse=True)
     def check_templates(cls, v: str):
+        """Validator to check if templates are generated."""
         if not v:
             raise ValueError("No templates generated.")
         return v.strip('"')
 
     def remove_invalid_templates(self, original_template):
+        """Remove invalid templates based on the original template."""
         # extract variable names using regex
         regexs = r"{([^{}]*)}"
         original_vars = re.findall(regexs, original_template)

From df7776e3408022cb3f01732172d55ad5650bd38d Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Tue, 3 Sep 2024 11:37:20 +0530
Subject: [PATCH 24/27] implemented: text-classification support for
 multi-label classification.

---
 langtest/datahandler/datasource.py        |  2 ++
 langtest/modelhandler/jsl_modelhandler.py | 38 +++++++++++++++++++----
 langtest/tasks/task.py                    | 18 +++++++++--
 langtest/utils/custom_types/output.py     | 22 +++++++++----
 4 files changed, 66 insertions(+), 14 deletions(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index 868a4152a..90e3abcb8 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -957,6 +957,8 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]:
                 import ast
 
                 i["transformations"] = ast.literal_eval(temp)
+            else:
+                i["transformations"] = None
             sample = self.task.get_sample_class(**i)
             samples.append(sample)
 
diff --git a/langtest/modelhandler/jsl_modelhandler.py b/langtest/modelhandler/jsl_modelhandler.py
index f13b18d32..9bb67fcc2 100644
--- a/langtest/modelhandler/jsl_modelhandler.py
+++ b/langtest/modelhandler/jsl_modelhandler.py
@@ -42,6 +42,7 @@
         XlmRoBertaForSequenceClassification,
         XlnetForSequenceClassification,
         MarianTransformer,
+        MultiClassifierDLModel,
     )
     from sparknlp.base import LightPipeline
     from sparknlp.pretrained import PretrainedPipeline
@@ -63,6 +64,7 @@
 
     SUPPORTED_SPARKNLP_CLASSIFERS.extend(
         [
+            MultiClassifierDLModel,
             ClassifierDLModel,
             SentimentDLModel,
             AlbertForSequenceClassification,
@@ -409,6 +411,7 @@ def __init__(
         super().__init__(model)
 
         _classifier = None
+        self.multi_label_classifier = False
         for annotator in self.model.stages:
             if self.is_classifier(annotator):
                 _classifier = annotator
@@ -417,6 +420,10 @@ def __init__(
         if _classifier is None:
             raise ValueError(Errors.E040(var="classifier"))
 
+        if isinstance(_classifier, MultiClassifierDLModel):
+            self.multi_label_classifier = True
+            self.threshold = _classifier.getThreshold()
+
         self.output_col = _classifier.getOutputCol()
         self.classes = _classifier.getClasses()
         self.model = LightPipeline(self.model)
@@ -442,13 +449,32 @@ def predict(
         Returns:
             SequenceClassificationOutput: Classification output from SparkNLP LightPipeline.
         """
-        prediction_metadata = self.model.fullAnnotate(text)[0][self.output_col][
-            0
-        ].metadata
-        prediction = [{"label": x, "score": y} for x, y in prediction_metadata.items()]
+        prediction_metadata = self.model.fullAnnotate(text)[0][self.output_col]
+
+        if self.multi_label_classifier:
+            multi_label = True
+            if len(prediction_metadata) > 0:
+                prediction_metadata = prediction_metadata[0].metadata
+
+                prediction = [
+                    {"label": x, "score": y} for x, y in prediction_metadata.items()
+                ]
+                # filter based on the threshold value with score greater than threshold
+                prediction = [x for x in prediction if float(x["score"]) > self.threshold]
+
+                return SequenceClassificationOutput(
+                    text=text,
+                    predictions=prediction,
+                    multi_label=multi_label,
+                )
+            else:
+                return SequenceClassificationOutput(
+                    text=text, predictions=[], multi_label=multi_label
+                )
 
-        if not return_all_scores:
-            prediction = [max(prediction, key=lambda x: x["score"])]
+        else:
+            if not return_all_scores:
+                prediction = [max(prediction, key=lambda x: x["score"])]
 
         return SequenceClassificationOutput(text=text, predictions=prediction)
 
diff --git a/langtest/tasks/task.py b/langtest/tasks/task.py
index 035725bb8..93af99114 100644
--- a/langtest/tasks/task.py
+++ b/langtest/tasks/task.py
@@ -1,3 +1,4 @@
+import ast
 import re
 from abc import ABC, abstractmethod
 from typing import Union
@@ -267,17 +268,28 @@ def create_sample(
         row_data: dict,
         feature_column="text",
         target_column: Union[samples.SequenceLabel, str] = "label",
+        multi_label: bool = False,
+        *args,
+        **kwargs,
     ) -> samples.SequenceClassificationSample:
         """Create a sample."""
         keys = list(row_data.keys())
         # auto-detect the default column names from the row_data
         column_mapper = cls.column_mapping(keys, [feature_column, target_column])
 
+        # is multi-label classification
+        # if "multi_label" in kwargs:
+        #     multi_label = kwargs.get("multi_label", False)
+        #     kwargs.pop("multi_label")
+
         labels = row_data.get(column_mapper[target_column])
 
         if isinstance(labels, samples.SequenceLabel):
             labels = [labels]
-        elif isinstance(labels, list):
+        elif isinstance(labels, list) or isinstance(labels, str):
+            labels = ast.literal_eval(labels)
+            if not isinstance(labels, list):
+                labels = [labels]
             labels = [
                 samples.SequenceLabel(label=label, score=1.0)
                 if isinstance(label, str)
@@ -289,7 +301,9 @@ def create_sample(
 
         return samples.SequenceClassificationSample(
             original=row_data[column_mapper[feature_column]],
-            expected_results=samples.SequenceClassificationOutput(predictions=labels),
+            expected_results=samples.SequenceClassificationOutput(
+                predictions=labels, multi_label=multi_label
+            ),
         )
 
 
diff --git a/langtest/utils/custom_types/output.py b/langtest/utils/custom_types/output.py
index bcd1e4cf0..6961e4b0f 100644
--- a/langtest/utils/custom_types/output.py
+++ b/langtest/utils/custom_types/output.py
@@ -8,6 +8,7 @@ class SequenceClassificationOutput(BaseModel):
     """Output model for text classification tasks."""
 
     predictions: List[SequenceLabel]
+    multi_label: bool = False
 
     def to_str_list(self) -> str:
         """Convert the output into list of strings.
@@ -15,18 +16,27 @@ def to_str_list(self) -> str:
         Returns:
             List[str]: predictions in form of a list of strings.
         """
-        return ",".join([x.label for x in self.predictions])
+        return ", ".join([x.label for x in self.predictions])
 
-    def __str__(self):
+    def __str__(self) -> str:
         """String representation"""
         labels = {elt.label: elt.score for elt in self.predictions}
         return f"SequenceClassificationOutput(predictions={labels})"
 
-    def __eq__(self, other):
+    def __eq__(self, other: "SequenceClassificationOutput") -> bool:
         """Equality comparison method."""
-        top_class = max(self.predictions, key=lambda x: x.score).label
-        other_top_class = max(other.predictions, key=lambda x: x.score).label
-        return top_class == other_top_class
+
+        if self.multi_label:
+            # get all labels
+            self_labels = {elt.label for elt in self.predictions}
+            other_labels = {elt.label for elt in other.predictions}
+            return set(self_labels) == set(other_labels)
+        elif len(self.predictions) == 0 and len(other.predictions) == 0:
+            return True
+        else:
+            top_class = max(self.predictions, key=lambda x: x.score).label
+            other_top_class = max(other.predictions, key=lambda x: x.score).label
+            return top_class == other_top_class
 
 
 class MinScoreOutput(BaseModel):

From 2da96b76e9d52c6e75c6c5ed2b2fe41cd9735774 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Tue, 3 Sep 2024 12:17:33 +0530
Subject: [PATCH 25/27] Refactor SequenceClassificationOutputFormatter to
 handle multi-label predictions

---
 langtest/datahandler/format.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/langtest/datahandler/format.py b/langtest/datahandler/format.py
index 0755108f0..621fe34e0 100644
--- a/langtest/datahandler/format.py
+++ b/langtest/datahandler/format.py
@@ -108,9 +108,18 @@ def to_csv(sample: SequenceClassificationSample) -> Tuple[str, str]:
             Tuple[str, str]:
                 Row formatted as a list of strings.
         """
-        if sample.test_case:
-            return [sample.test_case, sample.expected_results.predictions[0].label]
-        return [sample.original, sample.expected_results.predictions[0].label]
+        predictions = sample.expected_results.predictions
+        multi_label = sample.expected_results.multi_label
+
+        if multi_label:
+            return [
+                sample.test_case or sample.original,
+                [elt.label for elt in predictions] if predictions else [],
+            ]
+        else:
+            if sample.test_case:
+                return [sample.test_case, sample.expected_results.predictions[0].label]
+            return [sample.original, sample.expected_results.predictions[0].label]
 
 
 class NEROutputFormatter(BaseFormatter):

From 16fee46de37e09d003b170aa85c902428ccd5902 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Tue, 3 Sep 2024 12:35:54 +0530
Subject: [PATCH 26/27] Refactor CSVDataset to remove unnecessary
 transformation field

---
 langtest/datahandler/datasource.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index 90e3abcb8..42648338d 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -958,7 +958,7 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]:
 
                 i["transformations"] = ast.literal_eval(temp)
             else:
-                i["transformations"] = None
+                i.pop("transformations")
             sample = self.task.get_sample_class(**i)
             samples.append(sample)
 

From 258a0f7ddeb9b05ca5a99c8a196febbc1dabcbd1 Mon Sep 17 00:00:00 2001
From: Kalyan Chakravarthy <chakravarthik27@gmail.com>
Date: Tue, 3 Sep 2024 14:19:05 +0530
Subject: [PATCH 27/27] fixed: Unbound Error and Key Error.

---
 langtest/datahandler/datasource.py        |  2 +-
 langtest/modelhandler/jsl_modelhandler.py | 38 ++++++++++-------------
 2 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
index 42648338d..1d89303ae 100644
--- a/langtest/datahandler/datasource.py
+++ b/langtest/datahandler/datasource.py
@@ -957,7 +957,7 @@ def _import_data(self, file_name, **kwargs) -> List[Sample]:
                 import ast
 
                 i["transformations"] = ast.literal_eval(temp)
-            else:
+            elif "transformations" in i:
                 i.pop("transformations")
             sample = self.task.get_sample_class(**i)
             samples.append(sample)
diff --git a/langtest/modelhandler/jsl_modelhandler.py b/langtest/modelhandler/jsl_modelhandler.py
index 9bb67fcc2..0b703d637 100644
--- a/langtest/modelhandler/jsl_modelhandler.py
+++ b/langtest/modelhandler/jsl_modelhandler.py
@@ -450,31 +450,25 @@ def predict(
             SequenceClassificationOutput: Classification output from SparkNLP LightPipeline.
         """
         prediction_metadata = self.model.fullAnnotate(text)[0][self.output_col]
+        prediction = []
+
+        if len(prediction_metadata) > 0:
+            prediction_metadata = prediction_metadata[0].metadata
+            prediction = [
+                {"label": x, "score": y} for x, y in prediction_metadata.items()
+            ]
 
         if self.multi_label_classifier:
-            multi_label = True
-            if len(prediction_metadata) > 0:
-                prediction_metadata = prediction_metadata[0].metadata
-
-                prediction = [
-                    {"label": x, "score": y} for x, y in prediction_metadata.items()
-                ]
-                # filter based on the threshold value with score greater than threshold
-                prediction = [x for x in prediction if float(x["score"]) > self.threshold]
-
-                return SequenceClassificationOutput(
-                    text=text,
-                    predictions=prediction,
-                    multi_label=multi_label,
-                )
-            else:
-                return SequenceClassificationOutput(
-                    text=text, predictions=[], multi_label=multi_label
-                )
+            prediction = [x for x in prediction if float(x["score"]) > self.threshold]
 
-        else:
-            if not return_all_scores:
-                prediction = [max(prediction, key=lambda x: x["score"])]
+            return SequenceClassificationOutput(
+                text=text,
+                predictions=prediction,
+                multi_label=self.multi_label_classifier,
+            )
+
+        if not return_all_scores:
+            prediction = [max(prediction, key=lambda x: x["score"])]
 
         return SequenceClassificationOutput(text=text, predictions=prediction)