From 24be4be5db0182b9eab2ceca47efefa51a14e414 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Mon, 26 Aug 2024 20:30:14 +0530 Subject: [PATCH 1/6] Refactor TemplaticAugment to support multiple AI providers for template generation --- langtest/augmentation/base.py | 29 +++++++++++++++++++++++------ langtest/augmentation/types.py | 23 +++++++++++++++++++++++ 2 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 langtest/augmentation/types.py diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py index 116f30be6..167529bb2 100644 --- a/langtest/augmentation/base.py +++ b/langtest/augmentation/base.py @@ -10,6 +10,7 @@ import pandas as pd import yaml +from langtest.augmentation.types import AzureOpenAIConfig, OpenAIConfig from langtest.datahandler.datasource import DataFactory from langtest.transform import TestFactory from langtest.transform.utils import create_terminology @@ -324,6 +325,7 @@ def __init__( generate_templates=False, show_templates=False, num_extra_templates=10, + model_config: Union[OpenAIConfig, AzureOpenAIConfig] = None, ) -> None: """This constructor for the TemplaticAugment class. @@ -341,12 +343,14 @@ def __init__( given_template = self.__templates[:] for template in given_template: generated_templates: List[str] = self.__generate_templates( - template, num_extra_templates + template, num_extra_templates, model_config ) while len(generated_templates) < num_extra_templates: temp_templates = self.__generate_templates( - template, num_extra_templates + template, + num_extra_templates, + model_config, ) generated_templates.extend(temp_templates) @@ -354,8 +358,8 @@ def __init__( # Extend the existing templates list self.__templates.extend(generated_templates[:num_extra_templates]) - except Exception as e: - raise Errors.E095(msg=e) + except Exception as e_msg: + raise Errors.E095(e=e_msg) if show_templates: [print(template) for template in self.__templates] @@ -596,12 +600,25 @@ def add_spaces_around_punctuation(text: str): return text - def __generate_templates(self, template, num_extra_templates) -> List[str]: + def __generate_templates( + self, + template: str, + num_extra_templates: int, + model_config: Union[OpenAIConfig, AzureOpenAIConfig] = None, + ) -> List[str]: if try_import_lib("openai"): import openai from pydantic import BaseModel, validator - client = openai.OpenAI() + if model_config and model_config.get("provider") == "openai": + client = openai.OpenAI() + elif model_config and model_config.get("provider") == "azure": + params = model_config + del params["provider"] + + client = openai.AzureOpenAI(**params) + else: + client = openai.OpenAI() class Templates(BaseModel): templates: List[str] diff --git a/langtest/augmentation/types.py b/langtest/augmentation/types.py new file mode 100644 index 000000000..44d6386ad --- /dev/null +++ b/langtest/augmentation/types.py @@ -0,0 +1,23 @@ +from typing import TypedDict, Union +import os + + +class OpenAIConfig(TypedDict): + api_key: str = os.environ.get("OPENAI_API_KEY") + base_url: Union[str, None] = None + organization: Union[str, None] = (None,) + project: Union[str, None] = (None,) + provider: str = "openai" + + +class AzureOpenAIConfig(TypedDict): + from openai.lib.azure import AzureADTokenProvider + + azure_endpoint: str + api_version: str + api_key: str + provider: str + azure_deployment: Union[str, None] = None + azure_ad_token: Union[str, None] = (None,) + azure_ad_token_provider: Union[AzureADTokenProvider, None] = (None,) + organization: Union[str, None] = (None,) From 4d866f282a387de5e77e5f7572d27bacf39ea7da Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Tue, 27 Aug 2024 21:47:59 +0530 Subject: [PATCH 2/6] Integrated Azure OpenAI and OpenAI services for automated template generation. --- langtest/augmentation/base.py | 77 ++++------------- langtest/augmentation/types.py | 23 ----- langtest/augmentation/utils.py | 148 +++++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 84 deletions(-) delete mode 100644 langtest/augmentation/types.py create mode 100644 langtest/augmentation/utils.py diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py index 167529bb2..c81589b94 100644 --- a/langtest/augmentation/base.py +++ b/langtest/augmentation/base.py @@ -10,7 +10,7 @@ import pandas as pd import yaml -from langtest.augmentation.types import AzureOpenAIConfig, OpenAIConfig +from langtest.augmentation.utils import AzureOpenAIConfig, OpenAIConfig from langtest.datahandler.datasource import DataFactory from langtest.transform import TestFactory from langtest.transform.utils import create_terminology @@ -607,69 +607,24 @@ def __generate_templates( model_config: Union[OpenAIConfig, AzureOpenAIConfig] = None, ) -> List[str]: if try_import_lib("openai"): - import openai - from pydantic import BaseModel, validator + from langtest.augmentation.utils import ( + generate_templates_azoi, + generate_templates_openai, + ) if model_config and model_config.get("provider") == "openai": - client = openai.OpenAI() - elif model_config and model_config.get("provider") == "azure": params = model_config - del params["provider"] + if "provider" in params: + del params["provider"] - client = openai.AzureOpenAI(**params) - else: - client = openai.OpenAI() - - class Templates(BaseModel): - templates: List[str] - - def __post_init__(self): - self.templates = [i.strip('"') for i in self.templates] - - @validator("templates", each_item=True, allow_reuse=True) - def check_templates(cls, v: str): - if not v: - raise ValueError("No templates generated.") - return v.strip('"') - - def remove_invalid_templates(self, original_template): - # extract variable names using regex - regexs = r"{([^{}]*)}" - original_vars = re.findall(regexs, original_template) - original_vars = set([var.strip() for var in original_vars]) - - # remove invalid templates - valid_templates = [] - for template in self.templates: - template_vars: List[str] = re.findall(regexs, template) - template_vars = set([var.strip() for var in template_vars]) - if template_vars == original_vars: - valid_templates.append(template) - self.templates = valid_templates - - prompt = ( - f"Based on the provided template, create {num_extra_templates} new and unique templates that are " - "variations on this theme. Present these as a list, with each template as a quoted string. The list should " - "contain only the templates, without any additional text or explanation. Ensure that the structure of " - "these variables remains consistent in each generated template. Note: don't add any extra variables and ignore typo errors.\n\n" - "Template:\n" - f"{template}\n" - ) - response = client.beta.chat.completions.parse( - model="gpt-4o-mini", - messages=[ - { - "role": "system", - "content": f"Action: Generate up to {num_extra_templates} templates and ensure that the structure of the variables within the templates remains unchanged and don't add any extra variables.", - }, - {"role": "user", "content": prompt}, - ], - max_tokens=500, - temperature=0, - response_format=Templates, - ) + return generate_templates_openai(template, num_extra_templates, params) - generated_response = response.choices[0].message.parsed - generated_response.remove_invalid_templates(template) + elif model_config and model_config.get("provider") == "azure": + params = model_config + if "provider" in params: + del params["provider"] - return generated_response.templates[:num_extra_templates] + return generate_templates_azoi(template, num_extra_templates, params) + + else: + return generate_templates_openai(template, num_extra_templates) diff --git a/langtest/augmentation/types.py b/langtest/augmentation/types.py deleted file mode 100644 index 44d6386ad..000000000 --- a/langtest/augmentation/types.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import TypedDict, Union -import os - - -class OpenAIConfig(TypedDict): - api_key: str = os.environ.get("OPENAI_API_KEY") - base_url: Union[str, None] = None - organization: Union[str, None] = (None,) - project: Union[str, None] = (None,) - provider: str = "openai" - - -class AzureOpenAIConfig(TypedDict): - from openai.lib.azure import AzureADTokenProvider - - azure_endpoint: str - api_version: str - api_key: str - provider: str - azure_deployment: Union[str, None] = None - azure_ad_token: Union[str, None] = (None,) - azure_ad_token_provider: Union[AzureADTokenProvider, None] = (None,) - organization: Union[str, None] = (None,) diff --git a/langtest/augmentation/utils.py b/langtest/augmentation/utils.py new file mode 100644 index 000000000..b4275f1ac --- /dev/null +++ b/langtest/augmentation/utils.py @@ -0,0 +1,148 @@ +import re +from typing import List, TypedDict, Union +import os + +from pydantic import BaseModel, validator + + +class OpenAIConfig(TypedDict): + api_key: str = os.environ.get("OPENAI_API_KEY") + base_url: Union[str, None] = None + organization: Union[str, None] = (None,) + project: Union[str, None] = (None,) + provider: str = "openai" + + +class AzureOpenAIConfig(TypedDict): + from openai.lib.azure import AzureADTokenProvider + + azure_endpoint: str + api_version: str + api_key: str + provider: str + azure_deployment: Union[str, None] = None + azure_ad_token: Union[str, None] = (None,) + azure_ad_token_provider: Union[AzureADTokenProvider, None] = (None,) + organization: Union[str, None] = (None,) + + +class Templates(BaseModel): + templates: List[str] + + def __post_init__(self): + self.templates = [i.strip('"') for i in self.templates] + + @validator("templates", each_item=True, allow_reuse=True) + def check_templates(cls, v: str): + if not v: + raise ValueError("No templates generated.") + return v.strip('"') + + def remove_invalid_templates(self, original_template): + # extract variable names using regex + regexs = r"{([^{}]*)}" + original_vars = re.findall(regexs, original_template) + original_vars = set([var.strip() for var in original_vars]) + + # remove invalid templates + valid_templates = [] + for template in self.templates: + template_vars: List[str] = re.findall(regexs, template) + template_vars = set([var.strip() for var in template_vars]) + if template_vars == original_vars: + valid_templates.append(template) + self.templates = valid_templates + + +def generate_templates_azoi( + template: str, num_extra_templates: int, model_config: AzureOpenAIConfig +): + """Generate new templates based on the provided template using Azure OpenAI API.""" + import openai + + client = openai.AzureOpenAI(**model_config) + + prompt = ( + "Based on the provided template, create {num_extra_templates} new and unique templates that are " + "variations on this theme. Present these as a list, with each template as a quoted string. The list should " + "contain only the templates, without any additional text or explanation. Ensure that the structure of " + "these variables remains consistent in each generated template. Note: don't add any extra variables and ignore typo errors.\n\n" + "Template:\n" + "{template}\n" + ) + + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + { + "role": "system", + "content": f"Generate new templates based on the provided template.\n\n Output Schema: {Templates.schema()}\n", + }, + { + "role": "user", + "content": prompt.format( + template="The {ORG} company is located in {LOC}", + num_extra_templates=2, + ), + }, + { + "role": "assistant", + "content": '["The {ORG} corporation is based out of {LOC}",\n "The {ORG} organization operates in {LOC}"]', + }, + { + "role": "user", + "content": prompt.format( + template=template, num_extra_templates=num_extra_templates + ), + }, + ], + temperature=0, + ) + + import json + + try: + clean_response = response.choices[0].message.content.replace("'", '"') + gen_templates = Templates(templates=json.loads(clean_response)) + gen_templates.remove_invalid_templates(template) + + return gen_templates.templates[:num_extra_templates] + + except json.JSONDecodeError as e: + raise ValueError(f"Error decoding response: {e}") + + +def generate_templates_openai( + template: str, num_extra_templates: int, model_config: OpenAIConfig = OpenAIConfig() +): + """Generate new templates based on the provided template using OpenAI API.""" + import openai + + client = openai.OpenAI(**model_config) + + prompt = ( + f"Based on the provided template, create {num_extra_templates} new and unique templates that are " + "variations on this theme. Present these as a list, with each template as a quoted string. The list should " + "contain only the templates, without any additional text or explanation. Ensure that the structure of " + "these variables remains consistent in each generated template. Note: don't add any extra variables and ignore typo errors.\n\n" + "Template:\n" + f"{template}\n" + ) + response = client.beta.chat.completions.parse( + model="gpt-4o-mini", + messages=[ + { + "role": "system", + "content": f"Action: Generate up to {num_extra_templates} templates and ensure that the structure of the variables within the templates remains unchanged and don't add any extra variables.", + }, + {"role": "user", "content": prompt}, + ], + max_tokens=500, + temperature=0, + response_format=Templates, + ) + + generated_response = response.choices[0].message.parsed + generated_response.remove_invalid_templates(template) + + return generated_response.templates[:num_extra_templates] From d04d5006a3fae36f6af76e4628def2536bbac500 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Tue, 27 Aug 2024 21:49:47 +0530 Subject: [PATCH 3/6] added comment for "azoi means Azue OpenAI" --- langtest/augmentation/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py index c81589b94..3fd368dc4 100644 --- a/langtest/augmentation/base.py +++ b/langtest/augmentation/base.py @@ -608,7 +608,7 @@ def __generate_templates( ) -> List[str]: if try_import_lib("openai"): from langtest.augmentation.utils import ( - generate_templates_azoi, + generate_templates_azoi, # azoi means Azure OpenAI generate_templates_openai, ) From 29d136e6d7bd58645f9ddd5a4f7efefd105f300d Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Wed, 28 Aug 2024 18:27:14 +0530 Subject: [PATCH 4/6] updated the model_config handling. --- langtest/augmentation/base.py | 10 ++-------- langtest/augmentation/utils.py | 25 +++++++++++++++++++++---- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py index 3fd368dc4..ca6718133 100644 --- a/langtest/augmentation/base.py +++ b/langtest/augmentation/base.py @@ -612,18 +612,12 @@ def __generate_templates( generate_templates_openai, ) - if model_config and model_config.get("provider") == "openai": - params = model_config - if "provider" in params: - del params["provider"] + params = model_config.copy() if model_config else {} + if model_config and model_config.get("provider") == "openai": return generate_templates_openai(template, num_extra_templates, params) elif model_config and model_config.get("provider") == "azure": - params = model_config - if "provider" in params: - del params["provider"] - return generate_templates_azoi(template, num_extra_templates, params) else: diff --git a/langtest/augmentation/utils.py b/langtest/augmentation/utils.py index b4275f1ac..2f6400734 100644 --- a/langtest/augmentation/utils.py +++ b/langtest/augmentation/utils.py @@ -3,6 +3,7 @@ import os from pydantic import BaseModel, validator +from langtest.logger import logger class OpenAIConfig(TypedDict): @@ -31,6 +32,7 @@ class Templates(BaseModel): def __post_init__(self): self.templates = [i.strip('"') for i in self.templates] + logger.info(f"Generated templates: {self.templates}") @validator("templates", each_item=True, allow_reuse=True) def check_templates(cls, v: str): @@ -51,7 +53,14 @@ def remove_invalid_templates(self, original_template): template_vars = set([var.strip() for var in template_vars]) if template_vars == original_vars: valid_templates.append(template) + logger.info(f"Valid template: {template}") + else: + logger.warning( + f"Invalid Variables in template: {template} - {template_vars}" + ) + self.templates = valid_templates + logger.info(f"Valid templates: {self.templates}") def generate_templates_azoi( @@ -60,6 +69,9 @@ def generate_templates_azoi( """Generate new templates based on the provided template using Azure OpenAI API.""" import openai + if "provider" in model_config: + del model_config["provider"] + client = openai.AzureOpenAI(**model_config) prompt = ( @@ -76,7 +88,7 @@ def generate_templates_azoi( messages=[ { "role": "system", - "content": f"Generate new templates based on the provided template.\n\n Output Schema: {Templates.schema()}\n", + "content": f"Generate up to {num_extra_templates} templates based on the provided template.\n\n JSON Output Schema: {Templates.schema()}\n", }, { "role": "user", @@ -96,7 +108,8 @@ def generate_templates_azoi( ), }, ], - temperature=0, + temperature=0.1, + max_tokens=1000, ) import json @@ -109,6 +122,7 @@ def generate_templates_azoi( return gen_templates.templates[:num_extra_templates] except json.JSONDecodeError as e: + logger.error(f"Error decoding response: {e}") raise ValueError(f"Error decoding response: {e}") @@ -118,6 +132,9 @@ def generate_templates_openai( """Generate new templates based on the provided template using OpenAI API.""" import openai + if "provider" in model_config: + del model_config["provider"] + client = openai.OpenAI(**model_config) prompt = ( @@ -137,8 +154,8 @@ def generate_templates_openai( }, {"role": "user", "content": prompt}, ], - max_tokens=500, - temperature=0, + max_tokens=100, + temperature=0.1, response_format=Templates, ) From cccb562d19f32673e6ec2c693dc0ed556375e10f Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Thu, 29 Aug 2024 20:51:01 +0530 Subject: [PATCH 5/6] changed: logging to logger from langtest --- langtest/datahandler/datasource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py index 217a06f17..c6ae54638 100644 --- a/langtest/datahandler/datasource.py +++ b/langtest/datahandler/datasource.py @@ -1,6 +1,5 @@ import csv import importlib -import logging import os import random import re @@ -11,6 +10,7 @@ import jsonlines import pandas as pd from langtest.tasks.task import TaskManager +from langtest.logger import logger as logging from .format import Formatter from langtest.utils.custom_types import ( From 85d7e7030010f46dffc39cff9416d5212423e545 Mon Sep 17 00:00:00 2001 From: Kalyan Chakravarthy Date: Mon, 2 Sep 2024 18:07:29 +0530 Subject: [PATCH 6/6] added: doc lines --- langtest/augmentation/utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/langtest/augmentation/utils.py b/langtest/augmentation/utils.py index 2f6400734..a13a8d2e2 100644 --- a/langtest/augmentation/utils.py +++ b/langtest/augmentation/utils.py @@ -7,6 +7,8 @@ class OpenAIConfig(TypedDict): + """OpenAI Configuration for API Key and Provider.""" + api_key: str = os.environ.get("OPENAI_API_KEY") base_url: Union[str, None] = None organization: Union[str, None] = (None,) @@ -15,6 +17,8 @@ class OpenAIConfig(TypedDict): class AzureOpenAIConfig(TypedDict): + """Azure OpenAI Configuration for API Key and Provider.""" + from openai.lib.azure import AzureADTokenProvider azure_endpoint: str @@ -28,19 +32,24 @@ class AzureOpenAIConfig(TypedDict): class Templates(BaseModel): + """Model to validate generated templates.""" + templates: List[str] def __post_init__(self): + """Post init method to remove quotes from templates.""" self.templates = [i.strip('"') for i in self.templates] logger.info(f"Generated templates: {self.templates}") @validator("templates", each_item=True, allow_reuse=True) def check_templates(cls, v: str): + """Validator to check if templates are generated.""" if not v: raise ValueError("No templates generated.") return v.strip('"') def remove_invalid_templates(self, original_template): + """Remove invalid templates based on the original template.""" # extract variable names using regex regexs = r"{([^{}]*)}" original_vars = re.findall(regexs, original_template)