Skip to content

Commit

Permalink
Merge pull request #1122 from JohnSnowLabs/release/2.4.0
Browse files Browse the repository at this point in the history
Release/2.4.0
  • Loading branch information
chakravarthik27 authored Sep 22, 2024
2 parents b35c28a + 551cc12 commit 1b9c7db
Showing 22 changed files with 2,204 additions and 285 deletions.
1 change: 1 addition & 0 deletions demo/tutorials/llm_notebooks/Visual_QA.ipynb

Large diffs are not rendered by default.

10 changes: 9 additions & 1 deletion langtest/datahandler/datasource.py
Original file line number Diff line number Diff line change
@@ -95,6 +95,12 @@
"anti-stereotype": ["anti-stereotype"],
"unrelated": ["unrelated"],
},
"visualqa": {
"image": ["image", "image_1"],
"question": ["question"],
"options": ["options"],
"answer": ["answer"],
},
}


@@ -183,7 +189,7 @@ def __init__(self, file_path: Union[str, dict], task: TaskManager, **kwargs) ->
raise ValueError(Errors.E024)

if "data_source" not in file_path:
raise ValueError(Errors.E025)
raise ValueError(Errors.E025())
self._custom_label = file_path.copy()
self._file_path = file_path.get("data_source")
self._size = None
@@ -1246,6 +1252,7 @@ class HuggingFaceDataset(BaseDataset):
"summarization",
"ner",
"question-answering",
"visualqa",
]

LIB_NAME = "datasets"
@@ -1709,6 +1716,7 @@ class PandasDataset(BaseDataset):
"legal",
"factuality",
"stereoset",
"visualqa",
]
COLUMN_NAMES = {task: COLUMN_MAPPER[task] for task in supported_tasks}

6 changes: 6 additions & 0 deletions langtest/langtest.py
Original file line number Diff line number Diff line change
@@ -605,6 +605,7 @@ def generated_results(self) -> Optional[pd.DataFrame]:
"model_name",
"category",
"test_type",
"original_image",
"original",
"context",
"prompt",
@@ -613,8 +614,10 @@ def generated_results(self) -> Optional[pd.DataFrame]:
"completion",
"test_case",
"perturbed_context",
"perturbed_image",
"perturbed_question",
"sentence",
"question",
"patient_info_A",
"patient_info_B",
"case",
@@ -838,6 +841,7 @@ def testcases(self, additional_cols=False) -> pd.DataFrame:
"model_name",
"category",
"test_type",
"original_image",
"original",
"context",
"original_context",
@@ -863,7 +867,9 @@ def testcases(self, additional_cols=False) -> pd.DataFrame:
"correct_sentence",
"incorrect_sentence",
"perturbed_context",
"perturbed_image",
"perturbed_question",
"question",
"ground_truth",
"options",
"expected_result",
57 changes: 56 additions & 1 deletion langtest/modelhandler/llm_modelhandler.py
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@
import logging
from functools import lru_cache
from langtest.utils.custom_types.helpers import HashableDict
from langchain.chat_models.base import BaseChatModel


class PretrainedModelForQA(ModelAPI):
@@ -80,7 +81,7 @@ def load_model(cls, hub: str, path: str, *args, **kwargs) -> "PretrainedModelFor
try:
cls._update_model_parameters(hub, filtered_kwargs)
if path in (
"gpt-4o",
"gpt-4o-mini",
"gpt-4",
"gpt-3.5-turbo",
"gpt-4-1106-preview",
@@ -452,3 +453,57 @@ class PretrainedModelForSycophancy(PretrainedModelForQA, ModelAPI):
"""

pass


class PretrainedModelForVisualQA(PretrainedModelForQA, ModelAPI):
"""A class representing a pretrained model for visual question answering.
Inherits:
PretrainedModelForQA: The base class for pretrained models.
"""

@lru_cache(maxsize=102400)
def predict(
self, text: Union[str, dict], prompt: dict, images: List[Any], *args, **kwargs
):
"""Perform prediction using the pretrained model.
Args:
text (Union[str, dict]): The input text or dictionary.
prompt (dict): The prompt configuration.
images (List[Any]): The list of images.
*args: Additional positional arguments.
**kwargs: Additional keyword arguments.
Returns:
dict: A dictionary containing the prediction result.
- 'result': The prediction result.
"""
try:
if not isinstance(self.model, BaseChatModel):
ValueError("visualQA task is only supported for chat models")

# prepare prompt
prompt_template = PromptTemplate(**prompt)
from langchain_core.messages import HumanMessage

images = [
{
"type": "image_url",
"image_url": {"url": image},
}
for image in images
]

messages = HumanMessage(
content=[
{"type": "text", "text": prompt_template.format(**text)},
*images,
]
)

response = self.model.invoke([messages])
return response.content

except Exception as e:
raise ValueError(Errors.E089(error_message=e))
128 changes: 128 additions & 0 deletions langtest/modelhandler/promptguard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
class PromptGuard:
_instance = None

def __new__(cls, model_name: str = "meta-llama/Prompt-Guard-86M", device="cpu"):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance.model_name = model_name
cls._instance.device = device
(
cls._instance.model,
cls._instance.tokenizer,
) = cls._instance._load_model_and_tokenizer()
return cls._instance

def __init__(
self, model_name: str = "meta-llama/Prompt-Guard-86M", device="cpu"
) -> None:
self.model_name = "meta-llama/Prompt-Guard-86M"
self.device = "cpu"
self.model, self.tokenizer = self._load_model_and_tokenizer()

def _load_model_and_tokenizer(self):
"""
Load the model and tokenizer from Hugging Face.
"""
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model = AutoModelForSequenceClassification.from_pretrained(self.model_name).to(
self.device
)
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
return model, tokenizer

def _preprocess_text(self, text):
"""
Preprocess the input text by removing spaces to mitigate prompt injection tactics.
"""
cleaned_text = "".join([char for char in text if not char.isspace()])
tokens = self.tokenizer.tokenize(cleaned_text)
result = " ".join(
[self.tokenizer.convert_tokens_to_string([token]) for token in tokens]
)
return result or text

def _get_class_probabilities(self, texts, temperature=1.0, preprocess=True):
"""
Internal method to get class probabilities for a single or batch of texts.
"""
import torch
from torch.nn.functional import softmax

if preprocess:
texts = [self._preprocess_text(text) for text in texts]

inputs = self.tokenizer(
texts, return_tensors="pt", padding=True, truncation=True, max_length=512
)
inputs = inputs.to(self.device)

with torch.no_grad():
logits = self.model(**inputs).logits

probabilities = softmax(logits / temperature, dim=-1)
return probabilities

def get_jailbreak_score(self, text, temperature=1.0, preprocess=True):
"""
Get jailbreak score for a single input text.
"""
probabilities = self._get_class_probabilities([text], temperature, preprocess)
return probabilities[0, 2].item()

def get_indirect_injection_score(self, text, temperature=1.0, preprocess=True):
"""
Get indirect injection score for a single input text.
"""
probabilities = self._get_class_probabilities([text], temperature, preprocess)
return (probabilities[0, 1] + probabilities[0, 2]).item()

def _process_text_batch(
self, texts, score_indices, temperature=1.0, max_batch_size=16, preprocess=True
):
"""
Internal method to process texts in batches and return scores.
"""
import torch

num_texts = len(texts)
all_scores = torch.zeros(num_texts)

for i in range(0, num_texts, max_batch_size):
batch_texts = texts[i : i + max_batch_size]
probabilities = self._get_class_probabilities(
batch_texts, temperature, preprocess
)
batch_scores = probabilities[:, score_indices].sum(dim=1).cpu()

all_scores[i : i + max_batch_size] = batch_scores

return all_scores.tolist()

def get_jailbreak_scores_for_texts(
self, texts, temperature=1.0, max_batch_size=16, preprocess=True
):
"""
Get jailbreak scores for a batch of texts.
"""
return self._process_text_batch(
texts,
score_indices=[2],
temperature=temperature,
max_batch_size=max_batch_size,
preprocess=preprocess,
)

def get_indirect_injection_scores_for_texts(
self, texts, temperature=1.0, max_batch_size=16, preprocess=True
):
"""
Get indirect injection scores for a batch of texts.
"""
return self._process_text_batch(
texts,
score_indices=[1, 2],
temperature=temperature,
max_batch_size=max_batch_size,
preprocess=preprocess,
)
41 changes: 41 additions & 0 deletions langtest/tasks/task.py
Original file line number Diff line number Diff line change
@@ -851,3 +851,44 @@ def create_sample(

class FillMask(BaseTask):
pass


class VisualQA(BaseTask):
_name = "visualqa"
_default_col = {
"image": ["image"],
"question": ["question"],
"answer": ["answer"],
}
sample_class = samples.VisualQASample

def create_sample(
cls,
row_data: dict,
image: str = "image_1",
question: str = "question",
options: str = "options",
answer: str = "answer",
dataset_name: str = "",
) -> samples.VisualQASample:
"""Create a sample."""
keys = list(row_data.keys())

# auto-detect the default column names from the row_data
column_mapper = cls.column_mapping(keys, [image, question, options, answer])

options = row_data.get(column_mapper.get(options, "-"), "-")

if len(options) > 3 and options[0] == "[" and options[-1] == "]":
options = ast.literal_eval(row_data[column_mapper["options"]])
options = "\n".join(
[f"{chr(65 + i)}. {option}" for i, option in enumerate(options)]
)

return samples.VisualQASample(
original_image=row_data[column_mapper[image]],
question=row_data[column_mapper[question]],
options=options,
expected_result=row_data[column_mapper[answer]],
dataset_name=dataset_name,
)
3 changes: 3 additions & 0 deletions langtest/transform/__init__.py
Original file line number Diff line number Diff line change
@@ -22,6 +22,8 @@
from langtest.transform.grammar import GrammarTestFactory
from langtest.transform.safety import SafetyTestFactory

from langtest.transform import image

# Fixing the asyncio event loop
nest_asyncio.apply()

@@ -47,4 +49,5 @@
SycophancyTestFactory,
GrammarTestFactory,
SafetyTestFactory,
image,
]
Loading
Oops, something went wrong.

0 comments on commit 1b9c7db

Please sign in to comment.