fix: stitching of messages for proper prompt creation in conversation

premAI-io · Jul 28, 2023 · d838114 · d838114
1 parent 02b713a
commit d838114
Showing 1 changed file with 26 additions and 3 deletions.
diff --git a/cht-llama-v2/models.py b/cht-llama-v2/models.py
@@ -31,6 +31,29 @@ def generate(
     def embeddings(cls, text) -> None:
         pass
 
+    @staticmethod
+    def stitch_prompt(messages: list) -> str:
+        system_prompt_template = "<s>[INST] <<SYS>>\n{}\n<</SYS>>\n\n"  # noqa
+        default_system_text = "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."  # noqa
+        user_prompt_template = "{} [/INST] "  # noqa
+        assistant_prompt_template = "{} </s><s>[INST] "  # noqa
+
+        system_prompt, chat_prompt = "", ""
+        for message in messages:
+            role = message["role"]
+            content = message["content"]
+            if role == "system":
+                system_prompt = system_prompt_template.format(content)
+            elif role == "user":
+                chat_prompt += user_prompt_template.format(content)
+            elif role == "assistant":
+                chat_prompt += assistant_prompt_template.format(content)
+
+        if not system_prompt:
+            system_prompt = system_prompt_template.format(default_system_text)
+
+        return system_prompt + chat_prompt
+
 
 class LlamaBasedModel(ChatModel):
     model = None
@@ -49,10 +72,10 @@ def generate(
         stop: str = "",
         **kwargs,
     ) -> List:
-        message = messages[-1]["content"]
+        prompt = cls.stitch_prompt(messages)
         return [
             cls.model(
-                message,
+                prompt,
                 max_length=max_tokens,
                 max_new_tokens=max_new_tokens,
                 num_return_sequences=n,
@@ -62,7 +85,7 @@ def generate(
                 return_full_text=kwargs.get("return_full_text", False),
                 do_sample=kwargs.get("do_sample", True),
                 stop_sequence=stop[0] if stop else None,
-                stopping_criteria=cls.stopping_criteria(stop, message, cls.tokenizer),
+                stopping_criteria=cls.stopping_criteria(stop, prompt, cls.tokenizer),
             )[0]["generated_text"].rstrip(stop[0] if stop else "")
         ]