BUG: llama.cpp model failed when chat due to lora (#1513)

xorbitsai · May 17, 2024 · 55a0200 · 55a0200
1 parent 0b3e13d
commit 55a0200
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 0 deletions.
diff --git a/xinference/model/llm/ggml/llamacpp.py b/xinference/model/llm/ggml/llamacpp.py
@@ -102,6 +102,8 @@ def _sanitize_generate_config(
             generate_config = LlamaCppGenerateConfig(
                 **CreateCompletionLlamaCpp(**generate_config).dict()
             )
+        # Currently, llama.cpp does not support lora
+        generate_config.pop("lora_name", None)  # type: ignore
         return generate_config
 
     def _convert_ggml_to_gguf(self, model_path: str) -> str:

diff --git a/xinference/types.py b/xinference/types.py
@@ -373,6 +373,7 @@ class CreateCompletionTorch(BaseModel):
         include_fields={
             "grammar": (Optional[Any], None),
             "max_tokens": (Optional[int], max_tokens_field),
+            "lora_name": (Optional[str], None),
             "stream_options": (Optional[Union[dict, None]], None),
         },
     )