diff --git a/xinference/model/llm/ggml/llamacpp.py b/xinference/model/llm/ggml/llamacpp.py index 429afa24c8..4136184fb1 100644 --- a/xinference/model/llm/ggml/llamacpp.py +++ b/xinference/model/llm/ggml/llamacpp.py @@ -102,6 +102,8 @@ def _sanitize_generate_config( generate_config = LlamaCppGenerateConfig( **CreateCompletionLlamaCpp(**generate_config).dict() ) + # Currently, llama.cpp does not support lora + generate_config.pop("lora_name", None) # type: ignore return generate_config def _convert_ggml_to_gguf(self, model_path: str) -> str: diff --git a/xinference/types.py b/xinference/types.py index d75dd58efc..923ae1ca70 100644 --- a/xinference/types.py +++ b/xinference/types.py @@ -373,6 +373,7 @@ class CreateCompletionTorch(BaseModel): include_fields={ "grammar": (Optional[Any], None), "max_tokens": (Optional[int], max_tokens_field), + "lora_name": (Optional[str], None), "stream_options": (Optional[Union[dict, None]], None), }, )