diff --git a/xinference/model/llm/ggml/llamacpp.py b/xinference/model/llm/ggml/llamacpp.py
index 429afa24c8..4136184fb1 100644
--- a/xinference/model/llm/ggml/llamacpp.py
+++ b/xinference/model/llm/ggml/llamacpp.py
@@ -102,6 +102,8 @@ def _sanitize_generate_config(
             generate_config = LlamaCppGenerateConfig(
                 **CreateCompletionLlamaCpp(**generate_config).dict()
             )
+        # Currently, llama.cpp does not support lora
+        generate_config.pop("lora_name", None)  # type: ignore
         return generate_config
 
     def _convert_ggml_to_gguf(self, model_path: str) -> str:
diff --git a/xinference/types.py b/xinference/types.py
index d75dd58efc..923ae1ca70 100644
--- a/xinference/types.py
+++ b/xinference/types.py
@@ -373,6 +373,7 @@ class CreateCompletionTorch(BaseModel):
         include_fields={
             "grammar": (Optional[Any], None),
             "max_tokens": (Optional[int], max_tokens_field),
+            "lora_name": (Optional[str], None),
             "stream_options": (Optional[Union[dict, None]], None),
         },
     )