Skip to content

Commit

Permalink
BUG: llama.cpp model failed when chat due to lora (#1513)
Browse files Browse the repository at this point in the history
  • Loading branch information
ChengjieLi28 authored May 17, 2024
1 parent 0b3e13d commit 55a0200
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 0 deletions.
2 changes: 2 additions & 0 deletions xinference/model/llm/ggml/llamacpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ def _sanitize_generate_config(
generate_config = LlamaCppGenerateConfig(
**CreateCompletionLlamaCpp(**generate_config).dict()
)
# Currently, llama.cpp does not support lora
generate_config.pop("lora_name", None) # type: ignore
return generate_config

def _convert_ggml_to_gguf(self, model_path: str) -> str:
Expand Down
1 change: 1 addition & 0 deletions xinference/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,7 @@ class CreateCompletionTorch(BaseModel):
include_fields={
"grammar": (Optional[Any], None),
"max_tokens": (Optional[int], max_tokens_field),
"lora_name": (Optional[str], None),
"stream_options": (Optional[Union[dict, None]], None),
},
)
Expand Down

0 comments on commit 55a0200

Please sign in to comment.