Merge pull request #339 from MichaelClifford/new_default

make granite default for models/ and llamacpp/
containers · Apr 25, 2024 · df755ca · df755ca
2 parents a680d5a + 273ab06
commit df755ca
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 21 deletions.
diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile
@@ -1,5 +1,6 @@
 APP := llamacpp_python
 PORT ?= 8001
+CHAT_FORMAT ?= openchat
 
 include ../common/Makefile.common
 
@@ -9,10 +10,10 @@ CUDA_IMAGE := $(REGISTRY)/$(REGISTRY_ORG)/$(COMPONENT)/$(APP)_cuda:latest
 VULKAN_IMAGE := $(REGISTRY)/$(REGISTRY_ORG)/$(COMPONENT)/$(APP)_vulkan:latest
 
 MODELS_PATH := /locallm/models
-MODEL_NAME ?= mistral-7b-instruct-v0.1.Q4_K_M.gguf
+MODEL_NAME ?= granite-7b-lab-Q4_K_M.gguf
 
 .Phony: all
-all: build download-model-mistral run
+all: build download-model-granite run
 
 .PHONY: build-cuda
 build-cuda:
@@ -22,7 +23,7 @@ build-cuda:
 build-vulkan:
 	podman build --squash-all -t $(VULKAN_IMAGE) . -f vulkan/Containerfile
 
-.PHONY: download-model-mistral # default model
-download-model-mistral:
+.PHONY: download-model-granite # default model
+download-model-granite:
 	cd ../../models && \
-	make MODEL_NAME=mistral-7b-instruct-v0.1.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -f  Makefile download-model
+	make MODEL_NAME=granite-7b-lab-Q4_K_M.gguf MODEL_URL=https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf -f  Makefile download-model
diff --git a/model_servers/llamacpp_python/README.md b/model_servers/llamacpp_python/README.md
@@ -76,24 +76,25 @@ podman pull quay.io/ai-lab/llamacpp_python_vulkan
 
 There are many models to choose from these days, most of which can be found on [huggingface.co](https://huggingface.co). In order to use a model with the llamacpp_python model server, it must be in GGUF format. You can either download pre-converted GGUF models directly or convert them yourself with the [model converter utility](../../convert_models/) available in this repo.
 
-One of the more popular Apache-2.0 Licenesed models that we recommend using if you are just getting started is `mistral-7b-instruct-v0.1`. You can use the link below to quickly download a quantized (smaller) GGUF version of this model for use with the llamacpp_python model server. 
+A well performant Apache-2.0 licensed models that we recommend using if you are just getting started is
+`granite-7b-lab`. You can use the link below to quickly download a quantized (smaller) GGUF version of this model for use with the llamacpp_python model server. 
 
-Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf)
+Download URL: [https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf](https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf)
 
 Place all models in the [models](../../models/) directory.
 
 You can use this snippet below to download the default model:
 
 ```bash
-make -f Makefile download-model-mistral
+make -f Makefile download-model-granite
 ```
 
 Or you can use the generic `download-models` target from the `/models` directory to download any model file from huggingface:
 
 ```bash
 cd ../../models
 make MODEL_NAME=<model_name> MODEL_URL=<model_url> -f  Makefile download-model
-# EX: make MODEL_NAME=mistral-7b-instruct-v0.1.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -f  Makefile download-model
+# EX: make MODEL_NAME=granite-7b-lab-Q4_K_M.gguf MODEL_URL=https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf -f  Makefile download-model
 ```
 
 
@@ -107,9 +108,10 @@ To deploy the LLM server you must specify a volume mount `-v` where your models
 podman run --rm -it \
   -p 8001:8001 \
   -v Local/path/to/locallm/models:/locallm/models:ro \
-  -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.gguf 
+  -e MODEL_PATH=models/granite-7b-lab-Q4_K_M.gguf 
   -e HOST=0.0.0.0 
   -e PORT=8001 
+  -e MODEL_CHAT_FORMAT=openchat
   llamacpp_python \
 ```
 
@@ -120,31 +122,32 @@ podman run --rm -it \
   --device nvidia.com/gpu=all
   -p 8001:8001 \
   -v Local/path/to/locallm/models:/locallm/models:ro \
-  -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.gguf 
+  -e MODEL_PATH=models/granite-7b-lab-Q4_K_M.gguf 
   -e HOST=0.0.0.0 
   -e PORT=8001 
+  -e MODEL_CHAT_FORMAT=openchat
   llamacpp_python \
 ```
 ### Multiple Model Service:
 
 To enable dynamic loading and unloading of different models present on your machine, you can start the model service with a `CONFIG_PATH` instead of a `MODEL_PATH`.
 
-Here is an example `models_config.json` with two quantization variants of mistral-7B.
+Here is an example `models_config.json` with two model options.
 
 ```json
 {
     "host": "0.0.0.0",
     "port": 8001,
     "models": [
         {
-            "model": "models/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
-            "model_alias": "mistral_Q4",
-            "chat_format": "mistral",
+            "model": "models/granite-7b-lab-Q4_K_M.gguf",
+            "model_alias": "granite",
+            "chat_format": "openchat",
         },
         {
-            "model": "models/mistral-7b-instruct-v0.1.Q5_K_M.gguf",
-            "model_alias": "mistral_Q5",
-            "chat_format": "mistral",
+            "model": "models/merlinite-7b-lab-Q4_K_M.gguf",
+            "model_alias": "merlinite",
+            "chat_format": "openchat",
         },
 
     ]

diff --git a/models/Containerfile b/models/Containerfile
@@ -10,7 +10,7 @@
 FROM registry.access.redhat.com/ubi9/ubi-micro:9.3-15
 
 # Can be substituted using the --build-arg defined above
-ARG MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
+ARG MODEL_URL=https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf
 
 # By default the Model Server container image uses the AI Model stored in the model/model.file file.
 WORKDIR /model

diff --git a/models/Makefile b/models/Makefile
@@ -1,5 +1,5 @@
-MODEL_URL ?= https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
-MODEL_NAME ?= mistral-7b-instruct
+MODEL_URL ?= https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf
+MODEL_NAME ?= granite-7b-lab-Q4_K_M.gguf
 
 REGISTRY ?= quay.io
 REGISTRY_ORG ?= ai-lab