Add the huggingface token parameter, and modify the file path in llam…

…a.cpp repo. (#761) Signed-off-by: Song Liu <soliu@redhat.com> Co-authored-by: Song Liu <soliu@redhat.com>
containers · Aug 19, 2024 · e82e739 · e82e739
1 parent f604294
commit e82e739
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 8 deletions.
diff --git a/convert_models/README.md b/convert_models/README.md
@@ -19,10 +19,10 @@ podman build -t converter .
 
 ## Quantize and Convert 
 
-You can run the conversion image directly with podman in the terminal. You just need to provide it with the huggingface model name you want to download, the quantization level you want to use and whether or not you want to keep the raw files after conversion.
+You can run the conversion image directly with podman in the terminal. You just need to provide it with the huggingface model name you want to download, the quantization level you want to use and whether or not you want to keep the raw files after conversion. "HF_TOKEN" is optional, it is required for private models.
 
 ```bash
-podman run -it --rm -v models:/converter/converted_models -e HF_MODEL_URL=<ORG/MODEL_NAME> -e QUANTIZATION=Q4_K_M -e KEEP_ORIGINAL_MODEL="False"
+podman run -it --rm -v models:/converter/converted_models -e HF_MODEL_URL=<ORG/MODEL_NAME> -e HF_TOKEN=<YOUR_HF_TOKEN_ID> -e QUANTIZATION=Q4_K_M -e KEEP_ORIGINAL_MODEL="False" converter
 ```
 
 You can also use the UI shown above to do the same.

diff --git a/convert_models/download_huggingface.py b/convert_models/download_huggingface.py
@@ -3,9 +3,11 @@
 
 parser = argparse.ArgumentParser()
 parser.add_argument("-m", "--model")
+parser.add_argument("-t", "--token")
 args = parser.parse_args()
 
 snapshot_download(repo_id=args.model,
+                token=args.token,
                 local_dir=f"converted_models/{args.model}",
                 local_dir_use_symlinks=True,
                 cache_dir=f"converted_models/cache")
diff --git a/convert_models/run.sh b/convert_models/run.sh
@@ -1,6 +1,7 @@
 #! /bin/bash
 
 hf_model_url=${HF_MODEL_URL}
+hf_token=${HF_TOKEN:="None"}
 model_org=$(echo $hf_model_url | sed -n 's/\(.*\)\/\(.*\)/\1/p')
 model_name=$(echo $hf_model_url | sed -n 's/\(.*\)\/\(.*\)/\2/p')
 keep_orgi=${KEEP_ORIGINAL_MODEL}
@@ -15,11 +16,11 @@ if [ -e "/opt/app-root/src/converter/converted_models/cache/models--$model_org--
 fi
 
 echo "Downloading $hf_model_url"
-python download_huggingface.py --model $hf_model_url
-python llama.cpp/convert.py /opt/app-root/src/converter/converted_models/$hf_model_url
-python llama.cpp/convert-hf-to-gguf.py /opt/app-root/src/converter/converted_models/$hf_model_url
+python download_huggingface.py --model $hf_model_url --token $hf_token
+python llama.cpp/examples/convert_legacy_llama.py /opt/app-root/src/converter/converted_models/$hf_model_url
+python llama.cpp/convert_hf_to_gguf.py /opt/app-root/src/converter/converted_models/$hf_model_url
 mkdir -p /opt/app-root/src/converter/converted_models/gguf/
-llama.cpp/quantize /opt/app-root/src/converter/converted_models/$hf_model_url/ggml-model-f16.gguf /opt/app-root/src/converter/converted_models/gguf/$model_org-$model_name-${QUANTIZATION}.gguf ${QUANTIZATION}
+llama.cpp/llama-quantize /opt/app-root/src/converter/converted_models/$hf_model_url/ggml-model-f16.gguf /opt/app-root/src/converter/converted_models/gguf/$model_org-$model_name-${QUANTIZATION}.gguf ${QUANTIZATION}
 rm -rf /opt/app-root/src/converter/converted_models/$model_org
 
 if [ $keep_orgi = "False" ]; then

diff --git a/convert_models/ui.py b/convert_models/ui.py
@@ -22,6 +22,9 @@
 
 model_name = st.text_input(label="Enter a huggingface model url to convert",
                            placeholder="org/model_name")
+token_id = st.text_input(label="Enter your huggingface token (optional)",
+                         help="huggingface token is required for private model"
+                        ) or "None"
 keep_files = st.checkbox("Keep huggingface model files after conversion?")
 submit_button = st.button(label="submit")
 if submit_button:
@@ -30,8 +33,9 @@
                         "run", 
                         "-it", 
                         "--rm", 
-                        "-v", f"{volume}:/opt/app-root/src/converter/converted_models", 
-                        "-e", f"HF_MODEL_URL={model_name}" ,
+                        "-v", f"{volume}:/converter/converted_models", 
+                        "-e", f"HF_MODEL_URL={model_name}",
+                        "-e", f"HF_TOKEN={token_id}",
                         "-e", f"QUANTIZATION={quantization}",
                         "-e", f"KEEP_ORIGINAL_MODEL={keep_files}",
                         "converter"],stdout=subprocess.PIPE)