diff --git a/inference/utils/download_llama_weights.py b/inference/utils/download_llama_weights.py
index 0cf4453aa0..d2b11453e6 100644
--- a/inference/utils/download_llama_weights.py
+++ b/inference/utils/download_llama_weights.py
@@ -6,11 +6,12 @@
 from transformers import AutoModelForCausalLM
 
 # You can pass the --use-full-precision flag to use the full-precision weight. By default, we use half precision.
+# and pass "--use_13B", "--use_30B", and "--use_65B" to use the corresponding "llama-13B/30B/65B" model weights
 parser = argparse.ArgumentParser()
 parser.add_argument("--use-full-precision", action="store_true", help="Use full precision")
-parser.add_argument("--use_13B", action="store_true", help="Use full precision")
-parser.add_argument("--use_30B", action="store_true", help="Use full precision")
-parser.add_argument("--use_65B", action="store_true", help="Use full precision")
+parser.add_argument("--use_13B", action="store_true", help="choose to use llama-13B")
+parser.add_argument("--use_30B", action="store_true", help="choose to use llama-30B")
+parser.add_argument("--use_65B", action="store_true", help="choose to use llama-65B")
 args = parser.parse_args()
 if not args.use_full_precision:
     import torch