This repository contains all scripts and resources used in the article 'How to fine-tune an open-source LLaMa using QLoRa'.
conda install pytorch torchvision torchaudio pytorch-cuda=11.7 -c pytorch -c nvidia
mkdir repositories
cd repositories
git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa.git -b cuda
cd GPTQ-for-LLaMa
pip install -r requirements.txt
python setup_cuda.py install
In the root directory
pip install -U -r requirements.txt
Update the dataset configuration based on your data format here -> https://github.com/mzbac/qlora-fine-tune/blob/main/qlora.py#L521-L527
python qlora.py --model_name_or_path TheBloke/wizardLM-13B-1.0-fp16 --dataset my-data --bf16
python inference.py
Note: Change the model_name and adapters_name accordingly
python merge_peft_adapters.py --device cpu --base_model_name_or_path TheBloke/wizardLM-13B-1.0-fp16 --peft_model_path ./output/checkpoint-2250/adapter_model --output_dir ./merged_models/
python repositories/GPTQ-for-LLaMa/llama.py ${MODEL_DIR} c4 --wbits 4 --true-sequential --groupsize 128 --save_safetensors {your-model-name}-no-act-order-4bit-128g.safetensors