Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HuggingFace Upload Feature #1300

Closed
wants to merge 61 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
b205d4d
upload hf model
april-yyt Feb 13, 2024
88f9311
upload peft model
april-yyt Feb 13, 2024
5459afa
refactor uploading hf
april-yyt Feb 16, 2024
4b760ac
refactor uploading peft
april-yyt Feb 16, 2024
cdf24eb
modify upload logic and add reconvert functions for opt models
april-yyt Feb 19, 2024
6101bbf
fix opt weight name converting issues
april-yyt Feb 20, 2024
90f5d67
implement revert functions for falcon models
april-yyt Feb 20, 2024
4791907
upload method for peft class and falcon fixes
april-yyt Feb 21, 2024
48ef455
reconvert functions for llama models
april-yyt Feb 23, 2024
72e1556
finish weight convert for falcon models
april-yyt Feb 24, 2024
fffe25a
simplify upload script
april-yyt Feb 26, 2024
69c53c4
fix falcon typo
april-yyt Feb 26, 2024
e51004f
mpt models, minor errs to be fixed
april-yyt Feb 26, 2024
f32000a
starcoder models, minor errs to be fixed
april-yyt Feb 26, 2024
b6dd20b
fixed issues with mpt and starcoder models
april-yyt Feb 27, 2024
2b5c79b
modify hf uploading script
april-yyt Feb 27, 2024
403516a
modify hf uploading script
april-yyt Feb 27, 2024
af1d535
modify hf uploading main
april-yyt Feb 27, 2024
85e15d4
Merge branch 'peft' into hf-upload
april-yyt Feb 27, 2024
bea5afb
add assertion for base model
april-yyt Mar 1, 2024
0901320
upload hf model
april-yyt Feb 13, 2024
0349838
upload peft model
april-yyt Feb 13, 2024
d7a23bb
refactor uploading hf
april-yyt Feb 16, 2024
08cf15e
refactor uploading peft
april-yyt Feb 16, 2024
6b476a6
modify upload logic and add reconvert functions for opt models
april-yyt Feb 19, 2024
0a1029a
fix opt weight name converting issues
april-yyt Feb 20, 2024
5df977d
implement revert functions for falcon models
april-yyt Feb 20, 2024
dbbf1cd
upload method for peft class and falcon fixes
april-yyt Feb 21, 2024
1157e1e
reconvert functions for llama models
april-yyt Feb 23, 2024
e4ba212
finish weight convert for falcon models
april-yyt Feb 24, 2024
edaaeca
simplify upload script
april-yyt Feb 26, 2024
4aea5e8
fix falcon typo
april-yyt Feb 26, 2024
a67d824
mpt models, minor errs to be fixed
april-yyt Feb 26, 2024
6382448
starcoder models, minor errs to be fixed
april-yyt Feb 26, 2024
614de32
fixed issues with mpt and starcoder models
april-yyt Feb 27, 2024
551e119
modify hf uploading script
april-yyt Feb 27, 2024
2da5aa1
modify hf uploading script
april-yyt Feb 27, 2024
ef471ef
modify hf uploading main
april-yyt Feb 27, 2024
edb2238
add assertion for base model
april-yyt Mar 1, 2024
0074b8f
resolve conflicts after rebase
Mar 30, 2024
44f43f5
rebase on peft
april-yyt Feb 19, 2024
f051efa
some fixes
april-yyt Apr 2, 2024
c7e2429
fix issues for opt model conversion
april-yyt Apr 3, 2024
e3be6b2
fix issues for llama models
april-yyt Apr 4, 2024
ee41f3a
models/starcoder.py
april-yyt Apr 4, 2024
8efb92b
fix issues for mpt models
april-yyt Apr 5, 2024
41e0bee
some fixes
april-yyt Apr 5, 2024
3354630
some fixes for falcon, qkv weights issues remains
april-yyt Apr 10, 2024
89b6e56
peft-upload-example
april-yyt Apr 10, 2024
a2ab5ba
remove redundant code & metrics file
april-yyt Apr 10, 2024
2792e25
add back utils script
april-yyt May 6, 2024
11f4c2f
Merge branch 'peft' into hf-upload
goliaro May 8, 2024
c2ad1c5
Merge branch 'peft' into hf-upload
goliaro May 29, 2024
a1b5db8
cleanup
goliaro May 29, 2024
8711734
Merge branch 'peft' into hf-upload
goliaro May 29, 2024
613eb6d
remove submodule
goliaro May 29, 2024
f73d556
add test
goliaro Jun 5, 2024
a899501
add code to save peft weights to file
goliaro Jun 12, 2024
9eb58c3
fix print
goliaro Jun 12, 2024
f00af8b
mv fix
goliaro Jun 12, 2024
64d1e2e
Merge branch 'peft' into hf-upload
goliaro Jul 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions include/flexflow/flexflow_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,12 @@ void flexflow_request_manager_start_background_server(
void flexflow_request_manager_terminate_background_server(
flexflow_request_manager_t handle_);

void flexflow_request_manager_save_peft_weights(
flexflow_request_manager_t handle_,
flexflow_model_t model_handle_,
flexflow_peft_model_id_t peft_model_id_,
char const *destination_folder);

// -----------------------------------------------------------------------
// InferenceManager
// -----------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion include/flexflow/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ enum TaskIDs {
LINEAR_BWD2_TASK_ID,
LINEAR_UPD_TASK_ID,
LORA_LINEAR_INIT_TASK_ID,
LORA_LINEAR_REG_TASK_ID,
LORA_LINEAR_SAVE_WEIGHTS_TASK_ID,
LORA_LINEAR_INF_TASK_ID,
LORA_LINEAR_PEFT_BWD_TASK_ID,
FLAT_INIT_TASK_ID,
Expand Down
12 changes: 12 additions & 0 deletions include/flexflow/ops/lora_linear.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ class LoraLinear : public Op {
MachineView const *mv = nullptr) override;
void forward(FFModel const &) override;
void backward(FFModel const &) override;
void save_peft_weights(FFModel const &ff,
PEFTModelID const &model_id,
int rank,
std::string const &destination_folder,
std::vector<ParallelTensor> const &batch_inputs,
std::vector<ParallelTensor> const &batch_outputs,
MachineView const *mv = nullptr);
Legion::FutureMap inference(FFModel const &,
BatchConfigFuture const &,
std::vector<ParallelTensor> const &,
Expand Down Expand Up @@ -69,6 +76,11 @@ class LoraLinear : public Op {
std::vector<Legion::PhysicalRegion> const &regions,
Legion::Context ctx,
Legion::Runtime *runtime);
static void
save_peft_weights_task(Legion::Task const *task,
std::vector<Legion::PhysicalRegion> const &regions,
Legion::Context ctx,
Legion::Runtime *runtime);
static void forward_task(Legion::Task const *task,
std::vector<Legion::PhysicalRegion> const &regions,
Legion::Context ctx,
Expand Down
7 changes: 7 additions & 0 deletions include/flexflow/request_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ class InferenceManager {
static InferenceManager *get_inference_manager();
void compile_model_and_allocate_buffer(FFModel *model);
void init_operators_inference(FFModel *model);
void save_peft_weights(FFModel *model,
PEFTModelID const &model_id,
std::string const &destination_folder);
Legion::FutureMap inference(FFModel *model, int index, BatchConfig const &bc);
Legion::FutureMap
inference(FFModel *model, int index, BatchConfigFuture const &bc);
Expand Down Expand Up @@ -161,6 +164,10 @@ class RequestManager {

FFModel *get_ssm_model(int model_id);

void save_peft_weights(FFModel *model,
PEFTModelID const &model_id,
std::string const &destination_folder);

void serve_incr_decoding(FFModel *model);
void serve_spec_infer(FFModel *model);
GenerationResult get_generation_result(RequestGuid const &guid);
Expand Down
4 changes: 4 additions & 0 deletions inference/peft/peft.cc
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,10 @@ void FlexFlow::top_level_task(Task const *task,
future.get_void_result();
}

rm->save_peft_weights(&model,
*peft_model_id,
std::string("/root/.cache/flexflow/finetuned_weights"));

if (peft_model_id != nullptr) {
free(peft_model_id);
}
Expand Down
37 changes: 31 additions & 6 deletions inference/python/ff_peft.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,22 @@ def get_configs():
type=str,
default="",
)
args = parser.parse_args()
parser.add_argument(
"--publish-peft-with-id",
help="The Hugging Face model ID to upload the trained model with",
type=str,
default=""
)

args = parser.parse_args()
publish_peft_with_id = args.publish_peft_with_id
if len(publish_peft_with_id) == 0:
print(
"Please pass a --publish-peft-with-id if you want to upload the trained model"
)
else:
print(f"The trained model will be uploaded with id: {publish_peft_with_id}")

# Load configs from JSON file (if specified)
if len(args.config_file) > 0:
if not os.path.isfile(args.config_file):
Expand Down Expand Up @@ -67,18 +81,19 @@ def get_configs():
"inference_peft_model_id": "goliaro/llama-160m-lora",
"finetuning_peft_model_id": "goliaro/llama-160m-lora",
# optional parameters
"cache_path": "",
"cache_path": "~/.cache/flexflow",
"refresh_cache": False,
"full_precision": True,
"prompt": "",
"finetuning_dataset": os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"../prompt/peft_dataset.json",
),
"output_file": "",
"output_file": ""
}
# Merge dictionaries
ff_init_configs.update(model_configs)
ff_init_configs["publish_peft_with_id"] = publish_peft_with_id
return ff_init_configs


Expand All @@ -98,7 +113,7 @@ def main():
data_type=ff_data_type,
cache_path=configs.cache_path,
refresh_cache=configs.refresh_cache,
output_file=configs.output_file,
output_file=configs.output_file
)
# Add inference and/or finetuning lora
lora_inference_config = None
Expand Down Expand Up @@ -146,6 +161,8 @@ def main():
)

llm.start_server()

print(f"LLM model class is: {llm.model_class}")

requests = []
# Serving
Expand Down Expand Up @@ -173,9 +190,17 @@ def main():
requests.append(finetuning_request)

llm.generate(requests)

llm.stop_server()


# upload the model back to huggingface after finetuning
# the model format would be converted from flexflow format back to huggingface format
if len(configs.publish_peft_with_id) > 0:
print(
f"Done training! Uploading the model to HF hub with id: {configs.publish_peft_with_id}..."
)
llm.upload_peft_model(configs.publish_peft_with_id, private=True)


if __name__ == "__main__":
print("flexflow PEFT example")
Expand Down
Loading
Loading