From 5203679f1fa337509e52fb15a3b0bd3548a488b5 Mon Sep 17 00:00:00 2001 From: Eliasj42 <46754803+Eliasj42@users.noreply.github.com> Date: Tue, 8 Aug 2023 10:14:57 -0700 Subject: [PATCH] Bandaid fix 2 (#1728) * download all mlirs * fixed install method * download all mlirs (#1727) Co-authored-by: Elias Joseph * added taggs * fix name check for file existence * Remove SD from all_models.csv (#1706) Removes SD from pytests as it has its own test suite. * gpt_langchain.py fixes for pydantic (#1722) * removed dead code --------- Co-authored-by: Elias Joseph Co-authored-by: PhaneeshB Co-authored-by: Ean Garvey <87458719+monorimet@users.noreply.github.com> Co-authored-by: Stefan Kapusniak <121311569+one-lithe-rune@users.noreply.github.com> --- apps/language_models/scripts/vicuna.py | 44 ++++++++++++++++++++------ 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/apps/language_models/scripts/vicuna.py b/apps/language_models/scripts/vicuna.py index d7fdf2b758..2c2d4cacf6 100644 --- a/apps/language_models/scripts/vicuna.py +++ b/apps/language_models/scripts/vicuna.py @@ -690,8 +690,12 @@ def compile_lmhead( # f_ = open(mlir_path, "wb") # f_.write(bytecode) # f_.close() - command = f"gsutil cp gs://shark_tank/elias/compressed_sv/lmhead.mlir lmhead.mlir" - subprocess.check_call(command.split()) + filepath = Path("lmhead.mlir") + download_public_file( + "gs://shark_tank/elias/compressed_sv/lmhead.mlir", + filepath.absolute(), + single_file=True, + ) f_ = open(f"lmhead.mlir", "rb") bytecode = f_.read() f_.close() @@ -732,8 +736,12 @@ def compile_norm(self, fvn, hidden_states, device="cpu", device_idx=None): # use_tracing=False, # verbose=False, # ) - command = f"gsutil cp gs://shark_tank/elias/compressed_sv/norm.mlir norm.mlir" - subprocess.check_call(command.split()) + filepath = Path("norm.mlir") + download_public_file( + "gs://shark_tank/elias/compressed_sv/norm.mlir", + filepath.absolute(), + single_file=True, + ) f_ = open(f"norm.mlir", "rb") bytecode = f_.read() f_.close() @@ -779,8 +787,12 @@ def compile_embedding(self, fve, input_ids, device="cpu", device_idx=None): # f_ = open(mlir_path, "wb") # f_.write(bytecode) # f_.close() - command = f"gsutil cp gs://shark_tank/elias/compressed_sv/embedding.mlir embedding.mlir" - subprocess.check_call(command.split()) + filepath = Path("embedding.mlir") + download_public_file( + "gs://shark_tank/elias/compressed_sv/embedding.mlir", + filepath.absolute(), + single_file=True, + ) f_ = open(f"embedding.mlir", "rb") bytecode = f_.read() f_.close() @@ -963,6 +975,8 @@ def compile_to_vmfb_one_model( "--iree-vm-target-truncate-unsupported-floats", "--iree-codegen-check-ir-before-llvm-conversion=false", "--iree-vm-bytecode-module-output-format=flatbuffer-binary", + "--iree-opt-const-expr-hoisting=False", + "--iree-codegen-linalg-max-constant-fold-elements=9223372036854775807" ], ) module.load_module(vmfb_path) @@ -986,9 +1000,12 @@ def compile_to_vmfb_one_model4( f_.close() mlirs.append(bytecode) else: - command = f"gsutil cp gs://shark_tank/elias/compressed_sv/{idx}_full.mlir {idx}_full.mlir" - - subprocess.check_call(command.split()) + filepath = Path(f"{idx}_full.mlir") + download_public_file( + f"gs://shark_tank/elias/compressed_sv/{idx}_full.mlir", + filepath.absolute(), + single_file=True, + ) f_ = open(f"{idx}_full.mlir", "rb") bytecode = f_.read() @@ -1026,6 +1043,8 @@ def compile_to_vmfb_one_model4( "--iree-vm-target-truncate-unsupported-floats", "--iree-codegen-check-ir-before-llvm-conversion=false", "--iree-vm-bytecode-module-output-format=flatbuffer-binary", + "--iree-opt-const-expr-hoisting=False", + "--iree-codegen-linalg-max-constant-fold-elements=9223372036854775807" ], ) module.load_module(vmfb_path) @@ -1150,7 +1169,7 @@ def get_sharded_model(self, device="cpu", compressed=False): layers0 = [layers00, layers01, layers02, layers03] layers1 = [layers10, layers11, layers12, layers13] - _, modules = self.compile_to_vmfb_one_model( + _, modules = self.compile_to_vmfb_one_model4( placeholder_input0, layers0, placeholder_input1, @@ -1177,6 +1196,9 @@ def compile(self, device="cpu"): return self.get_sharded_model( device=device, compressed=self.compressed ) + return self.get_sharded_model( + device=device, compressed=self.compressed + ) def generate(self, prompt, cli=False): # TODO: refactor for cleaner integration @@ -1617,6 +1639,8 @@ def compile(self, download_vmfb=False): "--iree-vm-target-truncate-unsupported-floats", "--iree-codegen-check-ir-before-llvm-conversion=false", "--iree-vm-bytecode-module-output-format=flatbuffer-binary", + "--iree-opt-const-expr-hoisting=False", + "--iree-codegen-linalg-max-constant-fold-elements=9223372036854775807" ], ) print("Saved vic vmfb at ", str(path))