Skip to content

Commit

Permalink
Bandaid fix 2 (#1728)
Browse files Browse the repository at this point in the history
* download all mlirs

* fixed install method

* download all mlirs (#1727)

Co-authored-by: Elias Joseph <elias@nod-labs.com>

* added taggs

* fix name check for file existence

* Remove SD from all_models.csv (#1706)

Removes SD from pytests as it has its own test suite.

* gpt_langchain.py fixes for pydantic (#1722)

* removed dead code

---------

Co-authored-by: Elias Joseph <elias@nod-labs.com>
Co-authored-by: PhaneeshB <b.phaneesh@gmail.com>
Co-authored-by: Ean Garvey <87458719+monorimet@users.noreply.github.com>
Co-authored-by: Stefan Kapusniak <121311569+one-lithe-rune@users.noreply.github.com>
  • Loading branch information
5 people authored Aug 8, 2023
1 parent bf073f8 commit 5203679
Showing 1 changed file with 34 additions and 10 deletions.
44 changes: 34 additions & 10 deletions apps/language_models/scripts/vicuna.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,8 +690,12 @@ def compile_lmhead(
# f_ = open(mlir_path, "wb")
# f_.write(bytecode)
# f_.close()
command = f"gsutil cp gs://shark_tank/elias/compressed_sv/lmhead.mlir lmhead.mlir"
subprocess.check_call(command.split())
filepath = Path("lmhead.mlir")
download_public_file(
"gs://shark_tank/elias/compressed_sv/lmhead.mlir",
filepath.absolute(),
single_file=True,
)
f_ = open(f"lmhead.mlir", "rb")
bytecode = f_.read()
f_.close()
Expand Down Expand Up @@ -732,8 +736,12 @@ def compile_norm(self, fvn, hidden_states, device="cpu", device_idx=None):
# use_tracing=False,
# verbose=False,
# )
command = f"gsutil cp gs://shark_tank/elias/compressed_sv/norm.mlir norm.mlir"
subprocess.check_call(command.split())
filepath = Path("norm.mlir")
download_public_file(
"gs://shark_tank/elias/compressed_sv/norm.mlir",
filepath.absolute(),
single_file=True,
)
f_ = open(f"norm.mlir", "rb")
bytecode = f_.read()
f_.close()
Expand Down Expand Up @@ -779,8 +787,12 @@ def compile_embedding(self, fve, input_ids, device="cpu", device_idx=None):
# f_ = open(mlir_path, "wb")
# f_.write(bytecode)
# f_.close()
command = f"gsutil cp gs://shark_tank/elias/compressed_sv/embedding.mlir embedding.mlir"
subprocess.check_call(command.split())
filepath = Path("embedding.mlir")
download_public_file(
"gs://shark_tank/elias/compressed_sv/embedding.mlir",
filepath.absolute(),
single_file=True,
)
f_ = open(f"embedding.mlir", "rb")
bytecode = f_.read()
f_.close()
Expand Down Expand Up @@ -963,6 +975,8 @@ def compile_to_vmfb_one_model(
"--iree-vm-target-truncate-unsupported-floats",
"--iree-codegen-check-ir-before-llvm-conversion=false",
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
"--iree-opt-const-expr-hoisting=False",
"--iree-codegen-linalg-max-constant-fold-elements=9223372036854775807"
],
)
module.load_module(vmfb_path)
Expand All @@ -986,9 +1000,12 @@ def compile_to_vmfb_one_model4(
f_.close()
mlirs.append(bytecode)
else:
command = f"gsutil cp gs://shark_tank/elias/compressed_sv/{idx}_full.mlir {idx}_full.mlir"

subprocess.check_call(command.split())
filepath = Path(f"{idx}_full.mlir")
download_public_file(
f"gs://shark_tank/elias/compressed_sv/{idx}_full.mlir",
filepath.absolute(),
single_file=True,
)

f_ = open(f"{idx}_full.mlir", "rb")
bytecode = f_.read()
Expand Down Expand Up @@ -1026,6 +1043,8 @@ def compile_to_vmfb_one_model4(
"--iree-vm-target-truncate-unsupported-floats",
"--iree-codegen-check-ir-before-llvm-conversion=false",
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
"--iree-opt-const-expr-hoisting=False",
"--iree-codegen-linalg-max-constant-fold-elements=9223372036854775807"
],
)
module.load_module(vmfb_path)
Expand Down Expand Up @@ -1150,7 +1169,7 @@ def get_sharded_model(self, device="cpu", compressed=False):
layers0 = [layers00, layers01, layers02, layers03]
layers1 = [layers10, layers11, layers12, layers13]

_, modules = self.compile_to_vmfb_one_model(
_, modules = self.compile_to_vmfb_one_model4(
placeholder_input0,
layers0,
placeholder_input1,
Expand All @@ -1177,6 +1196,9 @@ def compile(self, device="cpu"):
return self.get_sharded_model(
device=device, compressed=self.compressed
)
return self.get_sharded_model(
device=device, compressed=self.compressed
)

def generate(self, prompt, cli=False):
# TODO: refactor for cleaner integration
Expand Down Expand Up @@ -1617,6 +1639,8 @@ def compile(self, download_vmfb=False):
"--iree-vm-target-truncate-unsupported-floats",
"--iree-codegen-check-ir-before-llvm-conversion=false",
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
"--iree-opt-const-expr-hoisting=False",
"--iree-codegen-linalg-max-constant-fold-elements=9223372036854775807"
],
)
print("Saved vic vmfb at ", str(path))
Expand Down

0 comments on commit 5203679

Please sign in to comment.