Bandaid fix 2 (#1728)

* download all mlirs * fixed install method * download all mlirs (#1727) Co-authored-by: Elias Joseph <elias@nod-labs.com> * added taggs * fix name check for file existence * Remove SD from all_models.csv (#1706) Removes SD from pytests as it has its own test suite. * gpt_langchain.py fixes for pydantic (#1722) * removed dead code --------- Co-authored-by: Elias Joseph <elias@nod-labs.com> Co-authored-by: PhaneeshB <b.phaneesh@gmail.com> Co-authored-by: Ean Garvey <87458719+monorimet@users.noreply.github.com> Co-authored-by: Stefan Kapusniak <121311569+one-lithe-rune@users.noreply.github.com>
nod-ai · Aug 8, 2023 · 5203679 · 5203679
1 parent bf073f8
commit 5203679
Showing 1 changed file with 34 additions and 10 deletions.
diff --git a/apps/language_models/scripts/vicuna.py b/apps/language_models/scripts/vicuna.py
@@ -690,8 +690,12 @@ def compile_lmhead(
             # f_ = open(mlir_path, "wb")
             # f_.write(bytecode)
             # f_.close()
-            command = f"gsutil cp gs://shark_tank/elias/compressed_sv/lmhead.mlir lmhead.mlir"
-            subprocess.check_call(command.split())
+            filepath = Path("lmhead.mlir")
+            download_public_file(
+                "gs://shark_tank/elias/compressed_sv/lmhead.mlir",
+                filepath.absolute(),
+                single_file=True,
+            )
             f_ = open(f"lmhead.mlir", "rb")
             bytecode = f_.read()
             f_.close()
@@ -732,8 +736,12 @@ def compile_norm(self, fvn, hidden_states, device="cpu", device_idx=None):
             #    use_tracing=False,
             #    verbose=False,
             # )
-            command = f"gsutil cp gs://shark_tank/elias/compressed_sv/norm.mlir norm.mlir"
-            subprocess.check_call(command.split())
+            filepath = Path("norm.mlir")
+            download_public_file(
+                "gs://shark_tank/elias/compressed_sv/norm.mlir",
+                filepath.absolute(),
+                single_file=True,
+            )
             f_ = open(f"norm.mlir", "rb")
             bytecode = f_.read()
             f_.close()
@@ -779,8 +787,12 @@ def compile_embedding(self, fve, input_ids, device="cpu", device_idx=None):
             # f_ = open(mlir_path, "wb")
             # f_.write(bytecode)
             # f_.close()
-            command = f"gsutil cp gs://shark_tank/elias/compressed_sv/embedding.mlir embedding.mlir"
-            subprocess.check_call(command.split())
+            filepath = Path("embedding.mlir")
+            download_public_file(
+                "gs://shark_tank/elias/compressed_sv/embedding.mlir",
+                filepath.absolute(),
+                single_file=True,
+            )
             f_ = open(f"embedding.mlir", "rb")
             bytecode = f_.read()
             f_.close()
@@ -963,6 +975,8 @@ def compile_to_vmfb_one_model(
                         "--iree-vm-target-truncate-unsupported-floats",
                         "--iree-codegen-check-ir-before-llvm-conversion=false",
                         "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
+                        "--iree-opt-const-expr-hoisting=False",
+                        "--iree-codegen-linalg-max-constant-fold-elements=9223372036854775807"
                     ],
                 )
                 module.load_module(vmfb_path)
@@ -986,9 +1000,12 @@ def compile_to_vmfb_one_model4(
                 f_.close()
                 mlirs.append(bytecode)
             else:
-                command = f"gsutil cp gs://shark_tank/elias/compressed_sv/{idx}_full.mlir {idx}_full.mlir"
-
-                subprocess.check_call(command.split())
+                filepath = Path(f"{idx}_full.mlir")
+                download_public_file(
+                    f"gs://shark_tank/elias/compressed_sv/{idx}_full.mlir",
+                    filepath.absolute(),
+                    single_file=True,
+                )
 
                 f_ = open(f"{idx}_full.mlir", "rb")
                 bytecode = f_.read()
@@ -1026,6 +1043,8 @@ def compile_to_vmfb_one_model4(
                         "--iree-vm-target-truncate-unsupported-floats",
                         "--iree-codegen-check-ir-before-llvm-conversion=false",
                         "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
+                        "--iree-opt-const-expr-hoisting=False",
+                        "--iree-codegen-linalg-max-constant-fold-elements=9223372036854775807"
                     ],
                 )
                 module.load_module(vmfb_path)
@@ -1150,7 +1169,7 @@ def get_sharded_model(self, device="cpu", compressed=False):
             layers0 = [layers00, layers01, layers02, layers03]
             layers1 = [layers10, layers11, layers12, layers13]
 
-        _, modules = self.compile_to_vmfb_one_model(
+        _, modules = self.compile_to_vmfb_one_model4(
             placeholder_input0,
             layers0,
             placeholder_input1,
@@ -1177,6 +1196,9 @@ def compile(self, device="cpu"):
         return self.get_sharded_model(
             device=device, compressed=self.compressed
         )
+        return self.get_sharded_model(
+            device=device, compressed=self.compressed
+        )
 
     def generate(self, prompt, cli=False):
         # TODO: refactor for cleaner integration
@@ -1617,6 +1639,8 @@ def compile(self, download_vmfb=False):
                 "--iree-vm-target-truncate-unsupported-floats",
                 "--iree-codegen-check-ir-before-llvm-conversion=false",
                 "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
+                "--iree-opt-const-expr-hoisting=False",
+                "--iree-codegen-linalg-max-constant-fold-elements=9223372036854775807"
             ],
         )
         print("Saved vic vmfb at ", str(path))