Merge pull request #793 from febielin/master

Update test.py
ersilia-os · Aug 29, 2023 · a48060a · a48060a
2 parents 09d1ec4 + 4f4cfda
commit a48060a
Showing 1 changed file with 158 additions and 107 deletions.
diff --git a/ersilia/publish/test.py b/ersilia/publish/test.py
@@ -4,6 +4,7 @@
 import tempfile
 import types
 import subprocess
+import shutil
 import time
 import re
 from ..cli import echo
@@ -287,23 +288,6 @@ def _check_model_output_shape(self, data):
         if data["card"]["Output Shape"] not in valid_output_shapes:
             raise texc.InvalidEntry("Output Shape")
 
-    """
-    This is a helper function for the run_bash() function, and it parses through the Dockerfile to find 
-    the package installation lines.
-    """
-
-    def _parse_dockerfile(self, temp_dir, pyversion):
-        packages = set()
-        prefix = "FROM bentoml/model-server:0.11.0-py"
-        os.chdir(temp_dir)  # navigate into cloned repo
-        with open("Dockerfile", "r") as dockerfile:
-            lines = dockerfile.readlines()
-            assert lines[0].startswith(prefix)
-            pyversion[0] = lines[0][len(prefix) :]
-            lines_as_string = "\n".join(lines)
-            run_lines = re.findall(r"^\s*RUN\s+(.+)$", lines_as_string, re.MULTILINE)
-        return run_lines
-
     """
     Check the model information to make sure it's correct. Performs the following checks:
     - Checks that model ID is correct
@@ -458,15 +442,15 @@ def check_consistent_output(self):
                                     print("\n")
                                 raise texc.InconsistentOutputs(self.model_id)
                         else:
-                            if self._compare_output_strings(elem1, elem2) <= 0.95:
+                            if self._compare_output_strings(elem1, elem2) <= 95:
                                 print("output1 value:", elem1)
                                 print("output2 value:", elem2)
                                 raise texc.InconsistentOutputs(self.model_id)
                 else:
                     # if it reaches this, then the outputs are just strings
                     if (
                         self._compare_output_strings(output1[key1], output2[key2])
-                        <= 0.95
+                        <= 95
                     ):
                         print("output1 value:", output1[key1])
                         print("output2 value:", output2[key2])
@@ -486,117 +470,184 @@ def check_consistent_output(self):
         else:
             echo("Number of outputs and inputs are equal!\n")
 
-    # WITH CONDA!!!!
-    @throw_ersilia_exception
-    def run_bash(self):
-        # print("Running the model bash script...")
-        print("Cloning a temporary file and calculating model size...")
+    @staticmethod
+    def default_env():
+        if "CONDA_DEFAULT_ENV" in os.environ:
+            return os.environ["CONDA_DEFAULT_ENV"]
+        else:
+            return BASE
+
+    @staticmethod
+    def conda_prefix(is_base):
+        o = run_command_check_output("which conda").rstrip()
+        if o:
+            o = os.path.abspath(os.path.join(o, "..", ".."))
+            return o
+        if is_base:
+            o = run_command_check_output("echo $CONDA_PREFIX").rstrip()
+            return o
+        else:
+            o = run_command_check_output("echo $CONDA_PREFIX_1").rstrip()
+            return o
 
-        # Save current working directory - atm, this must be run from root directory (~)
-        # TODO: is there a way to change this so that this test command doesn't have to be run from root dir
-        current_dir = os.getcwd()
+    def is_base(self):
+        default_env = self.default_env()
+        if default_env == 'base':
+            return True
+        else:
+            return False
+
+    def _compare_tolerance(self, value1, value2, tolerance_percentage):
+        diff = abs(value1 - value2)
+        tolerance = (tolerance_percentage / 100) * max(abs(value1), abs(value2))
+        return diff <= tolerance
+
+    def _compare_string_similarity(self, str1, str2, similarity_threshold):
+        similarity = fuzz.ratio(str1, str2)
+        return similarity >= similarity_threshold
+
+    def read_csv(self, file_path):
+        data = []
+        with open(file_path, 'r') as file:
+            lines = file.readlines()
+            header = lines[0].strip().split(',')
+            for line in lines[1:]:
+                values = line.strip().split(',')
+                data.append(dict(zip(header, values)))
+        return data
 
-        # Create temp directory and clone model
+    @throw_ersilia_exception
+    def run_bash(self): 
+        click.echo(BOLD + "Calculating model size..." + RESET)
+
         with tempfile.TemporaryDirectory() as temp_dir:
-            repo_url = "https://github.com/ersilia-os/{0}.git".format(self.model_id)
-            try:
-                subprocess.run(["git", "clone", repo_url, temp_dir], check=True)
-            except subprocess.CalledProcessError as e:
-                print(f"Error while cloning the repository: {e}")
 
-            # we will remove this part later, but will keep until we get the run_bash() function working
-            self._set_model_size(temp_dir)
+            # Print size of model
+            self._set_model_size(os.path.join(self.conda_prefix(self.is_base()), "../eos/dest/{0}".format(self.model_id)))
             size_kb = self.model_size / 1024
             size_mb = size_kb / 1024
             size_gb = size_mb / 1024
             print("\nModel Size:")
             print("KB:", size_kb)
             print("MB:", size_mb)
             print("GB:", size_gb)
-            return
-
-            # halt this check if the run.sh file does not exist (e.g. eos3b5e)
-            if not os.path.exists(os.path.join(temp_dir, "model/framework/run.sh")):
-                print("Check halted: run.sh file does not exist.")
+
+            click.echo(BOLD + "\nRunning the model bash script..." + RESET)
+
+            # Create an example input
+            eg = ExampleGenerator(model_id=self.model_id)
+            input = eg.example(n_samples=NUM_SAMPLES, file_name=None, simple=True)
+
+            # Read it into a temp file
+            ex_file = os.path.abspath(os.path.join(temp_dir, "example_file.csv"))
+            with open(ex_file, "w") as f:
+                f.write("smiles")
+                for item in input:
+                    f.write(str(item) + '\n')
+
+            # Halt this check if the run.sh file does not exist (e.g. eos3b5e)
+            if not os.path.exists(os.path.join(self.conda_prefix(self.is_base()), "../eos/dest/{0}/model/framework/run.sh".format(self.model_id))):
+                print("Check halted. Either run.sh file does not exist, or model was not fetched via --from_github or --from_s3.")
                 return
 
             # Navigate into the temporary directory
-            subdirectory_path = os.path.join(temp_dir, "model/framework")
+            subdirectory_path = os.path.join(self.conda_prefix(self.is_base()), "../eos/dest/{0}/model/framework".format(self.model_id))
             os.chdir(subdirectory_path)
 
-            # Parse Dockerfile
-            # dockerfile_path = os.path.join(temp_dir, "Dockerfile")
-            pyversion = [0]
-            packages = self._parse_dockerfile(temp_dir, pyversion)
-            pyversion[0] = pyversion[0][0] + "." + pyversion[0][1:]
-
-            conda_env_name = self.model_id
             try:
-                # subprocess.run(['conda', 'create', '-n', self.model_id, 'python={0}'.format(pyversion[0])], check=True)
-                subprocess.run(
-                    [
-                        "conda",
-                        "create",
-                        "-n",
-                        self.model_id,
-                        "python={0}".format("3.10.0"),
-                    ],
-                    check=True,
-                )
-                subprocess.run(
-                    ["conda", "activate", conda_env_name], shell=True, check=True
-                )
-
-                # install packages
-                for package in packages:
-                    if "conda install" in package:
-                        # Handle conda package installation
-                        subprocess.run(package, shell=True, check=True)
-                    elif "pip install" in package:
-                        subprocess.run(package, shell=True, check=True)
-                    else:
-                        print("Invalid package command:", package)
-                print("Packages printed!")
-
-                # Create temp file
-                with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
-                    temp_file_path = temp_file.name
-
-                # Run bash script with specified args
-                output_path = temp_file_path
-                run_path = os.path.join(
-                    temp_dir, "model/framework/run.sh"
-                )  # path to run.sh
-                arg1 = os.path.join(
-                    current_dir, "ersilia/test/inputs/compound_singles.csv"
-                )  # input
-                arg2 = output_path  # output
-
+                run_path = os.path.abspath(os.path.join(self.conda_prefix(self.is_base()), "../eos/dest/{0}/model/framework/".format(self.model_id)))
+                tmp_script = os.path.abspath(os.path.join(temp_dir, "script.sh"))
+                arg1 = os.path.join(temp_dir, "bash_output.csv")
+                output_log = os.path.abspath(os.path.join(temp_dir, "output.txt"))
+                error_log = os.path.abspath(os.path.join(temp_dir, "error.txt"))
+
+                bash_script = """
+    source {0}/etc/profile.d/conda.sh 
+    conda activate {1}
+    cd {2}
+    bash run.sh . {3} {4} > {5} 2> {6}
+    conda deactivate
+    """.format(self.conda_prefix(self.is_base()), self.model_id, run_path, ex_file, arg1, output_log, error_log)
+
+                with open(tmp_script, "w") as f:
+                    f.write(bash_script)
+
+                print("Executing 'bash run.sh'...")
                 try:
-                    subprocess.run(
-                        [
-                            "bash",
-                            run_path,
-                            ".",
-                            arg1,
-                            arg2,
-                        ],
-                        check=True,
-                    )
+                    subprocess.run(['bash', tmp_script], capture_output=True, text=True, check=True)
+                    print("Bash execution completed!\n")
                 except subprocess.CalledProcessError as e:
-                    print(f"Error while running the bash script: {e}")
-
-                with open(output_path, "r") as temp_file:
-                    output_contents = temp_file.read()
+                    print("Error encountered while running the bash script.")
 
-                print("Output contents:")
-                print(output_contents)
+                with open(output_log, "r") as output_file:
+                    output_content = output_file.read()
+                    print("Captured Output:")
+                    print(output_content)
 
-                deactivate_command = "conda deactivate"
-                subprocess.run(deactivate_command, shell=True, check=True)
+                with open(error_log, "r") as error_file:
+                    error_content = error_file.read()
+                    print("Captured Error:")
+                    print(error_content)
 
             except Exception as e:
-                print(f"Error while creating or activating the conda environment: {e}")
+                    print(f"Error while activating the conda environment: {e}")
+
+            print("Executing ersilia run...")
+            output_file = os.path.abspath(os.path.join(temp_dir, "ersilia_output.csv"))
+
+            session = Session(config_json=None)
+            service_class = session.current_service_class()
+            mdl = ErsiliaModel(self.model_id, service_class=service_class, config_json=None)
+            result = mdl.run(input=ex_file, output=output_file, batch_size=100)
+            print("Ersilia run completed!\n")
+
+            ersilia_run = self.read_csv(output_file)
+            remove_cols = ['key', 'input']
+            for row in ersilia_run:
+                for col in remove_cols:
+                    if col in row:
+                        del row[col]
+            bash_run = self.read_csv(arg1)
+            print("Bash output:\n", bash_run)
+            print("\nErsilia output:\n", ersilia_run)
+
+            # Select common columns for comparison
+            ersilia_columns = set()
+            for row in ersilia_run:
+                ersilia_columns.update(row.keys())
+
+            bash_columns = set()
+            for row in bash_run:
+                bash_columns.update(row.keys())
+
+            common_columns = ersilia_columns & bash_columns
+
+            # Compare values in the common columns within a 5% tolerance`
+            for column in common_columns:
+                for i in range(len(ersilia_run)):
+                    if isinstance(ersilia_run[i][column], (float, int)) and isinstance(ersilia_run[i][column], (float, int)): 
+                        if not all(self._compare_tolerance(a, b, DIFFERENCE_THRESHOLD) for a, b in zip(ersilia_run[i][column], bash_run[i][column])):
+                            click.echo(BOLD + "\nBash run and Ersilia run produce inconsistent results." + RESET)
+                            print("Error in the following column: ", column)
+                            print(ersilia_run[i][column])
+                            print(bash_run[i][column])
+                            raise texc.InconsistentOutputs(self.model_id)
+                    elif isinstance(ersilia_run[i][column], str) and isinstance(ersilia_run[i][column], str):
+                        if not all(self._compare_string_similarity(a, b, 95) for a, b in zip(ersilia_run[i][column], bash_run[i][column])):
+                            click.echo(BOLD + "\nBash run and Ersilia run produce inconsistent results." + RESET)
+                            print("Error in the following column: ", column)
+                            print(ersilia_run[i][column])
+                            print(bash_run[i][column])
+                            raise texc.InconsistentOutputs(self.model_id)
+                    elif isinstance(ersilia_run[i][column], bool) and isinstance(ersilia_run[i][column], bool):    
+                        if not ersilia_run[i][column].equals(bash_run[i][column]):
+                            click.echo(BOLD + "\nBash run and Ersilia run produce inconsistent results." + RESET)
+                            print("Error in the following column: ", column)
+                            print(ersilia_run[i][column])
+                            print(bash_run[i][column])
+                            raise texc.InconsistentOutputs(self.model_id)
+
+            click.echo(BOLD + "\nSUCCESS! Bash run and Ersilia run produce consistent results." + RESET)
 
     """
     writes to the .json file all the basic information received from the test module: