Skip to content

Commit

Permalink
Merge pull request #793 from febielin/master
Browse files Browse the repository at this point in the history
Update test.py
  • Loading branch information
GemmaTuron authored Aug 29, 2023
2 parents 09d1ec4 + 4f4cfda commit a48060a
Showing 1 changed file with 158 additions and 107 deletions.
265 changes: 158 additions & 107 deletions ersilia/publish/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import tempfile
import types
import subprocess
import shutil
import time
import re
from ..cli import echo
Expand Down Expand Up @@ -287,23 +288,6 @@ def _check_model_output_shape(self, data):
if data["card"]["Output Shape"] not in valid_output_shapes:
raise texc.InvalidEntry("Output Shape")

"""
This is a helper function for the run_bash() function, and it parses through the Dockerfile to find
the package installation lines.
"""

def _parse_dockerfile(self, temp_dir, pyversion):
packages = set()
prefix = "FROM bentoml/model-server:0.11.0-py"
os.chdir(temp_dir) # navigate into cloned repo
with open("Dockerfile", "r") as dockerfile:
lines = dockerfile.readlines()
assert lines[0].startswith(prefix)
pyversion[0] = lines[0][len(prefix) :]
lines_as_string = "\n".join(lines)
run_lines = re.findall(r"^\s*RUN\s+(.+)$", lines_as_string, re.MULTILINE)
return run_lines

"""
Check the model information to make sure it's correct. Performs the following checks:
- Checks that model ID is correct
Expand Down Expand Up @@ -458,15 +442,15 @@ def check_consistent_output(self):
print("\n")
raise texc.InconsistentOutputs(self.model_id)
else:
if self._compare_output_strings(elem1, elem2) <= 0.95:
if self._compare_output_strings(elem1, elem2) <= 95:
print("output1 value:", elem1)
print("output2 value:", elem2)
raise texc.InconsistentOutputs(self.model_id)
else:
# if it reaches this, then the outputs are just strings
if (
self._compare_output_strings(output1[key1], output2[key2])
<= 0.95
<= 95
):
print("output1 value:", output1[key1])
print("output2 value:", output2[key2])
Expand All @@ -486,117 +470,184 @@ def check_consistent_output(self):
else:
echo("Number of outputs and inputs are equal!\n")

# WITH CONDA!!!!
@throw_ersilia_exception
def run_bash(self):
# print("Running the model bash script...")
print("Cloning a temporary file and calculating model size...")
@staticmethod
def default_env():
if "CONDA_DEFAULT_ENV" in os.environ:
return os.environ["CONDA_DEFAULT_ENV"]
else:
return BASE

@staticmethod
def conda_prefix(is_base):
o = run_command_check_output("which conda").rstrip()
if o:
o = os.path.abspath(os.path.join(o, "..", ".."))
return o
if is_base:
o = run_command_check_output("echo $CONDA_PREFIX").rstrip()
return o
else:
o = run_command_check_output("echo $CONDA_PREFIX_1").rstrip()
return o

# Save current working directory - atm, this must be run from root directory (~)
# TODO: is there a way to change this so that this test command doesn't have to be run from root dir
current_dir = os.getcwd()
def is_base(self):
default_env = self.default_env()
if default_env == 'base':
return True
else:
return False

def _compare_tolerance(self, value1, value2, tolerance_percentage):
diff = abs(value1 - value2)
tolerance = (tolerance_percentage / 100) * max(abs(value1), abs(value2))
return diff <= tolerance

def _compare_string_similarity(self, str1, str2, similarity_threshold):
similarity = fuzz.ratio(str1, str2)
return similarity >= similarity_threshold

def read_csv(self, file_path):
data = []
with open(file_path, 'r') as file:
lines = file.readlines()
header = lines[0].strip().split(',')
for line in lines[1:]:
values = line.strip().split(',')
data.append(dict(zip(header, values)))
return data

# Create temp directory and clone model
@throw_ersilia_exception
def run_bash(self):
click.echo(BOLD + "Calculating model size..." + RESET)

with tempfile.TemporaryDirectory() as temp_dir:
repo_url = "https://github.com/ersilia-os/{0}.git".format(self.model_id)
try:
subprocess.run(["git", "clone", repo_url, temp_dir], check=True)
except subprocess.CalledProcessError as e:
print(f"Error while cloning the repository: {e}")

# we will remove this part later, but will keep until we get the run_bash() function working
self._set_model_size(temp_dir)
# Print size of model
self._set_model_size(os.path.join(self.conda_prefix(self.is_base()), "../eos/dest/{0}".format(self.model_id)))
size_kb = self.model_size / 1024
size_mb = size_kb / 1024
size_gb = size_mb / 1024
print("\nModel Size:")
print("KB:", size_kb)
print("MB:", size_mb)
print("GB:", size_gb)
return

# halt this check if the run.sh file does not exist (e.g. eos3b5e)
if not os.path.exists(os.path.join(temp_dir, "model/framework/run.sh")):
print("Check halted: run.sh file does not exist.")

click.echo(BOLD + "\nRunning the model bash script..." + RESET)

# Create an example input
eg = ExampleGenerator(model_id=self.model_id)
input = eg.example(n_samples=NUM_SAMPLES, file_name=None, simple=True)

# Read it into a temp file
ex_file = os.path.abspath(os.path.join(temp_dir, "example_file.csv"))
with open(ex_file, "w") as f:
f.write("smiles")
for item in input:
f.write(str(item) + '\n')

# Halt this check if the run.sh file does not exist (e.g. eos3b5e)
if not os.path.exists(os.path.join(self.conda_prefix(self.is_base()), "../eos/dest/{0}/model/framework/run.sh".format(self.model_id))):
print("Check halted. Either run.sh file does not exist, or model was not fetched via --from_github or --from_s3.")
return

# Navigate into the temporary directory
subdirectory_path = os.path.join(temp_dir, "model/framework")
subdirectory_path = os.path.join(self.conda_prefix(self.is_base()), "../eos/dest/{0}/model/framework".format(self.model_id))
os.chdir(subdirectory_path)

# Parse Dockerfile
# dockerfile_path = os.path.join(temp_dir, "Dockerfile")
pyversion = [0]
packages = self._parse_dockerfile(temp_dir, pyversion)
pyversion[0] = pyversion[0][0] + "." + pyversion[0][1:]

conda_env_name = self.model_id
try:
# subprocess.run(['conda', 'create', '-n', self.model_id, 'python={0}'.format(pyversion[0])], check=True)
subprocess.run(
[
"conda",
"create",
"-n",
self.model_id,
"python={0}".format("3.10.0"),
],
check=True,
)
subprocess.run(
["conda", "activate", conda_env_name], shell=True, check=True
)

# install packages
for package in packages:
if "conda install" in package:
# Handle conda package installation
subprocess.run(package, shell=True, check=True)
elif "pip install" in package:
subprocess.run(package, shell=True, check=True)
else:
print("Invalid package command:", package)
print("Packages printed!")

# Create temp file
with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
temp_file_path = temp_file.name

# Run bash script with specified args
output_path = temp_file_path
run_path = os.path.join(
temp_dir, "model/framework/run.sh"
) # path to run.sh
arg1 = os.path.join(
current_dir, "ersilia/test/inputs/compound_singles.csv"
) # input
arg2 = output_path # output

run_path = os.path.abspath(os.path.join(self.conda_prefix(self.is_base()), "../eos/dest/{0}/model/framework/".format(self.model_id)))
tmp_script = os.path.abspath(os.path.join(temp_dir, "script.sh"))
arg1 = os.path.join(temp_dir, "bash_output.csv")
output_log = os.path.abspath(os.path.join(temp_dir, "output.txt"))
error_log = os.path.abspath(os.path.join(temp_dir, "error.txt"))

bash_script = """
source {0}/etc/profile.d/conda.sh
conda activate {1}
cd {2}
bash run.sh . {3} {4} > {5} 2> {6}
conda deactivate
""".format(self.conda_prefix(self.is_base()), self.model_id, run_path, ex_file, arg1, output_log, error_log)

with open(tmp_script, "w") as f:
f.write(bash_script)

print("Executing 'bash run.sh'...")
try:
subprocess.run(
[
"bash",
run_path,
".",
arg1,
arg2,
],
check=True,
)
subprocess.run(['bash', tmp_script], capture_output=True, text=True, check=True)
print("Bash execution completed!\n")
except subprocess.CalledProcessError as e:
print(f"Error while running the bash script: {e}")

with open(output_path, "r") as temp_file:
output_contents = temp_file.read()
print("Error encountered while running the bash script.")

print("Output contents:")
print(output_contents)
with open(output_log, "r") as output_file:
output_content = output_file.read()
print("Captured Output:")
print(output_content)

deactivate_command = "conda deactivate"
subprocess.run(deactivate_command, shell=True, check=True)
with open(error_log, "r") as error_file:
error_content = error_file.read()
print("Captured Error:")
print(error_content)

except Exception as e:
print(f"Error while creating or activating the conda environment: {e}")
print(f"Error while activating the conda environment: {e}")

print("Executing ersilia run...")
output_file = os.path.abspath(os.path.join(temp_dir, "ersilia_output.csv"))

session = Session(config_json=None)
service_class = session.current_service_class()
mdl = ErsiliaModel(self.model_id, service_class=service_class, config_json=None)
result = mdl.run(input=ex_file, output=output_file, batch_size=100)
print("Ersilia run completed!\n")

ersilia_run = self.read_csv(output_file)
remove_cols = ['key', 'input']
for row in ersilia_run:
for col in remove_cols:
if col in row:
del row[col]
bash_run = self.read_csv(arg1)
print("Bash output:\n", bash_run)
print("\nErsilia output:\n", ersilia_run)

# Select common columns for comparison
ersilia_columns = set()
for row in ersilia_run:
ersilia_columns.update(row.keys())

bash_columns = set()
for row in bash_run:
bash_columns.update(row.keys())

common_columns = ersilia_columns & bash_columns

# Compare values in the common columns within a 5% tolerance`
for column in common_columns:
for i in range(len(ersilia_run)):
if isinstance(ersilia_run[i][column], (float, int)) and isinstance(ersilia_run[i][column], (float, int)):
if not all(self._compare_tolerance(a, b, DIFFERENCE_THRESHOLD) for a, b in zip(ersilia_run[i][column], bash_run[i][column])):
click.echo(BOLD + "\nBash run and Ersilia run produce inconsistent results." + RESET)
print("Error in the following column: ", column)
print(ersilia_run[i][column])
print(bash_run[i][column])
raise texc.InconsistentOutputs(self.model_id)
elif isinstance(ersilia_run[i][column], str) and isinstance(ersilia_run[i][column], str):
if not all(self._compare_string_similarity(a, b, 95) for a, b in zip(ersilia_run[i][column], bash_run[i][column])):
click.echo(BOLD + "\nBash run and Ersilia run produce inconsistent results." + RESET)
print("Error in the following column: ", column)
print(ersilia_run[i][column])
print(bash_run[i][column])
raise texc.InconsistentOutputs(self.model_id)
elif isinstance(ersilia_run[i][column], bool) and isinstance(ersilia_run[i][column], bool):
if not ersilia_run[i][column].equals(bash_run[i][column]):
click.echo(BOLD + "\nBash run and Ersilia run produce inconsistent results." + RESET)
print("Error in the following column: ", column)
print(ersilia_run[i][column])
print(bash_run[i][column])
raise texc.InconsistentOutputs(self.model_id)

click.echo(BOLD + "\nSUCCESS! Bash run and Ersilia run produce consistent results." + RESET)

"""
writes to the .json file all the basic information received from the test module:
Expand Down

0 comments on commit a48060a

Please sign in to comment.