Skip to content

Commit

Permalink
Enable Ersilia to serve multiple models simultaneously (#1201)
Browse files Browse the repository at this point in the history
* WIP Add session management utils

* Create a session at ersilia cli initialization

* Remove unused code

* start an ersilia session in a dedicated sessions dir that is mapped to the parent process' id which ran the given ersilia command, this will generally be a shell process inside a terminal, but it can also be a process from a bash script

* declare session specific defaults

* Run all ersilia commands within a single process during standard example run, otherwise ersilia run command does not find a served model bec of running in a different process and therefore in a different session

* Move the currently served model's pid to its dedicated session directory

* WIP Logging

* Redirect tmp logs to model's session logs

* catch permission error

* Redirect tmp logs to model's session logs

* don't use ersilia exception management because we don't exactly want the ersilia process to exit
  • Loading branch information
DhanshreeA authored Jul 26, 2024
1 parent a1dd2b2 commit 552b053
Show file tree
Hide file tree
Showing 36 changed files with 208 additions and 102 deletions.
3 changes: 2 additions & 1 deletion ersilia/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from .create_cli import create_ersilia_cli
from .echo import echo
from ..utils.session import create_session_dir

cli = create_ersilia_cli()

create_session_dir()
if __name__ == "__main__":
cli()
7 changes: 0 additions & 7 deletions ersilia/cli/commands/utils/utils.py

This file was deleted.

5 changes: 3 additions & 2 deletions ersilia/cli/echo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
import click
import os
import json
from ..default import EOS, SILENCE_FILE
from ..default import SILENCE_FILE
from ..utils.session import get_session_dir


class Silencer(object):
def __init__(self):
self.silence_file = os.path.join(EOS, SILENCE_FILE)
self.silence_file = os.path.join(get_session_dir(), SILENCE_FILE)
if not os.path.exists(self.silence_file):
self.speak()

Expand Down
3 changes: 2 additions & 1 deletion ersilia/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from ..utils.exceptions_utils.api_exceptions import ApiSpecifiedOutputError
from ..default import FETCHED_MODELS_FILENAME, MODEL_SIZE_FILE, CARD_FILE, EOS
from ..default import DEFAULT_BATCH_SIZE, APIS_LIST_FILE, INFORMATION_FILE
from ..utils.logging import make_temp_dir

try:
import pandas as pd
Expand Down Expand Up @@ -222,7 +223,7 @@ def _api_runner_return(self, api, input, output, batch_size):
R += [r]
return json.dumps(R, indent=4)
else:
tmp_folder = tempfile.mkdtemp(prefix="ersilia-")
tmp_folder = make_temp_dir(prefix="ersilia-")
is_h5_serializable = self.api_schema.is_h5_serializable(
api_name=api.api_name
)
Expand Down
6 changes: 4 additions & 2 deletions ersilia/core/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,17 @@
import time
import uuid
import shutil
from ..utils.session import get_session_dir

from ..default import EOS
from ..default import SESSIONS_DIR
from .base import ErsiliaBase


class Session(ErsiliaBase):
def __init__(self, config_json):
ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None)
self.session_file = os.path.join(EOS, "session.json")
session_dir = get_session_dir()
self.session_file = os.path.join(session_dir, "session.json")

def current_model_id(self):
data = self.get()
Expand Down
19 changes: 10 additions & 9 deletions ersilia/db/environments/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
from ...utils.identifiers.short import ShortIdentifier
from ...utils.ports import find_free_port
from .localdb import EnvironmentDb

from ...default import DOCKERHUB_ORG, DOCKERHUB_LATEST_TAG
from ...utils.session import get_session_dir
from ...utils.logging import make_temp_dir

import sys

Expand Down Expand Up @@ -99,7 +100,7 @@ def containers_of_model(self, model_id, only_run, only_latest=True):

def build_with_bentoml(self, model_id, use_cache=True):
bundle_path = self._get_bundle_location(model_id)
tmp_folder = tempfile.mkdtemp(prefix="ersilia-")
tmp_folder = make_temp_dir(prefix="ersilia-")
tmp_file = os.path.join(tmp_folder, "build.sh")
cmdlines = ["cd {0}".format(bundle_path)]
if use_cache:
Expand Down Expand Up @@ -128,7 +129,7 @@ def _model_deploy_dockerfiles_url(self):

def _build_ersilia_base(self):
self.logger.debug("Creating docker image of ersilia base")
path = tempfile.mkdtemp(prefix="ersilia-")
path = make_temp_dir(prefix="ersilia-")
base_folder = os.path.join(path, "base")
os.mkdir(base_folder)
base_files = [
Expand All @@ -153,7 +154,7 @@ def build_with_ersilia(self, model_id, docker_user, docker_pwd):
pass
else:
self._build_ersilia_base()
path = tempfile.mkdtemp(prefix="ersilia-model")
path = make_temp_dir(prefix="ersilia-model")
model_folder = os.path.join(path, model_id)
os.mkdir(model_folder)
cmd = "cd {0}; wget {1}/model/Dockerfile".format(
Expand Down Expand Up @@ -281,7 +282,7 @@ def _stop_containers_with_model_id(self, model_id):
return
if not self.is_installed():
return
tmp_folder = tempfile.mkdtemp(prefix="ersilia-")
tmp_folder = make_temp_dir(prefix="ersilia-")
tmp_file = os.path.join(tmp_folder, "docker-ps.txt")
cmd = "docker ps > {0}".format(tmp_file)
self.logger.debug("Running {0}".format(cmd))
Expand All @@ -305,7 +306,7 @@ def _stop_containers_with_entrypoint_sh(self):
return
if not self.is_installed():
return
tmp_folder = tempfile.mkdtemp(prefix="ersilia-")
tmp_folder = make_temp_dir(prefix="ersilia-")
tmp_file = os.path.join(tmp_folder, "docker-ps.txt")
cmd = "docker ps > {0}".format(tmp_file)
self.logger.debug("Running {0}".format(cmd))
Expand All @@ -332,7 +333,7 @@ def stop_containers(self, model_id):
self._stop_containers_with_model_id(model_id)
self._stop_containers_with_entrypoint_sh()
self.remove_stopped_containers()
tmp_folder = tempfile.mkdtemp(prefix="ersilia-")
tmp_folder = make_temp_dir(prefix="ersilia-")
tmp_file = os.path.join(tmp_folder, "docker-ps.txt")
cmd = "docker ps > {0}".format(tmp_file)
self.logger.debug("Running {0}".format(cmd))
Expand All @@ -356,7 +357,7 @@ def prune(self):
run_command(cmd)

def delete_image(self, img):
fn = os.path.join(self._tmp_dir, "rm_image_output.txt")
fn = os.path.join(get_session_dir(), "rm_image_output.txt")
cmd = "docker image rm {0} --force 2> {1}".format(img, fn)
run_command(cmd)
with open(fn, "r") as f:
Expand All @@ -382,7 +383,7 @@ def delete_images(self, model_id, purge_unnamed=True):
return
self.stop_containers(model_id)
self.prune()
tmp_folder = tempfile.mkdtemp(prefix="ersilia-")
tmp_folder = make_temp_dir(prefix="ersilia-")
tmp_file = os.path.join(tmp_folder, "docker-images.txt")
cmd = "docker images > {0}".format(tmp_file)
self.logger.debug("Running {0}".format(cmd))
Expand Down
18 changes: 14 additions & 4 deletions ersilia/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,7 @@
DEFAULT_VENV = "env"
DEFAULT_API_NAME = "run"
PACKMODE_FILE = "pack_mode.txt"
LOGGING_FILE = "console.log"
CURRENT_LOGGING_FILE = "current.log"
CARD_FILE = "card.json"
SILENCE_FILE = ".silence.json"
VERBOSE_FILE = ".verbose.json"
API_SCHEMA_FILE = "api_schema.json"
MODEL_SIZE_FILE = "size.json"
DEFAULT_BATCH_SIZE = 100
Expand Down Expand Up @@ -61,6 +57,20 @@
PACK_METHOD_FASTAPI = "fastapi"
PACK_METHOD_BENTOML = "bentoml"

# Session and logging
SESSIONS_DIR = os.path.join(EOS, "sessions")
if not os.path.exists(SESSIONS_DIR):
os.makedirs(SESSIONS_DIR, exist_ok=True)
SESSION_HISTORY_FILE = "history.txt"
SESSION_JSON = "session.json"
LOGS_DIR = "logs"
CONTAINER_LOGS_TMP_DIR = "_logs/tmp"
CONTAINER_LOGS_EOS_DIR = "_logs/eos" # This is not used
LOGGING_FILE = "console.log"
CURRENT_LOGGING_FILE = "current.log"
SILENCE_FILE = ".silence.json"
VERBOSE_FILE = ".verbose.json"

# Isaura data lake
H5_EXTENSION = ".h5"
H5_DATA_FILE = "data.h5"
Expand Down
3 changes: 2 additions & 1 deletion ersilia/hub/content/card.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
MemoryGbBaseInformationError,
)
from ...utils.identifiers.model import ModelIdentifier
from ...utils.logging import make_temp_dir

try:
from isaura.core.hdf5 import Hdf5Explorer
Expand Down Expand Up @@ -622,7 +623,7 @@ def _raw_readme_url(self, model_id):
return url

def _gh_view(self, model_id):
tmp_folder = tempfile.mkdtemp(prefix="ersilia-")
tmp_folder = make_temp_dir(prefix="ersilia-")
tmp_file = os.path.join(tmp_folder, "view.md")
cmd = "gh repo view {0}/{1} > {2}".format("ersilia-os", model_id, tmp_file)
run_command(cmd)
Expand Down
5 changes: 3 additions & 2 deletions ersilia/hub/delete/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,9 @@ def __init__(self, config_json=None):
ErsiliaBase.__init__(self, config_json=config_json)

def delete(self):
os.rmdir(self._tmp_dir)
os.makedirs(self._tmp_dir)
if os.path.exists(self._tmp_dir):
os.rmdir(self._tmp_dir)
os.makedirs(self._tmp_dir)


class ModelFullDeleter(ErsiliaBase):
Expand Down
3 changes: 2 additions & 1 deletion ersilia/hub/fetch/actions/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from .template_resolver import TemplateResolver

from ....default import S3_BUCKET_URL_ZIP, PREDEFINED_EXAMPLE_FILES
from ....utils.logging import make_temp_dir

MODEL_DIR = "model"
ROOT = os.path.basename(os.path.abspath(__file__))
Expand Down Expand Up @@ -207,7 +208,7 @@ def _copy_from_github(self, dst):

def _copy_zip_from_s3(self, dst):
self.logger.debug("Downloading model from S3 in zipped format")
tmp_file = os.path.join(tempfile.mkdtemp("ersilia-"), "model.zip")
tmp_file = os.path.join(make_temp_dir("ersilia-"), "model.zip")
self.s3_down.download_from_s3(
bucket_url=S3_BUCKET_URL_ZIP,
file_name=self.model_id + ".zip",
Expand Down
5 changes: 3 additions & 2 deletions ersilia/hub/fetch/actions/modify.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
BundleRequirementsFile,
)
from ...bundle.repo import DockerfileFile
from ....utils.logging import make_temp_dir


class ModelModifier(BaseAction):
Expand All @@ -24,7 +25,7 @@ def __init__(self, model_id, config_json):
def _bundle_uses_ersilia(self, model_id):
"""Check if the bundle imports ersilia"""
src = os.path.join(self._get_bundle_location(model_id), model_id, "src")
tmp_folder = tempfile.mkdtemp(prefix="ersilia-")
tmp_folder = make_temp_dir(prefix="ersilia-")
tmp_file = os.path.join(tmp_folder, "grep.txt")
cmd = "grep -R 'ersilia' {0}/* > {1}".format(src, tmp_file)
run_command(cmd)
Expand Down Expand Up @@ -86,7 +87,7 @@ def _bundle_dockerfile_has_ersilia(self, model_id):
dockerfile = os.path.join(dir, DOCKERFILE)
if not os.path.exists(dockerfile):
return None
tmp_folder = tempfile.mkdtemp(prefix="ersilia-")
tmp_folder = make_temp_dir(prefix="ersilia-")
tmp_file = os.path.join(tmp_folder, "grep.txt")
cmd = "grep -R 'ersilia' {0} > {1}".format(dockerfile, tmp_file)
run_command(cmd)
Expand Down
3 changes: 2 additions & 1 deletion ersilia/hub/fetch/inner_template/src/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import subprocess
import csv

from .....utils.logging import make_temp_dir
CHECKPOINTS_BASEDIR = "checkpoints"
FRAMEWORK_BASEDIR = "framework"

Expand Down Expand Up @@ -62,7 +63,7 @@ def set_framework_dir(self, dest):
self.framework_dir = os.path.abspath(dest)

def run(self, input_list):
tmp_folder = tempfile.mkdtemp(prefix="eos-")
tmp_folder = make_temp_dir(prefix="eos-")
data_file = os.path.join(tmp_folder, self.DATA_FILE)
output_file = os.path.join(tmp_folder, self.OUTPUT_FILE)
log_file = os.path.join(tmp_folder, self.LOG_FILE)
Expand Down
10 changes: 6 additions & 4 deletions ersilia/hub/fetch/lazy_fetchers/dockerhub.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def modify_information(self, model_id):
:param service_class_file: File containing the model service class.
:size_file: File containing the size of the pulled docker image.
"""
information_file = "{0}/dest/{1}/{2}".format(EOS, model_id, INFORMATION_FILE)
information_file = os.path.join(self._model_path(model_id), INFORMATION_FILE)
mp = ModelPuller(model_id=model_id, config_json=self.config_json)
try:
with open(information_file, "r") as infile:
Expand All @@ -115,9 +115,11 @@ def modify_information(self, model_id):
self.logger.error("Information file not found, not modifying anything")
return None

data["service_class"] = "pulled_docker" # Using this literal here to prevent a file read
# from service class file for a model fetched through DockerHub since we already know the service class.
data["size"] = mp._get_size_of_local_docker_image_in_mb()
# Using this literal here to prevent a file read
# from service class file for a model fetched through DockerHub
# since we already know the service class.
data["service_class"] = "pulled_docker"
data["size"] = mp._get_size_of_local_docker_image_in_mb() # TODO this should probably be a util function
with open(information_file, "w") as outfile:
json.dump(data, outfile, indent=4)

Expand Down
3 changes: 2 additions & 1 deletion ersilia/hub/fetch/pack/bentoml_pack/runners.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from ... import MODEL_INSTALL_COMMANDS_FILE
from ..... import throw_ersilia_exception
from .....utils.exceptions_utils.fetch_exceptions import CondaEnvironmentExistsError
from .....utils.logging import make_temp_dir

USE_CHECKSUM = False

Expand Down Expand Up @@ -199,7 +200,7 @@ def _run(self):
self.logger.debug("Executing container {0}".format(name))
self.docker.exec_container(name, "python %s" % self.cfg.HUB.PACK_SCRIPT)
self.logger.debug("Copying bundle from docker image to host")
tmp_dir = tempfile.mkdtemp(prefix="ersilia-")
tmp_dir = make_temp_dir(prefix="ersilia-")
self.logger.debug("Using this temporary directory: {0}".format(tmp_dir))
self.docker.cp_from_container(
name, "/root/bentoml/repository/%s" % model_id, tmp_dir
Expand Down
5 changes: 2 additions & 3 deletions ersilia/hub/fetch/register/standard_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,8 @@ def run(self):
self.logger.debug(cmd_output)
if "Welcome to Ersilia" in cmd_output:
self.logger.debug("No need to use Conda!")
for cmd in commands:
self.logger.debug(cmd)
run_command(cmd)
cmd = " && ".join(commands)
run_command(cmd)
else:
self.logger.debug("Will run this through Conda")
env_name = os.environ.get("CONDA_DEFAULT_ENV")
Expand Down
3 changes: 2 additions & 1 deletion ersilia/hub/pull/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from ...utils.docker import SimpleDocker
from ...default import DOCKERHUB_ORG, DOCKERHUB_LATEST_TAG, EOS, MODEL_SIZE_FILE
from ...utils.logging import make_temp_dir

PULL_IMAGE = os.environ.get("PULL_IMAGE", "Y")

Expand Down Expand Up @@ -112,7 +113,7 @@ def pull(self):
"Trying to pull image {0}/{1}".format(DOCKERHUB_ORG, self.model_id)
)
tmp_file = os.path.join(
tempfile.mkdtemp(prefix="ersilia-"), "docker_pull.log"
make_temp_dir(prefix="ersilia-"), "docker_pull.log"
)
self.logger.debug("Keeping logs of pull in {0}".format(tmp_file))
run_command(
Expand Down
3 changes: 2 additions & 1 deletion ersilia/io/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from ..db.hubdata.json_models_interface import JsonModelsInterface
from ..default import FEATURE_MERGE_PATTERN, PACK_METHOD_FASTAPI
from ..utils.paths import resolve_pack_method
from ..utils.logging import make_temp_dir


class DataFrame(object):
Expand Down Expand Up @@ -437,7 +438,7 @@ def __init__(self, config_json):
GenericOutputAdapter.__init__(self, config_json=config_json)

def dictlist2dataframe(self, dl, model_id, api_name):
tmp_dir = tempfile.mkdtemp(prefix="ersilia-")
tmp_dir = make_temp_dir(prefix="ersilia-")
df_file = os.path.join(tmp_dir, "data.csv")
self.adapt(dl, df_file, model_id, api_name)
df = Dataframe()
Expand Down
8 changes: 4 additions & 4 deletions ersilia/io/readers/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

from ..shape import InputShape
from ..shape import InputShapeSingle, InputShapeList, InputShapePairOfLists

from ... import logger
from ...utils.logging import make_temp_dir

MIN_COLUMN_VALIDITY = 0.8
FLATTENED_EVIDENCE = 0.2
Expand Down Expand Up @@ -68,7 +68,7 @@ def get_extension(self):

class BatchCacher(object):
def __init__(self):
self.tmp_folder = tempfile.mkdtemp(prefix="ersilia-")
self.tmp_folder = make_temp_dir(prefix="ersilia-")

def get_cached_files(self, prefix):
idx2fn = {}
Expand Down Expand Up @@ -554,7 +554,7 @@ def split_in_cache(self):
class TabularFileReader(StandardTabularFileReader):
def __init__(self, path, IO, sniff_line_limit=100):
self.src_path = os.path.abspath(path)
self.tmp_folder = tempfile.mkdtemp(prefix="ersilia-")
self.tmp_folder = make_temp_dir(prefix="ersilia-")
self.dst_path = os.path.join(self.tmp_folder, "standard_input_file.csv")
self.path = self.dst_path
self.IO = IO
Expand Down Expand Up @@ -720,7 +720,7 @@ def split_in_cache(self):
class JsonFileReader(StandardJsonFileReader):
def __init__(self, path, IO):
self.src_path = os.path.abspath(path)
self.tmp_folder = tempfile.mkdtemp(prefix="ersilia-")
self.tmp_folder = make_temp_dir(prefix="ersilia-")
self.dst_path = os.path.join(self.tmp_folder, "standard_input_file.json")
self.path = self.dst_path
self.IO = IO
Expand Down
Loading

0 comments on commit 552b053

Please sign in to comment.