Skip to content

Commit

Permalink
Fix issue with running Metaflow with the runner environment (#45)
Browse files Browse the repository at this point in the history
In some cases, the outside environment would "leak" to the inner execution
  • Loading branch information
romain-intel authored Aug 20, 2024
1 parent 3d51d04 commit a59f9f0
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def is_disabled(self) -> Optional[bool]:
or self.from_pathspec
or self.packages
or self.sources
or self.is_fetch_at_exec
):
return False
return None
Expand Down
40 changes: 18 additions & 22 deletions metaflow_extensions/netflix_ext/plugins/conda/conda_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,30 +321,26 @@ def resolve_fetch_at_exec_env(

@classmethod
def get_env_id(cls, conda: Conda, step_name: str) -> Optional[Union[str, EnvID]]:
# _METAFLOW_CONDA_ENV is set:
# - by remote_bootstrap:
# - if executing on a scheduler (no runtime), this will set it to
# the fully resolved ID even in the case of fetch_at_exec
# - if executing on a remote node with the runtime, it will also set it but
# it is unused
# - in runtime_step_cli:
# - if executing locally, we can use this in our actual execution to create
# the environment in runtime_step_cli
# - if executing remotely, we use this to determine the executable and
# it is used in bootstrap_commands to build the command line
# We either look in _result_for_step for the env id (this only works for the
# initial process that launched Metaflow ie: the first CLI process). This is what
# happens when deploying to argo/airflow or any other scheduler. For everything
# else, we look at _METAFLOW_CONDA_ENV:
# - when deploying to a scheduler: use _result_for_step to build the command line
# - when executing for scheduler, _METAFLOW_CONDA_ENV is set by remote_bootstrap
# and will also fully resolve fetch_at_exec and used in task_pre_step. Note that
# it relies on the value that was hardcoded in the command line when deploying
# to the scheduler. The value may still not be the same due to fetch_at_exec
# - when using the runtime:
# - if executing locally, the environment is created in runtime_step_cli
# and sets _METAFLOW_CONDA_ENV which is then read in task_pre_step (in the
# next process)
# - if executing remotely, _METAFLOW_CONDA_ENV is set by runtime_step_cli. It
# is then used by bootstrap_commands (in conda_environment.py) to build the
# command line. The command line will then call remote_bootstrap which will
# set again _METAFLOW_CONDA_ENV to the fully resolved environment which is
# then used in task_pre_step.
step_info = cls._result_for_step.get(step_name)
if step_info is None:
# _METAFLOW_CONDA_ENV is set:
# - by remote_bootstrap:
# - if executing on a scheduler (no runtime), this will set it to
# the fully resolved ID even in the case of fetch_at_exec
# - if executing on a remote node with the runtime, it will also set it but
# it is unused
# - in runtime_step_cli:
# - if executing locally, we can use this in our actual execution to create
# the environment in runtime_step_cli
# - if executing remotely, we use this to determine the executable and
# it is used in bootstrap_commands to build the command line
resolved_env_id = None
t = os.environ.get("_METAFLOW_CONDA_ENV")
if t:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -500,31 +500,26 @@ def runtime_step_cli(
max_user_code_retries: int,
ubf_context: str,
):
# We also set the env var in remote case for is_fetch_at_exec
# so that it can be used to fill out the bootstrap command with
# the proper environment
if self._is_enabled(UBF_TASK) or self._is_fetch_at_exec():
# Export this for local runs, we will use it to read the "resolved"
# environment ID in task_pre_step as well as in get_env_id in
# conda_environment.py. This makes it compatible with the remote
# bootstrap which also exports it. We do this even for UBF control tasks as
# this environment variable is then passed to the actual tasks. We don't
# always create the environment for the control task. Note that this is
# determined by _is_enabled

# Note that in the case of a fetch_at_exec, self._env_id is fully resolved
# (it was resolved in runtime_task_created) so this is the env_id we need
# to use for our task.
# Check if there is a conda environment to use. We check for UBF_TASK (instead of
# ubf_context) because, even if we don't create the environment for a control task,
# we want to pass down the environment variable for the mapper tasks.
if self._is_enabled(UBF_TASK):
cli_args.env["_METAFLOW_CONDA_ENV"] = json.dumps(self._env_id)

# If we are executing remotely, we now have _METAFLOW_CONDA_ENV set-up
# properly so we will be able to use it in _get_env_id in conda_environment
# to figure out what environment we need to execute remotely
if self._is_remote or not self._is_enabled(ubf_context):
# If we are executing remotely, we don't need to actually create the
# environment at all. We also don't create it if we are local and
# this is not enabled for the current UBF context.
return
else:
# In this case, there is no need for a conda environment. Since we can
# be running with a runner *inside* another Metaflow step that *does* have
# a conda environment, we make sure to clear _METAFLOW_CONDA_ENV so that
# the outside environment doesn't "leak" (it could have been set for the external
# process in which case it would get passed down).
cli_args.env["_METAFLOW_CONDA_ENV"] = ""
return

# Create the environment only if executing locally (ie: not self._is_remote)
conda = cast(Conda, self._env.conda)
assert self._env_id
entrypoint = None # type: Optional[str]
Expand Down

0 comments on commit a59f9f0

Please sign in to comment.