Skip to content

Commit

Permalink
Remove installed environments and packages to try to keep saving more…
Browse files Browse the repository at this point in the history
… space
  • Loading branch information
romain-intel committed Aug 13, 2024
1 parent 9e0a582 commit b843ec6
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 31 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ on:
- main

jobs:

pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1

test:
runs-on: ${{ matrix.os }}
strategy:
Expand Down
7 changes: 5 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
exclude: /vendor/
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
hooks:
- id: check-yaml
- id: check-json
- repo: https://github.com/ambv/black
rev: 22.8.0
rev: 24.4.2
hooks:
- id: black
language_version: python3
exclude: "^/metaflow_extensions/netflix_ext/vendor/"
args: [-t, py34, -t, py35, -t, py36, -t, py37, -t, py38, -t, py39, -t, py310, -t, py311, -t, py312]


42 changes: 18 additions & 24 deletions metaflow_extensions/netflix_ext/plugins/conda/conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def call_conda(
or binary == "micromamba"
)
):
args.extend(["-r", self._info["root_prefix"], "--json"])
args.extend(["-r", self.root_prefix, "--json"])
debug.conda_exec("Conda call: %s" % str([self._bins[binary]] + args))
return cast(
bytes,
Expand Down Expand Up @@ -2060,9 +2060,13 @@ def _check_match(dir_name: str) -> Optional[EnvID]:
return None

if self._conda_executable_type == "micromamba" or CONDA_LOCAL_PATH is not None:
# Micromamba does not record created environments so we look around for them
# in the root env directory. We also do this if had a local installation
# because we don't want to look around at other environments created outside
# of that local installation
# For micromamba OR if we are using a specific conda installation
# (so with CONDA_LOCAL_PATH), only search there
env_dir = os.path.join(self._info["root_prefix"], "envs")
env_dir = self._root_env_dir
with CondaLock(self.echo, self._env_lock_file(os.path.join(env_dir, "_"))):
# Grab a lock *once* on the parent directory so we pick anyname for
# the "directory".
Expand All @@ -2072,6 +2076,7 @@ def _check_match(dir_name: str) -> Optional[EnvID]:
if possible_env_id:
ret.setdefault(possible_env_id, []).append(entry.path)
else:
# Else we iterate over all the environments that the installation know about
envs = self._info["envs"] # type: List[str]
for env in envs:
with CondaLock(self.echo, self._env_lock_file(env)):
Expand Down Expand Up @@ -2189,19 +2194,20 @@ def _remote_env_fetch_alias(
@property
def _package_dirs(self) -> List[str]:
info = self._info
if self._conda_executable_type == "micromamba":
pkg_dir = os.path.join(info["root_prefix"], "pkgs")
if not os.path.exists(pkg_dir):
os.makedirs(pkg_dir)
return [pkg_dir]
# We rely on the first directory existing. This should be a fairly
# easy check.
if not os.path.exists(info["pkgs_dirs"][0]):
os.makedirs(info["pkgs_dirs"][0])
return info["pkgs_dirs"]

@property
def _root_env_dir(self) -> str:
info = self._info
if self._conda_executable_type == "micromamba":
return os.path.join(info["root_prefix"], "envs")
return info["envs_dirs"][0]
# We rely on the first directory existing. This should be a fairly
# easy check.
if not os.path.exists(info["envs_dirs"][0]):
os.makedirs(info["envs_dirs"][0])
return info["envs_dirs"]

@property
def _info(self) -> Dict[str, Any]:
Expand All @@ -2213,22 +2219,10 @@ def _info(self) -> Dict[str, Any]:
def _info_no_lock(self) -> Dict[str, Any]:
if self._cached_info is None:
self._cached_info = json.loads(self.call_conda(["info", "--json"]))
# Micromamba is annoying because if there are multiple installations of it
# executing the binary doesn't necessarily point us to the root directory
# we are in so we kind of look for it heuristically
if self._conda_executable_type == "micromamba":
# Best info if we don't have something else
self._cached_info["root_prefix"] = self._cached_info["base environment"]
cur_dir = os.path.dirname(self._bins[self._conda_executable_type])
while True:
if os.path.isdir(os.path.join(cur_dir, "pkgs")) and os.path.isdir(
os.path.join(cur_dir, "envs")
):
self._cached_info["root_prefix"] = cur_dir
break
if cur_dir == "/":
break
cur_dir = os.path.dirname(cur_dir)
self._cached_info["envs_dirs"] = self._cached_info["envs directories"]
self._cached_info["pkgs_dirs"] = self._cached_info["package cache"]

return self._cached_info

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def resolve(

addl_env = {
"CONDA_SUBDIR": architecture,
"CONDA_PKGS_DIRS": mamba_dir,
"CONDA_PKGS_DIRS": os.path.join(mamba_dir, "pkgs"),
"CONDA_ENVS_DIRS": os.path.join(mamba_dir, "envs"),
"CONDA_ROOT": self._conda.root_prefix,
"CONDA_UNSATISFIABLE_HINTS_CHECK_DEPTH": "0",
}
Expand Down
11 changes: 7 additions & 4 deletions tests/test_env_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,12 @@ def test_resolve_and_check_env(capsys, python_version, file_type, file_name, ali
env_dict["METAFLOW_CONDA_ENVS_DIRNAME"] = "testing/envs_%s" % conda_rand
env_dict["METAFLOW_CONDA_PACKAGES_DIRNAME"] = "testing/packages_%s" % conda_rand
env_dict["METAFLOW_CONDA_MAGIC_FILE_V2"] = "condav2-%s.cnd" % conda_rand
env_dict[
"METAFLOW_CONDA_LOCK_TIMEOUT"
] = "7200" # Increase to make sure we resolve everything
env_dict["METAFLOW_CONDA_LOCK_TIMEOUT"] = (
"7200" # Increase to make sure we resolve everything
)
env_dict["METAFLOW_DEBUG_CONDA"] = "1"
env_dict["CONDA_ENVS_DIRS"] = "/tmp/mfcondaenvs-%s" % conda_rand
env_dict["CONDA_PKGS_DIRS"] = "/tmp/mfcondapkgs-%s" % conda_rand
check_command = sh.Command("./check_env.sh").bake(["-e", sys.executable])
metaflow_command = sh.Command(sys.executable).bake(
["-m", "metaflow.cmd.main_cli"]
Expand Down Expand Up @@ -121,7 +123,6 @@ def test_resolve_and_check_env(capsys, python_version, file_type, file_name, ali
finally:
os.chdir(cwd)
# Runners run out of space so clear out all the packages and environments we created/downloaded
# This does not remove the conda/mamba/micromamba environments though
shutil.rmtree(
os.path.join(
os.environ["METAFLOW_DATASTORE_SYSROOT_LOCAL"],
Expand All @@ -140,3 +141,5 @@ def test_resolve_and_check_env(capsys, python_version, file_type, file_name, ali
),
ignore_errors=True,
)
shutil.rmtree(env_dict["CONDA_ENVS_DIRS"], ignore_errors=True)
shutil.rmtree(env_dict["CONDA_PKGS_DIRS"], ignore_errors=True)

0 comments on commit b843ec6

Please sign in to comment.