From b843ec60e71364c2a7a1c043675280094997ed0c Mon Sep 17 00:00:00 2001 From: Romain Cledat Date: Tue, 13 Aug 2024 13:49:03 -0700 Subject: [PATCH] Remove installed environments and packages to try to keep saving more space --- .github/workflows/test.yml | 8 ++++ .pre-commit-config.yaml | 7 +++- .../netflix_ext/plugins/conda/conda.py | 42 ++++++++----------- .../plugins/conda/resolvers/conda_resolver.py | 3 +- tests/test_env_create.py | 11 +++-- 5 files changed, 40 insertions(+), 31 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cf1e5da..c3131fa 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,6 +6,14 @@ on: - main jobs: + + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 + - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 + test: runs-on: ${{ matrix.os }} strategy: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 00ce501..f8ed033 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,3 @@ -exclude: /vendor/ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.1.0 @@ -6,7 +5,11 @@ repos: - id: check-yaml - id: check-json - repo: https://github.com/ambv/black - rev: 22.8.0 + rev: 24.4.2 hooks: - id: black language_version: python3 + exclude: "^/metaflow_extensions/netflix_ext/vendor/" + args: [-t, py34, -t, py35, -t, py36, -t, py37, -t, py38, -t, py39, -t, py310, -t, py311, -t, py312] + + diff --git a/metaflow_extensions/netflix_ext/plugins/conda/conda.py b/metaflow_extensions/netflix_ext/plugins/conda/conda.py index 7f009ca..d28abfb 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/conda.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/conda.py @@ -274,7 +274,7 @@ def call_conda( or binary == "micromamba" ) ): - args.extend(["-r", self._info["root_prefix"], "--json"]) + args.extend(["-r", self.root_prefix, "--json"]) debug.conda_exec("Conda call: %s" % str([self._bins[binary]] + args)) return cast( bytes, @@ -2060,9 +2060,13 @@ def _check_match(dir_name: str) -> Optional[EnvID]: return None if self._conda_executable_type == "micromamba" or CONDA_LOCAL_PATH is not None: + # Micromamba does not record created environments so we look around for them + # in the root env directory. We also do this if had a local installation + # because we don't want to look around at other environments created outside + # of that local installation # For micromamba OR if we are using a specific conda installation # (so with CONDA_LOCAL_PATH), only search there - env_dir = os.path.join(self._info["root_prefix"], "envs") + env_dir = self._root_env_dir with CondaLock(self.echo, self._env_lock_file(os.path.join(env_dir, "_"))): # Grab a lock *once* on the parent directory so we pick anyname for # the "directory". @@ -2072,6 +2076,7 @@ def _check_match(dir_name: str) -> Optional[EnvID]: if possible_env_id: ret.setdefault(possible_env_id, []).append(entry.path) else: + # Else we iterate over all the environments that the installation know about envs = self._info["envs"] # type: List[str] for env in envs: with CondaLock(self.echo, self._env_lock_file(env)): @@ -2189,19 +2194,20 @@ def _remote_env_fetch_alias( @property def _package_dirs(self) -> List[str]: info = self._info - if self._conda_executable_type == "micromamba": - pkg_dir = os.path.join(info["root_prefix"], "pkgs") - if not os.path.exists(pkg_dir): - os.makedirs(pkg_dir) - return [pkg_dir] + # We rely on the first directory existing. This should be a fairly + # easy check. + if not os.path.exists(info["pkgs_dirs"][0]): + os.makedirs(info["pkgs_dirs"][0]) return info["pkgs_dirs"] @property def _root_env_dir(self) -> str: info = self._info - if self._conda_executable_type == "micromamba": - return os.path.join(info["root_prefix"], "envs") - return info["envs_dirs"][0] + # We rely on the first directory existing. This should be a fairly + # easy check. + if not os.path.exists(info["envs_dirs"][0]): + os.makedirs(info["envs_dirs"][0]) + return info["envs_dirs"] @property def _info(self) -> Dict[str, Any]: @@ -2213,22 +2219,10 @@ def _info(self) -> Dict[str, Any]: def _info_no_lock(self) -> Dict[str, Any]: if self._cached_info is None: self._cached_info = json.loads(self.call_conda(["info", "--json"])) - # Micromamba is annoying because if there are multiple installations of it - # executing the binary doesn't necessarily point us to the root directory - # we are in so we kind of look for it heuristically if self._conda_executable_type == "micromamba": - # Best info if we don't have something else self._cached_info["root_prefix"] = self._cached_info["base environment"] - cur_dir = os.path.dirname(self._bins[self._conda_executable_type]) - while True: - if os.path.isdir(os.path.join(cur_dir, "pkgs")) and os.path.isdir( - os.path.join(cur_dir, "envs") - ): - self._cached_info["root_prefix"] = cur_dir - break - if cur_dir == "/": - break - cur_dir = os.path.dirname(cur_dir) + self._cached_info["envs_dirs"] = self._cached_info["envs directories"] + self._cached_info["pkgs_dirs"] = self._cached_info["package cache"] return self._cached_info diff --git a/metaflow_extensions/netflix_ext/plugins/conda/resolvers/conda_resolver.py b/metaflow_extensions/netflix_ext/plugins/conda/resolvers/conda_resolver.py index 60516f0..077164e 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/resolvers/conda_resolver.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/resolvers/conda_resolver.py @@ -63,7 +63,8 @@ def resolve( addl_env = { "CONDA_SUBDIR": architecture, - "CONDA_PKGS_DIRS": mamba_dir, + "CONDA_PKGS_DIRS": os.path.join(mamba_dir, "pkgs"), + "CONDA_ENVS_DIRS": os.path.join(mamba_dir, "envs"), "CONDA_ROOT": self._conda.root_prefix, "CONDA_UNSATISFIABLE_HINTS_CHECK_DEPTH": "0", } diff --git a/tests/test_env_create.py b/tests/test_env_create.py index ec44958..3be9f2e 100644 --- a/tests/test_env_create.py +++ b/tests/test_env_create.py @@ -61,10 +61,12 @@ def test_resolve_and_check_env(capsys, python_version, file_type, file_name, ali env_dict["METAFLOW_CONDA_ENVS_DIRNAME"] = "testing/envs_%s" % conda_rand env_dict["METAFLOW_CONDA_PACKAGES_DIRNAME"] = "testing/packages_%s" % conda_rand env_dict["METAFLOW_CONDA_MAGIC_FILE_V2"] = "condav2-%s.cnd" % conda_rand - env_dict[ - "METAFLOW_CONDA_LOCK_TIMEOUT" - ] = "7200" # Increase to make sure we resolve everything + env_dict["METAFLOW_CONDA_LOCK_TIMEOUT"] = ( + "7200" # Increase to make sure we resolve everything + ) env_dict["METAFLOW_DEBUG_CONDA"] = "1" + env_dict["CONDA_ENVS_DIRS"] = "/tmp/mfcondaenvs-%s" % conda_rand + env_dict["CONDA_PKGS_DIRS"] = "/tmp/mfcondapkgs-%s" % conda_rand check_command = sh.Command("./check_env.sh").bake(["-e", sys.executable]) metaflow_command = sh.Command(sys.executable).bake( ["-m", "metaflow.cmd.main_cli"] @@ -121,7 +123,6 @@ def test_resolve_and_check_env(capsys, python_version, file_type, file_name, ali finally: os.chdir(cwd) # Runners run out of space so clear out all the packages and environments we created/downloaded - # This does not remove the conda/mamba/micromamba environments though shutil.rmtree( os.path.join( os.environ["METAFLOW_DATASTORE_SYSROOT_LOCAL"], @@ -140,3 +141,5 @@ def test_resolve_and_check_env(capsys, python_version, file_type, file_name, ali ), ignore_errors=True, ) + shutil.rmtree(env_dict["CONDA_ENVS_DIRS"], ignore_errors=True) + shutil.rmtree(env_dict["CONDA_PKGS_DIRS"], ignore_errors=True)