Bake QGreenland tasks into Docker image

* Remove mamba from docker image * Support micromamba-specific environment activation behavior * Stop mounting code into docker container * Add a dev compose file which mounts code
nsidc · Jul 11, 2023 · c133d1d · c133d1d
1 parent 48c65d7
commit c133d1d
Show file tree

Hide file tree

Showing 5 changed files with 53 additions and 27 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,41 +1,40 @@
 # This docker image simply runs luigi's centralized scheduler with the needed
-# dependencies installed into a conda environment. It's expected that task code
-# will always be mounted in using volumes!
+# dependencies installed into a conda environment. All QGreenland tasks are
+# available at `TASKS_DIR`.
 
 FROM axiom/docker-luigi:3.0.3-alpine AS luigi
 # This build stage only exists to grab the luigi run script. Luigi dependency
 # itself is specified in `environment.yml`
+# TODO: Why is this necessary? Does `luigid` not come along with the conda package?
 
 FROM mambaorg/micromamba:1.4.2 AS micromamba
 COPY --from=luigi /bin/run /usr/local/bin/luigid
 USER root
 
-ENV TASKS_MOUNT_DIR=/luigi/tasks/qgreenland
-
 # `libgl1-mesa-glx` is required for pyqgis
 # `git` is required for analyzing the current version
 # `make` is required for building sphinx docs
 # `texlive-latex-extra` is required for pdf doc builds
+# TODO: Remove `make`
 RUN apt-get update && apt-get install -y \
   git \
   make \
   libgl1-mesa-glx \
   texlive-latex-extra
 
-# Enable our code (which runs git commands) to run as a different user than the
-# current user on the host machine (who will be the owner of the mounted git
-# repository)
-RUN git config --global --add safe.directory "${TASKS_MOUNT_DIR}"
-
-# TODO: Why are we copying these files to /tmp?
-COPY --chown=$MAMBA_USER:$MAMBA_USER conda-lock.yml /tmp/conda-lock.yml
-RUN micromamba install -y -n base -f /tmp/conda-lock.yml
+ENV TASKS_DIR=/luigi/tasks/qgreenland
+WORKDIR "${TASKS_DIR}"
+COPY --chown=$MAMBA_USER:$MAMBA_USER . .
 
-# Install mamba. It is missing after installing `conda-lock.yml`
-RUN micromamba install -y -c conda-forge -n base conda mamba~=1.4.2
+# Our code needs to run git commands (for example, to determine a full version
+# string), but if tasks  repo is mounted from the host machine, the owner of
+# the repo won't match the container user. A "safe directory" allows Git to
+# tolerate this user mismatch.
+RUN git config --global --add safe.directory "${TASKS_DIR}"
 
-COPY --chown=$MAMBA_USER:$MAMBA_USER environment.cmd.yml /tmp/environment.cmd.yml
-RUN micromamba create -y -f /tmp/environment.cmd.yml
+# Set up the Luigi task environment and the command environment
+RUN micromamba install -y -n base -f conda-lock.yml
+RUN micromamba create -y -f environment.cmd.yml
 
 # Cleanup
 RUN micromamba clean --all --yes
@@ -47,7 +46,7 @@ WORKDIR /luigi
 # gets populated. Additionally, /luigi/tasks is where we expect python code to
 # be mounted.
 # TODO: With modern micromamba, can we clean this up?
-ENV PYTHONPATH "${TASKS_MOUNT_DIR}:/opt/conda/share/qgis/python/plugins:/opt/conda/share/qgis/python"
+ENV PYTHONPATH "${TASKS_DIR}:/opt/conda/share/qgis/python/plugins:/opt/conda/share/qgis/python"
 ENV PATH "/opt/conda/bin:${PATH}"
 
 CMD ["/usr/local/bin/luigid"]
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
@@ -0,0 +1,10 @@
+version: "3.4"
+
+services:
+
+  luigi:
+    image: "nsidc/luigi:dev"
+    build: "."
+    volumes:
+      # Code
+      - "./:/luigi/tasks/qgreenland:ro"
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -5,8 +5,7 @@ services:
   # Luigi runs as a service and must have jobs submitted to it
   # (`scripts/run.sh`)
   luigi:
-    image: "nsidc/luigi:dev"
-    build: "."
+    image: "nsidc/luigi:local"
     container_name: "luigi"
     volumes:
       # Code
@@ -23,10 +22,12 @@ services:
       # locations temporarily, and we haven't re-tested yet.
       - "${DATA_WORKING_STORAGE_TMP:-./data/working-storage}:/working-storage:rw"
     environment:
-      - "LUIGI_CONFIG_PARSER=toml"
       - "ENVIRONMENT"
       - "EARTHDATA_USERNAME"
       - "EARTHDATA_PASSWORD"
+      - "QGREENLAND_ENV_MANAGER=micromamba"
+      # Configure Luigi to find its config in luigi/conf/luigi.toml
+      - "LUIGI_CONFIG_PARSER=toml"
       # Set `export PYTHONBREAKPOINT=ipdb.set_trace` to use `ipdb` by default
       # instead of `pdb`.
       - "PYTHONBREAKPOINT"

diff --git a/qgreenland/constants/project.py b/qgreenland/constants/project.py
@@ -2,6 +2,7 @@
 
 PROJECT = "QGreenland"
 ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
+ENV_MANAGER = os.environ.get("QGREENLAND_ENV_MANAGER", "conda")
 
 # In seconds. See
 # https://2.python-requests.org/en/master/user/quickstart/#timeouts

diff --git a/qgreenland/util/command.py b/qgreenland/util/command.py
@@ -3,6 +3,7 @@
 from collections.abc import Sequence
 
 import qgreenland.exceptions as exc
+from qgreenland.constants.project import ENV_MANAGER
 from qgreenland.util.runtime_vars import EvalStr
 
 logger = logging.getLogger("luigi-interface")
@@ -16,19 +17,33 @@ def interpolate_args(
     return [arg.eval(**kwargs) for arg in args]
 
 
-def run_qgr_command(args: list[str]):
+def run_qgr_command(args: list[str]) -> None:
     """Run a command in the `qgreenland-cmd` environment."""
-    cmd = [".", "activate", "qgreenland-cmd", "&&"]
-    cmd.extend(args)
+    conda_env_name = "qgreenland-cmd"
+    # With conda or mamba, `. activate myenv` works as expected, but with micromamba, we
+    # need something a little different.
+    if ENV_MANAGER == "micromamba":
+        cmd = [
+            "eval",
+            '"$(micromamba shell hook -s posix)"',
+            "&&",
+            "micromamba",
+            "activate",
+            conda_env_name,
+            "&&",
+            *args,
+        ]
+
+    else:
+        cmd = [".", "activate", conda_env_name, "&&", *args]
 
     run_cmd(cmd)
+    return
 
 
-def run_cmd(args: list[str]):
+def run_cmd(args: list[str]) -> subprocess.CompletedProcess:
     """Run a command and log it."""
-    # Hack. The activation of a conda environment does not work as a list.
-    # `subprocess.run(..., shell=True, ...)` enables running commands from
-    # strings.
+    # Hack. The activation of a conda environment does not work without `shell=True`.
     cmd_str = " ".join(str(arg) for arg in args)
 
     logger.info("Running command:")