Skip to content

Commit

Permalink
Bake QGreenland tasks into Docker image
Browse files Browse the repository at this point in the history
* Remove mamba from docker image
    * Support micromamba-specific environment activation behavior
* Stop mounting code into docker container
    * Add a dev compose file which mounts code
  • Loading branch information
mfisher87 committed Jul 11, 2023
1 parent 48c65d7 commit c133d1d
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 27 deletions.
33 changes: 16 additions & 17 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,41 +1,40 @@
# This docker image simply runs luigi's centralized scheduler with the needed
# dependencies installed into a conda environment. It's expected that task code
# will always be mounted in using volumes!
# dependencies installed into a conda environment. All QGreenland tasks are
# available at `TASKS_DIR`.

FROM axiom/docker-luigi:3.0.3-alpine AS luigi
# This build stage only exists to grab the luigi run script. Luigi dependency
# itself is specified in `environment.yml`
# TODO: Why is this necessary? Does `luigid` not come along with the conda package?

FROM mambaorg/micromamba:1.4.2 AS micromamba
COPY --from=luigi /bin/run /usr/local/bin/luigid
USER root

ENV TASKS_MOUNT_DIR=/luigi/tasks/qgreenland

# `libgl1-mesa-glx` is required for pyqgis
# `git` is required for analyzing the current version
# `make` is required for building sphinx docs
# `texlive-latex-extra` is required for pdf doc builds
# TODO: Remove `make`
RUN apt-get update && apt-get install -y \
git \
make \
libgl1-mesa-glx \
texlive-latex-extra

# Enable our code (which runs git commands) to run as a different user than the
# current user on the host machine (who will be the owner of the mounted git
# repository)
RUN git config --global --add safe.directory "${TASKS_MOUNT_DIR}"

# TODO: Why are we copying these files to /tmp?
COPY --chown=$MAMBA_USER:$MAMBA_USER conda-lock.yml /tmp/conda-lock.yml
RUN micromamba install -y -n base -f /tmp/conda-lock.yml
ENV TASKS_DIR=/luigi/tasks/qgreenland
WORKDIR "${TASKS_DIR}"
COPY --chown=$MAMBA_USER:$MAMBA_USER . .

# Install mamba. It is missing after installing `conda-lock.yml`
RUN micromamba install -y -c conda-forge -n base conda mamba~=1.4.2
# Our code needs to run git commands (for example, to determine a full version
# string), but if tasks repo is mounted from the host machine, the owner of
# the repo won't match the container user. A "safe directory" allows Git to
# tolerate this user mismatch.
RUN git config --global --add safe.directory "${TASKS_DIR}"

COPY --chown=$MAMBA_USER:$MAMBA_USER environment.cmd.yml /tmp/environment.cmd.yml
RUN micromamba create -y -f /tmp/environment.cmd.yml
# Set up the Luigi task environment and the command environment
RUN micromamba install -y -n base -f conda-lock.yml
RUN micromamba create -y -f environment.cmd.yml

# Cleanup
RUN micromamba clean --all --yes
Expand All @@ -47,7 +46,7 @@ WORKDIR /luigi
# gets populated. Additionally, /luigi/tasks is where we expect python code to
# be mounted.
# TODO: With modern micromamba, can we clean this up?
ENV PYTHONPATH "${TASKS_MOUNT_DIR}:/opt/conda/share/qgis/python/plugins:/opt/conda/share/qgis/python"
ENV PYTHONPATH "${TASKS_DIR}:/opt/conda/share/qgis/python/plugins:/opt/conda/share/qgis/python"
ENV PATH "/opt/conda/bin:${PATH}"

CMD ["/usr/local/bin/luigid"]
10 changes: 10 additions & 0 deletions docker-compose.dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: "3.4"

services:

luigi:
image: "nsidc/luigi:dev"
build: "."
volumes:
# Code
- "./:/luigi/tasks/qgreenland:ro"
7 changes: 4 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ services:
# Luigi runs as a service and must have jobs submitted to it
# (`scripts/run.sh`)
luigi:
image: "nsidc/luigi:dev"
build: "."
image: "nsidc/luigi:local"
container_name: "luigi"
volumes:
# Code
Expand All @@ -23,10 +22,12 @@ services:
# locations temporarily, and we haven't re-tested yet.
- "${DATA_WORKING_STORAGE_TMP:-./data/working-storage}:/working-storage:rw"
environment:
- "LUIGI_CONFIG_PARSER=toml"
- "ENVIRONMENT"
- "EARTHDATA_USERNAME"
- "EARTHDATA_PASSWORD"
- "QGREENLAND_ENV_MANAGER=micromamba"
# Configure Luigi to find its config in luigi/conf/luigi.toml
- "LUIGI_CONFIG_PARSER=toml"
# Set `export PYTHONBREAKPOINT=ipdb.set_trace` to use `ipdb` by default
# instead of `pdb`.
- "PYTHONBREAKPOINT"
Expand Down
1 change: 1 addition & 0 deletions qgreenland/constants/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

PROJECT = "QGreenland"
ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
ENV_MANAGER = os.environ.get("QGREENLAND_ENV_MANAGER", "conda")

# In seconds. See
# https://2.python-requests.org/en/master/user/quickstart/#timeouts
Expand Down
29 changes: 22 additions & 7 deletions qgreenland/util/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from collections.abc import Sequence

import qgreenland.exceptions as exc
from qgreenland.constants.project import ENV_MANAGER
from qgreenland.util.runtime_vars import EvalStr

logger = logging.getLogger("luigi-interface")
Expand All @@ -16,19 +17,33 @@ def interpolate_args(
return [arg.eval(**kwargs) for arg in args]


def run_qgr_command(args: list[str]):
def run_qgr_command(args: list[str]) -> None:
"""Run a command in the `qgreenland-cmd` environment."""
cmd = [".", "activate", "qgreenland-cmd", "&&"]
cmd.extend(args)
conda_env_name = "qgreenland-cmd"
# With conda or mamba, `. activate myenv` works as expected, but with micromamba, we
# need something a little different.
if ENV_MANAGER == "micromamba":
cmd = [
"eval",
'"$(micromamba shell hook -s posix)"',
"&&",
"micromamba",
"activate",
conda_env_name,
"&&",
*args,
]

else:
cmd = [".", "activate", conda_env_name, "&&", *args]

run_cmd(cmd)
return


def run_cmd(args: list[str]):
def run_cmd(args: list[str]) -> subprocess.CompletedProcess:
"""Run a command and log it."""
# Hack. The activation of a conda environment does not work as a list.
# `subprocess.run(..., shell=True, ...)` enables running commands from
# strings.
# Hack. The activation of a conda environment does not work without `shell=True`.
cmd_str = " ".join(str(arg) for arg in args)

logger.info("Running command:")
Expand Down

0 comments on commit c133d1d

Please sign in to comment.