Skip to content

Commit

Permalink
Merge pull request #11 from ebmdatalab/docker
Browse files Browse the repository at this point in the history
Dockerise the CLI
  • Loading branch information
ghickman authored Nov 8, 2023
2 parents a10da2d + ff06b1a commit 0c6ab02
Show file tree
Hide file tree
Showing 12 changed files with 390 additions and 17 deletions.
14 changes: 14 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
.git/

**/*~
**/.#*
**/*#
**/htmlcov
**/__pycache__
**/*.pyc
**/.python-version
**/.env
**/.venv
**/venv
**/.coverage
**/*.egg-info/
94 changes: 94 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
---
name: CI

env:
IMAGE_NAME: metrics
PUBLIC_IMAGE_NAME: ghcr.io/ebmdatalab/metrics
REGISTRY: ghcr.io
SSH_AUTH_SOCK: /tmp/agent.sock

on:
push:

Expand Down Expand Up @@ -30,12 +36,100 @@ jobs:
run: |
just test
lint-dockerfile:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0
with:
dockerfile: docker/Dockerfile

docker-test-and-build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- uses: "opensafely-core/setup-action@v1"
with:
install-just: true

- name: Build docker image for both prod and dev
run: |
just docker-build prod
just docker-build dev
- name: Run smoke test on prod
run: |
just docker-run prod python -m metrics
- name: Save docker image
run: |
docker save metrics | gzip > /tmp/metrics.tar.gz
- name: Upload docker image
uses: actions/upload-artifact@v3
with:
name: metrics-image
path: /tmp/metrics.tar.gz

deploy:
needs:
- check
- test
- docker-test-and-build
- lint-dockerfile

runs-on: ubuntu-latest

permissions:
contents: read
packages: write

if: github.ref == 'refs/heads/main'

concurrency: deploy-production

steps:
- uses: actions/checkout@v4
- uses: "opensafely-core/setup-action@v1"
with:
install-just: true

- name: Download docker image
uses: actions/download-artifact@v3
with:
name: metrics-image
path: /tmp/image

- name: Import docker image
run: gunzip -c /tmp/image/metrics.tar.gz | docker load

- name: Test image we imported from previous job works
run: |
SKIP_BUILD=1 just docker-run prod python -m metrics
- name: Publish image
run: |
echo ${{ secrets.GITHUB_TOKEN }} | docker login "$REGISTRY" -u ${{ github.actor }} --password-stdin
docker tag "$IMAGE_NAME" "$PUBLIC_IMAGE_NAME":latest
docker push "$PUBLIC_IMAGE_NAME":latest
- name: Deploy image
run: |
ssh-agent -a "$SSH_AUTH_SOCK" > /dev/null
ssh-add - <<< "${{ secrets.DOKKU3_DEPLOY_SSH_KEY }}"
SHA=$(docker inspect --format='{{index .RepoDigests 0}}' "$PUBLIC_IMAGE_NAME":latest)
ssh -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" dokku@dokku3.ebmdatalab.net git:from-image metrics "$SHA"
required-checks:
if: always()

needs:
- check
- test
- docker-test-and-build
- lint-dockerfile

runs-on: Ubuntu-latest

Expand Down
53 changes: 52 additions & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,63 @@ services:
timescaledb:
image: timescale/timescaledb-ha:pg14-latest
environment:
POSTGRES_PASSWORD: password
POSTGRES_DB: metrics
POSTGRES_PASSWORD: pass
POSTGRES_USER: user
ports:
- 5433:5432
volumes:
- timescaledb:/home/postgres/pgdata/data

metrics-prod:
# image name, both locally and public
image: metrics
build:
dockerfile: docker/Dockerfile
# the prod stage in the Dockerfile
target: metrics-prod
# should speed up the build in CI, where we have a cold cache
cache_from: # should speed up the build in CI, where we have a cold cache
- ghcr.io/opensafely-core/base-docker
- ghcr.io/ebmdatalab/metrics
args:
# this makes the image work for later cache_from: usage
- BUILDKIT_INLINE_CACHE=1
# env vars should be supplied by just
- BUILD_DATE
- GITREF
# use dockers builitin PID daemon
init: true
environment:
- GITHUB_TOKEN=dummy
- SLACK_SIGNING_SECRET=dummy
- SLACK_TECH_SUPPORT_CHANNEL_ID=dummy
- SLACK_TOKEN=dummy
- TIMESCALEDB_URL=dummy

# main development service
metrics-dev:
extends:
service: metrics-prod
image: metrics-dev
container_name: metrics-dev
# running as a specific uid/gid allows files written to mounted volumes by
# the docker container's default user to match the host user's uid/gid, for
# convienience.
user: ${DEV_USERID:-1000}:${DEV_GROUPID:-1000}
build:
# the dev stage in the Dockerfile
target: metrics-dev
# pass the uid/gid as build arg
args:
- DEV_USERID=${DEV_USERID:-1000}
- DEV_GROUPID=${DEV_GROUPID:-1000}
volumes:
# mount our current code
- .:/app
env_file:
- .env

volumes:
postgres:
grafana:
Expand Down
160 changes: 160 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# syntax=docker/dockerfile:1.2
#################################################
#
# Create base image with python installed.
#
# DL3007 ignored because base-docker we specifically always want to build on
# the latest base image, by design.
#
# hadolint ignore=DL3007
FROM ghcr.io/opensafely-core/base-docker:22.04 as base-python

# we are going to use an apt cache on the host, so disable the default debian
# docker clean up that deletes that cache on every apt install
RUN rm -f /etc/apt/apt.conf.d/docker-clean

# ensure fully working base python3.11 installation using deadsnakes ppa
# see: https://gist.github.com/tiran/2dec9e03c6f901814f6d1e8dad09528e
# use space efficient utility from base image
RUN --mount=type=cache,target=/var/cache/apt \
echo "deb http://ppa.launchpad.net/deadsnakes/ppa/ubuntu jammy main" > /etc/apt/sources.list.d/deadsnakes-ppa.list &&\
/usr/lib/apt/apt-helper download-file 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xf23c5a6cf475977595c89f51ba6932366a755776' /etc/apt/trusted.gpg.d/deadsnakes.asc

# install any additional system dependencies
COPY docker/dependencies.txt /tmp/dependencies.txt
RUN --mount=type=cache,target=/var/cache/apt \
/root/docker-apt-install.sh /tmp/dependencies.txt


##################################################
#
# Build image
#
# Ok, now we have local base image with python and our system dependencies on.
# We'll use this as the base for our builder image, where we'll build and
# install any python packages needed.
#
# We use a separate, disposable build image to avoid carrying the build
# dependencies into the production image.
FROM base-python as builder

# Install any system build dependencies
COPY docker/build-dependencies.txt /tmp/build-dependencies.txt
RUN --mount=type=cache,target=/var/cache/apt \
/root/docker-apt-install.sh /tmp/build-dependencies.txt

# Install everything in venv for isolation from system python libraries
RUN python3.11 -m venv /opt/venv
ENV VIRTUAL_ENV=/opt/venv/ PATH="/opt/venv/bin:$PATH"

# The cache mount means a) /root/.cache is not in the image, and b) it's preserved
# between docker builds locally, for faster dev rebuild.
COPY requirements.prod.txt /tmp/requirements.prod.txt

# DL3042: using cache mount instead
# DL3013: we always want latest pip/setuptools/wheel, at least for now
# hadolint ignore=DL3042,DL3013
RUN --mount=type=cache,target=/root/.cache \
/opt/venv/bin/python -m pip install -U pip setuptools wheel && \
/opt/venv/bin/python -m pip install --no-deps --require-hashes --requirement /tmp/requirements.prod.txt


##################################################
#
# Base project image
#
# Ok, we've built everything we need, build an image with all dependencies but
# no code.
#
# Not including the code at this stage has two benefits:
#
# 1) this image only rebuilds when the handfull of files needed to build metrics-base
# changes. If we do `COPY . /app` now, this will rebuild when *any* file changes.
#
# 2) Ensures we *have* to mount the volume for dev image, as there's no embedded
# version of the code. Otherwise, we could end up accidentally using the
# version of the code included when the prod image was built.
FROM base-python as metrics-base

# Create a non-root metrics user to run the app as
RUN useradd --create-home --user-group metrics

# copy venv over from builder image. These will have root:root ownership, but
# are readable by all.
COPY --from=builder /opt/venv /opt/venv

# Ensure we're using the venv by default
ENV VIRTUAL_ENV=/opt/venv/ PATH="/opt/venv/bin:$PATH"

RUN mkdir /app
WORKDIR /app

# We set command rather than entrypoint, to make it easier to run different
# things from the cli
CMD ["/opt/venv/bin/python", "-m", "metrics"]

# This may not be necessary, but it probably doesn't hurt
ENV PYTHONPATH=/app

# switch to running as the user
USER metrics


##################################################
#
# Production image
#
# Copy code in, add proper metadata
FROM metrics-base as metrics-prod

# Adjust this metadata to fit project. Note that the base-docker image does set
# some basic metadata.
LABEL org.opencontainers.image.title="metrics" \
org.opencontainers.image.description="Bennett Institute internal metrics tranformation tool" \
org.opencontainers.image.source="https://github.com/ebmdatalab/metrics"

# copy application code
COPY metrics /app/metrics

# finally, tag with build information. These will change regularly, therefore
# we do them as the last action.
ARG BUILD_DATE=unknown
LABEL org.opencontainers.image.created=$BUILD_DATE
ARG GITREF=unknown
LABEL org.opencontainers.image.revision=$GITREF



##################################################
#
# Dev image
#
# Now we build a dev image from our metrics-dev image. This is basically
# installing dev dependencies and matching local UID/GID. It is expected that
# the current code will be mounted in /app when this is run
#
FROM metrics-base as metrics-dev

# switch back to root to run the install of dev requirements.txt
USER root

# install development requirements
COPY requirements.dev.txt /tmp/requirements.dev.txt
# using cache mount instead
# hadolint ignore=DL3042
RUN --mount=type=cache,target=/root/.cache \
python -m pip install --requirement /tmp/requirements.dev.txt

# in dev, ensure metrics uid matches host user id
ARG DEV_USERID=1000
ARG DEV_GROUPID=1000
RUN usermod -u $DEV_USERID metrics
# Modify metrics only if group id does not already exist. We run dev
# containers with an explicit group id anyway, so file permissions on the host
# will be correct, and we do not actually rely on named metrics group access to
# anything.
RUN grep -q ":$DEV_GROUPID:" /etc/group || groupmod -g $DEV_GROUPID metrics


# switch back to metrics
USER metrics
4 changes: 4 additions & 0 deletions docker/build-dependencies.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# list ubuntu packges needed to build dependencies, one per line
build-essential
libpq-dev
python3.11-dev
7 changes: 7 additions & 0 deletions docker/dependencies.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# list ubuntu packages needed in production, one per line
git
postgresql-client
python3.11
python3.11-distutils
python3.11-venv
tzdata
17 changes: 17 additions & 0 deletions dotenv-sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# The DSN for access the timescaledb database
TIMESCALEDB_URL=postgres://user:pass@localhost:5433/metrics

# API token for pulling data from Github
GITHUB_TOKEN=

# Slack API access credentials.
# The slack app used for this will need the following OAuth scopes:
# * channels:history
# * groups:history
# * im:history
# * npim:history
SLACK_SIGNING_SECRET=
SLACK_TOKEN=

# Slack channel ID for tech-support-channel
SLACK_TECH_SUPPORT_CHANNEL_ID=C0270Q313H7
Loading

0 comments on commit 0c6ab02

Please sign in to comment.