Skip to content

Commit

Permalink
[DPE-3344] Charmed Spark <> KubeFlow integration (#68)
Browse files Browse the repository at this point in the history
  • Loading branch information
deusebio authored Feb 5, 2024
1 parent 53c2f88 commit 0a9bc66
Show file tree
Hide file tree
Showing 12 changed files with 427 additions and 142 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ jobs:
- name: Build image
run: sudo make build

- name: Build image (Jupyter)
run: sudo make build FLAVOUR=jupyter

- name: Get Artifact Name
id: artifact
run: |
Expand Down
13 changes: 10 additions & 3 deletions .github/workflows/integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,16 @@ jobs:
# Import artifact into microk8s to be used in integration tests
sudo make import TARGET=microk8s PREFIX=test- REPOSITORY=ghcr.io/canonical/ \
-o $(find .make_cache -name "*.tag")
sg microk8s -c "make tests"
- name: Run tests (Jupyter)
run: |
# Import artifact into docker with new tag
sudo make jupyter TARGET=docker REPOSITORY=ghcr.io/canonical/ PREFIX=test- \
sudo make import \
FLAVOUR=jupyter TARGET=microk8s \
TAG=$(yq .version rockcraft.yaml) \
REPOSITORY=ghcr.io/canonical/ PREFIX=test- \
-o $(find .make_cache -name "*.tag")
sg microk8s -c "make tests"
sg microk8s -c "make tests FLAVOUR=jupyter"
35 changes: 17 additions & 18 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ jobs:
TRACK=${{ needs.release_checks.outputs.track }}
if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi
IMAGE_NAME=$(make REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs)
IMAGE_NAME=$(make help REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs)
# Import artifact into docker with new tag
sudo make import TARGET=docker REPOSITORY=${REPOSITORY} TAG=${TAG}\
Expand All @@ -107,29 +107,28 @@ jobs:

- name: Publish JupyterLab Image to Channel
run: |
REPOSITORY="ghcr.io/canonical/"
RISK=${{ needs.release_checks.outputs.risk }}
TRACK=${{ needs.release_checks.outputs.track }}
if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi

# Import artifact into docker with new tag
sudo make jupyter REPOSITORY=${REPOSITORY} TAG=${TAG}\
sudo make import TARGET=docker FLAVOUR=jupyter \
REPOSITORY=${REPOSITORY} TAG=${TAG}\
-o $(find .make_cache -name "*.tag")
IMAGE_NAME=$(make REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Jupyter\:" | cut -d ":" -f2 | xargs)
echo "Publishing ${IMAGE_NAME}:${TAG}"
IMAGE_NAME=$(make help FLAVOUR=jupyter REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs)
echo "Publishing ${IMAGE_NAME}:${TAG}"
docker push ${IMAGE_NAME}:${TAG}
if [[ "$RISK" == "edge" ]]; then
VERSION_TAG="${{ needs.release_checks.outputs.version }}-${{ needs.release_checks.outputs.base }}_edge"
docker tag ${IMAGE_NAME}:${TAG} ${IMAGE_NAME}:${VERSION_TAG}
echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}"

if [[ "$RISK" == "edge" ]]; then
VERSION_LONG=$(make help FLAVOUR=jupyter | grep "Tag\:" | cut -d ":" -f2 | xargs)
VERSION_TAG="${VERSION_LONG}-${{ needs.release_checks.outputs.base }}_edge"

docker tag ${IMAGE_NAME}:${TAG} ${IMAGE_NAME}:${VERSION_TAG}

echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}"
docker push ${IMAGE_NAME}:${VERSION_TAG}
fi
92 changes: 55 additions & 37 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ REPOSITORY :=
PREFIX :=
TARGET := docker
PLATFORM := amd64
FLAVOUR := "spark"

# ======================
# INTERNAL VARIABLES
Expand All @@ -26,33 +27,45 @@ $(shell mkdir -p $(_MAKE_DIR))
K8S_TAG := $(_MAKE_DIR)/.k8s_tag

IMAGE_NAME := $(shell yq .name rockcraft.yaml)
VERSION := $(shell yq .version rockcraft.yaml)

TAG := $(VERSION)
VERSION := $(shell yq .version rockcraft.yaml)

BASE_NAME=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).tar
VERSION_FLAVOUR=$(shell grep "version:$(FLAVOUR)" rockcraft.yaml | sed "s/^#//" | cut -d ":" -f3)

_ROCK_OCI=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).rock

_TMP_OCI_NAME := stage-$(IMAGE_NAME)
_TMP_OCI_TAG := $(_MAKE_DIR)/$(_TMP_OCI_NAME)/$(TAG).tag

CHARMED_OCI_FULL_NAME=$(REPOSITORY)$(PREFIX)$(IMAGE_NAME)
CHARMED_OCI_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tag
CHARMED_OCI_JUPYTER=$(CHARMED_OCI_FULL_NAME)-jupyterlab

ifeq ($(FLAVOUR), jupyter)
NAME=$(CHARMED_OCI_JUPYTER)
TAG=$(VERSION)-$(VERSION_FLAVOUR)
BASE_NAME=$(IMAGE_NAME)-jupyterlab_$(VERSION)_$(PLATFORM).tar
else
NAME=$(CHARMED_OCI_FULL_NAME)
TAG=$(VERSION)
BASE_NAME=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).tar
endif

FTAG=$(_MAKE_DIR)/$(NAME)/$(TAG)

CHARMED_OCI_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG)
CHARMED_OCI_JUPYTER_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG)

CHARMED_OCI_JUPYTER=$(CHARMED_OCI_FULL_NAME)-jupyterlab4
CHARMED_OCI_JUPYTER_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG).tag
_TMP_OCI_NAME := stage-$(IMAGE_NAME)
_TMP_OCI_TAG := $(_MAKE_DIR)/$(_TMP_OCI_NAME)/$(TAG)

help:
@echo "---------------HELP-----------------"
@echo "Name: $(IMAGE_NAME)"
@echo "Version: $(VERSION)"
@echo "Platform: $(PLATFORM)"
@echo " "
@echo "Artifact: $(BASE_NAME)"
@echo "Flavour: $(FLAVOUR)"
@echo " "
@echo "Image: $(CHARMED_OCI_FULL_NAME)"
@echo "Jupyter: $(CHARMED_OCI_JUPYTER)"
@echo "Image: $(NAME)"
@echo "Tag: $(TAG)"
@echo "Artifact: $(BASE_NAME)"
@echo " "
@echo "Type 'make' followed by one of these keywords:"
@echo " "
Expand All @@ -67,18 +80,13 @@ $(_ROCK_OCI): rockcraft.yaml
@echo "=== Building Charmed Image ==="
rockcraft pack

$(_TMP_OCI_TAG): $(_ROCK_OCI)
$(_TMP_OCI_TAG).tag: $(_ROCK_OCI)
skopeo --insecure-policy \
copy \
oci-archive:"$(_ROCK_OCI)" \
docker-daemon:"$(_TMP_OCI_NAME):$(TAG)"
if [ ! -d "$(_MAKE_DIR)/$(_TMP_OCI_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(_TMP_OCI_NAME)"; fi
touch $(_TMP_OCI_TAG)

$(CHARMED_OCI_TAG): $(_TMP_OCI_TAG) build/Dockerfile
docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" -f build/Dockerfile .
if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi
touch $(CHARMED_OCI_TAG)
touch $(_TMP_OCI_TAG).tag

$(K8S_TAG):
@echo "=== Setting up and configure local Microk8s cluster ==="
Expand All @@ -88,42 +96,52 @@ $(K8S_TAG):

microk8s: $(K8S_TAG)

$(_MAKE_DIR)/%/$(TAG).tar: $(_MAKE_DIR)/%/$(TAG).tag
docker save $*:$(TAG) > $(_MAKE_DIR)/$*/$(TAG).tar
$(CHARMED_OCI_TAG).tag: $(_TMP_OCI_TAG).tag build/Dockerfile
docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" \
--build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" \
-f build/Dockerfile .
if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi
touch $(CHARMED_OCI_TAG).tag

$(CHARMED_OCI_JUPYTER_TAG): $(CHARMED_OCI_TAG) build/Dockerfile.jupyter
docker build -t "$(CHARMED_OCI_JUPYTER):$(TAG)" --build-arg BASE_IMAGE="$(CHARMED_OCI_FULL_NAME):$(TAG)" -f build/Dockerfile.jupyter .
$(CHARMED_OCI_JUPYTER_TAG).tag: $(CHARMED_OCI_TAG).tag build/Dockerfile.jupyter files/jupyter
docker build -t "$(CHARMED_OCI_JUPYTER):$(TAG)" \
--build-arg BASE_IMAGE="$(CHARMED_OCI_FULL_NAME):$(TAG)" \
--build-arg JUPYTERLAB_VERSION="$(VERSION_FLAVOUR)" \
-f build/Dockerfile.jupyter .
if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)"; fi
touch $(CHARMED_OCI_JUPYTER_TAG)
touch $(CHARMED_OCI_JUPYTER_TAG).tag

$(BASE_NAME): $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tar
@echo "=== Creating $(BASE_NAME) OCI archive ==="
cp $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tar $(BASE_NAME)
$(_MAKE_DIR)/%/$(TAG).tar: $(_MAKE_DIR)/%/$(TAG).tag
docker save $*:$(TAG) > $(_MAKE_DIR)/$*/$(TAG).tar

build: $(BASE_NAME)
$(BASE_NAME): $(FTAG).tar
@echo "=== Creating $(BASE_NAME) OCI archive (flavour: $(FLAVOUR)) ==="
cp $(FTAG).tar $(BASE_NAME)

jupyter: $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG).tar
@echo "=== Creating $(BASE_NAME) OCI jupyter archive ==="
cp $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG).tar $(IMAGE_NAME)-jupyter_$(VERSION)_$(PLATFORM).tar
build: $(BASE_NAME)

ifeq ($(TARGET), docker)
import: build
@echo "=== Importing image $(CHARMED_OCI_FULL_NAME):$(TAG) into docker ==="
@echo "=== Importing image $(NAME):$(TAG) into docker ==="
$(eval IMAGE := $(shell docker load -i $(BASE_NAME)))
docker tag $(lastword $(IMAGE)) $(CHARMED_OCI_FULL_NAME):$(TAG)
if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi
touch $(CHARMED_OCI_TAG)
docker tag $(lastword $(IMAGE)) $(NAME):$(TAG)
if [ ! -d "$(_MAKE_DIR)/$(NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(NAME)"; fi
touch $(FTAG).tag
endif

ifeq ($(TARGET), microk8s)
import: $(K8S_TAG) build
@echo "=== Importing image $(CHARMED_OCI_FULL_NAME):$(TAG) into Microk8s container registry ==="
microk8s ctr images import --base-name $(CHARMED_OCI_FULL_NAME):$(TAG) $(BASE_NAME)
@echo "=== Importing image $(NAME):$(TAG) into Microk8s container registry ==="
microk8s ctr images import --base-name $(NAME):$(TAG) $(BASE_NAME)
endif

tests:
@echo "=== Running Integration Tests ==="
ifeq ($(FLAVOUR), jupyter)
/bin/bash ./tests/integration/integration-tests-jupyter.sh
else
/bin/bash ./tests/integration/integration-tests.sh
endif

clean:
@echo "=== Cleaning environment ==="
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ https://github.com/canonical/charmed-spark-rock/pkgs/container/charmed-spark
The image can be used straight away when running Spark on Kubernetes by setting the appropriate configuration property:

```shell
spark.kubernetes.container.image=ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge
spark.kubernetes.container.image=ghcr.io/canonical/charmed-spark:3.4-22.04_edge
```

### Using `spark8t` CLI

The `spark8t` CLI tooling interacts with the K8s API to create, manage and delete K8s resources representing the Spark service account.
Make sure that the kube config file is correctly loaded into the container, e.g.
```shell
docker run --name chamed-spark -v /path/to/kube/config:/var/lib/spark/.kube/config ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge
docker run --name chamed-spark -v /path/to/kube/config:/var/lib/spark/.kube/config ghcr.io/canonical/charmed-spark:3.4-22.04_edge
```

Note that this will start the image and a long-living service, allowing you to exec commands:
Expand All @@ -59,7 +59,7 @@ docker exec charmed-spark spark-client.service-account-registry list

If you prefer to run one-shot commands, without having the Charmed Spark image running, use `\; exec` prefix, e.g.
```shell
docker run -v ... ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge \; exec spark-client.service-account-registry list
docker run -v ... ghcr.io/canonical/charmed-spark:3.4-22.04_edge \; exec spark-client.service-account-registry list
```

For more information about spark-client API and `spark8t` tooling, please refer to [here](https://discourse.charmhub.io/t/spark-client-snap-how-to-manage-spark-accounts/8959).
Expand All @@ -71,7 +71,7 @@ Charmed Spark Rock Image is delivered with Pebble already included in order to m
#### Starting History Server

```shell
docker run ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge \; start history-server
docker run ghcr.io/canonical/charmed-spark:3.4-22.04_edge \; start history-server
```

### Running Jupyter Lab
Expand All @@ -86,7 +86,7 @@ To start a JupyterLab server using the `charmed-spark-jupyter` image, use
docker run \
-v /path/to/kube/config:/var/lib/spark/.kube/config \
-p <port>:8888
ghcr.io/canonical/charmed-spark-jupyter:3.4.1-22.04_edge \
ghcr.io/canonical/charmed-spark-jupyter:3.4-22.04_edge \
--username <spark-service-account> --namespace <spark-namespace>
```

Expand Down
9 changes: 7 additions & 2 deletions build/Dockerfile.jupyter
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
ARG BASE_IMAGE=base-charmed-spark:latest
ARG JUPYTERLAB_VERSION=4.0.0
FROM $BASE_IMAGE

ARG JUPYTERLAB_VERSION
USER root
RUN rm /var/lib/pebble/default/layers/*.yaml

RUN python3 -m pip install "jupyterlab~=4.0"
RUN rm /var/lib/pebble/default/layers/*.yaml /opt/pebble/*.sh

RUN python3 -m pip install "jupyterlab==$JUPYTERLAB_VERSION"
COPY ./files/jupyter/pebble/layers.yaml /var/lib/pebble/default/layers/001-charmed-jupyter.yaml
COPY ./files/jupyter/bin/jupyterlab-server.sh /opt/pebble/jupyterlab-server.sh
RUN chown _daemon_:_daemon_ /opt/pebble/jupyterlab-server.sh

USER _daemon_

Expand Down
16 changes: 16 additions & 0 deletions files/jupyter/bin/jupyterlab-server.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

sleep 5

export PYSPARK_DRIVER_PYTHON=jupyter

# This variable is injected when running a notebook from Kubeflow.
if [ ! -z "${NB_PREFIX}" ]; then
NB_PREFIX_ARG="--NotebookApp.base_url '${NB_PREFIX}'"
fi

export PYSPARK_DRIVER_PYTHON_OPTS="lab --no-browser --port=8888 ${NB_PREFIX_ARG} --ip=0.0.0.0 --NotebookApp.token='' --notebook-dir=/var/lib/spark/notebook"

echo "PYSPARK_DRIVER_PYTHON_OPTS: ${PYSPARK_DRIVER_PYTHON_OPTS}"

spark-client.pyspark $*
5 changes: 1 addition & 4 deletions files/jupyter/pebble/layers.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
services:
jupyter:
command: "spark-client.pyspark"
command: "/opt/pebble/jupyterlab-server.sh"
summary: "This is the Spark-powered Jupyter service"
override: replace
startup: enabled
environment:
PYSPARK_DRIVER_PYTHON: jupyter
PYSPARK_DRIVER_PYTHON_OPTS: "lab --no-browser --port=8888 --ip=0.0.0.0 --NotebookApp.token='' --notebook-dir=/var/lib/spark/notebook"
7 changes: 5 additions & 2 deletions rockcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ description: Spark ROCK
license: Apache-2.0

version: "3.4.2"
# version:spark:3.4.2
# version:jupyter:4.0.11

base: ubuntu@22.04

platforms:
amd64:

Expand All @@ -17,7 +21,6 @@ environment:
PYTHONPATH: /opt/spark/python:/opt/spark8t/python/dist:/usr/lib/python3.10/site-packages
PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark:/opt/spark/bin:/opt/spark/python/bin:/opt/spark-client/python/bin
HOME: /var/lib/spark
KUBECONFIG: /var/lib/spark/.kube/config
SPARK_USER_DATA: /var/lib/spark
SPARK_LOG_DIR: /var/log/spark

Expand Down Expand Up @@ -133,7 +136,7 @@ parts:
- python3-pip
overlay-script: |
mkdir -p $CRAFT_PART_INSTALL/opt/spark8t/python/dist
pip install --target=${CRAFT_PART_INSTALL}/opt/spark8t/python/dist https://github.com/canonical/spark-k8s-toolkit-py/releases/download/v0.0.2/spark8t-0.0.2-py3-none-any.whl
pip install --target=${CRAFT_PART_INSTALL}/opt/spark8t/python/dist https://github.com/canonical/spark-k8s-toolkit-py/releases/download/v0.0.3/spark8t-0.0.3-py3-none-any.whl
rm usr/bin/pip*
stage:
- opt/spark8t/python/dist
Expand Down
Loading

0 comments on commit 0a9bc66

Please sign in to comment.