From fcd90f44b6c3bdbfcff00e8da37de1ceb0c8e7a9 Mon Sep 17 00:00:00 2001 From: Enrico Deusebio Date: Fri, 19 Jan 2024 20:22:06 +0100 Subject: [PATCH 1/3] [DPE-3344] Charmed Spark <> KubeFlow integration --- .github/workflows/integration.yaml | 10 +- .github/workflows/publish.yaml | 50 ++--- Makefile | 65 +++--- build/Dockerfile.jupyter | 9 +- files/jupyter/bin/jupyterlab-server.sh | 16 ++ files/jupyter/pebble/layers.yaml | 5 +- rockcraft.yaml | 4 +- .../integration/integration-tests-jupyter.sh | 190 ++++++++++++++++++ tests/integration/integration-tests.sh | 155 +++++++------- tests/integration/resources/jupyter.yaml | 34 ++++ 10 files changed, 403 insertions(+), 135 deletions(-) create mode 100755 files/jupyter/bin/jupyterlab-server.sh create mode 100755 tests/integration/integration-tests-jupyter.sh create mode 100644 tests/integration/resources/jupyter.yaml diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml index 07e0fc04..08709a4d 100644 --- a/.github/workflows/integration.yaml +++ b/.github/workflows/integration.yaml @@ -52,9 +52,13 @@ jobs: # Import artifact into microk8s to be used in integration tests sudo make import TARGET=microk8s PREFIX=test- REPOSITORY=ghcr.io/canonical/ \ -o $(find .make_cache -name "*.tag") + + sg microk8s -c "make tests" + - name: Run tests (Jupyter) + run: | # Import artifact into docker with new tag - sudo make jupyter TARGET=docker REPOSITORY=ghcr.io/canonical/ PREFIX=test- \ + sudo make import FLAVOUR=jupyter TARGET=microk8s REPOSITORY=ghcr.io/canonical/ PREFIX=test- \ -o $(find .make_cache -name "*.tag") - - sg microk8s -c "make tests" + + sg microk8s -c "make tests FLAVOUR=jupyter" diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 4868dcec..2d961f79 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -105,31 +105,31 @@ jobs: docker push ${IMAGE_NAME}:${VERSION_TAG} fi - - name: Publish JupyterLab Image to Channel - run: | - - REPOSITORY="ghcr.io/canonical/" - RISK=${{ needs.release_checks.outputs.risk }} - TRACK=${{ needs.release_checks.outputs.track }} - if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi - - # Import artifact into docker with new tag - sudo make jupyter REPOSITORY=${REPOSITORY} TAG=${TAG}\ - -o $(find .make_cache -name "*.tag") - - IMAGE_NAME=$(make REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Jupyter\:" | cut -d ":" -f2 | xargs) - - echo "Publishing ${IMAGE_NAME}:${TAG}" - docker push ${IMAGE_NAME}:${TAG} - - if [[ "$RISK" == "edge" ]]; then - VERSION_TAG="${{ needs.release_checks.outputs.version }}-${{ needs.release_checks.outputs.base }}_edge" - - docker tag ${IMAGE_NAME}:${TAG} ${IMAGE_NAME}:${VERSION_TAG} - - echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}" - docker push ${IMAGE_NAME}:${VERSION_TAG} - fi + # - name: Publish JupyterLab Image to Channel + # run: | + # + # REPOSITORY="ghcr.io/canonical/" + # RISK=${{ needs.release_checks.outputs.risk }} + # TRACK=${{ needs.release_checks.outputs.track }} + # if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi + # + # # Import artifact into docker with new tag + # sudo make jupyter REPOSITORY=${REPOSITORY} TAG=${TAG}\ + # -o $(find .make_cache -name "*.tag") + # + # IMAGE_NAME=$(make REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Jupyter\:" | cut -d ":" -f2 | xargs) + # + # echo "Publishing ${IMAGE_NAME}:${TAG}" + # docker push ${IMAGE_NAME}:${TAG} + # + # if [[ "$RISK" == "edge" ]]; then + # VERSION_TAG="${{ needs.release_checks.outputs.version }}-${{ needs.release_checks.outputs.base }}_edge" + # + # docker tag ${IMAGE_NAME}:${TAG} ${IMAGE_NAME}:${VERSION_TAG} + # + # echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}" + # docker push ${IMAGE_NAME}:${VERSION_TAG} + # fi diff --git a/Makefile b/Makefile index a1348b27..fd447ea0 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,7 @@ REPOSITORY := PREFIX := TARGET := docker PLATFORM := amd64 +FLAVOUR := "base" # ====================== # INTERNAL VARIABLES @@ -35,13 +36,21 @@ BASE_NAME=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).tar _ROCK_OCI=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).rock _TMP_OCI_NAME := stage-$(IMAGE_NAME) -_TMP_OCI_TAG := $(_MAKE_DIR)/$(_TMP_OCI_NAME)/$(TAG).tag +_TMP_OCI_TAG := $(_MAKE_DIR)/$(_TMP_OCI_NAME)/$(TAG) CHARMED_OCI_FULL_NAME=$(REPOSITORY)$(PREFIX)$(IMAGE_NAME) -CHARMED_OCI_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tag +CHARMED_OCI_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG) -CHARMED_OCI_JUPYTER=$(CHARMED_OCI_FULL_NAME)-jupyterlab4 -CHARMED_OCI_JUPYTER_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG).tag +CHARMED_OCI_JUPYTER=$(CHARMED_OCI_FULL_NAME)-jupyterlab +CHARMED_OCI_JUPYTER_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG) + +ifeq ($(FLAVOUR), jupyter) +NAME=$(CHARMED_OCI_JUPYTER) +FTAG=$(CHARMED_OCI_JUPYTER_TAG) +else +NAME=$(CHARMED_OCI_FULL_NAME) +FTAG=$(CHARMED_OCI_TAG) +endif help: @echo "---------------HELP-----------------" @@ -67,18 +76,13 @@ $(_ROCK_OCI): rockcraft.yaml @echo "=== Building Charmed Image ===" rockcraft pack -$(_TMP_OCI_TAG): $(_ROCK_OCI) +$(_TMP_OCI_TAG).tag: $(_ROCK_OCI) skopeo --insecure-policy \ copy \ oci-archive:"$(_ROCK_OCI)" \ docker-daemon:"$(_TMP_OCI_NAME):$(TAG)" if [ ! -d "$(_MAKE_DIR)/$(_TMP_OCI_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(_TMP_OCI_NAME)"; fi - touch $(_TMP_OCI_TAG) - -$(CHARMED_OCI_TAG): $(_TMP_OCI_TAG) build/Dockerfile - docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" -f build/Dockerfile . - if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi - touch $(CHARMED_OCI_TAG) + touch $(_TMP_OCI_TAG).tag $(K8S_TAG): @echo "=== Setting up and configure local Microk8s cluster ===" @@ -88,42 +92,47 @@ $(K8S_TAG): microk8s: $(K8S_TAG) -$(_MAKE_DIR)/%/$(TAG).tar: $(_MAKE_DIR)/%/$(TAG).tag - docker save $*:$(TAG) > $(_MAKE_DIR)/$*/$(TAG).tar +$(CHARMED_OCI_TAG).tag: $(_TMP_OCI_TAG).tag build/Dockerfile + docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" -f build/Dockerfile . + if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi + touch $(CHARMED_OCI_TAG).tag -$(CHARMED_OCI_JUPYTER_TAG): $(CHARMED_OCI_TAG) build/Dockerfile.jupyter +$(CHARMED_OCI_JUPYTER_TAG).tag: $(CHARMED_OCI_TAG).tag build/Dockerfile.jupyter files/jupyter docker build -t "$(CHARMED_OCI_JUPYTER):$(TAG)" --build-arg BASE_IMAGE="$(CHARMED_OCI_FULL_NAME):$(TAG)" -f build/Dockerfile.jupyter . if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)"; fi - touch $(CHARMED_OCI_JUPYTER_TAG) + touch $(CHARMED_OCI_JUPYTER_TAG).tag -$(BASE_NAME): $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tar - @echo "=== Creating $(BASE_NAME) OCI archive ===" - cp $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tar $(BASE_NAME) +$(_MAKE_DIR)/%/$(TAG).tar: $(_MAKE_DIR)/%/$(TAG).tag + docker save $*:$(TAG) > $(_MAKE_DIR)/$*/$(TAG).tar -build: $(BASE_NAME) +$(BASE_NAME): $(FTAG).tar + @echo "=== Creating $(BASE_NAME) OCI archive (flavour: $(FLAVOUR)) ===" + cp $(FTAG).tar $(BASE_NAME) -jupyter: $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG).tar - @echo "=== Creating $(BASE_NAME) OCI jupyter archive ===" - cp $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG).tar $(IMAGE_NAME)-jupyter_$(VERSION)_$(PLATFORM).tar +build: $(BASE_NAME) ifeq ($(TARGET), docker) import: build - @echo "=== Importing image $(CHARMED_OCI_FULL_NAME):$(TAG) into docker ===" + @echo "=== Importing image $(NAME):$(TAG) into docker ===" $(eval IMAGE := $(shell docker load -i $(BASE_NAME))) - docker tag $(lastword $(IMAGE)) $(CHARMED_OCI_FULL_NAME):$(TAG) - if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi - touch $(CHARMED_OCI_TAG) + docker tag $(lastword $(IMAGE)) $(NAME):$(TAG) + if [ ! -d "$(_MAKE_DIR)/$(NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(NAME)"; fi + touch $(FTAG).tag endif ifeq ($(TARGET), microk8s) import: $(K8S_TAG) build - @echo "=== Importing image $(CHARMED_OCI_FULL_NAME):$(TAG) into Microk8s container registry ===" - microk8s ctr images import --base-name $(CHARMED_OCI_FULL_NAME):$(TAG) $(BASE_NAME) + @echo "=== Importing image $(NAME):$(TAG) into Microk8s container registry ===" + microk8s ctr images import --base-name $(NAME):$(TAG) $(BASE_NAME) endif tests: @echo "=== Running Integration Tests ===" +ifeq ($(FLAVOUR), jupyter) + /bin/bash ./tests/integration/integration-tests-jupyter.sh +else /bin/bash ./tests/integration/integration-tests.sh +endif clean: @echo "=== Cleaning environment ===" diff --git a/build/Dockerfile.jupyter b/build/Dockerfile.jupyter index b87d49e0..a32c7153 100644 --- a/build/Dockerfile.jupyter +++ b/build/Dockerfile.jupyter @@ -1,11 +1,16 @@ ARG BASE_IMAGE=base-charmed-spark:latest +ARG JUPYTERLAB_VERSION=4.0.11 FROM $BASE_IMAGE +ARG JUPYTERLAB_VERSION USER root -RUN rm /var/lib/pebble/default/layers/*.yaml -RUN python3 -m pip install "jupyterlab~=4.0" +RUN rm /var/lib/pebble/default/layers/*.yaml /opt/pebble/*.sh + +RUN python3 -m pip install "jupyterlab==$JUPYTERLAB_VERSION" COPY ./files/jupyter/pebble/layers.yaml /var/lib/pebble/default/layers/001-charmed-jupyter.yaml +COPY ./files/jupyter/bin/jupyterlab-server.sh /opt/pebble/jupyterlab-server.sh +RUN chown _daemon_:_daemon_ /opt/pebble/jupyterlab-server.sh USER _daemon_ diff --git a/files/jupyter/bin/jupyterlab-server.sh b/files/jupyter/bin/jupyterlab-server.sh new file mode 100755 index 00000000..05a960a1 --- /dev/null +++ b/files/jupyter/bin/jupyterlab-server.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +sleep 5 + +export PYSPARK_DRIVER_PYTHON=jupyter + +# This variable is injected when running a notebook from Kubeflow. +if [ ! -z "${NB_PREFIX}" ]; then + NB_PREFIX_ARG="--NotebookApp.base_url '${NB_PREFIX}'" +fi + +export PYSPARK_DRIVER_PYTHON_OPTS="lab --no-browser --port=8888 ${NB_PREFIX_ARG} --ip=0.0.0.0 --NotebookApp.token='' --notebook-dir=/var/lib/spark/notebook" + +echo "PYSPARK_DRIVER_PYTHON_OPTS: ${PYSPARK_DRIVER_PYTHON_OPTS}" + +spark-client.pyspark $* diff --git a/files/jupyter/pebble/layers.yaml b/files/jupyter/pebble/layers.yaml index d2fe4ba4..3ecb33a1 100644 --- a/files/jupyter/pebble/layers.yaml +++ b/files/jupyter/pebble/layers.yaml @@ -1,9 +1,6 @@ services: jupyter: - command: "spark-client.pyspark" + command: "/opt/pebble/jupyterlab-server.sh" summary: "This is the Spark-powered Jupyter service" override: replace startup: enabled - environment: - PYSPARK_DRIVER_PYTHON: jupyter - PYSPARK_DRIVER_PYTHON_OPTS: "lab --no-browser --port=8888 --ip=0.0.0.0 --NotebookApp.token='' --notebook-dir=/var/lib/spark/notebook" diff --git a/rockcraft.yaml b/rockcraft.yaml index 579f70d4..0bf4fcff 100644 --- a/rockcraft.yaml +++ b/rockcraft.yaml @@ -17,7 +17,7 @@ environment: PYTHONPATH: /opt/spark/python:/opt/spark8t/python/dist:/usr/lib/python3.10/site-packages PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark:/opt/spark/bin:/opt/spark/python/bin:/opt/spark-client/python/bin HOME: /var/lib/spark - KUBECONFIG: /var/lib/spark/.kube/config + # KUBECONFIG: /var/lib/spark/.kube/config SPARK_USER_DATA: /var/lib/spark SPARK_LOG_DIR: /var/log/spark @@ -133,7 +133,7 @@ parts: - python3-pip overlay-script: | mkdir -p $CRAFT_PART_INSTALL/opt/spark8t/python/dist - pip install --target=${CRAFT_PART_INSTALL}/opt/spark8t/python/dist https://github.com/canonical/spark-k8s-toolkit-py/releases/download/v0.0.2/spark8t-0.0.2-py3-none-any.whl + pip install --target=${CRAFT_PART_INSTALL}/opt/spark8t/python/dist git+https://github.com/canonical/spark-k8s-toolkit-py.git@dpe-3398-fix-service-account-permission rm usr/bin/pip* stage: - opt/spark8t/python/dist diff --git a/tests/integration/integration-tests-jupyter.sh b/tests/integration/integration-tests-jupyter.sh new file mode 100755 index 00000000..c6d8617e --- /dev/null +++ b/tests/integration/integration-tests-jupyter.sh @@ -0,0 +1,190 @@ +#!/bin/bash + +# The integration tests are designed to tests that Spark Jobs can be submitted and/or shell processes are +# working properly with restricted permission of the service account starting the process. For this reason, +# in the tests we spawn two pods: +# +# 1. Admin pod, that is used to create and delete service accounts +# 2. User pod, that is used to start and execute Spark Jobs +# +# The Admin pod is created once at the beginning of the tests and it is used to manage Spark service accounts +# throughtout the integration tests. On the other hand, the User pod(s) are created together with the creation +# of the Spark user (service accounts and secrets) at the beginning of each test, and they are destroyed at the +# end of the test. + +NAMESPACE=tests + +get_spark_version(){ + SPARK_VERSION=$(yq '(.version)' rockcraft.yaml) + echo "$SPARK_VERSION" +} + +spark_image(){ + echo "ghcr.io/canonical/test-charmed-spark-jupyterlab:$(get_spark_version)" +} + +setup_jupyter() { + echo "setup_jupyter() ${1} ${2}" + + USERNAME=$1 + NAMESPACE=$2 + + kubectl -n $NAMESPACE exec testpod-admin -- env UU="$USERNAME" NN="$NAMESPACE" \ + /bin/bash -c 'spark-client.service-account-registry create --username $UU --namespace $NN' + + IMAGE=$(spark_image) + echo $IMAGE + + # Create the pod with the Spark service account + sed -e "s%%${IMAGE}%g" \ + -e "s//${USERNAME}/g" \ + -e "s//${NAMESPACE}/g" \ + ./tests/integration/resources/jupyter.yaml | \ + kubectl -n tests apply -f - + + wait_for_pod charmed-spark-jupyter $NAMESPACE + + # WAIT FOR SERVER TO BE UP AND RUNNING + sleep 10 +} + +cleanup_user() { + EXIT_CODE=$1 + USERNAME=$2 + NAMESPACE=$3 + + kubectl -n $NAMESPACE delete pod charmed-spark-jupyter --wait=true + + kubectl -n $NAMESPACE exec testpod-admin -- env UU="$USERNAME" NN="$NAMESPACE" \ + /bin/bash -c 'spark-client.service-account-registry delete --username $UU --namespace $NN' + + OUTPUT=$(kubectl -n $NAMESPACE exec testpod-admin -- /bin/bash -c 'spark-client.service-account-registry list') + + EXISTS=$(echo -e "$OUTPUT" | grep "$NAMESPACE:$USERNAME" | wc -l) + + if [ "${EXISTS}" -ne "0" ]; then + exit 2 + fi + + if [ "${EXIT_CODE}" -ne "0" ]; then + kubectl delete ns $NAMESPACE + exit 1 + fi +} + +cleanup_user_success() { + echo "cleanup_user_success()......" + cleanup_user 0 spark $NAMESPACE +} + +cleanup_user_failure() { + echo "cleanup_user_failure()......" + cleanup_user 1 spark $NAMESPACE +} + +wait_for_pod() { + + POD=$1 + NAMESPACE=$2 + + SLEEP_TIME=1 + for i in {1..5} + do + pod_status=$(kubectl -n ${NAMESPACE} get pod ${POD} | awk '{ print $3 }' | tail -n 1) + echo $pod_status + if [[ "${pod_status}" == "Running" ]] + then + echo "testpod is Running now!" + break + elif [[ "${i}" -le "5" ]] + then + echo "Waiting for the pod to come online..." + sleep $SLEEP_TIME + else + echo "testpod did not come up. Test Failed!" + exit 3 + fi + SLEEP_TIME=$(expr $SLEEP_TIME \* 2); + done +} + +setup_admin_test_pod() { + kubectl create ns $NAMESPACE + + echo "Creating admin test-pod" + + # Create a pod with admin service account + yq ea '.spec.containers[0].env[0].name = "KUBECONFIG" | .spec.containers[0].env[0].value = "/var/lib/spark/.kube/config" | .metadata.name = "testpod-admin"' \ + ./tests/integration/resources/testpod.yaml | \ + kubectl -n tests apply -f - + + wait_for_pod testpod-admin $NAMESPACE + + MY_KUBE_CONFIG=$(cat /home/${USER}/.kube/config) + + kubectl -n $NAMESPACE exec testpod-admin -- /bin/bash -c 'mkdir -p ~/.kube' + kubectl -n $NAMESPACE exec testpod-admin -- env KCONFIG="$MY_KUBE_CONFIG" /bin/bash -c 'echo "$KCONFIG" > ~/.kube/config' +} + +teardown_test_pod() { + kubectl -n $NAMESPACE delete pod testpod-admin + kubectl delete namespace $NAMESPACE +} + +get_status_code() { + URL=$1 + + STATUS_CODE=$(curl -X GET -o /dev/null --silent --head --write-out '%{http_code}\n' "${URL}") + + echo $STATUS_CODE +} + +test_connection(){ + SERVICE_IP=$(kubectl get svc jupyter-service -n $NAMESPACE -o yaml | yq .spec.clusterIP) + + echo "Jupyter service IP: ${SERVICE_IP}" + + STATUS_CODE=$(get_status_code "http://${SERVICE_IP}:8888/jupyter-test/lab") + + if [[ "${STATUS_CODE}" -ne "200" ]]; then + echo "200 exit code NOT returned" + exit 1 + fi + + STATUS_CODE=$(get_status_code "http://${SERVICE_IP}:8888/jupyter-test") + + if [[ "${STATUS_CODE}" -ne "302" ]]; then + echo "302 exit code NOT returned" + exit 1 + fi + + STATUS_CODE=$(get_status_code "http://${SERVICE_IP}:8888") + + if [[ "${STATUS_CODE}" -ne "404" ]]; then + echo "404 exit code NOT returned" + exit 1 + fi + +} + +echo -e "##################################" +echo -e "SETUP TEST POD" +echo -e "##################################" + +setup_admin_test_pod + +echo -e "##################################" +echo -e "START JUPYTER SERVICE" +echo -e "##################################" + +(setup_jupyter spark tests && test_connection && cleanup_user_success) || cleanup_user_failure + +echo -e "##################################" +echo -e "TEARDOWN ADMIN POD" +echo -e "##################################" + +teardown_test_pod + +echo -e "##################################" +echo -e "END OF THE TEST" +echo -e "##################################" diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh index a9267613..ead36ffe 100755 --- a/tests/integration/integration-tests.sh +++ b/tests/integration/integration-tests.sh @@ -1,5 +1,20 @@ #!/bin/bash +# The integration tests are designed to tests that Spark Jobs can be submitted and/or shell processes are +# working properly with restricted permission of the service account starting the process. For this reason, +# in the tests we spawn two pods: +# +# 1. Admin pod, that is used to create and delete service accounts +# 2. User pod, that is used to start and execute Spark Jobs +# +# The Admin pod is created once at the beginning of the tests and it is used to manage Spark service accounts +# throughtout the integration tests. On the other hand, the User pod(s) are created together with the creation +# of the Spark user (service accounts and secrets) at the beginning of each test, and they are destroyed at the +# end of the test. + + +NAMESPACE=tests + get_spark_version(){ SPARK_VERSION=$(yq '(.version)' rockcraft.yaml) echo "$SPARK_VERSION" @@ -27,39 +42,30 @@ validate_metrics() { fi } -test_restricted_account() { - - kubectl config set-context spark-context --namespace=tests --cluster=prod --user=spark - - run_example_job tests spark -} - setup_user() { - echo "setup_user() ${1} ${2} ${3}" + echo "setup_user() ${1} ${2}" USERNAME=$1 NAMESPACE=$2 - kubectl create namespace ${NAMESPACE} + kubectl -n $NAMESPACE exec testpod-admin -- env UU="$USERNAME" NN="$NAMESPACE" \ + /bin/bash -c 'spark-client.service-account-registry create --username $UU --namespace $NN' - if [ "$#" -gt 2 ] - then - CONTEXT=$3 - kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" CX="$CONTEXT" \ - /bin/bash -c 'spark-client.service-account-registry create --context $CX --username $UU --namespace $NN' - else - kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" \ - /bin/bash -c 'spark-client.service-account-registry create --username $UU --namespace $NN' - fi + # Create the pod with the Spark service account + yq ea ".spec.serviceAccountName = \"${USERNAME}\"" \ + ./tests/integration/resources/testpod.yaml | \ + kubectl -n tests apply -f - -} + wait_for_pod testpod $NAMESPACE -setup_user_admin_context() { - setup_user spark tests + TEST_POD_TEMPLATE=$(cat tests/integration/resources/podTemplate.yaml) + + kubectl -n $NAMESPACE exec testpod -- /bin/bash -c 'cp -r /opt/spark/python /var/lib/spark/' + kubectl -n $NAMESPACE exec testpod -- env PTEMPLATE="$TEST_POD_TEMPLATE" /bin/bash -c 'echo "$PTEMPLATE" > /etc/spark/conf/podTemplate.yaml' } -setup_user_restricted_context() { - setup_user spark tests microk8s +setup_user_context() { + setup_user spark $NAMESPACE } cleanup_user() { @@ -67,10 +73,12 @@ cleanup_user() { USERNAME=$2 NAMESPACE=$3 - kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" \ + kubectl -n $NAMESPACE delete pod testpod --wait=true + + kubectl -n $NAMESPACE exec testpod-admin -- env UU="$USERNAME" NN="$NAMESPACE" \ /bin/bash -c 'spark-client.service-account-registry delete --username $UU --namespace $NN' - OUTPUT=$(kubectl exec testpod -- /bin/bash -c 'spark-client.service-account-registry list') + OUTPUT=$(kubectl -n $NAMESPACE exec testpod-admin -- /bin/bash -c 'spark-client.service-account-registry list') EXISTS=$(echo -e "$OUTPUT" | grep "$NAMESPACE:$USERNAME" | wc -l) @@ -78,8 +86,6 @@ cleanup_user() { exit 2 fi - kubectl delete namespace ${NAMESPACE} - if [ "${EXIT_CODE}" -ne "0" ]; then exit 1 fi @@ -87,27 +93,29 @@ cleanup_user() { cleanup_user_success() { echo "cleanup_user_success()......" - cleanup_user 0 spark tests + cleanup_user 0 spark $NAMESPACE } cleanup_user_failure() { echo "cleanup_user_failure()......" - cleanup_user 1 spark tests + cleanup_user 1 spark $NAMESPACE } -setup_test_pod() { - kubectl apply -f ./tests/integration/resources/testpod.yaml +wait_for_pod() { + + POD=$1 + NAMESPACE=$2 SLEEP_TIME=1 for i in {1..5} do - pod_status=$(kubectl get pod testpod | awk '{ print $3 }' | tail -n 1) + pod_status=$(kubectl -n ${NAMESPACE} get pod ${POD} | awk '{ print $3 }' | tail -n 1) echo $pod_status - if [ "${pod_status}" == "Running" ] + if [[ "${pod_status}" == "Running" ]] then echo "testpod is Running now!" break - elif [ "${i}" -le "5" ] + elif [[ "${i}" -le "5" ]] then echo "Waiting for the pod to come online..." sleep $SLEEP_TIME @@ -117,29 +125,41 @@ setup_test_pod() { fi SLEEP_TIME=$(expr $SLEEP_TIME \* 2); done +} + +setup_admin_test_pod() { + kubectl create ns $NAMESPACE + + echo "Creating admin test-pod" + + # Create a pod with admin service account + yq ea '.spec.containers[0].env[0].name = "KUBECONFIG" | .spec.containers[0].env[0].value = "/var/lib/spark/.kube/config" | .metadata.name = "testpod-admin"' \ + ./tests/integration/resources/testpod.yaml | \ + kubectl -n tests apply -f - + + wait_for_pod testpod-admin $NAMESPACE MY_KUBE_CONFIG=$(cat /home/${USER}/.kube/config) - TEST_POD_TEMPLATE=$(cat tests/integration/resources/podTemplate.yaml) - kubectl exec testpod -- /bin/bash -c 'mkdir -p ~/.kube' - kubectl exec testpod -- env KCONFIG="$MY_KUBE_CONFIG" /bin/bash -c 'echo "$KCONFIG" > ~/.kube/config' - kubectl exec testpod -- /bin/bash -c 'cat ~/.kube/config' - kubectl exec testpod -- /bin/bash -c 'cp -r /opt/spark/python /var/lib/spark/' - kubectl exec testpod -- env PTEMPLATE="$TEST_POD_TEMPLATE" /bin/bash -c 'echo "$PTEMPLATE" > /etc/spark/conf/podTemplate.yaml' + kubectl -n $NAMESPACE exec testpod-admin -- /bin/bash -c 'mkdir -p ~/.kube' + kubectl -n $NAMESPACE exec testpod-admin -- env KCONFIG="$MY_KUBE_CONFIG" /bin/bash -c 'echo "$KCONFIG" > ~/.kube/config' } teardown_test_pod() { - kubectl delete pod testpod + kubectl -n $NAMESPACE delete pod testpod + kubectl -n $NAMESPACE delete pod testpod-admin + + kubectl delete namespace $NAMESPACE } run_example_job_in_pod() { SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar" - PREVIOUS_JOB=$(kubectl get pods | grep driver | tail -n 1 | cut -d' ' -f1) + PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods | grep driver | tail -n 1 | cut -d' ' -f1) NAMESPACE=$1 USERNAME=$2 - kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ + kubectl -n $NAMESPACE exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ /bin/bash -c 'spark-client.spark-submit \ --username $UU --namespace $NN \ --conf spark.kubernetes.driver.request.cores=100m \ @@ -169,11 +189,11 @@ run_example_job_in_pod() { run_example_job_in_pod_with_pod_templates() { SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar" - PREVIOUS_JOB=$(kubectl get pods | grep driver | tail -n 1 | cut -d' ' -f1) + PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods | grep driver | tail -n 1 | cut -d' ' -f1) NAMESPACE=$1 USERNAME=$2 - kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ + kubectl -n $NAMESPACE exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ /bin/bash -c 'spark-client.spark-submit \ --username $UU --namespace $NN \ --conf spark.kubernetes.driver.request.cores=100m \ @@ -215,7 +235,7 @@ run_example_job_in_pod_with_metrics() { SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar" LOG_FILE="/tmp/server.log" SERVER_PORT=9091 - PREVIOUS_JOB=$(kubectl get pods | grep driver | tail -n 1 | cut -d' ' -f1) + PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods | grep driver | tail -n 1 | cut -d' ' -f1) # start simple http server python3 tests/integration/resources/test_web_server.py $SERVER_PORT > $LOG_FILE & HTTP_SERVER_PID=$! @@ -224,7 +244,7 @@ run_example_job_in_pod_with_metrics() { echo "IP: $IP_ADDRESS" NAMESPACE=$1 USERNAME=$2 - kubectl exec testpod -- env PORT="$SERVER_PORT" IP="$IP_ADDRESS" UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ + kubectl -n $NAMESPACE exec testpod -- env PORT="$SERVER_PORT" IP="$IP_ADDRESS" UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ /bin/bash -c 'spark-client.spark-submit \ --username $UU --namespace $NN \ --conf spark.kubernetes.driver.request.cores=100m \ @@ -262,11 +282,11 @@ run_example_job_in_pod_with_metrics() { run_example_job_with_error_in_pod() { SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar" - PREVIOUS_JOB=$(kubectl get pods | grep driver | tail -n 1 | cut -d' ' -f1) + PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods | grep driver | tail -n 1 | cut -d' ' -f1) NAMESPACE=$1 USERNAME=$2 - kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ + kubectl -n $NAMESPACE exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ /bin/bash -c 'spark-client.spark-submit \ --username $UU --namespace $NN \ --conf spark.kubernetes.driver.request.cores=100m \ @@ -303,21 +323,21 @@ run_example_job_with_error_in_pod() { } test_example_job_in_pod_with_errors() { - run_example_job_with_error_in_pod tests spark + run_example_job_with_error_in_pod $NAMESPACE spark } test_example_job_in_pod_with_templates() { - run_example_job_in_pod_with_pod_templates tests spark + run_example_job_in_pod_with_pod_templates $NAMESPACE spark } test_example_job_in_pod() { - run_example_job_in_pod tests spark + run_example_job_in_pod $NAMESPACE spark } test_example_job_in_pod_with_metrics() { - run_example_job_in_pod_with_metrics tests spark + run_example_job_in_pod_with_metrics $NAMESPACE spark } @@ -334,7 +354,7 @@ run_spark_shell_in_pod() { # Sample output # "Pi is roughly 3.13956232343" - echo -e "$(kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" CMDS="$SPARK_SHELL_COMMANDS" IM="$(spark_image)" /bin/bash -c 'echo "$CMDS" | spark-client.spark-shell --username $UU --namespace $NN --conf spark.kubernetes.container.image=$IM')" > spark-shell.out + echo -e "$(kubectl -n $NAMESPACE exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" CMDS="$SPARK_SHELL_COMMANDS" IM="$(spark_image)" /bin/bash -c 'echo "$CMDS" | spark-client.spark-shell --username $UU --namespace $NN --conf spark.kubernetes.container.image=$IM')" > spark-shell.out pi=$(cat spark-shell.out | grep "^Pi is roughly" | rev | cut -d' ' -f1 | rev | cut -c 1-3) echo -e "Spark-shell Pi Job Output: \n ${pi}" @@ -343,7 +363,7 @@ run_spark_shell_in_pod() { } test_spark_shell_in_pod() { - run_spark_shell_in_pod tests spark + run_spark_shell_in_pod $NAMESPACE spark } run_pyspark_in_pod() { @@ -358,7 +378,7 @@ run_pyspark_in_pod() { # Sample output # "Pi is roughly 3.13956232343" - echo -e "$(kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" CMDS="$PYSPARK_COMMANDS" IM="$(spark_image)" /bin/bash -c 'echo "$CMDS" | spark-client.pyspark --username $UU --namespace $NN --conf spark.kubernetes.container.image=$IM')" > pyspark.out + echo -e "$(kubectl -n $NAMESPACE exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" CMDS="$PYSPARK_COMMANDS" IM="$(spark_image)" /bin/bash -c 'echo "$CMDS" | spark-client.pyspark --username $UU --namespace $NN --conf spark.kubernetes.container.image=$IM')" > pyspark.out cat pyspark.out pi=$(cat pyspark.out | grep "Pi is roughly" | tail -n 1 | rev | cut -d' ' -f1 | rev | cut -c 1-3) @@ -368,14 +388,7 @@ run_pyspark_in_pod() { } test_pyspark_in_pod() { - run_pyspark_in_pod tests spark -} - -test_restricted_account_in_pod() { - - kubectl config set-context spark-context --namespace=tests --cluster=prod --user=spark - - run_example_job_in_pod tests spark + run_pyspark_in_pod $NAMESPACE spark } cleanup_user_failure_in_pod() { @@ -387,44 +400,44 @@ echo -e "##################################" echo -e "SETUP TEST POD" echo -e "##################################" -setup_test_pod +setup_admin_test_pod echo -e "##################################" echo -e "RUN EXAMPLE JOB" echo -e "##################################" -(setup_user_admin_context && test_example_job_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod +(setup_user_context && test_example_job_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod echo -e "##################################" echo -e "RUN SPARK SHELL IN POD" echo -e "##################################" -(setup_user_admin_context && test_spark_shell_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod +(setup_user_context && test_spark_shell_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod echo -e "##################################" echo -e "RUN PYSPARK IN POD" echo -e "##################################" -(setup_user_admin_context && test_pyspark_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod +(setup_user_context && test_pyspark_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod echo -e "##################################" echo -e "RUN EXAMPLE JOB WITH POD TEMPLATE" echo -e "##################################" -(setup_user_admin_context && test_example_job_in_pod_with_templates && cleanup_user_success) || cleanup_user_failure_in_pod +(setup_user_context && test_example_job_in_pod_with_templates && cleanup_user_success) || cleanup_user_failure_in_pod echo -e "########################################" echo -e "RUN EXAMPLE JOB WITH PROMETHEUS METRICS" echo -e "########################################" -(setup_user_admin_context && test_example_job_in_pod_with_metrics && cleanup_user_success) || cleanup_user_failure_in_pod +(setup_user_context && test_example_job_in_pod_with_metrics && cleanup_user_success) || cleanup_user_failure_in_pod echo -e "########################################" echo -e "RUN EXAMPLE JOB WITH ERRORS" echo -e "########################################" -(setup_user_admin_context && test_example_job_in_pod_with_errors && cleanup_user_success) || cleanup_user_failure_in_pod +(setup_user_context && test_example_job_in_pod_with_errors && cleanup_user_success) || cleanup_user_failure_in_pod echo -e "##################################" echo -e "TEARDOWN TEST POD" echo -e "##################################" diff --git a/tests/integration/resources/jupyter.yaml b/tests/integration/resources/jupyter.yaml new file mode 100644 index 00000000..44785433 --- /dev/null +++ b/tests/integration/resources/jupyter.yaml @@ -0,0 +1,34 @@ +apiVersion: v1 +kind: Pod +metadata: + name: charmed-spark-jupyter + labels: + app.kubernetes.io/name: charmed-spark-jupyter +spec: + serviceAccountName: "" + containers: + - image: + name: spark + # IT WOULD BE NICE IF THESE PARAMETERS COULD BE AUTO-INFERRED FROM THE + # SERVICE ACCOUNT USED TO RUN THE JOB + # (JIRA TICKET https://warthogs.atlassian.net/browse/DPE-3460) + args: ["--username", "", "--namespace", ""] + ports: + - containerPort: 8888 + env: + - name: NB_PREFIX + value: "jupyter-test" +--- +apiVersion: v1 +kind: Service +metadata: + name: jupyter-service + labels: + app: charmed-spark-jupyter +spec: + ports: + - port: 8888 + protocol: TCP + targetPort: 8888 + selector: + app.kubernetes.io/name: charmed-spark-jupyter \ No newline at end of file From 5ec1c76cc6e4b0a8583a4e17ae5d7a9a41a24ec2 Mon Sep 17 00:00:00 2001 From: Enrico Deusebio Date: Sat, 3 Feb 2024 00:12:57 +0100 Subject: [PATCH 2/3] improve makefile logic for building and tagging jupyter --- .github/workflows/build.yaml | 3 ++ .github/workflows/integration.yaml | 5 ++- .github/workflows/publish.yaml | 57 +++++++++++++++--------------- Makefile | 43 +++++++++++++--------- build/Dockerfile.jupyter | 2 +- rockcraft.yaml | 7 ++-- 6 files changed, 67 insertions(+), 50 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 82b26dc8..3e1ca71d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -38,6 +38,9 @@ jobs: - name: Build image run: sudo make build + - name: Build image (Jupyter) + run: sudo make build FLAVOUR=jupyter + - name: Get Artifact Name id: artifact run: | diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml index 08709a4d..c15f0c23 100644 --- a/.github/workflows/integration.yaml +++ b/.github/workflows/integration.yaml @@ -58,7 +58,10 @@ jobs: - name: Run tests (Jupyter) run: | # Import artifact into docker with new tag - sudo make import FLAVOUR=jupyter TARGET=microk8s REPOSITORY=ghcr.io/canonical/ PREFIX=test- \ + sudo make import \ + FLAVOUR=jupyter TARGET=microk8s \ + TAG=$(yq .version rockcraft.yaml) \ + REPOSITORY=ghcr.io/canonical/ PREFIX=test- \ -o $(find .make_cache -name "*.tag") sg microk8s -c "make tests FLAVOUR=jupyter" diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 2d961f79..870cd88e 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -87,7 +87,7 @@ jobs: TRACK=${{ needs.release_checks.outputs.track }} if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi - IMAGE_NAME=$(make REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs) + IMAGE_NAME=$(make help REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs) # Import artifact into docker with new tag sudo make import TARGET=docker REPOSITORY=${REPOSITORY} TAG=${TAG}\ @@ -105,31 +105,30 @@ jobs: docker push ${IMAGE_NAME}:${VERSION_TAG} fi - # - name: Publish JupyterLab Image to Channel - # run: | - # - # REPOSITORY="ghcr.io/canonical/" - # RISK=${{ needs.release_checks.outputs.risk }} - # TRACK=${{ needs.release_checks.outputs.track }} - # if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi - # - # # Import artifact into docker with new tag - # sudo make jupyter REPOSITORY=${REPOSITORY} TAG=${TAG}\ - # -o $(find .make_cache -name "*.tag") - # - # IMAGE_NAME=$(make REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Jupyter\:" | cut -d ":" -f2 | xargs) - # - # echo "Publishing ${IMAGE_NAME}:${TAG}" - # docker push ${IMAGE_NAME}:${TAG} - # - # if [[ "$RISK" == "edge" ]]; then - # VERSION_TAG="${{ needs.release_checks.outputs.version }}-${{ needs.release_checks.outputs.base }}_edge" - # - # docker tag ${IMAGE_NAME}:${TAG} ${IMAGE_NAME}:${VERSION_TAG} - # - # echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}" - # docker push ${IMAGE_NAME}:${VERSION_TAG} - # fi - - - + - name: Publish JupyterLab Image to Channel + run: | + + REPOSITORY="ghcr.io/canonical/" + RISK=${{ needs.release_checks.outputs.risk }} + TRACK=${{ needs.release_checks.outputs.track }} + if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi + + # Import artifact into docker with new tag + sudo make import TARGET=docker FLAVOUR=jupyter \ + REPOSITORY=${REPOSITORY} TAG=${TAG}\ + -o $(find .make_cache -name "*.tag") + + IMAGE_NAME=$(make help FLAVOUR=jupyter REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs) + + echo "Publishing ${IMAGE_NAME}:${TAG}" + docker push ${IMAGE_NAME}:${TAG} + + if [[ "$RISK" == "edge" ]]; then + VERSION_LONG=$(make help FLAVOUR=jupyter | grep "Tag\:" | cut -d ":" -f2 | xargs) + VERSION_TAG="${VERSION_LONG}-${{ needs.release_checks.outputs.base }}_edge" + + docker tag ${IMAGE_NAME}:${TAG} ${IMAGE_NAME}:${VERSION_TAG} + + echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}" + docker push ${IMAGE_NAME}:${VERSION_TAG} + fi diff --git a/Makefile b/Makefile index fd447ea0..31dc5e99 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ REPOSITORY := PREFIX := TARGET := docker PLATFORM := amd64 -FLAVOUR := "base" +FLAVOUR := "spark" # ====================== # INTERNAL VARIABLES @@ -27,41 +27,45 @@ $(shell mkdir -p $(_MAKE_DIR)) K8S_TAG := $(_MAKE_DIR)/.k8s_tag IMAGE_NAME := $(shell yq .name rockcraft.yaml) -VERSION := $(shell yq .version rockcraft.yaml) -TAG := $(VERSION) +VERSION := $(shell yq .version rockcraft.yaml) -BASE_NAME=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).tar +VERSION_FLAVOUR=$(shell grep "version:$(FLAVOUR)" rockcraft.yaml | sed "s/^#//" | cut -d ":" -f3) _ROCK_OCI=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).rock -_TMP_OCI_NAME := stage-$(IMAGE_NAME) -_TMP_OCI_TAG := $(_MAKE_DIR)/$(_TMP_OCI_NAME)/$(TAG) - CHARMED_OCI_FULL_NAME=$(REPOSITORY)$(PREFIX)$(IMAGE_NAME) -CHARMED_OCI_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG) - CHARMED_OCI_JUPYTER=$(CHARMED_OCI_FULL_NAME)-jupyterlab -CHARMED_OCI_JUPYTER_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG) ifeq ($(FLAVOUR), jupyter) NAME=$(CHARMED_OCI_JUPYTER) -FTAG=$(CHARMED_OCI_JUPYTER_TAG) +TAG=$(VERSION)-$(VERSION_FLAVOUR) +BASE_NAME=$(IMAGE_NAME)-jupyterlab_$(VERSION)_$(PLATFORM).tar else NAME=$(CHARMED_OCI_FULL_NAME) -FTAG=$(CHARMED_OCI_TAG) +TAG=$(VERSION) +BASE_NAME=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).tar endif +FTAG=$(_MAKE_DIR)/$(NAME)/$(TAG) + +CHARMED_OCI_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG) +CHARMED_OCI_JUPYTER_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG) + +_TMP_OCI_NAME := stage-$(IMAGE_NAME) +_TMP_OCI_TAG := $(_MAKE_DIR)/$(_TMP_OCI_NAME)/$(TAG) + help: @echo "---------------HELP-----------------" @echo "Name: $(IMAGE_NAME)" @echo "Version: $(VERSION)" @echo "Platform: $(PLATFORM)" @echo " " - @echo "Artifact: $(BASE_NAME)" + @echo "Flavour: $(FLAVOUR)" @echo " " - @echo "Image: $(CHARMED_OCI_FULL_NAME)" - @echo "Jupyter: $(CHARMED_OCI_JUPYTER)" + @echo "Image: $(NAME)" + @echo "Tag: $(TAG)" + @echo "Artifact: $(BASE_NAME)" @echo " " @echo "Type 'make' followed by one of these keywords:" @echo " " @@ -93,12 +97,17 @@ $(K8S_TAG): microk8s: $(K8S_TAG) $(CHARMED_OCI_TAG).tag: $(_TMP_OCI_TAG).tag build/Dockerfile - docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" -f build/Dockerfile . + docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" \ + --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" \ + -f build/Dockerfile . if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi touch $(CHARMED_OCI_TAG).tag $(CHARMED_OCI_JUPYTER_TAG).tag: $(CHARMED_OCI_TAG).tag build/Dockerfile.jupyter files/jupyter - docker build -t "$(CHARMED_OCI_JUPYTER):$(TAG)" --build-arg BASE_IMAGE="$(CHARMED_OCI_FULL_NAME):$(TAG)" -f build/Dockerfile.jupyter . + docker build -t "$(CHARMED_OCI_JUPYTER):$(TAG)" \ + --build-arg BASE_IMAGE="$(CHARMED_OCI_FULL_NAME):$(TAG)" \ + --build-arg JUPYTERLAB_VERSION="$(VERSION_FLAVOUR)" \ + -f build/Dockerfile.jupyter . if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)"; fi touch $(CHARMED_OCI_JUPYTER_TAG).tag diff --git a/build/Dockerfile.jupyter b/build/Dockerfile.jupyter index a32c7153..e73abd4e 100644 --- a/build/Dockerfile.jupyter +++ b/build/Dockerfile.jupyter @@ -1,5 +1,5 @@ ARG BASE_IMAGE=base-charmed-spark:latest -ARG JUPYTERLAB_VERSION=4.0.11 +ARG JUPYTERLAB_VERSION=4.0.0 FROM $BASE_IMAGE ARG JUPYTERLAB_VERSION diff --git a/rockcraft.yaml b/rockcraft.yaml index 0bf4fcff..3a80677e 100644 --- a/rockcraft.yaml +++ b/rockcraft.yaml @@ -4,7 +4,11 @@ description: Spark ROCK license: Apache-2.0 version: "3.4.2" +# version:spark:3.4.2 +# version:jupyter:4.0.11 + base: ubuntu@22.04 + platforms: amd64: @@ -17,7 +21,6 @@ environment: PYTHONPATH: /opt/spark/python:/opt/spark8t/python/dist:/usr/lib/python3.10/site-packages PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark:/opt/spark/bin:/opt/spark/python/bin:/opt/spark-client/python/bin HOME: /var/lib/spark - # KUBECONFIG: /var/lib/spark/.kube/config SPARK_USER_DATA: /var/lib/spark SPARK_LOG_DIR: /var/log/spark @@ -133,7 +136,7 @@ parts: - python3-pip overlay-script: | mkdir -p $CRAFT_PART_INSTALL/opt/spark8t/python/dist - pip install --target=${CRAFT_PART_INSTALL}/opt/spark8t/python/dist git+https://github.com/canonical/spark-k8s-toolkit-py.git@dpe-3398-fix-service-account-permission + pip install --target=${CRAFT_PART_INSTALL}/opt/spark8t/python/dist https://github.com/canonical/spark-k8s-toolkit-py/releases/download/v0.0.3/spark8t-0.0.3-py3-none-any.whl rm usr/bin/pip* stage: - opt/spark8t/python/dist From 0161c66c258624f3cd3c4fbe49dbd06c3b46480f Mon Sep 17 00:00:00 2001 From: Enrico Deusebio Date: Sat, 3 Feb 2024 00:16:18 +0100 Subject: [PATCH 3/3] fix readme file --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 922426f1..960bb501 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ https://github.com/canonical/charmed-spark-rock/pkgs/container/charmed-spark The image can be used straight away when running Spark on Kubernetes by setting the appropriate configuration property: ```shell -spark.kubernetes.container.image=ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge +spark.kubernetes.container.image=ghcr.io/canonical/charmed-spark:3.4-22.04_edge ``` ### Using `spark8t` CLI @@ -49,7 +49,7 @@ spark.kubernetes.container.image=ghcr.io/canonical/charmed-spark:3.4.2-22.04_edg The `spark8t` CLI tooling interacts with the K8s API to create, manage and delete K8s resources representing the Spark service account. Make sure that the kube config file is correctly loaded into the container, e.g. ```shell -docker run --name chamed-spark -v /path/to/kube/config:/var/lib/spark/.kube/config ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge +docker run --name chamed-spark -v /path/to/kube/config:/var/lib/spark/.kube/config ghcr.io/canonical/charmed-spark:3.4-22.04_edge ``` Note that this will start the image and a long-living service, allowing you to exec commands: @@ -59,7 +59,7 @@ docker exec charmed-spark spark-client.service-account-registry list If you prefer to run one-shot commands, without having the Charmed Spark image running, use `\; exec` prefix, e.g. ```shell -docker run -v ... ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge \; exec spark-client.service-account-registry list +docker run -v ... ghcr.io/canonical/charmed-spark:3.4-22.04_edge \; exec spark-client.service-account-registry list ``` For more information about spark-client API and `spark8t` tooling, please refer to [here](https://discourse.charmhub.io/t/spark-client-snap-how-to-manage-spark-accounts/8959). @@ -71,7 +71,7 @@ Charmed Spark Rock Image is delivered with Pebble already included in order to m #### Starting History Server ```shell -docker run ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge \; start history-server +docker run ghcr.io/canonical/charmed-spark:3.4-22.04_edge \; start history-server ``` ### Running Jupyter Lab @@ -86,7 +86,7 @@ To start a JupyterLab server using the `charmed-spark-jupyter` image, use docker run \ -v /path/to/kube/config:/var/lib/spark/.kube/config \ -p :8888 - ghcr.io/canonical/charmed-spark-jupyter:3.4.1-22.04_edge \ + ghcr.io/canonical/charmed-spark-jupyter:3.4-22.04_edge \ --username --namespace ```