Skip to content
This repository has been archived by the owner on Jul 20, 2023. It is now read-only.

Commit

Permalink
Merge pull request #6 from katulu-io/GerardoGR/local-kind-cluster
Browse files Browse the repository at this point in the history
Local kind cluster setup
  • Loading branch information
GerardoGR authored Jun 7, 2022
2 parents 0a562d0 + 4cec822 commit 4a57e10
Show file tree
Hide file tree
Showing 17 changed files with 403 additions and 11 deletions.
12 changes: 12 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,18 @@ RUN url="https://raw.githubusercontent.com/python-poetry/poetry/master/install-p
wget -q -O- "$url" | POETRY_VERSION=1.1.13 python3 -; \
poetry --version;

# Install kind
RUN url="https://kind.sigs.k8s.io/dl/v0.13.0/kind-linux-amd64"; \
sha256="c80c6d1013337cbbe226c2eda0a3dc2d75af16e5fa8af4ce3fc9fedcf1f9d2dc"; \
\
wget -O kind "$url" --progress=dot:giga; \
echo "$sha256 kind" | sha256sum --strict --check -; \
\
chmod +x kind; \
mv kind /usr/local/bin; \
\
kind version;

WORKDIR /workspace

CMD [ "/bin/bash" ]
10 changes: 9 additions & 1 deletion .devcontainer/targets.mk
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,18 @@ DEV_CONTAINER_WORKING_DIR = /workspace$(CURDIR:$(DEV_CONTAINER_MOUNT)%=%)
DOCKER_CONFIG_JSON = $(HOME)/.docker/config.json
DOCKER_LINUX_OPTS = -u $(shell id -u):$(shell id -g) --group-add=$(shell getent group docker | cut -d: -f3)
DOCKER_OTHER_OS_OPTS =
DOCKER_NETWORK_NAME ?=
DOCKER_NETWORK_OPTS =

ifeq ($(shell uname), Linux)
DOCKER_OS_OPTS = $(DOCKER_LINUX_OPTS)
else
DOCKER_OS_OPTS = $(DOCKER_OTHER_OS_OPTS)
endif

ifneq ($(DOCKER_NETWORK_NAME),)
DOCKER_NETWORK_OPTS = --network=$(DOCKER_NETWORK_NAME)
endif

--pull-devcontainer:
@docker pull ${MAKEVAR_REGISTRY}/${DEV_CONTAINER_IMAGE_NAME}:${DEV_CONTAINER_TAG} || true
Expand All @@ -35,7 +40,10 @@ devcontainer-%: ENVFILE := $(shell mktemp)
devcontainer-%: devcontainer
env | grep -e 'AWS_' -e 'ARM_' -e 'GITHUB_' -e 'MAKEVAR_' -e 'SKIP_' -e 'TF_' >> ${ENVFILE} || true
echo MAKEVAR_DIND=true >> ${ENVFILE}
docker run --rm $(DOCKER_OS_OPTS) \
@if [ -n "$(DOCKER_NETWORK_NAME)" ]; then \
docker network create $(DOCKER_NETWORK_NAME) || true; \
fi
docker run --rm $(DOCKER_OS_OPTS) $(DOCKER_NETWORK_OPTS) \
-v ~/.kube/config:$(DEV_CONTAINER_WORKING_DIR)/.kube/config \
-v /var/run/docker.sock:/var/run/docker.sock \
-v $(DEV_CONTAINER_MOUNT):/workspace \
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ Already cloned without recursive options? Run the following command to initializ
```sh
git submodule update --init --recursive
```

### Local kubernetes cluster

For instructions how to spin up a local kubernetes environment please see the [develop README.md](develop/README.md)
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,6 @@ spec:
type: object
status:
description: FlOperatorStatus defines the observed state of FlOperator
properties:
running-servers:
items:
type: object
type: array
type: object
type: object
served: true
Expand Down
8 changes: 6 additions & 2 deletions components/fl-operator/pkg/resources/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ func NewDeployment(task *pb.OrchestratorMessage_TaskSpec, name types.NamespacedN
labels := map[string]string{
FlClientDeploymentLabelKey: FlClientDeploymentLabelValue,
"run-id": string(task.ID),
"spire-workload": "flower-client",
}

envoyConfigVolumeKey := "envoy-config"
Expand All @@ -63,6 +64,7 @@ func NewDeployment(task *pb.OrchestratorMessage_TaskSpec, name types.NamespacedN
ObjectMeta: metav1.ObjectMeta{
Name: name.Name,
Namespace: name.Namespace,
Labels: labels,
},
Spec: appsv1.DeploymentSpec{
Replicas: utils.Int32Ptr(1),
Expand Down Expand Up @@ -158,7 +160,8 @@ func NewDeployment(task *pb.OrchestratorMessage_TaskSpec, name types.NamespacedN
// Creates a new envoy proxy deployment
func NewEnvoyproxyDeployment(name types.NamespacedName) *appsv1.Deployment {
labels := map[string]string{
"app": name.Name,
"app": name.Name,
"spire-workload": "fl-operator",
}

const envoyConfigVolumeKey = "envoy-config"
Expand All @@ -167,6 +170,7 @@ func NewEnvoyproxyDeployment(name types.NamespacedName) *appsv1.Deployment {
ObjectMeta: metav1.ObjectMeta{
Name: name.Name,
Namespace: name.Namespace,
Labels: labels,
},
Spec: appsv1.DeploymentSpec{
Replicas: utils.Int32Ptr(1),
Expand Down Expand Up @@ -244,7 +248,7 @@ func NewEnvoyproxyService(name types.NamespacedName) *corev1.Service {
Spec: corev1.ServiceSpec{
Ports: []corev1.ServicePort{
{
Name: "http",
Name: "grpc",
Port: 9080,
Protocol: "TCP",
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ spec:
args: ["-t", "30", "spire-server:8081"]
containers:
- name: spire-agent
image: gcr.io/spiffe-io/spire-agent:1.1.2
image: gcr.io/spiffe-io/spire-agent:1.3.0
args:
- -expandEnv
- -config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ spec:
shareProcessNamespace: true
containers:
- name: spire-server
image: gcr.io/spiffe-io/spire-server:1.1.2
image: gcr.io/spiffe-io/spire-server:1.3.0
args:
- -config
- /run/spire/config/server.conf
Expand Down Expand Up @@ -53,7 +53,7 @@ spec:
readOnly: false

- name: k8s-workload-registrar
image: gcr.io/spiffe-io/k8s-workload-registrar:1.1.0
image: gcr.io/spiffe-io/k8s-workload-registrar:1.3.0
args:
- -config
- /run/spire/config/k8s-workload-registrar.conf
Expand Down
3 changes: 3 additions & 0 deletions develop/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# KUBECONFIG
/local.fl-suite.kubeconfig.yaml
/.local.fl-suite-internal.kubeconfig.yaml
20 changes: 20 additions & 0 deletions develop/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
DOCKER_NETWORK_NAME = kind

-include ../.devcontainer/targets.mk

dependencies lint test build dist push:
@echo "$@ not implemented"
.PHONY: dependencies lint test build dist push

local-registry:
docker network create kind || true
docker run -d --name registry --restart=always -p 5000:5000 --net=kind registry:2
.PHONY: local-registry

provision:
@./provision.sh
.PHONY: provision

teardown:
@./teardown.sh
.PHONY: teardown
104 changes: 104 additions & 0 deletions develop/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Local kubernetes cluster

## Requirements

* [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl).
* [kustomize](https://kubectl.docs.kubernetes.io/installation/kustomize/).
* [kind](https://kind.sigs.k8s.io/docs/user/quick-start/#installation).

> 🌻 Add `devcontainer-` to any of the make targets to use a container image with the requirements already pre-installed
## Deploy

To deploy a local kubernetes cluster we will use kind (Kubernetes In Docker).

### 1. Setup a local container registry

The kind cluster uses a local registry to host the fl-suite container images. To deploy this run:

```shell
make local-registry
```


### 2. Push the fl-suite images to the local container registry

Build, dist and push the fl-suite's container images. This needs to be done at the root of the project:

On Linux (with all the tools to build all the components of the fl-suite):

```
cd /path/to/katulu-io/fl-suite/
export MAKEVAR_REGISTRY=localhost:5000
make build dist push
```

On any other platform:

```
cd /path/to/katulu-io/fl-suite/
export MAKEVAR_REGISTRY=localhost:5000
make devcontainer-build devcontainer-dist push
```

### 3. Provision the kind cluster

```shell
make provision
```

That step will show some errors like:

```
Error from server (NotFound): error when creating "STDIN": namespaces "katulu-fl" not found
```

This and other CRD related errors are expected. The namespace "katulu-fl" gets created once a Kubeflow Profile is reconciled in kubernetes which might take some time. The other CRD errors (e.g cert-manager's Certificates CRDs) have the same cause. The `provision` target will take care to retry this as many times as it needs and normally this takes around ~20 mins but depends on the local resources like CPU, Network, etc.

> 🌻 The same make-target can be used to update the cluster with the latest kustomize changes
A kubeconfig file is generated which can be used to configure `kubectl` and access the kind cluster:

```shell
export KUBECONFIG=local.fl-suite.kubeconfig.yaml
kubectl get nodes
NAME STATUS ROLES AGE VERSION
local.fl-suite-control-plane Ready control-plane,master 5m00s v1.21.10
```

### 4. Wait for all the pods to be ready

```shell
export KUBECONFIG=local.fl-suite.kubeconfig.yaml
kubectl get pods -n cert-manager
kubectl get pods -n istio-system
kubectl get pods -n auth
kubectl get pods -n knative-eventing
kubectl get pods -n knative-serving
kubectl get pods -n kubeflow
kubectl get pods -n katulu-fl
kubectl get pods -n spire
kubectl get pods -n container-registry
```

### 5. Login to the fl-suite central dashboard

Once all pods are ready, you can access the fl-suite via:

On Linux: http://localhost
On MacOS: http://docker.for.mac.localhost

The credentials are:

```
Username: user@example.com
Password: 12341234
```

## Teardown

```shell
make teardown
```
25 changes: 25 additions & 0 deletions develop/kind-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
---
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
name: local.fl-suite
containerdConfigPatches:
- |-
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."localhost:5000"]
endpoint = ["http://registry:5000"]
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."container-registry.container-registry:5000"]
endpoint = ["http://local.fl-suite-control-plane:30080"]
nodes:
- role: control-plane
# Using kubernetes version 1.21 to avoid "no matches for kind "CustomResourceDefinition"" error (related issue: https://github.com/kubeflow/manifests/issues/2028)
image: kindest/node:v1.21.10@sha256:84709f09756ba4f863769bdcabe5edafc2ada72d3c8c44d6515fc581b66b029c
extraPortMappings:
- containerPort: 30080
hostPort: 80
- containerPort: 30443
hostPort: 443
# TODO: Remove this extra mount. The FLOperator pods expect a /dataset directory in the kubernetes node. To
# force-create one we let kind do it
extraMounts:
- hostPath: dataset/
containerPath: /dataset
readOnly: true
11 changes: 11 additions & 0 deletions develop/kustomize/allow-fl-operator-access.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
apiVersion: security.istio.io/v1beta1
kind: AuthorizationPolicy
metadata:
name: allow-fl-operator-access
namespace: katulu-fl
spec:
rules:
- from:
- source:
principals: ["cluster.local/ns/fl-operator-system/sa/fl-operator-controller-manager"]
7 changes: 7 additions & 0 deletions develop/kustomize/config/internal-registry-credentials.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"auths":{
"container-registry.container-registry:5000": {
"auth":"cmVnaXN0cnk6cmVnaXN0cnk="
}
}
}
33 changes: 33 additions & 0 deletions develop/kustomize/config/spire-agent.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
agent {
data_dir = "/run/spire"
log_level = "DEBUG"
server_address = "spire-server"
server_port = "8081"
socket_path = "/run/spire/sockets/agent.sock"
trust_bundle_path = "/run/spire/bundle/bundle.crt"
trust_domain = "katulu.io"
}

plugins {
NodeAttestor "k8s_psat" {
plugin_data {
cluster = "local-k8s"
}
}

KeyManager "memory" {
plugin_data {
}
}

WorkloadAttestor "k8s" {
plugin_data {
skip_kubelet_verification = true
}
}

WorkloadAttestor "unix" {
plugin_data {
}
}
}
Loading

0 comments on commit 4a57e10

Please sign in to comment.