Inspired by Kelsey Hightower's kubernetes-the-hard-way, this tutorial walks you through setting up Kafka on K8s using BanzaiCloud Kafka Operator on a local kind
cluster.
- Intro
- Install Kafka in a
kind
k8s cluster - BanzaiCloud KOperator
- Disaster scenarios
This is a quick tutorial on how to run the fine piece BanzaiCloud Kafka-Operator in a local multi-node kind cluster.
- Check https://github.com/kubernetes-sigs/kind
- Docker required (kind runs nodes and control plane as local docker containers)
curl -Lo ./kind "https://github.com/kubernetes-sigs/kind/releases/download/v0.14.0/kind-$(uname)-amd64"
chmod +x ./kind
mv ./kind ~/bin
mkdir ~/.kind
# Create a 6 node cluster configuration
cat > ~/.kind/kind-config.yaml <<EOF
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
EOF
kind create cluster \
--name kafka \
--config ~/.kind/kind-config.yaml \
`# renovate: datasource=docker depName=kindest/node ` \
--image kindest/node:v1.24.1
Once the cluster is created your KUBECONFIG
is updated to include
the new kind-kafka
cluster context.
Run kubectl cluster-info --context kind-kafka
to get more info.
Debug: kind
clusters are running in docker, check containers: docker ps
# switch to kind cluster context
kubectl config use-context kind-kafka
# test
k get nodes
# To dump kubeconfig
kind get kubeconfig --name kafka
# place all nodes in the same region
kubectl label nodes kafka-worker kafka-worker2 kafka-worker3 kafka-worker4 kafka-worker5 kafka-worker6 topology.kubernetes.io/region=region1
# emulate 3 AZs:
kubectl label nodes kafka-worker kafka-worker2 topology.kubernetes.io/zone=az1
kubectl label nodes kafka-worker3 kafka-worker4 topology.kubernetes.io/zone=az2
kubectl label nodes kafka-worker5 kafka-worker6 topology.kubernetes.io/zone=az3
# check
kubectl get nodes --label-columns topology.kubernetes.io/region,topology.kubernetes.io/zone
Note: The installation below assumes you're using helm3
See https://cert-manager.io/docs/installation/kubernetes/#steps
# Install separately CRDs
# renovate: datasource=github-releases depName=cert-manager/cert-manager
kubectl create --validate=false -f https://github.com/cert-manager/cert-manager/releases/download/v1.8.2/cert-manager.crds.yaml
kubectl create namespace cert-manager
# Install operator using helm3
helm repo add cert-manager https://charts.jetstack.io
helm repo update
# renovate: datasource=github-releases depName=cert-manager/cert-manager
helm install cert-manager cert-manager/cert-manager --namespace cert-manager --version v1.8.2
Make sure you use helm3
rm -rf /tmp/zookeeper-operator
git clone --single-branch --branch master https://github.com/adobe/zookeeper-operator /tmp/zookeeper-operator
cd /tmp/zookeeper-operator
kubectl create ns zookeeper
kubectl create -f https://raw.githubusercontent.com/adobe/zookeeper-operator/master/config/crd/bases/zookeeper.pravega.io_zookeeperclusters.yaml
helm template zookeeper-operator --namespace=zookeeper --set crd.create=false \
--set image.repository='adobe/zookeeper-operator' \
`# renovate: datasource=docker depName=adobe/zookeeper-operator ` \
--set image.tag='0.2.14-adobe-20220610' \
./charts/zookeeper-operator | kubectl create -n zookeeper -f -
kubectl create --namespace zookeeper -f - <<EOF
apiVersion: zookeeper.pravega.io/v1beta1
kind: ZookeeperCluster
metadata:
name: zk
namespace: zookeeper
spec:
replicas: 3
image:
repository: adobe/zookeeper
# renovate: datasource=docker depName=adobe/zookeeper
tag: 3.7.1-0.2.14-adobe-20220610
pullPolicy: IfNotPresent
config:
initLimit: 10
tickTime: 2000
syncLimit: 5
probes:
livenessProbe:
initialDelaySeconds: 41
persistence:
reclaimPolicy: Delete
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi
EOF
# Check
kubectl get all -n zookeeper
# ZookeeperCluster up?
kubectl get -w zookeepercluster -n zookeeper -o wide
# Good
See more at https://github.com/pravega/zookeeper-operator
kubectl create -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/master/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
kubectl create -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/master/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
kubectl create -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/master/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
kubectl create -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/master/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
kubectl create -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/master/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
kubectl create -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/master/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
kubectl create -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/master/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
kubectl create -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/master/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
helm install monitoring --namespace=default prometheus-community/kube-prometheus-stack --set prometheusOperator.createCustomResource=false
Prometheus, Grafana, and Alertmanager dashboards can be accessed quickly using kubectl port-forward
after running the quickstart via the commands below.
kubectl --namespace default port-forward svc/monitoring-kube-prometheus-prometheus 9090
Then access via http://localhost:9090
http://localhost:9090/api/v1/label/__name__/values
# get admin password
kubectl get secret --namespace default monitoring-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
# proxy
kubectl --namespace default port-forward svc/monitoring-grafana 3000:80
Then access via http://localhost:3000 and use the grafana user:password of admin:prom-operator
.
kubectl --namespace default port-forward svc/monitoring-kube-prometheus-alertmanager 9093
Then access via http://localhost:9093
# check all resources created by helm release
k get all -A -l release=monitoring
# new kafka NS
kubectl create ns kafka
# install operator using upstream helm chart
rm -rf /tmp/kafka-operator
git clone --single-branch --branch master https://github.com/banzaicloud/koperator /tmp/kafka-operator
cd /tmp/kafka-operator
kubectl create -f config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml
kubectl create -f config/base/crds/kafka.banzaicloud.io_kafkatopics.yaml
kubectl create -f config/base/crds/kafka.banzaicloud.io_kafkausers.yaml
helm template kafka-operator \
--namespace=kafka \
--set webhook.enabled=false \
--set operator.image.repository=adobe/kafka-operator \
`# renovate: datasource=docker depName=adobe/kafka-operator ` \
--set operator.image.tag=0.21.3-adobe-20220712 \
charts/kafka-operator > kafka-operator.yaml
kubectl create -n kafka -f kafka-operator.yaml
# Check
kubectl get all -n kafka
# Good
kubectl create -n kafka -f https://raw.githubusercontent.com/amuraru/k8s-kafka-the-hard-way/master/simplekafkacluster.yaml
# Check CRD created
k get KafkaCluster kafka -n kafka -w -o wide
# See CRD state
k describe KafkaCluster kafka -n kafka
kubectl apply -n kafka -f https://raw.githubusercontent.com/amuraru/k8s-kafka-operator/master/kafkacluster-prometheus-monitoring.yaml
kubectl apply -n kafka -f https://raw.githubusercontent.com/amuraru/k8s-kafka-operator/master/kafkacluster-prometheus-autoscale.yaml
kubectl apply -n default -f https://raw.githubusercontent.com/amuraru/k8s-kafka-operator/master/grafana-dashboard.yaml
This needs to be created in the same namespace as grafana
(default
)
Dashboard should be automatically loaded and available at http://127.0.0.1:3000/d/1a1a1a1a1/kafka-looking-glass
kubectl -n kafka port-forward -n kafka svc/kafka-cruisecontrol-svc 18090:8090
kubectl port-forward -n default svc/prometheus-operated 19090:9090
# http://10.131.236.142:19090/graph?g0.range_input=1h&g0.expr=%7B__name__%20%3D~%27kafka.*%27%7D&g0.tab=1
kubectl config set-context --current --namespace=kafka
# See operator logs
k logs -l app.kubernetes.io/instance=kafka-operator -c manager -f
kubectl get pod -o=custom-columns='NAME:.metadata.name,IMAGE:.spec.containers[*].image' --all-namespaces
kubectl run kafka-topics --rm -i --tty=true \
--image=adobe/kafka:2.13-2.8.1 \
--restart=Never \
-- /opt/kafka/bin/kafka-topics.sh \
--bootstrap-server kafka-headless:29092 \
--list
kubectl run kafka-topics --rm -i --tty=true \
--image=adobe/kafka:2.13-2.8.1 \
--restart=Never \
-- /opt/kafka/bin/kafka-topics.sh \
--bootstrap-server kafka-headless:29092 \
--topic perf_topic \
--replica-assignment 101:201:301,102:202:302,101:201:301,102:202:302,101:201:301,102:202:302,101:201:301,102:202:302,101:201:301,102:202:302,101:201:301,102:202:302 \
--create
kubectl run kafka-topics --rm -i --tty=true \
--image=adobe/kafka:2.13-2.8.1 \
--restart=Never \
-- /opt/kafka/bin/kafka-configs.sh \
--zookeeper zk-client.zookeeper:2181/kafka \
--alter --entity-name perf_topic \
--entity-type topics \
--add-config retention.ms=720000
kubectl run kafka-topics --rm -i --tty=true \
--image=adobe/kafka:2.13-2.8.1 \
--restart=Never \
-- /opt/kafka/bin/kafka-topics.sh \
--bootstrap-server kafka-headless:29092 \
--topic perf_topic \
--describe
kubectl run kafka-producer-topic \
--image=adobe/kafka:2.13-2.8.1 \
--restart=Never \
-- /opt/kafka/bin/kafka-producer-perf-test.sh \
--producer-props bootstrap.servers=kafka-headless:29092 acks=all \
--topic perf_topic \
--record-size 1000 \
--throughput 29000 \
--num-records 21100000000
kubectl run kafka-consumer-test \
--image=adobe/kafka:2.13-2.8.1 \
--restart=Never \
-- /opt/kafka/bin/kafka-consumer-perf-test.sh \
--broker-list kafka-headless:29092 \
--group perf-consume \
--messages 10000000000 \
--topic perf_topic \
--show-detailed-stats \
--from-latest \
--timeout 100000
http://127.0.0.1:3000/d/1a1a1a1a1/kafka-looking-glass
# Get Kakfa broker pods
k get pod -l kafka_cr=kafka
NAME READY STATUS RESTARTS AGE
kafka7fwkf 1/1 Running 0 6h3m
kafka8dksv 1/1 Running 0 6h
kafka9kp6q 1/1 Running 0 6h1m
kafkas6gh4 1/1 Running 0 6h2m
kafkavbsff 1/1 Running 0 6h3m
kafkawn4l6 1/1 Running 0 6h2m
# Get PV and PVC
k get pv,pvc | grep standard
persistentvolume/pvc-0965737b-0886-4599-99e7-a5dec34b29bb 10Gi RWO Delete Bound kafka/kafka-301-storage-0-v2w79 standard 19m
persistentvolume/pvc-1adee862-67a3-4be3-85a2-d2048aa71330 10Gi RWO Delete Bound kafka/kafka-101-storage-0-pss4m standard 24m
persistentvolume/pvc-2fc31ae1-de91-43dd-a029-d938b7fc9b67 20Gi RWO Delete Bound zookeeper/data-zk-2 standard 37m
persistentvolume/pvc-314bca5b-8b1e-47cd-b928-a4a2dd85a6c4 10Gi RWO Delete Bound kafka/kafka-202-storage-0-2r8vm standard 19m
persistentvolume/pvc-485d4ca7-4129-43a5-b98f-db23ea87a36c 10Gi RWO Delete Bound kafka/kafka-201-storage-0-nw8vd standard 19m
persistentvolume/pvc-4e99bb66-161c-4573-bdab-1d44455eeb4f 10Gi RWO Delete Bound kafka/kafka-302-storage-0-8tsdt standard 19m
persistentvolume/pvc-9d53fa44-ca21-4bb8-a036-feecee95500a 20Gi RWO Delete Bound zookeeper/data-zk-1 standard 38m
persistentvolume/pvc-a4b70cdf-28be-4c7c-b18b-1fb070624649 20Gi RWO Delete Bound zookeeper/data-zk-0 standard 39m
persistentvolume/pvc-fb0571c5-3fe8-4dbc-8ea8-9cf93b8ad2e9 10Gi RWO Delete Bound kafka/kafka-102-storage-0-qc8bj standard 19m
persistentvolumeclaim/kafka-101-storage-0-pss4m Bound pvc-1adee862-67a3-4be3-85a2-d2048aa71330 10Gi RWO standard 24m
persistentvolumeclaim/kafka-102-storage-0-qc8bj Bound pvc-fb0571c5-3fe8-4dbc-8ea8-9cf93b8ad2e9 10Gi RWO standard 24m
persistentvolumeclaim/kafka-201-storage-0-nw8vd Bound pvc-485d4ca7-4129-43a5-b98f-db23ea87a36c 10Gi RWO standard 24m
persistentvolumeclaim/kafka-202-storage-0-2r8vm Bound pvc-314bca5b-8b1e-47cd-b928-a4a2dd85a6c4 10Gi RWO standard 24m
persistentvolumeclaim/kafka-301-storage-0-v2w79 Bound pvc-0965737b-0886-4599-99e7-a5dec34b29bb 10Gi RWO standard 24m
persistentvolumeclaim/kafka-302-storage-0-8tsdt Bound pvc-4e99bb66-161c-4573-bdab-1d44455eeb4f 10Gi RWO standard 24m
Is the underlying PV/PVC retained and broker pod is rescheduled? PASSSED
# Kill one broker JVM
k exec -it kafka7fwkf -- kill 1
# Pod is recreated
NAME READY STATUS RESTARTS AGE
kafka8dksv 1/1 Running 0 6h2m
kafka9kp6q 1/1 Running 0 6h4m
kafkap4h7p 1/1 Running 0 56s # <----
kafkas6gh4 1/1 Running 0 6h4m
kafkavbsff 1/1 Running 0 6h5m
kafkawn4l6 1/1 Running 0 6h4m
# PV/PVC reused, attached to the new POD : Good!
k get pv,pvc | grep examplestorageclass
persistentvolume/pvc-1e0b15df-0236-11ea-93d3-0242ac110002 100Gi RWO Retain Bound kafka/kafka-storager5t9v examplestorageclass 7h7m
persistentvolume/pvc-3ab64754-0236-11ea-93d3-0242ac110002 100Gi RWO Retain Bound kafka/kafka-storage7g8xd examplestorageclass 7h6m
persistentvolume/pvc-3ae3ae0c-0236-11ea-93d3-0242ac110002 100Gi RWO Retain Bound kafka/kafka-storage6sss6 examplestorageclass 7h6m
persistentvolume/pvc-3b5fd2ad-0236-11ea-93d3-0242ac110002 100Gi RWO Retain Bound kafka/kafka-storagezs7r7 examplestorageclass 7h6m
persistentvolume/pvc-a102806f-0239-11ea-93d3-0242ac110002 100Gi RWO Retain Bound kafka/kafka-storagecp57b examplestorageclass 6h42m
persistentvolume/pvc-a12dafe5-0239-11ea-93d3-0242ac110002 100Gi RWO Retain Bound kafka/kafka-storagekg5j8 examplestorageclass 6h42m
persistentvolumeclaim/kafka-storage6sss6 Bound pvc-3ae3ae0c-0236-11ea-93d3-0242ac110002 100Gi RWO examplestorageclass 7h6m
persistentvolumeclaim/kafka-storage7g8xd Bound pvc-3ab64754-0236-11ea-93d3-0242ac110002 100Gi RWO examplestorageclass 7h6m
persistentvolumeclaim/kafka-storagecp57b Bound pvc-a102806f-0239-11ea-93d3-0242ac110002 100Gi RWO examplestorageclass 6h42m
persistentvolumeclaim/kafka-storagekg5j8 Bound pvc-a12dafe5-0239-11ea-93d3-0242ac110002 100Gi RWO examplestorageclass 6h42m
persistentvolumeclaim/kafka-storager5t9v Bound pvc-1e0b15df-0236-11ea-93d3-0242ac110002 100Gi RWO examplestorageclass 7h7m
persistentvolumeclaim/kafka-storagezs7r7 Bound pvc-3b5fd2ad-0236-11ea-93d3-0242ac110002 100Gi RWO examplestorageclass 7h6m
PASSED - PV is reattached to the new pod
$ k get pod -l kafka_cr=kafka
NAME READY STATUS RESTARTS AGE
kafka8dksv 1/1 Running 0 6h12m
kafka9kp6q 1/1 Running 0 6h13m
kafkabvx7m 1/1 Running 0 6m59s
kafkap4h7p 1/1 Running 0 10m
kafkavbsff 1/1 Running 0 6h14m
kafkawn4l6 1/1 Running 0 6h14m
$ k delete pod kafka8dksv
pod "kafka8dksv" deleted
$ k get pod -l kafka_cr=kafka
NAME READY STATUS RESTARTS AGE
kafka8hn4d 1/1 Running 0 31s # <--recreated
kafka9kp6q 1/1 Running 0 6h14m
kafkabvx7m 1/1 Running 0 7m54s
kafkap4h7p 1/1 Running 0 11m
kafkavbsff 1/1 Running 0 6h15m
kafkawn4l6 1/1 Running 0 6h14m
$ k get pv,pvc -o wide | grep examplestorageclass
# Same PV/PVCs
persistentvolume/pvc-1e0b15df-0236-11ea-93d3-0242ac110002 100Gi RWO Retain Bound kafka/kafka-storager5t9v examplestorageclass 7h17m
persistentvolume/pvc-3ab64754-0236-11ea-93d3-0242ac110002 100Gi RWO Retain Bound kafka/kafka-storage7g8xd examplestorageclass 7h16m
persistentvolume/pvc-3ae3ae0c-0236-11ea-93d3-0242ac110002 100Gi RWO Retain Bound kafka/kafka-storage6sss6 examplestorageclass 7h16m
persistentvolume/pvc-3b5fd2ad-0236-11ea-93d3-0242ac110002 100Gi RWO Retain Bound kafka/kafka-storagezs7r7 examplestorageclass 7h16m
persistentvolume/pvc-a102806f-0239-11ea-93d3-0242ac110002 100Gi RWO Retain Bound kafka/kafka-storagecp57b examplestorageclass 6h52m
persistentvolume/pvc-a12dafe5-0239-11ea-93d3-0242ac110002 100Gi RWO Retain Bound kafka/kafka-storagekg5j8 examplestorageclass 6h52m
persistentvolumeclaim/kafka-storage6sss6 Bound pvc-3ae3ae0c-0236-11ea-93d3-0242ac110002 100Gi RWO examplestorageclass 7h16m
persistentvolumeclaim/kafka-storage7g8xd Bound pvc-3ab64754-0236-11ea-93d3-0242ac110002 100Gi RWO examplestorageclass 7h16m
persistentvolumeclaim/kafka-storagecp57b Bound pvc-a102806f-0239-11ea-93d3-0242ac110002 100Gi RWO examplestorageclass 6h52m
persistentvolumeclaim/kafka-storagekg5j8 Bound pvc-a12dafe5-0239-11ea-93d3-0242ac110002 100Gi RWO examplestorageclass 6h52m
persistentvolumeclaim/kafka-storager5t9v Bound pvc-1e0b15df-0236-11ea-93d3-0242ac110002 100Gi RWO examplestorageclass 7h17m
persistentvolumeclaim/kafka-storagezs7r7 Bound pvc-3b5fd2ad-0236-11ea-93d3-0242ac110002 100Gi RWO examplestorageclass 7h16m