Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanups and minor improments in lima provider #1568

Merged
merged 12 commits into from
Oct 7, 2024
5 changes: 4 additions & 1 deletion test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
# hardware acceleration for VMs.
DRIVER ?= vm

# drenv start timeout in seconds
TIMEOUT ?= 600

env := envs/$(DRIVER).yaml
prefix := drenv-test-

Expand Down Expand Up @@ -50,7 +53,7 @@ coverage-html:
xdg-open htmlcov/index.html

cluster:
drenv start --name-prefix $(prefix) $(env) -v
drenv start --name-prefix $(prefix) $(env) --verbose --timeout $(TIMEOUT)

clean:
drenv delete --name-prefix $(prefix) $(env)
7 changes: 6 additions & 1 deletion test/drenv/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ def parse_args():
metavar="N",
help="maximum number of workers per profile",
)
p.add_argument(
"--timeout",
type=int,
help="time in seconds to wait until clsuter is started",
)

p = add_command(sp, "stop", do_stop, help="stop an environment")
p.add_argument(
Expand Down Expand Up @@ -379,7 +384,7 @@ def start_cluster(profile, hooks=(), args=None, **options):
provider = providers.get(profile["provider"])
existing = provider.exists(profile)

provider.start(profile, verbose=args.verbose)
provider.start(profile, verbose=args.verbose, timeout=args.timeout)
provider.configure(profile, existing=existing)

if existing:
Expand Down
9 changes: 6 additions & 3 deletions test/drenv/envfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@
},
"darwin": {
PROVIDER: {
"x86_64": "minikube",
"x86_64": "lima",
"arm64": "lima",
},
VM: {
"x86_64": "hyperkit",
"x86_64": "",
"arm64": "",
},
CONTAINER: "podman",
Expand Down Expand Up @@ -136,7 +136,7 @@ def _validate_profile(profile, addons_root):
# If True, this is an external cluster and we don't have to start it.
profile.setdefault("external", False)

# Properties for drenv managed cluster.
# Common properties.
profile.setdefault("provider", PROVIDER)
profile.setdefault("driver", VM)
profile.setdefault("container_runtime", "")
Expand All @@ -155,6 +155,9 @@ def _validate_profile(profile, addons_root):
profile.setdefault("containerd", None)
profile.setdefault("workers", [])

# Lima provider properties.
profile.setdefault("rosetta", True)

_validate_platform_defaults(profile)

for i, worker in enumerate(profile["workers"]):
Expand Down
2 changes: 1 addition & 1 deletion test/drenv/providers/external.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def exists(profile):
return True


def start(profile, verbose=False):
def start(profile, verbose=False, timeout=None):
start = time.monotonic()
logging.info("[%s] Checking external cluster status", profile["name"])

Expand Down
16 changes: 11 additions & 5 deletions test/drenv/providers/lima/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def exists(profile):
return False


def start(profile, verbose=False):
def start(profile, verbose=False, timeout=None):
start = time.monotonic()
logging.info("[%s] Starting lima cluster", profile["name"])

Expand All @@ -76,7 +76,7 @@ def start(profile, verbose=False):
# Get vm before starting to detect a stopped vm.
vm = _get_vm(profile)

_start_vm(profile)
_start_vm(profile, timeout=timeout)
_add_kubeconfig(profile, vm)

debug = partial(logging.debug, f"[{profile['name']}] %s")
Expand Down Expand Up @@ -181,7 +181,9 @@ def _write_config(profile, path):
# The "vz" type is required to support amd64 images on arm64, needed for
# OCM, and also provide the best performance.
config["vmType"] = "vz"
config["rosetta"] = {"enabled": True, "binfmt": True}

if profile["rosetta"]:
config["rosetta"] = {"enabled": True, "binfmt": True}

# We always use socket_vmnet to get shared network.
config["networks"] = [{"socket": "/var/run/socket_vmnet"}]
Expand Down Expand Up @@ -270,8 +272,12 @@ def _create_vm(profile, config):
_watch("create", "--name", profile["name"], config, context=profile["name"])


def _start_vm(profile):
_watch("start", profile["name"], context=profile["name"])
def _start_vm(profile, timeout=None):
args = ["start"]
if timeout:
args.append(f"--timeout={timeout}s")
args.append(profile["name"])
_watch(*args, context=profile["name"])


def _stop_vm(profile):
Expand Down
55 changes: 30 additions & 25 deletions test/drenv/providers/lima/k8s.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
images:
- location: "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-arm64.img"
arch: "aarch64"
- location: "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img"
arch: "x86_64"

mounts: []

Expand All @@ -24,9 +26,8 @@ containerd:
# forwarding cannot work for multiple clusters since same port from multiple
# clusters is mapped to the same host port.
portForwards:
- guestPortRange: [1, 65535]
guestIP: "0.0.0.0"
ignore: true
- ignore: true
proto: any

provision:

Expand All @@ -48,20 +49,20 @@ provision:
set -eux -o pipefail
command -v kubeadm >/dev/null 2>&1 && exit 0
# Install and configure prerequisites
cat <<EOF | sudo tee /etc/modules-load.d/containerd.conf
cat <<EOF | tee /etc/modules-load.d/containerd.conf
overlay
br_netfilter
EOF
modprobe overlay
modprobe br_netfilter
cat <<EOF | sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
cat <<EOF | tee /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF
# Avoid "failed to creating a fsnotify watcher: too many open files"
# errors with bigger setups.
cat <<EOF | sudo tee /etc/sysctl.d/99-fs-inotify.conf
cat <<EOF | tee /etc/sysctl.d/99-fs-inotify.conf
fs.inotify.max_user_instances = 8192
fs.inotify.max_user_watches = 65536
EOF
Expand All @@ -71,12 +72,12 @@ provision:
apt-get update
apt-get install -y apt-transport-https ca-certificates curl
VERSION=$(curl -L -s https://dl.k8s.io/release/stable.txt | sed -e 's/v//' | cut -d'.' -f1-2)
echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v${VERSION}/deb/ /" | sudo tee /etc/apt/sources.list.d/kubernetes.list
curl -fsSL https://pkgs.k8s.io/core:/stable:/v${VERSION}/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v${VERSION}/deb/ /" | tee /etc/apt/sources.list.d/kubernetes.list
curl -fsSL https://pkgs.k8s.io/core:/stable:/v${VERSION}/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
apt-get update
# cri-tools
apt-get install -y cri-tools
cat <<EOF | sudo tee /etc/crictl.yaml
cat <<EOF | tee /etc/crictl.yaml
runtime-endpoint: unix:///run/containerd/containerd.sock
EOF
# cni-plugins
Expand Down Expand Up @@ -126,12 +127,12 @@ provision:
criSocket: unix:///run/containerd/containerd.sock
kubeletExtraArgs:
# Ramen: use specific network
node-ip: $ADVERTISE_ADDRESS
node-ip: "$ADVERTISE_ADDRESS"
# Ramen: speed up image pulls
serialize-image-pulls: "false"
# Ramen: serve specific network.
localAPIEndpoint:
advertiseAddress: $ADVERTISE_ADDRESS
advertiseAddress: "$ADVERTISE_ADDRESS"
---
kind: ClusterConfiguration
apiVersion: kubeadm.k8s.io/v1beta3
Expand All @@ -147,25 +148,35 @@ provision:
featureGates:
StatefulSetAutoDeletePVC: true
EOF
kubeadm init --config kubeadm-config.yaml

# We ignore NumCPU preflight error for running a minimal cluster in
# github actions and for testing drenv.
# [ERROR NumCPU]: the number of available CPUs 1 is less than the required 2
kubeadm init --config kubeadm-config.yaml --ignore-preflight-errors NumCPU

# Scale down coredns like minikube
kubectl scale deploy coredns -n kube-system --replicas=1

# Installing a Pod network add-on
kubectl apply -f https://github.com/flannel-io/flannel/releases/download/v0.24.0/kube-flannel.yml

# Control plane node isolation
kubectl taint nodes --all node-role.kubernetes.io/control-plane-
mkdir -p ${HOME:-/root}/.kube && cp -f $KUBECONFIG ${HOME:-/root}/.kube/config

- mode: system
script: |
#!/bin/bash
set -eux -o pipefail
export KUBECONFIG=/etc/kubernetes/admin.conf
KUBECONFIG=/etc/kubernetes/admin.conf
mkdir -p ${HOME:-/root}/.kube
cp -f $KUBECONFIG ${HOME:-/root}/.kube/config
mkdir -p {{.Home}}/.kube
cp -f $KUBECONFIG {{.Home}}/.kube/config
chown -R {{.User}} {{.Home}}/.kube

probes:

- description: "kubeadm to be installed"
- description: "kubeadm installed"
script: |
#!/bin/bash
set -eux -o pipefail
Expand All @@ -176,7 +187,7 @@ probes:
hint: |
See "/var/log/cloud-init-output.log". in the guest

- description: "kubeadm to be completed"
- description: "kubeadm completed"
script: |
#!/bin/bash
set -eux -o pipefail
Expand All @@ -187,21 +198,15 @@ probes:
hint: |
The k8s kubeconfig file has not yet been created.

- description: "kubernetes cluster to be running"
- description: "kubernetes cluster is ready"
script: |
#!/bin/bash
set -eux -o pipefail
if ! timeout 300s bash -c "until kubectl version >/dev/null 2>&1; do sleep 3; done"; then
echo >&2 "kubernetes cluster is not up and running yet"
if ! timeout 300s bash -c "until kubectl get --raw /readyz >/dev/null 2>&1; do sleep 3; done"; then
echo >&2 "kubernetes cluster is not ready yet"
exit 1
fi

- description: "coredns deployment to be running"
script: |
#!/bin/bash
set -eux -o pipefail
kubectl wait -n kube-system --timeout=180s --for=condition=available deploy coredns

copyToHost:
- guest: "/etc/kubernetes/admin.conf"
host: "{{.Dir}}/copied-from-guest/kubeconfig.yaml"
Expand Down
8 changes: 4 additions & 4 deletions test/drenv/providers/minikube.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def exists(profile):
return False


def start(profile, verbose=False):
def start(profile, verbose=False, timeout=None):
start = time.monotonic()
logging.info("[%s] Starting minikube cluster", profile["name"])

Expand Down Expand Up @@ -119,7 +119,7 @@ def start(profile, verbose=False):
# TODO: Use --interactive=false when the bug is fixed.
# https://github.com/kubernetes/minikube/issues/19518

_watch("start", *args, profile=profile["name"])
_watch("start", *args, profile=profile["name"], timeout=timeout)

logging.info(
"[%s] Cluster started in %.2f seconds",
Expand Down Expand Up @@ -364,11 +364,11 @@ def _run(command, *args, profile=None, output=None):
return commands.run(*cmd)


def _watch(command, *args, profile=None):
def _watch(command, *args, profile=None, timeout=None):
cmd = ["minikube", command, "--profile", profile]
cmd.extend(args)
logging.debug("[%s] Running %s", profile, cmd)
for line in commands.watch(*cmd):
for line in commands.watch(*cmd, timeout=timeout):
logging.debug("[%s] %s", profile, line)


Expand Down
4 changes: 3 additions & 1 deletion test/envs/vm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ profiles:
- name: cluster
driver: $vm
container_runtime: containerd
memory: "3g"
cpus: 1
memory: "2g"
rosetta: false
workers:
- addons:
- name: example
Loading