RamenDR · raghavendra-talur · Oct 7, 2024 · Sep 21, 2024 · Sep 20, 2024 · Sep 20, 2024
diff --git a/test/Makefile b/test/Makefile
@@ -5,6 +5,9 @@
 # hardware acceleration for VMs.
 DRIVER ?= vm
 
+# drenv start timeout in seconds
+TIMEOUT ?= 600
+
 env := envs/$(DRIVER).yaml
 prefix := drenv-test-
 
@@ -50,7 +53,7 @@ coverage-html:
 	xdg-open htmlcov/index.html
 
 cluster:
-	drenv start --name-prefix $(prefix) $(env) -v
+	drenv start --name-prefix $(prefix) $(env) --verbose --timeout $(TIMEOUT)
 
 clean:
 	drenv delete --name-prefix $(prefix) $(env)
diff --git a/test/drenv/__main__.py b/test/drenv/__main__.py
@@ -76,6 +76,11 @@ def parse_args():
         metavar="N",
         help="maximum number of workers per profile",
     )
+    p.add_argument(
+        "--timeout",
+        type=int,
+        help="time in seconds to wait until clsuter is started",
+    )
 
     p = add_command(sp, "stop", do_stop, help="stop an environment")
     p.add_argument(
@@ -379,7 +384,7 @@ def start_cluster(profile, hooks=(), args=None, **options):
     provider = providers.get(profile["provider"])
     existing = provider.exists(profile)
 
-    provider.start(profile, verbose=args.verbose)
+    provider.start(profile, verbose=args.verbose, timeout=args.timeout)
     provider.configure(profile, existing=existing)
 
     if existing:

diff --git a/test/drenv/envfile.py b/test/drenv/envfile.py
@@ -46,11 +46,11 @@
     },
     "darwin": {
         PROVIDER: {
-            "x86_64": "minikube",
+            "x86_64": "lima",
             "arm64": "lima",
         },
         VM: {
-            "x86_64": "hyperkit",
+            "x86_64": "",
             "arm64": "",
         },
         CONTAINER: "podman",
@@ -136,7 +136,7 @@ def _validate_profile(profile, addons_root):
     # If True, this is an external cluster and we don't have to start it.
     profile.setdefault("external", False)
 
-    # Properties for drenv managed cluster.
+    # Common properties.
     profile.setdefault("provider", PROVIDER)
     profile.setdefault("driver", VM)
     profile.setdefault("container_runtime", "")
@@ -155,6 +155,9 @@ def _validate_profile(profile, addons_root):
     profile.setdefault("containerd", None)
     profile.setdefault("workers", [])
 
+    # Lima provider properties.
+    profile.setdefault("rosetta", True)
+
     _validate_platform_defaults(profile)
 
     for i, worker in enumerate(profile["workers"]):

diff --git a/test/drenv/providers/external.py b/test/drenv/providers/external.py
@@ -25,7 +25,7 @@ def exists(profile):
     return True
 
 
-def start(profile, verbose=False):
+def start(profile, verbose=False, timeout=None):
     start = time.monotonic()
     logging.info("[%s] Checking external cluster status", profile["name"])
 

diff --git a/test/drenv/providers/lima/__init__.py b/test/drenv/providers/lima/__init__.py
@@ -61,7 +61,7 @@ def exists(profile):
     return False
 
 
-def start(profile, verbose=False):
+def start(profile, verbose=False, timeout=None):
     start = time.monotonic()
     logging.info("[%s] Starting lima cluster", profile["name"])
 
@@ -76,7 +76,7 @@ def start(profile, verbose=False):
     # Get vm before starting to detect a stopped vm.
     vm = _get_vm(profile)
 
-    _start_vm(profile)
+    _start_vm(profile, timeout=timeout)
     _add_kubeconfig(profile, vm)
 
     debug = partial(logging.debug, f"[{profile['name']}] %s")
@@ -181,7 +181,9 @@ def _write_config(profile, path):
     # The "vz" type is required to support amd64 images on arm64, needed for
     # OCM, and also provide the best performance.
     config["vmType"] = "vz"
-    config["rosetta"] = {"enabled": True, "binfmt": True}
+
+    if profile["rosetta"]:
+        config["rosetta"] = {"enabled": True, "binfmt": True}
 
     # We always use socket_vmnet to get shared network.
     config["networks"] = [{"socket": "/var/run/socket_vmnet"}]
@@ -270,8 +272,12 @@ def _create_vm(profile, config):
     _watch("create", "--name", profile["name"], config, context=profile["name"])
 
 
-def _start_vm(profile):
-    _watch("start", profile["name"], context=profile["name"])
+def _start_vm(profile, timeout=None):
+    args = ["start"]
+    if timeout:
+        args.append(f"--timeout={timeout}s")
+    args.append(profile["name"])
+    _watch(*args, context=profile["name"])
 
 
 def _stop_vm(profile):

diff --git a/test/drenv/providers/lima/k8s.yaml b/test/drenv/providers/lima/k8s.yaml
@@ -13,6 +13,8 @@
 images:
   - location: "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-arm64.img"
     arch: "aarch64"
+  - location: "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img"
+    arch: "x86_64"
 
 mounts: []
 
@@ -24,9 +26,8 @@ containerd:
 # forwarding cannot work for multiple clusters since same port from multiple
 # clusters is mapped to the same host port.
 portForwards:
-  - guestPortRange: [1, 65535]
-    guestIP: "0.0.0.0"
-    ignore: true
+  - ignore: true
+    proto: any
 
 provision:
 
@@ -48,20 +49,20 @@ provision:
       set -eux -o pipefail
       command -v kubeadm >/dev/null 2>&1 && exit 0
       # Install and configure prerequisites
-      cat <<EOF | sudo tee /etc/modules-load.d/containerd.conf
+      cat <<EOF | tee /etc/modules-load.d/containerd.conf
       overlay
       br_netfilter
       EOF
       modprobe overlay
       modprobe br_netfilter
-      cat <<EOF | sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
+      cat <<EOF | tee /etc/sysctl.d/99-kubernetes-cri.conf
       net.bridge.bridge-nf-call-iptables  = 1
       net.ipv4.ip_forward                 = 1
       net.bridge.bridge-nf-call-ip6tables = 1
       EOF
       # Avoid "failed to creating a fsnotify watcher: too many open files"
       # errors with bigger setups.
-      cat <<EOF | sudo tee /etc/sysctl.d/99-fs-inotify.conf
+      cat <<EOF | tee /etc/sysctl.d/99-fs-inotify.conf
       fs.inotify.max_user_instances       = 8192
       fs.inotify.max_user_watches         = 65536
       EOF
@@ -71,12 +72,12 @@ provision:
       apt-get update
       apt-get install -y apt-transport-https ca-certificates curl
       VERSION=$(curl -L -s https://dl.k8s.io/release/stable.txt | sed -e 's/v//' | cut -d'.' -f1-2)
-      echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v${VERSION}/deb/ /" | sudo tee /etc/apt/sources.list.d/kubernetes.list
-      curl -fsSL https://pkgs.k8s.io/core:/stable:/v${VERSION}/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
+      echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v${VERSION}/deb/ /" | tee /etc/apt/sources.list.d/kubernetes.list
+      curl -fsSL https://pkgs.k8s.io/core:/stable:/v${VERSION}/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
       apt-get update
       # cri-tools
       apt-get install -y cri-tools
-      cat  <<EOF | sudo tee /etc/crictl.yaml
+      cat  <<EOF | tee /etc/crictl.yaml
       runtime-endpoint: unix:///run/containerd/containerd.sock
       EOF
       # cni-plugins
@@ -126,12 +127,12 @@ provision:
         criSocket: unix:///run/containerd/containerd.sock
         kubeletExtraArgs:
           # Ramen: use specific network
-          node-ip: $ADVERTISE_ADDRESS
+          node-ip: "$ADVERTISE_ADDRESS"
           # Ramen: speed up image pulls
           serialize-image-pulls: "false"
       # Ramen: serve specific network.
       localAPIEndpoint:
-        advertiseAddress: $ADVERTISE_ADDRESS
+        advertiseAddress: "$ADVERTISE_ADDRESS"
       ---
       kind: ClusterConfiguration
       apiVersion: kubeadm.k8s.io/v1beta3
@@ -147,25 +148,35 @@ provision:
       featureGates:
         StatefulSetAutoDeletePVC: true
       EOF
-      kubeadm init --config kubeadm-config.yaml
+
+      # We ignore NumCPU preflight error for running a minimal cluster in
+      # github actions and for testing drenv.
+      # [ERROR NumCPU]: the number of available CPUs 1 is less than the required 2
+      kubeadm init --config kubeadm-config.yaml --ignore-preflight-errors NumCPU
+
+      # Scale down coredns like minikube
+      kubectl scale deploy coredns -n kube-system --replicas=1
+
       # Installing a Pod network add-on
       kubectl apply -f https://github.com/flannel-io/flannel/releases/download/v0.24.0/kube-flannel.yml
+
       # Control plane node isolation
       kubectl taint nodes --all node-role.kubernetes.io/control-plane-
-      mkdir -p ${HOME:-/root}/.kube && cp -f $KUBECONFIG ${HOME:-/root}/.kube/config
 
   - mode: system
     script: |
       #!/bin/bash
       set -eux -o pipefail
-      export KUBECONFIG=/etc/kubernetes/admin.conf
+      KUBECONFIG=/etc/kubernetes/admin.conf
+      mkdir -p ${HOME:-/root}/.kube
+      cp -f $KUBECONFIG ${HOME:-/root}/.kube/config
       mkdir -p {{.Home}}/.kube
       cp -f $KUBECONFIG {{.Home}}/.kube/config
       chown -R {{.User}} {{.Home}}/.kube
 
 probes:
 
-  - description: "kubeadm to be installed"
+  - description: "kubeadm installed"
     script: |
       #!/bin/bash
       set -eux -o pipefail
@@ -176,7 +187,7 @@ probes:
     hint: |
       See "/var/log/cloud-init-output.log". in the guest
 
-  - description: "kubeadm to be completed"
+  - description: "kubeadm completed"
     script: |
       #!/bin/bash
       set -eux -o pipefail
@@ -187,21 +198,15 @@ probes:
     hint: |
       The k8s kubeconfig file has not yet been created.
 
-  - description: "kubernetes cluster to be running"
+  - description: "kubernetes cluster is ready"
     script: |
       #!/bin/bash
       set -eux -o pipefail
-      if ! timeout 300s bash -c "until kubectl version >/dev/null 2>&1; do sleep 3; done"; then
-        echo >&2 "kubernetes cluster is not up and running yet"
+      if ! timeout 300s bash -c "until kubectl get --raw /readyz >/dev/null 2>&1; do sleep 3; done"; then
+        echo >&2 "kubernetes cluster is not ready yet"
         exit 1
       fi
 
-  - description: "coredns deployment to be running"
-    script: |
-      #!/bin/bash
-      set -eux -o pipefail
-      kubectl wait -n kube-system --timeout=180s --for=condition=available deploy coredns
-
 copyToHost:
   - guest: "/etc/kubernetes/admin.conf"
     host: "{{.Dir}}/copied-from-guest/kubeconfig.yaml"

diff --git a/test/drenv/providers/minikube.py b/test/drenv/providers/minikube.py
@@ -61,7 +61,7 @@ def exists(profile):
     return False
 
 
-def start(profile, verbose=False):
+def start(profile, verbose=False, timeout=None):
     start = time.monotonic()
     logging.info("[%s] Starting minikube cluster", profile["name"])
 
@@ -119,7 +119,7 @@ def start(profile, verbose=False):
     # TODO: Use --interactive=false when the bug is fixed.
     # https://github.com/kubernetes/minikube/issues/19518
 
-    _watch("start", *args, profile=profile["name"])
+    _watch("start", *args, profile=profile["name"], timeout=timeout)
 
     logging.info(
         "[%s] Cluster started in %.2f seconds",
@@ -364,11 +364,11 @@ def _run(command, *args, profile=None, output=None):
     return commands.run(*cmd)
 
 
-def _watch(command, *args, profile=None):
+def _watch(command, *args, profile=None, timeout=None):
     cmd = ["minikube", command, "--profile", profile]
     cmd.extend(args)
     logging.debug("[%s] Running %s", profile, cmd)
-    for line in commands.watch(*cmd):
+    for line in commands.watch(*cmd, timeout=timeout):
         logging.debug("[%s] %s", profile, line)
 
 

diff --git a/test/envs/vm.yaml b/test/envs/vm.yaml
@@ -8,7 +8,9 @@ profiles:
   - name: cluster
     driver: $vm
     container_runtime: containerd
-    memory: "3g"
+    cpus: 1
+    memory: "2g"
+    rosetta: false
     workers:
       - addons:
           - name: example