From 6af4fb91a865bbfa03528e7f87620989703e62ea Mon Sep 17 00:00:00 2001 From: Praveen Kumar Date: Thu, 18 Jul 2024 17:51:10 +0530 Subject: [PATCH] Add WaitForKubeAPIServerCertRenewal helper During 4.17.0-ec.1 testing we found out that it might be possible that kubelet certs are not expired but aggreator one is and due to that our apiserver not able to get the node info and fails with following error. ``` INFO Verifying validity of the kubelet certificates... DEBU Running SSH command: date --date="$(sudo openssl x509 -in /var/lib/kubelet/pki/kubelet-client-current.pem -noout -enddate | cut -d= -f 2)" --iso-8601=seconds DEBU SSH command results: err: , output: 2025-07-05T03:48:39+00:00 DEBU Running SSH command: date --date="$(sudo openssl x509 -in /var/lib/kubelet/pki/kubelet-server-current.pem -noout -enddate | cut -d= -f 2)" --iso-8601=seconds DEBU SSH command results: err: , output: 2025-07-05T03:49:24+00:00 DEBU Running SSH command: date --date="$(sudo openssl x509 -in /etc/kubernetes/static-pod-resources/kube-apiserver-certs/configmaps/aggregator-client-ca/ca-bundle.crt -noout -enddate | cut -d= -f 2)" --iso-8601=seconds DEBU SSH command results: err: , output: 2024-07-11T05:50:37+00:00 DEBU Certs have expired, they were valid till: 11 Jul 24 05:50 +0000 DEBU Running SSH command: timeout 5s oc get nodes --context admin --cluster crc --kubeconfig /opt/kubeconfig DEBU SSH command results: err: Process exited with status 1, output: DEBU E0722 10:21:40.601631 10967 memcache.go:265] couldn't get current server API group list: Get "https://api.crc.testing:6443/api?timeout=32s": tls: failed to verify certificate: x509: certificate has expired or is not yet valid: current time 2024-07-22T10:21:40Z is after 2024-07-10T05:27:06Z E0722 10:21:40.604575 10967 memcache.go:265] couldn't get current server API group list: Get "https://api.crc.testing:6443/api?timeout=32s": tls: failed to verify certificate: x509: certificate has expired or is not yet valid: current time 2024-07-22T10:21:40Z is after 2024-07-10T05:27:06Z ``` This PR make sure we also wait to recover the api server related certs before checking the apiserver is responding. --- pkg/crc/cluster/cert_renewal.go | 6 +++++- pkg/crc/machine/start.go | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pkg/crc/cluster/cert_renewal.go b/pkg/crc/cluster/cert_renewal.go index 1766450522..f15eb4ec1d 100644 --- a/pkg/crc/cluster/cert_renewal.go +++ b/pkg/crc/cluster/cert_renewal.go @@ -43,7 +43,7 @@ func approvePendingCSRs(ctx context.Context, ocConfig oc.Config, expectedSignerN }, time.Second*5) } -func ApproveCSRAndWaitForCertsRenewal(ctx context.Context, sshRunner *ssh.Runner, ocConfig oc.Config, client, server bool) error { +func ApproveCSRAndWaitForCertsRenewal(ctx context.Context, sshRunner *ssh.Runner, ocConfig oc.Config, client, server, aggregratorClient bool) error { const ( kubeletClientSignerName = "kubernetes.io/kube-apiserver-client-kubelet" kubeletServingSignerName = "kubernetes.io/kubelet-serving" @@ -77,6 +77,10 @@ func ApproveCSRAndWaitForCertsRenewal(ctx context.Context, sshRunner *ssh.Runner logging.Info("Kubelet serving certificate has expired, waiting for automatic renewal... [will take up to 5 minutes]") return crcerrors.Retry(ctx, 5*time.Minute, waitForCertRenewal(sshRunner, KubeletServerCert), time.Second*5) } + if aggregratorClient { + logging.Info("Kube API server certificate has expired, waiting for automatic renewal... [will take up to 8 minutes]") + return crcerrors.Retry(ctx, 8*time.Minute, waitForCertRenewal(sshRunner, AggregatorClientCert), time.Second*5) + } return nil } diff --git a/pkg/crc/machine/start.go b/pkg/crc/machine/start.go index 8d0295d081..e0cb87deb8 100644 --- a/pkg/crc/machine/start.go +++ b/pkg/crc/machine/start.go @@ -552,7 +552,7 @@ func (client *client) Start(ctx context.Context, startConfig types.StartConfig) ocConfig := oc.UseOCWithSSH(sshRunner) - if err := cluster.ApproveCSRAndWaitForCertsRenewal(ctx, sshRunner, ocConfig, certsExpired[cluster.KubeletClientCert], certsExpired[cluster.KubeletServerCert]); err != nil { + if err := cluster.ApproveCSRAndWaitForCertsRenewal(ctx, sshRunner, ocConfig, certsExpired[cluster.KubeletClientCert], certsExpired[cluster.KubeletServerCert], certsExpired[cluster.AggregatorClientCert]); err != nil { logBundleDate(vm.bundle) return nil, errors.Wrap(err, "Failed to renew TLS certificates: please check if a newer CRC release is available") }