From 9c826c9c208149b2ce6309792c68158e7ff14986 Mon Sep 17 00:00:00 2001 From: kubevirt-bot Date: Tue, 15 Mar 2022 10:35:30 +0100 Subject: [PATCH] Fix cache miss hit on resource recreation (#1818) The k8s client is configured to use selectors based on label to watch only a subset of certain resources to limit the memory consumption. IF the user explictly removes that label from one of the watched object, the client cache will miss it and so the operator will try recreating it failing then with AlreadyExists. Let's explictly detect this corner case and fix it setting the missing label with a custom client to bypass the cache. Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=2032837 Signed-off-by: Simone Tiraboschi Co-authored-by: Simone Tiraboschi --- hack/run-tests.sh | 5 ++++ pkg/controller/operands/operand.go | 47 +++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/hack/run-tests.sh b/hack/run-tests.sh index 8f42793cb9..c10a51cdc4 100755 --- a/hack/run-tests.sh +++ b/hack/run-tests.sh @@ -50,5 +50,10 @@ KUBECTL_BINARY=${KUBECTL_BINARY} ./hack/check_defaults.sh # check golden images KUBECTL_BINARY=${KUBECTL_BINARY} ./hack/check_golden_images.sh +# check if HCO is able to correctly add back a label used as a label selector +${KUBECTL_BINARY} label priorityclass kubevirt-cluster-critical app- +sleep 10 +[[ $(${KUBECTL_BINARY} get priorityclass kubevirt-cluster-critical -o=jsonpath='{.metadata.labels.app}') == 'kubevirt-hyperconverged' ]] + # Check the webhook, to see if it allow deleteing of the HyperConverged CR ./hack/retry.sh 10 30 "${KUBECTL_BINARY} delete hco -n ${INSTALLED_NAMESPACE} kubevirt-hyperconverged" diff --git a/pkg/controller/operands/operand.go b/pkg/controller/operands/operand.go index 6f96aae118..901e544d96 100644 --- a/pkg/controller/operands/operand.go +++ b/pkg/controller/operands/operand.go @@ -8,6 +8,8 @@ import ( "reflect" "strings" + "sigs.k8s.io/controller-runtime/pkg/client/config" + conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1" jsonpatch "github.com/evanphx/json-patch" @@ -93,7 +95,16 @@ func (h *genericOperand) ensure(req *common.HcoRequest) *EnsureResult { found := h.hooks.getEmptyCr() err = h.Client.Get(req.Ctx, key, found) if err != nil { - return h.createNewCr(req, err, cr, res) + result := h.createNewCr(req, err, cr, res) + + if apierrors.IsAlreadyExists(result.Err) { + // we failed trying to create it due to a caching error + // or we neither tried because we know that the object is already there for sure, + // but we cannot get it due to a bad cache hit. + // Let's try updating it bypassing the client cache mechanism + return h.handleExistingCrSkipCache(req, key, found, cr, res) + } + return result } return h.handleExistingCr(req, key, found, cr, res) @@ -126,6 +137,40 @@ func (h *genericOperand) handleExistingCr(req *common.HcoRequest, key client.Obj return res.SetUpgradeDone(req.ComponentUpgradeInProgress) } +func (h *genericOperand) handleExistingCrSkipCache(req *common.HcoRequest, key client.ObjectKey, found client.Object, cr client.Object, res *EnsureResult) *EnsureResult { + cfg, configerr := config.GetConfig() + if configerr != nil { + req.Logger.Error(configerr, "failed creating a config for a custom client") + return &EnsureResult{ + Err: configerr, + } + } + apiClient, acerr := client.New(cfg, client.Options{ + Scheme: h.Scheme, + }) + if acerr != nil { + req.Logger.Error(acerr, "failed creating a custom client to bypass the cache") + return &EnsureResult{ + Err: acerr, + } + } + geterr := apiClient.Get(req.Ctx, key, found) + if geterr != nil { + req.Logger.Error(geterr, "failed trying to get the object bypassing the cache") + return &EnsureResult{ + Err: geterr, + } + } + originalClient := h.Client + // this is not exactly thread safe, + // but we are not supposed to call twice in parallel + // the handler for a single CR + h.Client = apiClient + existingcrresult := h.handleExistingCr(req, key, found, cr, res) + h.Client = originalClient + return existingcrresult +} + func (h *genericOperand) completeEnsureOperands(req *common.HcoRequest, opr hcoOperandHooks, found client.Object, res *EnsureResult) *EnsureResult { // Handle KubeVirt resource conditions isReady := handleComponentConditions(req, h.crType, opr.getConditions(found))