Skip to content

Commit

Permalink
fix node not-ready condition when leader disconnect with cloud (openy…
Browse files Browse the repository at this point in the history
  • Loading branch information
JameKeal authored Mar 31, 2023
1 parent 6996b92 commit 2ad49de
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 45 deletions.
6 changes: 6 additions & 0 deletions charts/openyurt/templates/yurt-controller-manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,12 @@ rules:
- get
- list
- watch
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- update
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
Expand Down
17 changes: 2 additions & 15 deletions pkg/controller/gateway/gateway/gateway_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import (
common "github.com/openyurtio/openyurt/pkg/controller/gateway"
"github.com/openyurtio/openyurt/pkg/controller/gateway/config"
"github.com/openyurtio/openyurt/pkg/controller/gateway/utils"
nodeutil "github.com/openyurtio/openyurt/pkg/controller/util/node"
utilclient "github.com/openyurtio/openyurt/pkg/util/client"
utildiscovery "github.com/openyurtio/openyurt/pkg/util/discovery"
)
Expand Down Expand Up @@ -264,25 +265,11 @@ func (r *ReconcileGateway) electActiveEndpoint(nodeList corev1.NodeList, gw *rav

// isNodeReady checks if the `node` is `corev1.NodeReady`
func isNodeReady(node corev1.Node) bool {
_, nc := getNodeCondition(&node.Status, corev1.NodeReady)
_, nc := nodeutil.GetNodeCondition(&node.Status, corev1.NodeReady)
// GetNodeCondition will return nil and -1 if the condition is not present
return nc != nil && nc.Status == corev1.ConditionTrue
}

// getNodeCondition extracts the provided condition from the given status and returns that.
// Returns nil and -1 if the condition is not present, and the index of the located condition.
func getNodeCondition(status *corev1.NodeStatus, conditionType corev1.NodeConditionType) (int, *corev1.NodeCondition) {
if status == nil {
return -1, nil
}
for i := range status.Conditions {
if status.Conditions[i].Type == conditionType {
return i, &status.Conditions[i]
}
}
return -1, nil
}

// getPodCIDRs returns the pod IP ranges assigned to the node.
func (r *ReconcileGateway) getPodCIDRs(ctx context.Context, node corev1.Node) ([]string, error) {
podCIDRs := make([]string, 0)
Expand Down
17 changes: 2 additions & 15 deletions pkg/controller/nodepool/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (

"github.com/openyurtio/openyurt/pkg/apis/apps"
appsv1beta1 "github.com/openyurtio/openyurt/pkg/apis/apps/v1beta1"
nodeutil "github.com/openyurtio/openyurt/pkg/controller/util/node"
)

var timeSleep = time.Sleep
Expand Down Expand Up @@ -272,23 +273,9 @@ func containTaint(taint corev1.Taint, taints []corev1.Taint) (int, bool) {
return 0, false
}

// GetNodeCondition extracts the provided condition from the given status and returns that.
// Returns nil and -1 if the condition is not present, and the index of the located condition.
func GetNodeCondition(status *corev1.NodeStatus, conditionType corev1.NodeConditionType) (int, *corev1.NodeCondition) {
if status == nil {
return -1, nil
}
for i := range status.Conditions {
if status.Conditions[i].Type == conditionType {
return i, &status.Conditions[i]
}
}
return -1, nil
}

// isNodeReady checks if the `node` is `corev1.NodeReady`
func isNodeReady(node corev1.Node) bool {
_, nc := GetNodeCondition(&node.Status, corev1.NodeReady)
_, nc := nodeutil.GetNodeCondition(&node.Status, corev1.NodeReady)
// GetNodeCondition will return nil and -1 if the condition is not present
return nc != nil && nc.Status == corev1.ConditionTrue
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (

"github.com/openyurtio/openyurt/pkg/controller/poolcoordinator/constant"
"github.com/openyurtio/openyurt/pkg/controller/poolcoordinator/utils"
nodeutil "github.com/openyurtio/openyurt/pkg/controller/util/node"
)

const (
Expand Down Expand Up @@ -191,6 +192,7 @@ func (c *Controller) syncHandler(key string) error {
c.ldc.Inc(nl.Name)
if c.ldc.Counter(nl.Name) >= constant.LeaseDelegationThreshold {
c.taintNodeNotSchedulable(nl.Name)
c.checkNodeReadyConditionAndSetIt(nl.Name)
c.delLdc.Reset(nl.Name)
}
} else {
Expand Down Expand Up @@ -233,3 +235,38 @@ func (c *Controller) Run(stopCh <-chan struct{}) {

<-stopCh
}

// If node lease was delegate, check node ready condition.
// If ready condition is unknown, update to true.
// Because when node ready condition is unknown, the native kubernetes will set node.kubernetes.io/unreachable taints in node,
// and the pod will be evict after 300s, that's not what we're trying to do in delegate lease.
// Up to now, it's only happen when leader in nodePool is disconnected with cloud, and this node will be not-ready,
// because in an election cycle, the node lease will not delegate to cloud, after 40s, the kubernetes will set unknown.
func (c *Controller) checkNodeReadyConditionAndSetIt(name string) {
node, err := c.nodeLister.Get(name)
if err != nil {
klog.Error(err)
return
}

// check node ready condition
newNode := node.DeepCopy()
_, currentCondition := nodeutil.GetNodeCondition(&newNode.Status, corev1.NodeReady)
if currentCondition.Status != corev1.ConditionUnknown {
// don't need to reset node ready condition
return
}

// reset node ready condition as true
currentCondition.Status = corev1.ConditionTrue
currentCondition.Reason = "NodeDelegateLease"
currentCondition.Message = "Node disconnect with ApiServer and lease delegate."
currentCondition.LastTransitionTime = metav1.NewTime(time.Now())

// update
if _, err := c.client.CoreV1().Nodes().UpdateStatus(context.TODO(), newNode, metav1.UpdateOptions{}); err != nil {
klog.Errorf("Error updating node %s: %v", newNode.Name, err)
return
}
klog.Infof("successful set node %s ready condition with true", newNode.Name)
}
14 changes: 14 additions & 0 deletions pkg/controller/util/node/controller_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,17 @@ func UpdatePodCondition(status *v1.PodStatus, condition *v1.PodCondition) bool {
// Return true if one of the fields have changed.
return !isEqual
}

// GetNodeCondition extracts the provided condition from the given status and returns that.
// Returns nil and -1 if the condition is not present, and the index of the located condition.
func GetNodeCondition(status *v1.NodeStatus, conditionType v1.NodeConditionType) (int, *v1.NodeCondition) {
if status == nil {
return -1, nil
}
for i := range status.Conditions {
if status.Conditions[i].Type == conditionType {
return i, &status.Conditions[i]
}
}
return -1, nil
}
17 changes: 2 additions & 15 deletions pkg/node-servant/preflight/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"k8s.io/klog/v2"
utilsexec "k8s.io/utils/exec"

nodeutil "github.com/openyurtio/openyurt/pkg/controller/util/node"
"github.com/openyurtio/openyurt/pkg/node-servant/components"
"github.com/openyurtio/openyurt/pkg/projectinfo"
kubeutil "github.com/openyurtio/openyurt/pkg/yurtadm/util/kubernetes"
Expand Down Expand Up @@ -430,20 +431,6 @@ func setHasItemOrAll(s sets.String, item string) bool {
}

func isNodeReady(status *v1.NodeStatus) bool {
_, condition := getNodeCondition(status, v1.NodeReady)
_, condition := nodeutil.GetNodeCondition(status, v1.NodeReady)
return condition != nil && condition.Status == v1.ConditionTrue
}

// getNodeCondition extracts the provided condition from the given status and returns that.
// Returns nil and -1 if the condition is not present, and the index of the located condition.
func getNodeCondition(status *v1.NodeStatus, conditionType v1.NodeConditionType) (int, *v1.NodeCondition) {
if status == nil {
return -1, nil
}
for i := range status.Conditions {
if status.Conditions[i].Type == conditionType {
return i, &status.Conditions[i]
}
}
return -1, nil
}

0 comments on commit 2ad49de

Please sign in to comment.