Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: introduce the available probe and condition for the workload #8211

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions apis/apps/v1alpha1/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1744,15 +1744,13 @@ func GetComponentTerminalPhases() []ClusterComponentPhase {
RunningClusterCompPhase,
StoppedClusterCompPhase,
FailedClusterCompPhase,
AbnormalClusterCompPhase,
}
}

// GetComponentUpRunningPhase returns component running or partially running phases.
func GetComponentUpRunningPhase() []ClusterComponentPhase {
return []ClusterComponentPhase{
RunningClusterCompPhase,
AbnormalClusterCompPhase,
FailedClusterCompPhase,
}
}
Expand Down
2 changes: 0 additions & 2 deletions apis/apps/v1alpha1/cluster_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,14 +180,12 @@ var _ = Describe("", func() {
RunningClusterCompPhase,
StoppedClusterCompPhase,
FailedClusterCompPhase,
AbnormalClusterCompPhase,
}))
})

It("GetComponentUpRunningPhase", func() {
Expect(GetComponentUpRunningPhase()).Should(ContainElements([]ClusterComponentPhase{
RunningClusterCompPhase,
AbnormalClusterCompPhase,
FailedClusterCompPhase,
}))
})
Expand Down
7 changes: 7 additions & 0 deletions apis/apps/v1alpha1/componentdefinition_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,13 @@ type ComponentLifecycleActions struct {
// +optional
RoleProbe *RoleProbe `json:"roleProbe,omitempty"`

// Defines the procedure which is invoked regularly to assess the availability of component.
//
// Note: This field is immutable once it has been set.
//
// +optional
AvailableProbe *Probe `json:"availableProbe,omitempty"`

// Defines the procedure for a controlled transition of leadership from the current leader to a new replica.
// This approach aims to minimize downtime and maintain availability in systems with a leader-follower topology,
// during events such as planned maintenance or when performing stop, shutdown, restart, or upgrade operations
Expand Down
8 changes: 2 additions & 6 deletions apis/apps/v1alpha1/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,21 +291,17 @@ const (
DeletingClusterCompPhase ClusterComponentPhase = "Deleting"

// FailedClusterCompPhase indicates the component has more than zero replicas, but there are some failed pods.
// The component is not functioning.
FailedClusterCompPhase ClusterComponentPhase = "Failed"

// AbnormalClusterCompPhase indicates the component has more than zero replicas, but there are some failed pods.
// The component is functioning, but it is in a fragile state.
AbnormalClusterCompPhase ClusterComponentPhase = "Abnormal"
)

const (
// define the cluster condition type
ConditionTypeHaltRecovery = "HaltRecovery" // ConditionTypeHaltRecovery describe Halt recovery processing stage
ConditionTypeProvisioningStarted = "ProvisioningStarted" // ConditionTypeProvisioningStarted the operator starts resource provisioning to create or change the cluster
ConditionTypeApplyResources = "ApplyResources" // ConditionTypeApplyResources the operator start to apply resources to create or change the cluster
ConditionTypeReplicasReady = "ReplicasReady" // ConditionTypeReplicasReady all pods of components are ready
ConditionTypeReady = "Ready" // ConditionTypeReady all components are running
// ConditionTypeAvailable indicates whether the target object is available for serving.
ConditionTypeAvailable = "Available"
)

// Phase represents the current status of the ClusterDefinition CR.
Expand Down
5 changes: 5 additions & 0 deletions apis/apps/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

296 changes: 296 additions & 0 deletions config/crd/bases/apps.kubeblocks.io_componentdefinitions.yaml

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions controllers/apps/transformer_cluster_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func (t *clusterStatusTransformer) reconcileClusterPhase(cluster *appsv1.Cluster
if !isPhaseIn(phase, appsv1.FailedClusterCompPhase) {
isAllComponentFailed = false
}
if isPhaseIn(phase, appsv1.AbnormalClusterCompPhase, appsv1.FailedClusterCompPhase) {
if isPhaseIn(phase, appsv1.FailedClusterCompPhase) {
hasComponentAbnormalOrFailed = true
}
}
Expand Down Expand Up @@ -217,8 +217,11 @@ func (t *clusterStatusTransformer) doAnalysisAndUpdateSynchronizer(cluster *apps
// t.replicasNotReadyCompNames[k] = struct{}{}
// t.notReadyCompNames[k] = struct{}{}
// }
switch v.Phase {
case appsv1.AbnormalClusterCompPhase, appsv1.FailedClusterCompPhase:
// switch v.Phase {
// case appsv1.FailedClusterCompPhase:
// t.notReadyCompNames[k] = struct{}{}
// }
if v.Phase == appsv1.FailedClusterCompPhase {
t.notReadyCompNames[k] = struct{}{}
}
}
Expand Down
36 changes: 3 additions & 33 deletions controllers/apps/transformer_component_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,18 +159,15 @@ func (t *componentStatusTransformer) reconcileStatus(transCtx *componentTransfor
return hasFailedPod || isScaleOutFailed || hasFailedVolumeExpansion
}()

// check if the component is available
isComponentAvailable := t.isComponentAvailable()

// check if the component is in creating phase
isInCreatingPhase := func() bool {
phase := t.comp.Status.Phase
return phase == "" || phase == appsv1.CreatingClusterCompPhase
}()

transCtx.Logger.Info(
fmt.Sprintf("status conditions, creating: %v, available: %v, its running: %v, has failure: %v, updating: %v, config synced: %v",
isInCreatingPhase, isComponentAvailable, isITSUpdatedNRunning, hasFailure, hasRunningVolumeExpansion, isAllConfigSynced))
fmt.Sprintf("status conditions, creating: %v, its running: %v, has failure: %v, updating: %v, config synced: %v",
isInCreatingPhase, isITSUpdatedNRunning, hasFailure, hasRunningVolumeExpansion, isAllConfigSynced))

switch {
case isDeleting:
Expand All @@ -185,10 +182,8 @@ func (t *componentStatusTransformer) reconcileStatus(transCtx *componentTransfor
t.setComponentStatusPhase(transCtx, appsv1.CreatingClusterCompPhase, nil, "component is Creating")
case !hasFailure:
t.setComponentStatusPhase(transCtx, appsv1.UpdatingClusterCompPhase, nil, "component is Updating")
case !isComponentAvailable:
t.setComponentStatusPhase(transCtx, appsv1.FailedClusterCompPhase, messages, "component is Failed")
default:
t.setComponentStatusPhase(transCtx, appsv1.AbnormalClusterCompPhase, nil, "component is Abnormal")
t.setComponentStatusPhase(transCtx, appsv1.FailedClusterCompPhase, messages, "component is Failed")
}

return nil
Expand All @@ -202,31 +197,6 @@ func (t *componentStatusTransformer) isWorkloadUpdated() bool {
return generation == strconv.FormatInt(t.comp.Generation, 10)
}

// isComponentAvailable tells whether the component is basically available, ether working well or in a fragile state:
// 1. at least one pod is available
// 2. with latest revision
// 3. and with leader role label set
func (t *componentStatusTransformer) isComponentAvailable() bool {
if !t.isWorkloadUpdated() {
return false
}
if t.runningITS.Status.CurrentRevision != t.runningITS.Status.UpdateRevision {
return false
}
if t.runningITS.Status.AvailableReplicas <= 0 {
return false
}
if len(t.synthesizeComp.Roles) == 0 {
return true
}
for _, status := range t.runningITS.Status.MembersStatus {
if status.ReplicaRole.IsLeader {
return true
}
}
return false
}

// isRunning checks if the component underlying workload is running.
func (t *componentStatusTransformer) isInstanceSetRunning() bool {
if t.runningITS == nil {
Expand Down
16 changes: 13 additions & 3 deletions controllers/k8score/event_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,16 @@ import (
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"

"github.com/apecloud/kubeblocks/pkg/controller/component"
"github.com/apecloud/kubeblocks/pkg/controller/instanceset"
"github.com/apecloud/kubeblocks/pkg/controller/multicluster"
intctrlutil "github.com/apecloud/kubeblocks/pkg/controllerutil"
)

type eventHandler interface {
Handle(cli client.Client, reqCtx intctrlutil.RequestCtx, recorder record.EventRecorder, event *corev1.Event) error
}

// EventReconciler reconciles an Event object
type EventReconciler struct {
client.Client
Expand Down Expand Up @@ -65,9 +70,14 @@ func (r *EventReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
return intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "getEventError")
}

handler := &instanceset.PodRoleEventHandler{}
if err := handler.Handle(r.Client, reqCtx, r.Recorder, event); err != nil && !apierrors.IsNotFound(err) {
return intctrlutil.RequeueWithError(err, reqCtx.Log, "handleEventError")
handlers := []eventHandler{
&instanceset.PodRoleEventHandler{},
&component.AvailableProbeEventHandler{},
}
for _, handler := range handlers {
if err := handler.Handle(r.Client, reqCtx, r.Recorder, event); err != nil && !apierrors.IsNotFound(err) {
return intctrlutil.RequeueWithError(err, reqCtx.Log, "handleEventError")
}
}
return intctrlutil.Reconciled()
}
Expand Down
Loading
Loading