From f5196bd42dea75c25f299d433e6e68463f0be757 Mon Sep 17 00:00:00 2001 From: Sergey Kanzhelev Date: Thu, 19 Sep 2024 21:02:59 +0000 Subject: [PATCH] updates to device failures for alpha2 --- .../README.md | 113 +++++++++++++++--- .../kep.yaml | 6 +- 2 files changed, 98 insertions(+), 21 deletions(-) diff --git a/keps/sig-node/4680-add-resource-health-to-pod-status/README.md b/keps/sig-node/4680-add-resource-health-to-pod-status/README.md index 1e2dc2df7a7..1135a8e6a42 100644 --- a/keps/sig-node/4680-add-resource-health-to-pod-status/README.md +++ b/keps/sig-node/4680-add-resource-health-to-pod-status/README.md @@ -44,9 +44,9 @@ Items marked with (R) are required *prior to targeting to a milestone / release*. - [X] (R) Enhancement issue in release milestone, which links to KEP dir in [kubernetes/enhancements] (not the initial KEP PR) -- [ ] (R) KEP approvers have approved the KEP status as `implementable` -- [ ] (R) Design details are appropriately documented -- [ ] (R) Test plan is in place, giving consideration to SIG Architecture and SIG Testing input (including test refactors) +- [X] (R) KEP approvers have approved the KEP status as `implementable` +- [X] (R) Design details are appropriately documented +- [X] (R) Test plan is in place, giving consideration to SIG Architecture and SIG Testing input (including test refactors) - [ ] e2e Tests for all Beta API Operations (endpoints) - [ ] (R) Ensure GA e2e tests meet requirements for [Conformance Tests](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/conformance-tests.md) - [ ] (R) Minimum Two Week Window for GA e2e tests to prove flake free @@ -120,31 +120,77 @@ The proposal is to introduce an additional field: ``` // AllocatedResourcesStatus represents the status of various resources // allocated for this Pod. -AllocatedResourcesStatus ResourcesStatus +// +featureGate=ResourceHealthStatus +// +optional +// +patchMergeKey=name +// +patchStrategy=merge +// +listType=map +// +listMapKey=name +AllocatedResourcesStatus []ResourceStatus `json:"allocatedResourcesStatus,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,14,rep,name=allocatedResourcesStatus"` +``` -type ResourcesStatus map[ResourceName]ResourceStatus +In alpha (kubernetes 1.31) the `ResourceStatus` is defined as: +``` type ResourceStatus struct { - // map of unique Resource ID to its health. - // At a minimum, ResourceID must uniquely identify the Resource - // allocated to the Pod on the Node for the lifetime of a Pod. - // See ResourceID type for it's definition. - Resources map[ResourceID] ResourceHealth - - // allow to extend this struct in future with the overall health fields or things like Device Plugin version + // Name of the resource. Must be unique within the pod and match one of the resources from the pod spec. + // +required + Name ResourceName `json:"name" protobuf:"bytes,1,opt,name=name"` + // List of unique Resources health. Each element in the list contains an unique resource ID and resource health. + // At a minimum, ResourceID must uniquely identify the Resource + // allocated to the Pod on the Node for the lifetime of a Pod. + // See ResourceID type for it's definition. + // +listType=map + // +listMapKey=resourceID + Resources []ResourceHealth `json:"resources,omitempty" protobuf:"bytes,2,rep,name=resources"` } +type ResourceHealthStatus string + +const ( + ResourceHealthStatusHealthy ResourceHealthStatus = "Healthy" + ResourceHealthStatusUnhealthy ResourceHealthStatus = "Unhealthy" + ResourceHealthStatusUnknown ResourceHealthStatus = "Unknown" +) + // ResourceID is calculated based on the source of this resource health information. // For DevicePlugin: -// deviceplugin:DeviceID, where DeviceID is from the Device structure of DevicePlugin's ListAndWatchResponse type: https://github.com/kubernetes/kubernetes/blob/eda1c780543a27c078450e2f17d674471e00f494/staging/src/k8s.io/kubelet/pkg/apis/deviceplugin/v1alpha/api.proto#L61-L73 +// +// deviceplugin:DeviceID, where DeviceID is from the Device structure of DevicePlugin's ListAndWatchResponse type: https://github.com/kubernetes/kubernetes/blob/eda1c780543a27c078450e2f17d674471e00f494/staging/src/k8s.io/kubelet/pkg/apis/deviceplugin/v1alpha/api.proto#L61-L73 +// // DevicePlugin ID is usually a constant for the lifetime of a Node and typically can be used to uniquely identify the device on the node. // For DRA: -// dra://: such a device can be looked up in the information published by that DRA driver to learn more about it. It is designed to be globally unique in a cluster. +// +// dra://: such a device can be looked up in the information published by that DRA driver to learn more about it. It is designed to be globally unique in a cluster. type ResourceID string +// ResourceHealth represents the health of a resource. It has the latest device health information. +// This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP. type ResourceHealth struct { - // List of conditions with the transition times - Conditions []ResourceHealthCondition + // ResourceID is the unique identifier of the resource. See the ResourceID type for more information. + ResourceID ResourceID `json:"resourceID" protobuf:"bytes,1,opt,name=resourceID"` + // Health of the resource. + // can be one of: + // - Healthy: operates as normal + // - Unhealthy: reported unhealthy. We consider this a temporary health issue + // since we do not have a mechanism today to distinguish + // temporary and permanent issues. + // - Unknown: The status cannot be determined. + // For example, Device Plugin got unregistered and hasn't been re-registered since. + // + // In future we may want to introduce the PermanentlyUnhealthy Status. + Health ResourceHealthStatus `json:"health,omitempty" protobuf:"bytes,2,name=health"` +} +``` + +In alpha2 we will enhance the structure `ResourceHealth` to include past conditions when devices were +moving to unhealthy and back. The structure will be following the structure of other conditions in Pod Status: + +``` +type ResourceHealth struct { + ... + // List of conditions with the transition times + Conditions []ResourceHealthCondition } // This condition type is replicating other condition types exposed by various status APIs @@ -233,7 +279,6 @@ We should consider introducing another field to the Status that will be a free f ### DRA implementation details Today DRA does not return the health of the device back to kubelet. The proposal is to extend the - type `NamedResourcesInstance` (from [pkg/apis/resource/namedresources.go](https://github.com/kubernetes/kubernetes/blob/790dfdbe386e4a115f41d38058c127d2dd0e6f44/pkg/apis/resource/namedresources.go#L29-L37)) to include the Health field the same way it is done in the Device Plugin as well as a device ID. @@ -245,7 +290,39 @@ The API will be limited to "prepared" devices and include the claim `name/namesp Kubelet will react on this field the same way as we propose to do it for the Device Plugin. -Specific implementation details will be added for the beta. +The new method will be added to [Node service](https://github.com/kubernetes/kubernetes/blob/04bba3c222bb2c5b1b1565713de4bf334ee7fbe4/staging/src/k8s.io/kubelet/pkg/apis/dra/v1alpha4/api.proto#L34) +alongside `NodePrepareResources` and `NodeUnprepareResources` and a `health` field for `Device` structure: + +``` +service Node { + ... + + // WatchDevicesStatus returns a stream of List of Devices + // Whenever a Device state change or a Device disappears, WatchDevicesStatus + // returns the new list. + // This method is optional and may not be implemented. + rpc WatchDevicesStatus(Empty) returns (stream DevicesStatusResponse) {} +} + +// ListAndWatch returns a stream of List of Devices +// Whenever a Device state change or a Device disappears, ListAndWatch +// returns the new list +message DevicesStatusResponse { + repeated Device devices = 1; +} + +message Device { + ... existing fields ... + // The device itself. Required. + string device_name = 3; + ... existing fields ... + + // Health of the device, can be Healthy or Unhealthy. + string Health = 5; +} +``` + +Implementation will ignore the `Unimplemented` error when calling this method. ### Test Plan diff --git a/keps/sig-node/4680-add-resource-health-to-pod-status/kep.yaml b/keps/sig-node/4680-add-resource-health-to-pod-status/kep.yaml index e0f1700fbd8..d50e31f5796 100644 --- a/keps/sig-node/4680-add-resource-health-to-pod-status/kep.yaml +++ b/keps/sig-node/4680-add-resource-health-to-pod-status/kep.yaml @@ -32,9 +32,9 @@ latest-milestone: "v1.31" # The milestone at which this feature was, or is targeted to be, at each stage. milestone: - alpha: "v1.31" - beta: "v1.32" - stable: "v1.34" + alpha: "v1.31" # 1.32 will contain an alpha2 with more features + beta: "v1.33" + stable: "v1.35" # The following PRR answers are required at alpha release # List the feature gate name and the components for which it must be enabled