From 43677caf9a7e8eb186c58220821107d02b523f3e Mon Sep 17 00:00:00 2001 From: Simon Murray Date: Tue, 24 Sep 2024 10:20:56 +0100 Subject: [PATCH] Refactor GPU Operator Support This reduces the Kubernetes API interface down to "please install autoscaler/GPU or not". The actual logic is delegated to the controller proper that's able to make the intelligent descision based on presence of a workload pool that requires the feature, and specifics of that flavor. While in the area, I've removed the CAPO specific hacks from the K8S API and pushed them down into the `clusteropenstack` provisioner where they belong, likewise scheduling hints for the autoscaler. --- charts/kubernetes/Chart.yaml | 4 +- ....org_clustermanagerapplicationbundles.yaml | 2 +- .../unikorn-cloud.org_clustermanagers.yaml | 2 +- ...g_kubernetesclusterapplicationbundles.yaml | 2 +- .../unikorn-cloud.org_kubernetesclusters.yaml | 89 +++--------- charts/kubernetes/templates/applications.yaml | 26 ++++ .../kubernetesclusterapplicationbundles.yaml | 5 + pkg/apis/unikorn/v1alpha1/helpers.go | 6 +- pkg/apis/unikorn/v1alpha1/types.go | 59 ++------ .../unikorn/v1alpha1/zz_generated.deepcopy.go | 107 +-------------- .../amdgpuoperator/provisioner.go | 35 +++++ .../clusteropenstack/provisioner.go | 127 +++++++++++++----- .../managers/cluster/provisioner.go | 89 +++++++++++- pkg/provisioners/types.go | 6 + pkg/server/handler/cluster/conversion.go | 92 +++---------- 15 files changed, 307 insertions(+), 344 deletions(-) create mode 100644 pkg/provisioners/helmapplications/amdgpuoperator/provisioner.go diff --git a/charts/kubernetes/Chart.yaml b/charts/kubernetes/Chart.yaml index d8acb4b5..e7fb4530 100644 --- a/charts/kubernetes/Chart.yaml +++ b/charts/kubernetes/Chart.yaml @@ -4,8 +4,8 @@ description: A Helm chart for deploying Unikorn Kubernetes Service type: application -version: v0.2.37 -appVersion: v0.2.37 +version: v0.2.38 +appVersion: v0.2.38 icon: https://raw.githubusercontent.com/unikorn-cloud/assets/main/images/logos/dark-on-light/icon.png diff --git a/charts/kubernetes/crds/unikorn-cloud.org_clustermanagerapplicationbundles.yaml b/charts/kubernetes/crds/unikorn-cloud.org_clustermanagerapplicationbundles.yaml index 0c299d64..75acd913 100644 --- a/charts/kubernetes/crds/unikorn-cloud.org_clustermanagerapplicationbundles.yaml +++ b/charts/kubernetes/crds/unikorn-cloud.org_clustermanagerapplicationbundles.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.16.3 name: clustermanagerapplicationbundles.unikorn-cloud.org spec: group: unikorn-cloud.org diff --git a/charts/kubernetes/crds/unikorn-cloud.org_clustermanagers.yaml b/charts/kubernetes/crds/unikorn-cloud.org_clustermanagers.yaml index d5bb32fd..2780ee8a 100644 --- a/charts/kubernetes/crds/unikorn-cloud.org_clustermanagers.yaml +++ b/charts/kubernetes/crds/unikorn-cloud.org_clustermanagers.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.16.3 name: clustermanagers.unikorn-cloud.org spec: group: unikorn-cloud.org diff --git a/charts/kubernetes/crds/unikorn-cloud.org_kubernetesclusterapplicationbundles.yaml b/charts/kubernetes/crds/unikorn-cloud.org_kubernetesclusterapplicationbundles.yaml index 67364b62..ab23c778 100644 --- a/charts/kubernetes/crds/unikorn-cloud.org_kubernetesclusterapplicationbundles.yaml +++ b/charts/kubernetes/crds/unikorn-cloud.org_kubernetesclusterapplicationbundles.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.16.3 name: kubernetesclusterapplicationbundles.unikorn-cloud.org spec: group: unikorn-cloud.org diff --git a/charts/kubernetes/crds/unikorn-cloud.org_kubernetesclusters.yaml b/charts/kubernetes/crds/unikorn-cloud.org_kubernetesclusters.yaml index 7fa1ab93..819176d7 100644 --- a/charts/kubernetes/crds/unikorn-cloud.org_kubernetesclusters.yaml +++ b/charts/kubernetes/crds/unikorn-cloud.org_kubernetesclusters.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.16.3 name: kubernetesclusters.unikorn-cloud.org spec: group: unikorn-cloud.org @@ -278,18 +278,14 @@ spec: description: |- DiskSize is the persistent root disk size to deploy with. This overrides the default ephemeral disk size defined in the flavor. + This is irrelevant for baremetal machine flavors. pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true flavorId: - description: Flavor is the OpenStack Nova flavor to deploy with. - type: string - flavorName: - description: |- - FlavorName is the name of the flavor. - CAPO is broken and doesn't accept an ID, so we need to use this. + description: Flavor is the regions service flavor to deploy with. type: string imageId: - description: Image is the OpenStack Glance image to deploy with. + description: Image is the region service image to deploy with. type: string replicas: default: 3 @@ -298,7 +294,6 @@ spec: type: integer required: - flavorId - - flavorName - imageId type: object features: @@ -307,23 +302,22 @@ spec: properties: autoscaling: description: |- - Autoscaling, if true, provisions a cluster autoscaler - and allows workload pools to specify autoscaling configuration. + Autoscaling enables the provision of a cluster autoscaler. + This is only installed if a workload pool has autoscaling enabled. type: boolean - nvidiaOperator: + gpuOperator: description: |- - NvidiaOperator, if false do not install the Nvidia Operator, otherwise - install if GPU flavors are detected + GPUOperator enables the provision of a GPU operator. + This is only installed if a workload pool has a flavor that defines + a valid GPU specification and vendor. type: boolean type: object network: description: Network defines the Kubernetes networking. properties: dnsNameservers: - description: |- - DNSNameservers sets the DNS nameservers for pods. - At present due to some technical challenges, this must contain - only one DNS server. + description: DNSNameservers sets the DNS nameservers for hosts + on the network. items: pattern: ^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9])$ type: string @@ -331,7 +325,9 @@ spec: type: array x-kubernetes-list-type: set nodeNetwork: - description: NodeNetwork is the IPv4 prefix for the node network. + description: |- + NodeNetwork is the IPv4 prefix for the node network. + This is tyically used to populate a physical network address range. pattern: ^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9])\/(?:3[0-2]|[1-2]?[0-9])$ type: string podNetwork: @@ -389,50 +385,6 @@ spec: this pool can be scaled down to. minimum: 0 type: integer - scheduler: - description: |- - Scheduler is required when scale-from-zero support is requested - i.e. MimumumReplicas is 0. This provides scheduling hints to - the autoscaler as it cannot derive CPU/memory constraints from - the machine flavor. - properties: - cpu: - description: CPU defines the number of CPUs for - the pool flavor. - minimum: 1 - type: integer - gpu: - description: |- - GPU needs to be set when the pool contains GPU resources so - the autoscaler can make informed choices when scaling up. - properties: - count: - description: Count is the number of GPUs for - the pool flavor. - minimum: 1 - type: integer - type: - description: Type is the type of GPU. - enum: - - nvidia.com/gpu - type: string - required: - - count - - type - type: object - memory: - anyOf: - - type: integer - - type: string - description: |- - Memory defines the amount of memory for the pool flavor. - Internally this will be rounded down to the nearest Gi. - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - required: - - cpu - - memory - type: object required: - maximumReplicas - minimumReplicas @@ -447,6 +399,7 @@ spec: description: |- DiskSize is the persistent root disk size to deploy with. This overrides the default ephemeral disk size defined in the flavor. + This is irrelevant for baremetal machine flavors. pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true files: @@ -471,16 +424,11 @@ spec: type: object type: array flavorId: - description: Flavor is the OpenStack Nova flavor to deploy + description: Flavor is the regions service flavor to deploy with. type: string - flavorName: - description: |- - FlavorName is the name of the flavor. - CAPO is broken and doesn't accept an ID, so we need to use this. - type: string imageId: - description: Image is the OpenStack Glance image to deploy + description: Image is the region service image to deploy with. type: string labels: @@ -500,7 +448,6 @@ spec: type: integer required: - flavorId - - flavorName - imageId - name type: object diff --git a/charts/kubernetes/templates/applications.yaml b/charts/kubernetes/templates/applications.yaml index 45fed6a7..41a2e646 100644 --- a/charts/kubernetes/templates/applications.yaml +++ b/charts/kubernetes/templates/applications.yaml @@ -303,6 +303,32 @@ spec: --- apiVersion: unikorn-cloud.org/v1alpha1 kind: HelmApplication +metadata: + name: {{ include "resource.id" "amd-gpu-operator" }} + labels: + {{- include "unikorn.labels" . | nindent 4 }} + unikorn-cloud.org/name: amd-gpu-operator + annotations: + unikorn-cloud.org/description: |- + Provides AMD GPU support in Kubernetes clusters. +spec: + documentation: https://github.com/ROCm/gpu-operator + license: Apache-2.0 License + icon: todo + tags: + - infrastructure + versions: + - version: 0.13.0 + repo: https://rocm.github.io/k8s-device-plugin + chart: amd-gpu + parameters: + - name: nfd.enabled + value: 'true' + - name: labeller.enabled + value: 'true' +--- +apiVersion: unikorn-cloud.org/v1alpha1 +kind: HelmApplication metadata: name: {{ include "resource.id" "cluster-autoscaler" }} labels: diff --git a/charts/kubernetes/templates/kubernetesclusterapplicationbundles.yaml b/charts/kubernetes/templates/kubernetesclusterapplicationbundles.yaml index 446df8d2..32c8939b 100644 --- a/charts/kubernetes/templates/kubernetesclusterapplicationbundles.yaml +++ b/charts/kubernetes/templates/kubernetesclusterapplicationbundles.yaml @@ -37,6 +37,11 @@ spec: kind: HelmApplication name: {{ include "resource.id" "nvidia-gpu-operator" }} version: v23.9.1 + - name: amd-gpu-operator + reference: + kind: HelmApplication + name: {{ include "resource.id" "amd-gpu-operator" }} + version: 0.13.0 - name: cluster-autoscaler reference: kind: HelmApplication diff --git a/pkg/apis/unikorn/v1alpha1/helpers.go b/pkg/apis/unikorn/v1alpha1/helpers.go index 46d97840..d82b7e61 100644 --- a/pkg/apis/unikorn/v1alpha1/helpers.go +++ b/pkg/apis/unikorn/v1alpha1/helpers.go @@ -142,9 +142,9 @@ func (c *KubernetesCluster) AutoscalingEnabled() bool { return c.Spec.Features != nil && c.Spec.Features.Autoscaling != nil && *c.Spec.Features.Autoscaling } -// NvidiaOperatorEnabled indicates whether to install the Nvidia GPU operator. -func (c *KubernetesCluster) NvidiaOperatorEnabled() bool { - return c.Spec.Features != nil && c.Spec.Features.NvidiaOperator != nil && *c.Spec.Features.NvidiaOperator +// GPUOperatorEnabled indicates whether to install the GPU operator. +func (c *KubernetesCluster) GPUOperatorEnabled() bool { + return c.Spec.Features != nil && c.Spec.Features.GPUOperator != nil && *c.Spec.Features.GPUOperator } func CompareClusterManager(a, b ClusterManager) int { diff --git a/pkg/apis/unikorn/v1alpha1/types.go b/pkg/apis/unikorn/v1alpha1/types.go index 4dcb1406..ef88885f 100644 --- a/pkg/apis/unikorn/v1alpha1/types.go +++ b/pkg/apis/unikorn/v1alpha1/types.go @@ -20,7 +20,6 @@ package v1alpha1 import ( unikornv1core "github.com/unikorn-cloud/core/pkg/apis/unikorn/v1alpha1" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -71,15 +70,6 @@ type ClusterManagerStatus struct { Conditions []unikornv1core.Condition `json:"conditions,omitempty"` } -// MachineGeneric contains common things across all pool types, including -// Kubernetes cluster manager nodes and workload pools. -type MachineGeneric struct { - unikornv1core.MachineGeneric `json:",inline"` - // FlavorName is the name of the flavor. - // CAPO is broken and doesn't accept an ID, so we need to use this. - FlavorName *string `json:"flavorName"` -} - // File is a file that can be deployed to a cluster node on creation. type File struct { // Path is the absolute path to create the file in. @@ -99,42 +89,12 @@ type MachineGenericAutoscaling struct { // this pool can be scaled up to. // +kubebuilder:validation:Minimum=1 MaximumReplicas *int `json:"maximumReplicas"` - // Scheduler is required when scale-from-zero support is requested - // i.e. MimumumReplicas is 0. This provides scheduling hints to - // the autoscaler as it cannot derive CPU/memory constraints from - // the machine flavor. - Scheduler *MachineGenericAutoscalingScheduler `json:"scheduler,omitempty"` -} - -// MachineGenericAutoscalingScheduler defines generic autoscaling scheduling -// constraints. -type MachineGenericAutoscalingScheduler struct { - // CPU defines the number of CPUs for the pool flavor. - // +kubebuilder:validation:Minimum=1 - CPU *int `json:"cpu"` - // Memory defines the amount of memory for the pool flavor. - // Internally this will be rounded down to the nearest Gi. - Memory *resource.Quantity `json:"memory"` - // GPU needs to be set when the pool contains GPU resources so - // the autoscaler can make informed choices when scaling up. - GPU *MachineGenericAutoscalingSchedulerGPU `json:"gpu,omitempty"` -} - -// MachineGenericAutoscalingSchedulerGPU defines generic autoscaling -// scheduling constraints for GPUs. -type MachineGenericAutoscalingSchedulerGPU struct { - // Type is the type of GPU. - // +kubebuilder:validation:Enum=nvidia.com/gpu - Type *string `json:"type"` - // Count is the number of GPUs for the pool flavor. - // +kubebuilder:validation:Minimum=1 - Count *int `json:"count"` } // KubernetesWorkloadPoolSpec defines the requested machine pool // state. type KubernetesWorkloadPoolSpec struct { - MachineGeneric `json:",inline"` + unikornv1core.MachineGeneric `json:",inline"` // Name is the name of the pool. Name string `json:"name"` // Labels is the set of node labels to apply to the pool on @@ -193,7 +153,7 @@ type KubernetesClusterSpec struct { // API defines Kubernetes API specific options. API *KubernetesClusterAPISpec `json:"api,omitempty"` // ControlPlane defines the cluster manager topology. - ControlPlane *KubernetesClusterControlPlaneSpec `json:"controlPlane"` + ControlPlane *unikornv1core.MachineGeneric `json:"controlPlane"` // WorkloadPools defines the workload cluster topology. WorkloadPools *KubernetesClusterWorkloadPoolsSpec `json:"workloadPools"` // Features defines add-on features that can be enabled for the cluster. @@ -227,16 +187,13 @@ type KubernetesClusterNetworkSpec struct { } type KubernetesClusterFeaturesSpec struct { - // Autoscaling, if true, provisions a cluster autoscaler - // and allows workload pools to specify autoscaling configuration. + // Autoscaling enables the provision of a cluster autoscaler. + // This is only installed if a workload pool has autoscaling enabled. Autoscaling *bool `json:"autoscaling,omitempty"` - // NvidiaOperator, if false do not install the Nvidia Operator, otherwise - // install if GPU flavors are detected - NvidiaOperator *bool `json:"nvidiaOperator,omitempty"` -} - -type KubernetesClusterControlPlaneSpec struct { - MachineGeneric `json:",inline"` + // GPUOperator enables the provision of a GPU operator. + // This is only installed if a workload pool has a flavor that defines + // a valid GPU specification and vendor. + GPUOperator *bool `json:"gpuOperator,omitempty"` } type KubernetesClusterWorkloadPoolsPoolSpec struct { diff --git a/pkg/apis/unikorn/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/unikorn/v1alpha1/zz_generated.deepcopy.go index d40a9fb1..a71f199b 100644 --- a/pkg/apis/unikorn/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/unikorn/v1alpha1/zz_generated.deepcopy.go @@ -507,23 +507,6 @@ func (in *KubernetesClusterApplicationBundleList) DeepCopyObject() runtime.Objec return nil } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *KubernetesClusterControlPlaneSpec) DeepCopyInto(out *KubernetesClusterControlPlaneSpec) { - *out = *in - in.MachineGeneric.DeepCopyInto(&out.MachineGeneric) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubernetesClusterControlPlaneSpec. -func (in *KubernetesClusterControlPlaneSpec) DeepCopy() *KubernetesClusterControlPlaneSpec { - if in == nil { - return nil - } - out := new(KubernetesClusterControlPlaneSpec) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *KubernetesClusterFeaturesSpec) DeepCopyInto(out *KubernetesClusterFeaturesSpec) { *out = *in @@ -532,8 +515,8 @@ func (in *KubernetesClusterFeaturesSpec) DeepCopyInto(out *KubernetesClusterFeat *out = new(bool) **out = **in } - if in.NvidiaOperator != nil { - in, out := &in.NvidiaOperator, &out.NvidiaOperator + if in.GPUOperator != nil { + in, out := &in.GPUOperator, &out.GPUOperator *out = new(bool) **out = **in } @@ -628,7 +611,7 @@ func (in *KubernetesClusterSpec) DeepCopyInto(out *KubernetesClusterSpec) { } if in.ControlPlane != nil { in, out := &in.ControlPlane, &out.ControlPlane - *out = new(KubernetesClusterControlPlaneSpec) + *out = new(unikornv1alpha1.MachineGeneric) (*in).DeepCopyInto(*out) } if in.WorkloadPools != nil { @@ -763,28 +746,6 @@ func (in *KubernetesWorkloadPoolSpec) DeepCopy() *KubernetesWorkloadPoolSpec { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineGeneric) DeepCopyInto(out *MachineGeneric) { - *out = *in - in.MachineGeneric.DeepCopyInto(&out.MachineGeneric) - if in.FlavorName != nil { - in, out := &in.FlavorName, &out.FlavorName - *out = new(string) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineGeneric. -func (in *MachineGeneric) DeepCopy() *MachineGeneric { - if in == nil { - return nil - } - out := new(MachineGeneric) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MachineGenericAutoscaling) DeepCopyInto(out *MachineGenericAutoscaling) { *out = *in @@ -798,11 +759,6 @@ func (in *MachineGenericAutoscaling) DeepCopyInto(out *MachineGenericAutoscaling *out = new(int) **out = **in } - if in.Scheduler != nil { - in, out := &in.Scheduler, &out.Scheduler - *out = new(MachineGenericAutoscalingScheduler) - (*in).DeepCopyInto(*out) - } return } @@ -815,60 +771,3 @@ func (in *MachineGenericAutoscaling) DeepCopy() *MachineGenericAutoscaling { in.DeepCopyInto(out) return out } - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineGenericAutoscalingScheduler) DeepCopyInto(out *MachineGenericAutoscalingScheduler) { - *out = *in - if in.CPU != nil { - in, out := &in.CPU, &out.CPU - *out = new(int) - **out = **in - } - if in.Memory != nil { - in, out := &in.Memory, &out.Memory - x := (*in).DeepCopy() - *out = &x - } - if in.GPU != nil { - in, out := &in.GPU, &out.GPU - *out = new(MachineGenericAutoscalingSchedulerGPU) - (*in).DeepCopyInto(*out) - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineGenericAutoscalingScheduler. -func (in *MachineGenericAutoscalingScheduler) DeepCopy() *MachineGenericAutoscalingScheduler { - if in == nil { - return nil - } - out := new(MachineGenericAutoscalingScheduler) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineGenericAutoscalingSchedulerGPU) DeepCopyInto(out *MachineGenericAutoscalingSchedulerGPU) { - *out = *in - if in.Type != nil { - in, out := &in.Type, &out.Type - *out = new(string) - **out = **in - } - if in.Count != nil { - in, out := &in.Count, &out.Count - *out = new(int) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineGenericAutoscalingSchedulerGPU. -func (in *MachineGenericAutoscalingSchedulerGPU) DeepCopy() *MachineGenericAutoscalingSchedulerGPU { - if in == nil { - return nil - } - out := new(MachineGenericAutoscalingSchedulerGPU) - in.DeepCopyInto(out) - return out -} diff --git a/pkg/provisioners/helmapplications/amdgpuoperator/provisioner.go b/pkg/provisioners/helmapplications/amdgpuoperator/provisioner.go new file mode 100644 index 00000000..b36f2b07 --- /dev/null +++ b/pkg/provisioners/helmapplications/amdgpuoperator/provisioner.go @@ -0,0 +1,35 @@ +/* +Copyright 2024 the Unikorn Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package amdgpuoperator + +import ( + "github.com/unikorn-cloud/core/pkg/provisioners/application" +) + +const ( + // defaultNamespace is where to install the component. + defaultNamespace = "kube-system" +) + +// New returns a new initialized provisioner object. +func New(getApplication application.GetterFunc) *application.Provisioner { + p := &Provisioner{} + + return application.New(getApplication).WithGenerator(p).InNamespace(defaultNamespace) +} + +type Provisioner struct{} diff --git a/pkg/provisioners/helmapplications/clusteropenstack/provisioner.go b/pkg/provisioners/helmapplications/clusteropenstack/provisioner.go index c6b36e6b..430a0a7e 100644 --- a/pkg/provisioners/helmapplications/clusteropenstack/provisioner.go +++ b/pkg/provisioners/helmapplications/clusteropenstack/provisioner.go @@ -20,12 +20,20 @@ package clusteropenstack import ( "context" "encoding/base64" + "errors" "fmt" + "slices" + unikornv1core "github.com/unikorn-cloud/core/pkg/apis/unikorn/v1alpha1" "github.com/unikorn-cloud/core/pkg/constants" "github.com/unikorn-cloud/core/pkg/provisioners/application" unikornv1 "github.com/unikorn-cloud/kubernetes/pkg/apis/unikorn/v1alpha1" kubernetesprovisioners "github.com/unikorn-cloud/kubernetes/pkg/provisioners" + regionapi "github.com/unikorn-cloud/region/pkg/openapi" +) + +var ( + ErrReference = errors.New("reference error") ) // Provisioner encapsulates control plane provisioning. @@ -51,12 +59,33 @@ var _ application.ReleaseNamer = &Provisioner{} var _ application.ValuesGenerator = &Provisioner{} var _ application.PostProvisionHook = &Provisioner{} +// getFlavor looks up a flavor. +func (p *Provisioner) getFlavor(flavorID string) (*regionapi.Flavor, error) { + callback := func(flavor regionapi.Flavor) bool { + return flavor.Metadata.Id == flavorID + } + + index := slices.IndexFunc(p.options.Flavors, callback) + if index < 0 { + return nil, fmt.Errorf("%w: unable to find requested flavor %s", ErrReference, flavorID) + } + + return &p.options.Flavors[index], nil +} + // generateMachineHelmValues translates the API's idea of a machine into what's // expected by the underlying Helm chart. -func (p *Provisioner) generateMachineHelmValues(machine *unikornv1.MachineGeneric, controlPlane bool) map[string]interface{} { +func (p *Provisioner) generateMachineHelmValues(machine *unikornv1core.MachineGeneric, controlPlane bool) (map[string]interface{}, error) { + flavor, err := p.getFlavor(*machine.FlavorID) + if err != nil { + return nil, err + } + + // Translate from flavor ID to flavor name, because CAPO cannot accept one... + // https://github.com/kubernetes-sigs/cluster-api-provider-openstack/pull/2148 object := map[string]interface{}{ "imageID": *machine.ImageID, - "flavorID": *machine.FlavorName, + "flavorID": flavor.Metadata.Name, } if controlPlane && p.options.ServerGroupID != nil { @@ -71,24 +100,34 @@ func (p *Provisioner) generateMachineHelmValues(machine *unikornv1.MachineGeneri object["disk"] = disk } - return object + return object, nil } // generateWorkloadPoolHelmValues translates the API's idea of a workload pool into // what's expected by the underlying Helm chart. -func (p *Provisioner) generateWorkloadPoolHelmValues(cluster *unikornv1.KubernetesCluster) map[string]interface{} { +func (p *Provisioner) generateWorkloadPoolHelmValues(cluster *unikornv1.KubernetesCluster) (map[string]interface{}, error) { workloadPools := map[string]interface{}{} for i := range cluster.Spec.WorkloadPools.Pools { workloadPool := &cluster.Spec.WorkloadPools.Pools[i] + machine, err := p.generateMachineHelmValues(&workloadPool.MachineGeneric, false) + if err != nil { + return nil, err + } + object := map[string]interface{}{ "replicas": *workloadPool.Replicas, - "machine": p.generateMachineHelmValues(&workloadPool.MachineGeneric, false), + "machine": machine, } if cluster.AutoscalingEnabled() && workloadPool.Autoscaling != nil { - object["autoscaling"] = generateWorkloadPoolSchedulerHelmValues(workloadPool) + autoscaling, err := p.generateWorkloadPoolSchedulerHelmValues(workloadPool) + if err != nil { + return nil, err + } + + object["autoscaling"] = autoscaling } if len(workloadPool.Labels) != 0 { @@ -117,43 +156,57 @@ func (p *Provisioner) generateWorkloadPoolHelmValues(cluster *unikornv1.Kubernet workloadPools[workloadPool.Name] = object } - return workloadPools + return workloadPools, nil } // generateWorkloadPoolSchedulerHelmValues translates from Kubernetes API scheduling // parameters into ones acceptable by Helm. -func generateWorkloadPoolSchedulerHelmValues(p *unikornv1.KubernetesClusterWorkloadPoolsPoolSpec) map[string]interface{} { - // When enabled, scaling limits are required. - values := map[string]interface{}{ - "limits": map[string]interface{}{ - "minReplicas": *p.Autoscaling.MinimumReplicas, - "maxReplicas": *p.Autoscaling.MaximumReplicas, - }, - } - +func (p *Provisioner) generateWorkloadPoolSchedulerHelmValues(pool *unikornv1.KubernetesClusterWorkloadPoolsPoolSpec) (map[string]interface{}, error) { // When scaler from zero is enabled, you need to provide CPU and memory hints, // the autoscaler cannot guess the flavor attributes. - if p.Autoscaling.Scheduler != nil { - scheduling := map[string]interface{}{ - "cpu": *p.Autoscaling.Scheduler.CPU, - "memory": fmt.Sprintf("%dG", p.Autoscaling.Scheduler.Memory.Value()>>30), - } + flavor, err := p.getFlavor(*pool.FlavorID) + if err != nil { + return nil, err + } - // If the flavor has a GPU, then we also need to inform the autoscaler - // about the GPU scheduling information. - if p.Autoscaling.Scheduler.GPU != nil { - gpu := map[string]interface{}{ - "type": *p.Autoscaling.Scheduler.GPU.Type, - "count": *p.Autoscaling.Scheduler.GPU.Count, - } + scheduling := map[string]interface{}{ + "cpu": flavor.Spec.Cpus, + "memory": fmt.Sprintf("%dG", flavor.Spec.Memory), + } - scheduling["gpu"] = gpu + // Add in labels for GPUs so the autoscaler can correctly scale when more GPUs + // are requested. To derive this value, each vendor will probably provide a + // node-labeller that you should install on the system. It should then appear + // in the status.capacity map. + if flavor.Spec.Gpu != nil { + var t string + + switch flavor.Spec.Gpu.Vendor { + case regionapi.NVIDIA: + t = "nvidia.com/gpu" + case regionapi.AMD: + t = "amd.com/gpu" + default: + return nil, fmt.Errorf("%w: unhandled gpu vendor case %s", ErrReference, flavor.Spec.Gpu.Vendor) } - values["scheduler"] = scheduling + gpu := map[string]interface{}{ + "type": t, + "count": flavor.Spec.Gpu.Count, + } + + scheduling["gpu"] = gpu } - return values + values := map[string]interface{}{ + "limits": map[string]interface{}{ + "minReplicas": *pool.Autoscaling.MinimumReplicas, + "maxReplicas": *pool.Autoscaling.MaximumReplicas, + }, + "scheduler": scheduling, + } + + return values, nil } func (p *Provisioner) generateNetworkValues(cluster *unikornv1.KubernetesCluster) interface{} { @@ -202,7 +255,10 @@ func (p *Provisioner) Values(ctx context.Context, version *string) (interface{}, //nolint:forcetypeassert cluster := application.FromContext(ctx).(*unikornv1.KubernetesCluster) - workloadPools := p.generateWorkloadPoolHelmValues(cluster) + workloadPools, err := p.generateWorkloadPoolHelmValues(cluster) + if err != nil { + return nil, err + } openstackValues := map[string]interface{}{ "cloud": p.options.Cloud, @@ -233,6 +289,11 @@ func (p *Provisioner) Values(ctx context.Context, version *string) (interface{}, "organizationID": labels[constants.OrganizationLabel], } + machine, err := p.generateMachineHelmValues(cluster.Spec.ControlPlane, true) + if err != nil { + return nil, err + } + // TODO: generate types from the Helm values schema. values := map[string]interface{}{ "version": string(*cluster.Spec.Version), @@ -254,7 +315,7 @@ func (p *Provisioner) Values(ctx context.Context, version *string) (interface{}, }, "controlPlane": map[string]interface{}{ "replicas": *cluster.Spec.ControlPlane.Replicas, - "machine": p.generateMachineHelmValues(&cluster.Spec.ControlPlane.MachineGeneric, true), + "machine": machine, }, "workloadPools": workloadPools, "network": p.generateNetworkValues(cluster), diff --git a/pkg/provisioners/managers/cluster/provisioner.go b/pkg/provisioners/managers/cluster/provisioner.go index 3921bb75..9059f730 100644 --- a/pkg/provisioners/managers/cluster/provisioner.go +++ b/pkg/provisioners/managers/cluster/provisioner.go @@ -22,6 +22,7 @@ import ( "errors" "fmt" "net/http" + "slices" "github.com/spf13/pflag" @@ -41,6 +42,7 @@ import ( unikornv1 "github.com/unikorn-cloud/kubernetes/pkg/apis/unikorn/v1alpha1" "github.com/unikorn-cloud/kubernetes/pkg/constants" kubernetesprovisioners "github.com/unikorn-cloud/kubernetes/pkg/provisioners" + "github.com/unikorn-cloud/kubernetes/pkg/provisioners/helmapplications/amdgpuoperator" "github.com/unikorn-cloud/kubernetes/pkg/provisioners/helmapplications/cilium" "github.com/unikorn-cloud/kubernetes/pkg/provisioners/helmapplications/clusterautoscaler" "github.com/unikorn-cloud/kubernetes/pkg/provisioners/helmapplications/clusterautoscaleropenstack" @@ -119,6 +121,10 @@ func (a *ApplicationReferenceGetter) nvidiaGPUOperator(ctx context.Context) (*un return a.getApplication(ctx, "nvidia-gpu-operator") } +func (a *ApplicationReferenceGetter) amdGPUOperator(ctx context.Context) (*unikornv1core.ApplicationReference, error) { + return a.getApplication(ctx, "amd-gpu-operator") +} + func (a *ApplicationReferenceGetter) clusterAutoscaler(ctx context.Context) (*unikornv1core.ApplicationReference, error) { return a.getApplication(ctx, "cluster-autoscaler") } @@ -202,9 +208,62 @@ func (p *Provisioner) getClusterManager(ctx context.Context) (*unikornv1.Cluster return &clusterManager, nil } +// provisionerOptions are dervied facts about a cluster used to generate the provisioner. +type provisionerOptions struct { + // autoscaling tells whether any pools have autoscaling enabled. + autoscaling bool + // gpuOperatorNvidia defines whether NVIDIA GPUs are present. + gpuVendorNvidia bool + // gpuOperatorAMD defines whether AMD GPUs are present. + gpuVendorAMD bool +} + +func (p *Provisioner) getProvisionerOptions(options *kubernetesprovisioners.ClusterOpenstackOptions) (*provisionerOptions, error) { + provisionerOptions := &provisionerOptions{} + + if options == nil { + return provisionerOptions, nil + } + + for _, pool := range p.cluster.Spec.WorkloadPools.Pools { + if pool.Autoscaling != nil { + provisionerOptions.autoscaling = true + } + + callback := func(flavor regionapi.Flavor) bool { + return flavor.Metadata.Id == *pool.FlavorID + } + + index := slices.IndexFunc(options.Flavors, callback) + if index < 0 { + return nil, fmt.Errorf("%w: unable to lookup flavor %s", ErrResourceDependency, *pool.FlavorID) + } + + flavor := &options.Flavors[index] + + if flavor.Spec.Gpu != nil { + switch flavor.Spec.Gpu.Vendor { + case regionapi.NVIDIA: + provisionerOptions.gpuVendorNvidia = true + case regionapi.AMD: + provisionerOptions.gpuVendorAMD = true + default: + return nil, fmt.Errorf("%w: unhandled GPU vendor %v", ErrResourceDependency, flavor.Spec.Gpu.Vendor) + } + } + } + + return provisionerOptions, nil +} + func (p *Provisioner) getProvisioner(ctx context.Context, options *kubernetesprovisioners.ClusterOpenstackOptions) (provisioners.Provisioner, error) { apps := newApplicationReferenceGetter(&p.cluster) + provisionerOptions, err := p.getProvisionerOptions(options) + if err != nil { + return nil, err + } + clusterManager, err := p.getClusterManager(ctx) if err != nil { return nil, err @@ -230,7 +289,7 @@ func (p *Provisioner) getProvisioner(ctx context.Context, options *kubernetespro ) clusterAutoscalerProvisioner := conditional.New("cluster-autoscaler", - p.cluster.AutoscalingEnabled, + func() bool { return p.cluster.AutoscalingEnabled() && provisionerOptions.autoscaling }, concurrent.New("cluster-autoscaler", clusterautoscaler.New(apps.clusterAutoscaler).InNamespace(p.cluster.Name), clusterautoscaleropenstack.New(apps.clusterAutoscalerOpenstack).InNamespace(p.cluster.Name), @@ -240,7 +299,14 @@ func (p *Provisioner) getProvisioner(ctx context.Context, options *kubernetespro addonsProvisioner := concurrent.New("cluster add-ons", openstackplugincindercsi.New(apps.openstackPluginCinderCSI, options), metricsserver.New(apps.metricsServer), - conditional.New("nvidia-gpu-operator", p.cluster.NvidiaOperatorEnabled, nvidiagpuoperator.New(apps.nvidiaGPUOperator)), + conditional.New("nvidia-gpu-operator", + func() bool { return p.cluster.GPUOperatorEnabled() && provisionerOptions.gpuVendorNvidia }, + nvidiagpuoperator.New(apps.nvidiaGPUOperator), + ), + conditional.New("amd-gpu-operator", + func() bool { return p.cluster.GPUOperatorEnabled() && provisionerOptions.gpuVendorAMD }, + amdgpuoperator.New(apps.amdGPUOperator), + ), ) // Create the cluster and the boostrap components in parallel, the cluster will @@ -316,6 +382,19 @@ func (p *Provisioner) getRegionClient(ctx context.Context, traceName string) (co return ctx, client, nil } +func (p *Provisioner) getFlavors(ctx context.Context, client regionapi.ClientWithResponsesInterface) (regionapi.Flavors, error) { + response, err := client.GetApiV1OrganizationsOrganizationIDRegionsRegionIDFlavorsWithResponse(ctx, p.cluster.Labels[coreconstants.OrganizationLabel], p.cluster.Spec.RegionID) + if err != nil { + return nil, err + } + + if response.StatusCode() != http.StatusOK { + return nil, fmt.Errorf("%w: flavors GET expected 200 got %d", coreerrors.ErrAPIStatus, response.StatusCode()) + } + + return *response.JSON200, nil +} + func (p *Provisioner) getIdentity(ctx context.Context, client regionapi.ClientWithResponsesInterface) (*regionapi.IdentityRead, error) { log := log.FromContext(ctx) @@ -426,12 +505,18 @@ func (p *Provisioner) identityOptions(ctx context.Context, client regionapi.Clie return nil, err } + flavors, err := p.getFlavors(ctx, client) + if err != nil { + return nil, err + } + options := &kubernetesprovisioners.ClusterOpenstackOptions{ CloudConfig: *identity.Spec.Openstack.CloudConfig, Cloud: *identity.Spec.Openstack.Cloud, ServerGroupID: identity.Spec.Openstack.ServerGroupId, SSHKeyName: identity.Spec.Openstack.SshKeyName, ExternalNetworkID: &externalNetwork.Id, + Flavors: flavors, } physicalNetwork, err := p.getPhysicalNetwork(ctx, client) diff --git a/pkg/provisioners/types.go b/pkg/provisioners/types.go index 73dc8539..970d419c 100644 --- a/pkg/provisioners/types.go +++ b/pkg/provisioners/types.go @@ -16,6 +16,10 @@ limitations under the License. package provisioners +import ( + regionapi "github.com/unikorn-cloud/region/pkg/openapi" +) + // ClusterOpenstackOptions are acquired from the region controller at // reconcile time as the identity provisioning is asynchronous. type ClusterOpenstackOptions struct { @@ -35,6 +39,8 @@ type ClusterOpenstackOptions struct { // SSHKeyName is the SSH key name, either user provided, or we // can inject one generated by the cloud identity provider. SSHKeyName *string + // Flavors are the flavors that are referenced by the cluster. + Flavors regionapi.Flavors } type ClusterOpenstackProviderOptions struct { diff --git a/pkg/server/handler/cluster/conversion.go b/pkg/server/handler/cluster/conversion.go index 7758085f..ec1dd5c2 100644 --- a/pkg/server/handler/cluster/conversion.go +++ b/pkg/server/handler/cluster/conversion.go @@ -41,7 +41,12 @@ import ( ) var ( + // ErrResourceLookup is raised when we are looking for a referenced resource + // but cannot find it. ErrResourceLookup = goerrors.New("could not find the requested resource") + + // ErrUnhandledCase is raised when an unhandled switch case is encountered. + ErrUnhandledCase = goerrors.New("handled case") ) // generator wraps up the myriad things we need to pass around as an object @@ -73,7 +78,7 @@ func newGenerator(client client.Client, options *Options, region regionapi.Clien } // convertMachine converts from a custom resource into the API definition. -func convertMachine(in *unikornv1.MachineGeneric) *openapi.MachinePool { +func convertMachine(in *unikornv1core.MachineGeneric) *openapi.MachinePool { machine := &openapi.MachinePool{ Replicas: in.Replicas, FlavorId: in.FlavorID, @@ -249,7 +254,7 @@ func (g *generator) generateNetwork() *unikornv1.KubernetesClusterNetworkSpec { } // generateMachineGeneric generates a generic machine part of the cluster. -func (g *generator) generateMachineGeneric(ctx context.Context, request *openapi.KubernetesClusterWrite, m *openapi.MachinePool, flavor *regionapi.Flavor) (*unikornv1.MachineGeneric, error) { +func (g *generator) generateMachineGeneric(ctx context.Context, request *openapi.KubernetesClusterWrite, m *openapi.MachinePool, flavor *regionapi.Flavor) (*unikornv1core.MachineGeneric, error) { if m.Replicas == nil { m.Replicas = util.ToPointer(3) } @@ -259,14 +264,10 @@ func (g *generator) generateMachineGeneric(ctx context.Context, request *openapi return nil, err } - machine := &unikornv1.MachineGeneric{ - MachineGeneric: unikornv1core.MachineGeneric{ - Replicas: m.Replicas, - ImageID: util.ToPointer(image.Metadata.Id), - FlavorID: &flavor.Metadata.Id, - }, - // TODO: remove with https://github.com/kubernetes-sigs/cluster-api-provider-openstack/pull/2148 - FlavorName: &flavor.Metadata.Name, + machine := &unikornv1core.MachineGeneric{ + Replicas: m.Replicas, + ImageID: util.ToPointer(image.Metadata.Id), + FlavorID: &flavor.Metadata.Id, } if m.Disk != nil { @@ -282,7 +283,7 @@ func (g *generator) generateMachineGeneric(ctx context.Context, request *openapi } // generateControlPlane generates the control plane part of a cluster. -func (g *generator) generateControlPlane(ctx context.Context, request *openapi.KubernetesClusterWrite) (*unikornv1.KubernetesClusterControlPlaneSpec, error) { +func (g *generator) generateControlPlane(ctx context.Context, request *openapi.KubernetesClusterWrite) (*unikornv1core.MachineGeneric, error) { // Add in any missing defaults. resource, err := g.defaultControlPlaneFlavor(ctx, request) if err != nil { @@ -298,11 +299,7 @@ func (g *generator) generateControlPlane(ctx context.Context, request *openapi.K return nil, err } - spec := &unikornv1.KubernetesClusterControlPlaneSpec{ - MachineGeneric: *machine, - } - - return spec, nil + return machine, nil } // generateWorkloadPools generates the workload pools part of a cluster. @@ -337,29 +334,9 @@ func (g *generator) generateWorkloadPools(ctx context.Context, request *openapi. // the flavor used in validation, this prevents having to surface this // complexity to the client via the API. if pool.Autoscaling != nil { - memory, err := resource.ParseQuantity(fmt.Sprintf("%dGi", flavor.Spec.Memory)) - if err != nil { - return nil, err - } - workloadPool.Autoscaling = &unikornv1.MachineGenericAutoscaling{ MinimumReplicas: &pool.Autoscaling.MinimumReplicas, MaximumReplicas: pool.Machine.Replicas, - Scheduler: &unikornv1.MachineGenericAutoscalingScheduler{ - CPU: &flavor.Spec.Cpus, - Memory: &memory, - }, - } - - if flavor.Spec.Gpu != nil { - // TODO: this is needed for scale from zero, at no point do the docs - // mention AMD... - t := "nvidia.com/gpu" - - workloadPool.Autoscaling.Scheduler.GPU = &unikornv1.MachineGenericAutoscalingSchedulerGPU{ - Type: &t, - Count: &flavor.Spec.Gpu.Count, - } } } @@ -390,38 +367,6 @@ func (g *generator) lookupFlavor(ctx context.Context, request *openapi.Kubernete return &flavors[index], nil } -// installNvidiaOperator installs the nvidia operator if any workload pool flavor -// has a GPU in it. -func (g *generator) installNvidiaOperator(ctx context.Context, request *openapi.KubernetesClusterWrite, cluster *unikornv1.KubernetesCluster) error { - for _, pool := range request.Spec.WorkloadPools { - flavor, err := g.lookupFlavor(ctx, request, *pool.Machine.FlavorId) - if err != nil { - return err - } - - if flavor.Spec.Gpu != nil && flavor.Spec.Gpu.Vendor == regionapi.NVIDIA { - cluster.Spec.Features.NvidiaOperator = util.ToPointer(true) - - return nil - } - } - - return nil -} - -// installClusterAutoscaler installs the cluster autoscaler if any workload pool has -// autoscaling enabled. -// TODO: probably push this down into the cluster manager. -func installClusterAutoscaler(cluster *unikornv1.KubernetesCluster) { - for _, pool := range cluster.Spec.WorkloadPools.Pools { - if pool.Autoscaling != nil { - cluster.Spec.Features.Autoscaling = util.ToPointer(true) - - return - } - } -} - // generate generates the full cluster custom resource. // TODO: there are a lot of parameters being passed about, we should make this // a struct and pass them as a single blob. @@ -457,15 +402,12 @@ func (g *generator) generate(ctx context.Context, request *openapi.KubernetesClu Network: g.generateNetwork(), ControlPlane: kubernetesControlPlane, WorkloadPools: kubernetesWorkloadPools, - Features: &unikornv1.KubernetesClusterFeaturesSpec{}, + Features: &unikornv1.KubernetesClusterFeaturesSpec{ + Autoscaling: util.ToPointer(true), + GPUOperator: util.ToPointer(true), + }, }, } - installClusterAutoscaler(cluster) - - if err := g.installNvidiaOperator(ctx, request, cluster); err != nil { - return nil, err - } - return cluster, nil }