From 3816b91634dbccfed13adfdd881c48eb78337c31 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Mon, 12 Jun 2023 15:04:16 +0200 Subject: [PATCH 1/4] Use envvars instead of flags for configuring runtime config and socket paths in toolkit container Signed-off-by: Carlos Eduardo Arango Gutierrez --- controllers/object_controls.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 0809d7e0f..2a0b29d1e 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -1114,7 +1114,8 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n return fmt.Errorf("error getting path to runtime config file: %v", err) } sourceConfigFileName := path.Base(runtimeConfigFile) - runtimeArgs := "--config " + DefaultRuntimeConfigTargetDir + sourceConfigFileName + // update runtime args + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "RUNTIME_CONFIG", DefaultRuntimeConfigTargetDir+sourceConfigFileName) volMountConfigName := fmt.Sprintf("%s-config", runtime) volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: DefaultRuntimeConfigTargetDir} @@ -1130,7 +1131,8 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n return fmt.Errorf("error getting path to runtime socket: %v", err) } sourceSocketFileName := path.Base(runtimeSocketFile) - runtimeArgs += " --socket " + DefaultRuntimeSocketTargetDir + sourceSocketFileName + // update runtime args + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "RUNTIME_SOCKET", DefaultRuntimeSocketTargetDir+sourceSocketFileName) volMountSocketName := fmt.Sprintf("%s-socket", runtime) volMountSocket := corev1.VolumeMount{Name: volMountSocketName, MountPath: DefaultRuntimeSocketTargetDir} @@ -1140,9 +1142,6 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, socketVol) } - // update runtime args - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "RUNTIME_ARGS", runtimeArgs) - // Update CRI-O hooks path to use default path for non OCP cases if n.openshift == "" && n.runtime == gpuv1.CRIO { for index, volume := range obj.Spec.Template.Spec.Volumes { From afd6af5d2ab7fc7f996e3d02fae827c157082a26 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Mon, 12 Jun 2023 16:07:11 +0200 Subject: [PATCH 2/4] Address review comments Signed-off-by: Carlos Eduardo Arango Gutierrez --- controllers/object_controls.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 2a0b29d1e..f406223ce 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -135,6 +135,12 @@ const ( DeviceListStrategyEnvName = "DEVICE_LIST_STRATEGY" // CDIAnnotationPrefixEnvName is the name of the device-plugin envvar for configuring the CDI annotation prefix CDIAnnotationPrefixEnvName = "CDI_ANNOTATION_PREFIX" + // NvidiaCtrRuntimeSocket is the EnvVar name for the Container toolkit runtime socket + NvidiaCtrRuntimeSocket = "RUNTIME_SOCKET" + // NvidiaCtrRuntimeSocket is the EnvVar name for the Container toolkit runtime config + NvidiaCtrRuntimeConfig = "RUNTIME_CONFIG" + // NvidiaCtrRuntimeSocket is the EnvVar name for the Container toolkit runtime class name + NvidiaCtrRuntimeName = "NVIDIA_RUNTIME_NAME" ) // ContainerProbe defines container probe types @@ -1105,7 +1111,7 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n if runtime == gpuv1.Containerd.String() { // Set the runtime class name that is to be configured for containerd - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CONTAINERD_RUNTIME_CLASS", getRuntimeClass(config)) + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeName, getRuntimeClass(config)) } // setup mounts for runtime config file @@ -1115,7 +1121,7 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n } sourceConfigFileName := path.Base(runtimeConfigFile) // update runtime args - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "RUNTIME_CONFIG", DefaultRuntimeConfigTargetDir+sourceConfigFileName) + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeConfig, DefaultRuntimeConfigTargetDir+sourceConfigFileName) volMountConfigName := fmt.Sprintf("%s-config", runtime) volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: DefaultRuntimeConfigTargetDir} @@ -1132,7 +1138,7 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n } sourceSocketFileName := path.Base(runtimeSocketFile) // update runtime args - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "RUNTIME_SOCKET", DefaultRuntimeSocketTargetDir+sourceSocketFileName) + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeSocket, DefaultRuntimeSocketTargetDir+sourceSocketFileName) volMountSocketName := fmt.Sprintf("%s-socket", runtime) volMountSocket := corev1.VolumeMount{Name: volMountSocketName, MountPath: DefaultRuntimeSocketTargetDir} From 648ec457d7bfc14cccc404995ac18043a2e8bbb4 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Mon, 26 Jun 2023 12:23:06 +0200 Subject: [PATCH 3/4] Address reviewers requested changes Signed-off-by: Carlos Eduardo Arango Gutierrez --- controllers/object_controls.go | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/controllers/object_controls.go b/controllers/object_controls.go index f406223ce..2da92f40e 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -135,12 +135,6 @@ const ( DeviceListStrategyEnvName = "DEVICE_LIST_STRATEGY" // CDIAnnotationPrefixEnvName is the name of the device-plugin envvar for configuring the CDI annotation prefix CDIAnnotationPrefixEnvName = "CDI_ANNOTATION_PREFIX" - // NvidiaCtrRuntimeSocket is the EnvVar name for the Container toolkit runtime socket - NvidiaCtrRuntimeSocket = "RUNTIME_SOCKET" - // NvidiaCtrRuntimeSocket is the EnvVar name for the Container toolkit runtime config - NvidiaCtrRuntimeConfig = "RUNTIME_CONFIG" - // NvidiaCtrRuntimeSocket is the EnvVar name for the Container toolkit runtime class name - NvidiaCtrRuntimeName = "NVIDIA_RUNTIME_NAME" ) // ContainerProbe defines container probe types @@ -1111,7 +1105,7 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n if runtime == gpuv1.Containerd.String() { // Set the runtime class name that is to be configured for containerd - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeName, getRuntimeClass(config)) + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CONTAINERD_RUNTIME_CLASS", getRuntimeClass(config)) } // setup mounts for runtime config file @@ -1121,7 +1115,13 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n } sourceConfigFileName := path.Base(runtimeConfigFile) // update runtime args - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeConfig, DefaultRuntimeConfigTargetDir+sourceConfigFileName) + if runtime == gpuv1.Containerd.String() { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CONTAINERD_CONFIG", DefaultRuntimeConfigTargetDir+sourceConfigFileName) + } else if runtime == gpuv1.Docker.String() { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "DOCKER_CONFIG", DefaultRuntimeConfigTargetDir+sourceConfigFileName) + } else if runtime == gpuv1.CRIO.String() { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CRIO_CONFIG", DefaultRuntimeConfigTargetDir+sourceConfigFileName) + } volMountConfigName := fmt.Sprintf("%s-config", runtime) volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: DefaultRuntimeConfigTargetDir} @@ -1138,8 +1138,15 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n } sourceSocketFileName := path.Base(runtimeSocketFile) // update runtime args - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeSocket, DefaultRuntimeSocketTargetDir+sourceSocketFileName) - + // update runtime args + if runtime == gpuv1.Containerd.String() { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CONTAINERD_SOCKET", DefaultRuntimeSocketTargetDir+sourceSocketFileName) + } else if runtime == gpuv1.Docker.String() { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "DOCKER_SOCKET", DefaultRuntimeSocketTargetDir+sourceSocketFileName) + } else if runtime == gpuv1.CRIO.String() { + runtimeArgs := " --socket " + DefaultRuntimeSocketTargetDir + sourceSocketFileName + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "RUNTIME_ARGS", runtimeArgs) + } volMountSocketName := fmt.Sprintf("%s-socket", runtime) volMountSocket := corev1.VolumeMount{Name: volMountSocketName, MountPath: DefaultRuntimeSocketTargetDir} obj.Spec.Template.Spec.Containers[0].VolumeMounts = append(obj.Spec.Template.Spec.Containers[0].VolumeMounts, volMountSocket) From 6f2968f955ffb95ce8f06f40b3df9c41e8387df3 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Mon, 26 Jun 2023 14:20:46 +0200 Subject: [PATCH 4/4] Adopt changes sugested by ELezar to handle Socket mount Signed-off-by: Carlos Eduardo Arango Gutierrez --- controllers/object_controls.go | 35 ++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 2da92f40e..9b2f26893 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -1114,14 +1114,16 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n return fmt.Errorf("error getting path to runtime config file: %v", err) } sourceConfigFileName := path.Base(runtimeConfigFile) - // update runtime args + + var configEnvvarName string if runtime == gpuv1.Containerd.String() { - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CONTAINERD_CONFIG", DefaultRuntimeConfigTargetDir+sourceConfigFileName) + configEnvvarName = "CONTAINERD_CONFIG" } else if runtime == gpuv1.Docker.String() { - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "DOCKER_CONFIG", DefaultRuntimeConfigTargetDir+sourceConfigFileName) + configEnvvarName = "DOCKER_CONFIG" } else if runtime == gpuv1.CRIO.String() { - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CRIO_CONFIG", DefaultRuntimeConfigTargetDir+sourceConfigFileName) + configEnvvarName = "CRIO_CONFIG" } + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), configEnvvarName, DefaultRuntimeConfigTargetDir+sourceConfigFileName) volMountConfigName := fmt.Sprintf("%s-config", runtime) volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: DefaultRuntimeConfigTargetDir} @@ -1131,22 +1133,21 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, configVol) // setup mounts for runtime socket file - if runtime == gpuv1.Docker.String() || runtime == gpuv1.Containerd.String() { - runtimeSocketFile, err := getRuntimeSocketFile(&(obj.Spec.Template.Spec.Containers[0]), runtime) - if err != nil { - return fmt.Errorf("error getting path to runtime socket: %v", err) - } + runtimeSocketFile, err := getRuntimeSocketFile(&(obj.Spec.Template.Spec.Containers[0]), runtime) + if err != nil { + return fmt.Errorf("error getting path to runtime socket: %v", err) + } + if runtimeSocketFile != "" { sourceSocketFileName := path.Base(runtimeSocketFile) - // update runtime args - // update runtime args + // set envvar for runtime socket + var socketEnvvarName string if runtime == gpuv1.Containerd.String() { - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CONTAINERD_SOCKET", DefaultRuntimeSocketTargetDir+sourceSocketFileName) + socketEnvvarName = "CONTAINERD_SOCKET" } else if runtime == gpuv1.Docker.String() { - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "DOCKER_SOCKET", DefaultRuntimeSocketTargetDir+sourceSocketFileName) - } else if runtime == gpuv1.CRIO.String() { - runtimeArgs := " --socket " + DefaultRuntimeSocketTargetDir + sourceSocketFileName - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "RUNTIME_ARGS", runtimeArgs) + socketEnvvarName = "DOCKER_SOCKET" } + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), socketEnvvarName, DefaultRuntimeSocketTargetDir+sourceSocketFileName) + volMountSocketName := fmt.Sprintf("%s-socket", runtime) volMountSocket := corev1.VolumeMount{Name: volMountSocketName, MountPath: DefaultRuntimeSocketTargetDir} obj.Spec.Template.Spec.Containers[0].VolumeMounts = append(obj.Spec.Template.Spec.Containers[0].VolumeMounts, volMountSocket) @@ -1951,6 +1952,8 @@ func getRuntimeSocketFile(c *corev1.Container, runtime string) (string, error) { if getContainerEnv(c, "CONTAINERD_SOCKET") != "" { runtimeSocketFile = getContainerEnv(c, "CONTAINERD_SOCKET") } + case gpuv1.CRIO.String(): + runtimeSocketFile = "" default: return "", fmt.Errorf("invalid runtime: %s", runtime) }