From cc12bb75d1ec7da8fc9eb6a93168930b0a6d9407 Mon Sep 17 00:00:00 2001 From: Douglas Wightman Date: Mon, 18 Nov 2024 12:16:20 -0700 Subject: [PATCH] DCGM-Exporter release 3.3.9-3.6.1 --- README.md | 2 +- dcgm-exporter.yaml | 12 ++++++------ deployment/Chart.yaml | 2 +- deployment/values.yaml | 2 +- docker/Dockerfile.ubi9 | 7 ++++--- docker/Dockerfile.ubuntu22.04 | 4 ++-- hack/VERSION | 8 ++++---- service-monitor.yaml | 4 ++-- tests/e2e/Makefile | 2 +- 9 files changed, 22 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 77c57435..7e7a0d78 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Official documentation for DCGM-Exporter can be found on [docs.nvidia.com](https To gather metrics on a GPU node, simply start the `dcgm-exporter` container: ```shell -docker run -d --gpus all --rm -p 9400:9400 nvcr.io/nvidia/k8s/dcgm-exporter:3.3.8-3.6.0-ubuntu22.04 +docker run -d --gpus all --rm -p 9400:9400 nvcr.io/nvidia/k8s/dcgm-exporter:3.3.9-3.6.1-ubuntu22.04 curl localhost:9400/metrics # HELP DCGM_FI_DEV_SM_CLOCK SM clock frequency (in MHz). # TYPE DCGM_FI_DEV_SM_CLOCK gauge diff --git a/dcgm-exporter.yaml b/dcgm-exporter.yaml index 0e9a7c39..d919dc83 100644 --- a/dcgm-exporter.yaml +++ b/dcgm-exporter.yaml @@ -18,23 +18,23 @@ metadata: name: "dcgm-exporter" labels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.6.0" + app.kubernetes.io/version: "3.6.1" spec: updateStrategy: type: RollingUpdate selector: matchLabels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.6.0" + app.kubernetes.io/version: "3.6.1" template: metadata: labels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.6.0" + app.kubernetes.io/version: "3.6.1" name: "dcgm-exporter" spec: containers: - - image: "nvcr.io/nvidia/k8s/dcgm-exporter:3.3.8-3.6.0-ubuntu22.04" + - image: "nvcr.io/nvidia/k8s/dcgm-exporter:3.3.9-3.6.1-ubuntu22.04" env: - name: "DCGM_EXPORTER_LISTEN" value: ":9400" @@ -66,11 +66,11 @@ metadata: name: "dcgm-exporter" labels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.6.0" + app.kubernetes.io/version: "3.6.1" spec: selector: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.6.0" + app.kubernetes.io/version: "3.6.1" ports: - name: "metrics" port: 9400 diff --git a/deployment/Chart.yaml b/deployment/Chart.yaml index 81054f3f..a297d602 100644 --- a/deployment/Chart.yaml +++ b/deployment/Chart.yaml @@ -3,7 +3,7 @@ name: dcgm-exporter description: A Helm chart for DCGM exporter version: "3.7.0" kubeVersion: ">= 1.19.0-0" -appVersion: "3.6.0" +appVersion: "3.6.1" sources: - https://github.com/nvidia/dcgm-exporter home: https://github.com/nvidia/dcgm-exporter/ diff --git a/deployment/values.yaml b/deployment/values.yaml index 8a81d661..bf2d5be7 100644 --- a/deployment/values.yaml +++ b/deployment/values.yaml @@ -17,7 +17,7 @@ image: pullPolicy: IfNotPresent # Image tag defaults to AppVersion, but you can use the tag key # for the image tag, e.g: - tag: 3.3.8-3.6.0-ubuntu22.04 + tag: 3.3.9-3.6.1-ubuntu22.04 # Change the following reference to "/etc/dcgm-exporter/default-counters.csv" # to stop profiling metrics from DCGM diff --git a/docker/Dockerfile.ubi9 b/docker/Dockerfile.ubi9 index 0dfd4a98..e6154d21 100644 --- a/docker/Dockerfile.ubi9 +++ b/docker/Dockerfile.ubi9 @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda:12.6.1-base-ubi9 AS builder +FROM nvcr.io/nvidia/cuda:12.6.2-base-ubi9 AS builder ARG GOLANG_VERSION=1.22.4 WORKDIR /go/src/github.com/NVIDIA/dcgm-exporter RUN set -eux; \ @@ -40,7 +40,7 @@ COPY . . RUN make binary check-format -FROM nvcr.io/nvidia/cuda:12.6.1-base-ubi9 +FROM nvcr.io/nvidia/cuda:12.6.2-base-ubi9 ARG DCGM_VERSION ARG VERSION ARG DIST_DIR @@ -63,7 +63,8 @@ RUN dnf update --disablerepo=* --enablerepo=ubi-9-appstream-rpms --enablerepo=ub && rm -rf /usr/local/dcgm/scripts \ && rm -f /usr/include/*.h /usr/bin/DcgmProfTesterKernels.ptx /usr/bin/dcgmproftester* \ && rm -rf /var/lib/rpm/rpmdb.sqlite /var/cache/* /var/lib/dnf/history.* /var/log/* /tmp/* /var/tmp/* \ - && rm -rf /usr/share/doc && rm -rf /usr/share/man + && rm -rf /usr/share/doc && rm -rf /usr/share/man \ + && dnf remove openssl COPY ./LICENSE ./licenses/LICENSE COPY --from=builder /go/src/github.com/NVIDIA/dcgm-exporter/cmd/dcgm-exporter/dcgm-exporter /usr/bin/ diff --git a/docker/Dockerfile.ubuntu22.04 b/docker/Dockerfile.ubuntu22.04 index a2c6ac7e..1cd1c3cc 100644 --- a/docker/Dockerfile.ubuntu22.04 +++ b/docker/Dockerfile.ubuntu22.04 @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda:12.6.1-base-ubuntu22.04 AS builder +FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04 AS builder ARG GOLANG_VERSION=1.22.4 WORKDIR /go/src/github.com/NVIDIA/dcgm-exporter RUN set -eux; \ @@ -45,7 +45,7 @@ COPY . . RUN make binary check-format -FROM nvcr.io/nvidia/cuda:12.6.1-base-ubuntu22.04 +FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04 ARG VERSION ARG DCGM_VERSION diff --git a/hack/VERSION b/hack/VERSION index 1fba2cc0..17b0cffa 100644 --- a/hack/VERSION +++ b/hack/VERSION @@ -1,4 +1,4 @@ -OLD_DCGM_VERSION=3.3.7 -OLD_EXPORTER_VERSION=3.5.0 -NEW_DCGM_VERSION=3.3.8 -NEW_EXPORTER_VERSION=3.6.0 +OLD_DCGM_VERSION=3.3.8 +OLD_EXPORTER_VERSION=3.6.0 +NEW_DCGM_VERSION=3.3.9 +NEW_EXPORTER_VERSION=3.6.1 diff --git a/service-monitor.yaml b/service-monitor.yaml index 50b49bd8..57aaa800 100644 --- a/service-monitor.yaml +++ b/service-monitor.yaml @@ -18,12 +18,12 @@ metadata: name: "dcgm-exporter" labels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.6.0" + app.kubernetes.io/version: "3.6.1" spec: selector: matchLabels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.6.0" + app.kubernetes.io/version: "3.6.1" endpoints: - port: "metrics" path: "/metrics" diff --git a/tests/e2e/Makefile b/tests/e2e/Makefile index ed483d6d..ae9a6411 100644 --- a/tests/e2e/Makefile +++ b/tests/e2e/Makefile @@ -16,7 +16,7 @@ GO_CMD ?= go NAMESPACE ?= "dcgm-exporter" CHART ?= "./../../deployment/" IMAGE_REPOSITORY ?= "nvcr.io/nvidia/k8s/dcgm-exporter" -IMAGE_TAG ?= "3.3.8-3.6.0-ubuntu22.04" +IMAGE_TAG ?= "3.3.9-3.6.1-ubuntu22.04" KUBECONFIG ?= "~/.kube/config" define TEST_CMD