diff --git a/Makefile b/Makefile index d2dea11b..7ff143d3 100644 --- a/Makefile +++ b/Makefile @@ -15,9 +15,9 @@ MKDIR ?= mkdir REGISTRY ?= nvidia -DCGM_VERSION := 3.3.0 +DCGM_VERSION := 3.3.3 GOLANG_VERSION := 1.21.5 -VERSION := 3.2.0 +VERSION := 3.3.0 FULL_VERSION := $(DCGM_VERSION)-$(VERSION) OUTPUT := type=oci,dest=/tmp/dcgm-exporter.tar PLATFORMS := linux/amd64,linux/arm64 @@ -30,7 +30,7 @@ NON_TEST_FILES += cmd/dcgm-exporter/main.go MAIN_TEST_FILES := pkg/dcgmexporter/system_info_test.go .PHONY: all binary install check-format local -all: ubuntu20.04 ubi8 +all: ubuntu22.04 ubi9 binary: cd cmd/dcgm-exporter; go build -ldflags "-X main.BuildVersion=${DCGM_VERSION}-${VERSION}" @@ -48,8 +48,8 @@ check-format: test $$(gofmt -l cmd | tee /dev/stderr | wc -l) -eq 0 push: - $(MAKE) ubuntu20.04 OUTPUT=type=registry - $(MAKE) ubi8 OUTPUT=type=registry + $(MAKE) ubuntu22.04 OUTPUT=type=registry + $(MAKE) ubi9 OUTPUT=type=registry local: ifeq ($(shell uname -p),aarch64) @@ -58,24 +58,24 @@ else $(MAKE) PLATFORMS=linux/amd64 OUTPUT=type=docker DOCKERCMD='docker build' endif -ubuntu20.04: +ubuntu22.04: $(DOCKERCMD) --pull \ --output $(OUTPUT) \ --platform $(PLATFORMS) \ --build-arg "GOLANG_VERSION=$(GOLANG_VERSION)" \ --build-arg "DCGM_VERSION=$(DCGM_VERSION)" \ - --tag "$(REGISTRY)/dcgm-exporter:$(FULL_VERSION)-ubuntu20.04" \ - --file docker/Dockerfile.ubuntu20.04 . + --tag "$(REGISTRY)/dcgm-exporter:$(FULL_VERSION)-ubuntu22.04" \ + --file docker/Dockerfile.ubuntu22.04 . -ubi8: +ubi9: $(DOCKERCMD) --pull \ --output $(OUTPUT) \ --platform $(PLATFORMS) \ --build-arg "GOLANG_VERSION=$(GOLANG_VERSION)" \ --build-arg "DCGM_VERSION=$(DCGM_VERSION)" \ --build-arg "VERSION=$(FULL_VERSION)" \ - --tag "$(REGISTRY)/dcgm-exporter:$(FULL_VERSION)-ubi8" \ - --file docker/Dockerfile.ubi8 . + --tag "$(REGISTRY)/dcgm-exporter:$(FULL_VERSION)-ubi9" \ + --file docker/Dockerfile.ubi9 . .PHONY: integration test-integration: diff --git a/README.md b/README.md index f04a268e..a7c8f3d2 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Official documentation for DCGM-Exporter can be found on [docs.nvidia.com](https To gather metrics on a GPU node, simply start the `dcgm-exporter` container: ``` -$ docker run -d --gpus all --rm -p 9400:9400 nvcr.io/nvidia/k8s/dcgm-exporter:3.3.0-3.2.0-ubuntu22.04 +$ docker run -d --gpus all --rm -p 9400:9400 nvcr.io/nvidia/k8s/dcgm-exporter:3.3.3-3.3.0-ubuntu22.04 $ curl localhost:9400/metrics # HELP DCGM_FI_DEV_SM_CLOCK SM clock frequency (in MHz). # TYPE DCGM_FI_DEV_SM_CLOCK gauge @@ -157,7 +157,7 @@ make local Build the ubuntu image and export to 'docker images' ``` -make ubuntu20.04 PLATFORMS=linux/amd64 OUTPUT=type=docker +make ubuntu22.04 PLATFORMS=linux/amd64 OUTPUT=type=docker ``` Build and push the images to some other 'private_registry' diff --git a/RELEASE.md b/RELEASE.md index 1017f226..0bd49616 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -7,7 +7,7 @@ This documents the release process as well as the versioning strategy for the DC The DCGM container has three major components: - The DCGM Version (e.g: 1.17.3) - The Exporter Version (e.g: 2.0.0) -- The platform of the container (e.g: ubuntu20.04) +- The platform of the container (e.g: ubuntu22.04) The overall version of the DCGM container has three forms: - The long form: `${DCGM_VERSION}-${EXPORTER_VERSION}-${PLATFORM}` @@ -17,7 +17,7 @@ The overall version of the DCGM container has three forms: The long form is a unique tag that once pushed will always refer to the same container. This means that no updates will be made to that tag and it will always point to the same container. -The short form refers to the latest EXPORTER_VERSION with the platform fixed to ubuntu20.04. +The short form refers to the latest EXPORTER_VERSION with the platform fixed to ubuntu22.04. The latest tag refers to the latest short form (i.e: latest DCGM_VERSION and EXPORTER_VERSION). Note: We do not maintain multiple version branches. diff --git a/dcgm-exporter.yaml b/dcgm-exporter.yaml index 538e2cd0..ddfd5f84 100644 --- a/dcgm-exporter.yaml +++ b/dcgm-exporter.yaml @@ -18,23 +18,23 @@ metadata: name: "dcgm-exporter" labels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.2.0" + app.kubernetes.io/version: "3.3.0" spec: updateStrategy: type: RollingUpdate selector: matchLabels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.2.0" + app.kubernetes.io/version: "3.3.0" template: metadata: labels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.2.0" + app.kubernetes.io/version: "3.3.0" name: "dcgm-exporter" spec: containers: - - image: "nvcr.io/nvidia/k8s/dcgm-exporter:3.3.0-3.2.0-ubuntu22.04" + - image: "nvcr.io/nvidia/k8s/dcgm-exporter:3.3.3-3.3.0-ubuntu22.04" env: - name: "DCGM_EXPORTER_LISTEN" value: ":9400" @@ -64,11 +64,11 @@ metadata: name: "dcgm-exporter" labels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.2.0" + app.kubernetes.io/version: "3.3.0" spec: selector: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.2.0" + app.kubernetes.io/version: "3.3.0" ports: - name: "metrics" port: 9400 diff --git a/deployment/Chart.yaml b/deployment/Chart.yaml index 8d6ed420..24fa4829 100644 --- a/deployment/Chart.yaml +++ b/deployment/Chart.yaml @@ -1,9 +1,9 @@ apiVersion: v2 name: dcgm-exporter description: A Helm chart for DCGM exporter -version: "3.2.0" +version: "3.3.0" kubeVersion: ">= 1.19.0-0" -appVersion: "3.2.0" +appVersion: "3.3.0" sources: - https://github.com/nvidia/dcgm-exporter home: https://github.com/nvidia/dcgm-exporter/ diff --git a/deployment/values.yaml b/deployment/values.yaml index d96c0f5f..2bd76ae7 100644 --- a/deployment/values.yaml +++ b/deployment/values.yaml @@ -17,7 +17,7 @@ image: pullPolicy: IfNotPresent # Image tag defaults to AppVersion, but you can use the tag key # for the image tag, e.g: - tag: 3.3.0-3.2.0-ubuntu22.04 + tag: 3.3.3-3.3.0-ubuntu22.04 # Change the following reference to "/etc/dcgm-exporter/default-counters.csv" # to stop profiling metrics from DCGM diff --git a/docker/Dockerfile.ubi8 b/docker/Dockerfile.ubi9 similarity index 91% rename from docker/Dockerfile.ubi8 rename to docker/Dockerfile.ubi9 index aa85efe9..0074585a 100644 --- a/docker/Dockerfile.ubi8 +++ b/docker/Dockerfile.ubi9 @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda:12.2.2-base-ubi8 AS builder +FROM nvcr.io/nvidia/cuda:12.3.1-base-ubi9 AS builder ARG GOLANG_VERSION WORKDIR /go/src/github.com/NVIDIA/dcgm-exporter RUN set -eux; \ @@ -40,12 +40,12 @@ COPY . . RUN make binary check-format -FROM nvcr.io/nvidia/cuda:12.2.2-base-ubi8 +FROM nvcr.io/nvidia/cuda:12.3.1-base-ubi9 LABEL io.k8s.display-name="NVIDIA DCGM Exporter" ARG DCGM_VERSION -RUN dnf update --disablerepo=* --enablerepo=ubi-8-appstream-rpms --enablerepo=ubi-8-baseos-rpms -y && rm -rf /var/cache/yum \ +RUN dnf update --disablerepo=* --enablerepo=ubi-9-appstream-rpms --enablerepo=ubi-9-baseos-rpms -y && rm -rf /var/cache/yum \ && dnf clean expire-cache && dnf install -y datacenter-gpu-manager-${DCGM_VERSION} libcap \ && rm -rfd /usr/local/dcgm/bindings /usr/local/dcgm/sdk_samples /usr/share/nvidia-validation-suite diff --git a/docker/Dockerfile.ubuntu20.04 b/docker/Dockerfile.ubuntu22.04 similarity index 95% rename from docker/Dockerfile.ubuntu20.04 rename to docker/Dockerfile.ubuntu22.04 index 8e22f4c0..ce1c6814 100644 --- a/docker/Dockerfile.ubuntu20.04 +++ b/docker/Dockerfile.ubuntu22.04 @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda:12.2.2-base-ubuntu20.04 AS builder +FROM nvcr.io/nvidia/cuda:12.3.1-base-ubuntu22.04 AS builder ARG GOLANG_VERSION=1.21.5 WORKDIR /go/src/github.com/NVIDIA/dcgm-exporter RUN set -eux; \ @@ -45,7 +45,7 @@ COPY . . RUN make binary check-format -FROM nvcr.io/nvidia/cuda:12.2.2-base-ubuntu20.04 +FROM nvcr.io/nvidia/cuda:12.3.1-base-ubuntu22.04 LABEL io.k8s.display-name="NVIDIA DCGM Exporter" COPY --from=builder /go/src/github.com/NVIDIA/dcgm-exporter/cmd/dcgm-exporter/dcgm-exporter /usr/bin/ diff --git a/service-monitor.yaml b/service-monitor.yaml index 8e6e9471..2234a0ca 100644 --- a/service-monitor.yaml +++ b/service-monitor.yaml @@ -18,12 +18,12 @@ metadata: name: "dcgm-exporter" labels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.2.0" + app.kubernetes.io/version: "3.3.0" spec: selector: matchLabels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "3.2.0" + app.kubernetes.io/version: "3.3.0" endpoints: - port: "metrics" path: "/metrics"