From 5a950b6c4cd8aaf27817db5be4767e956671b82f Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Wed, 22 May 2024 16:09:15 +0200 Subject: [PATCH] e2e: Verify metrics-exporter expose netdevice metrics Exposed metrics can be verified by scraping the prometheus endpoint on the `sriov-network-metrics-exporter` pod. Add a test that spawns an SR-IOV consuming pod and verifies its receiving counter increase when the interface is pinged from outside. Signed-off-by: Andrea Panattoni --- go.mod | 4 +- .../tests/test_exporter_metrics.go | 187 ++++++++++++++++++ test/util/namespaces/namespaces.go | 25 +++ 3 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 test/conformance/tests/test_exporter_metrics.go diff --git a/go.mod b/go.mod index fe9005b1b..0353c7ec1 100644 --- a/go.mod +++ b/go.mod @@ -30,6 +30,8 @@ require ( github.com/pkg/errors v0.9.1 github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.68.0 github.com/prometheus-operator/prometheus-operator/pkg/client v0.68.0 + github.com/prometheus/client_model v0.5.0 + github.com/prometheus/common v0.45.0 github.com/safchain/ethtool v0.3.0 github.com/spf13/cobra v1.7.0 github.com/stretchr/testify v1.8.4 @@ -125,8 +127,6 @@ require ( github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_golang v1.17.0 // indirect - github.com/prometheus/client_model v0.5.0 // indirect - github.com/prometheus/common v0.45.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/robfig/cron v1.2.0 // indirect github.com/rogpeppe/go-internal v1.10.0 // indirect diff --git a/test/conformance/tests/test_exporter_metrics.go b/test/conformance/tests/test_exporter_metrics.go new file mode 100644 index 000000000..e81f63067 --- /dev/null +++ b/test/conformance/tests/test_exporter_metrics.go @@ -0,0 +1,187 @@ +package tests + +import ( + "context" + "fmt" + "strings" + + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/discovery" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/network" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/pod" + + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { + + BeforeAll(func() { + if cluster.VirtualCluster() { + Skip("IGB driver does not support VF statistics") + } + + err := namespaces.Create(namespaces.Test, clients) + Expect(err).ToNot(HaveOccurred()) + + err = namespaces.Clean(operatorNamespace, namespaces.Test, clients, discovery.Enabled()) + Expect(err).ToNot(HaveOccurred()) + + featureFlagInitialValue := isFeatureFlagEnabled("metricsExporter") + DeferCleanup(func() { + By("Restoring initial feature flag value") + setFeatureFlag("metricsExporter", featureFlagInitialValue) + }) + + By("Enabling `metricsExporter` feature flag") + setFeatureFlag("metricsExporter", true) + + By("Adding monitoring label to " + operatorNamespace) + err = namespaces.AddLabel(clients, context.Background(), operatorNamespace, "openshift.io/cluster-monitoring", "true") + Expect(err).ToNot(HaveOccurred()) + + WaitForSRIOVStable() + }) + + It("collects metrics regarding receiving traffic via VF", func() { + sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) + Expect(err).ToNot(HaveOccurred()) + + node, nic, err := sriovInfos.FindOneSriovNodeAndDevice() + Expect(err).ToNot(HaveOccurred()) + By("Using device " + nic.Name + " on node " + node) + + _, err = network.CreateSriovPolicy(clients, "test-me-policy-", operatorNamespace, nic.Name, node, 2, "metricsResource", "netdevice") + Expect(err).ToNot(HaveOccurred()) + + err = network.CreateSriovNetwork(clients, nic, "test-me-network", namespaces.Test, operatorNamespace, "metricsResource", ipamIpv4) + Expect(err).ToNot(HaveOccurred()) + waitForNetAttachDef("test-me-network", namespaces.Test) + + pod := createTestPod(node, []string{"test-me-network"}) + + ips, err := network.GetSriovNicIPs(pod, "net1") + Expect(err).ToNot(HaveOccurred()) + Expect(ips).NotTo(BeNil(), "No sriov network interface found.") + Expect(len(ips)).Should(Equal(1)) + + initialMetrics := getMetricsForNode(node) + initialRxBytes := getCounterForPod(initialMetrics, pod, "sriov_vf_rx_bytes") + initialRxPackets := getCounterForPod(initialMetrics, pod, "sriov_vf_rx_packets") + + for _, ip := range ips { + pingPod(ip, node, "test-me-network") + } + + finalMetrics := getMetricsForNode(node) + finalRxBytes := getCounterForPod(finalMetrics, pod, "sriov_vf_rx_bytes") + finalRxPackets := getCounterForPod(finalMetrics, pod, "sriov_vf_rx_packets") + + Expect(finalRxBytes).Should(BeNumerically(">", initialRxBytes)) + Expect(finalRxPackets).Should(BeNumerically(">", initialRxPackets)) + }) + +}) + +func getMetricsForNode(nodeName string) map[string]*dto.MetricFamily { + metricsExporterPods, err := clients.Pods(operatorNamespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: "app=sriov-network-metrics-exporter", + FieldSelector: "spec.nodeName=" + nodeName, + }) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + ExpectWithOffset(1, metricsExporterPods.Items).ToNot(HaveLen(0), "At least one operator pod expected") + + metricsExporterPod := metricsExporterPods.Items[0] + + command := []string{"curl", "http://127.0.0.1:9110/metrics"} + stdout, stderr, err := pod.ExecCommand(clients, &metricsExporterPod, command...) + Expect(err).ToNot(HaveOccurred(), + "pod: [%s/%s] command: [%v]\nstdout: %s\nstderr: %s", metricsExporterPod.Namespace, metricsExporterPod.Name, command, stdout, stderr) + + // Clean the scraped output from carriage returns + stdout = strings.ReplaceAll(stdout, "\r", "") + + var parser expfmt.TextParser + mf, err := parser.TextToMetricFamilies(strings.NewReader(stdout)) + Expect(err).ToNot(HaveOccurred()) + + return mf +} + +func getCounterForPod(mf map[string]*dto.MetricFamily, p *corev1.Pod, metricName string) float64 { + pciAddress := findPciAddressForPod(mf, p) + return findCounterForPciAddr(mf, pciAddress, metricName) +} + +func findPciAddressForPod(mf map[string]*dto.MetricFamily, p *corev1.Pod) string { + kubePodDeviceMetric := findKubePodDeviceMetric(mf, p) + for _, labelPair := range kubePodDeviceMetric.Label { + if labelPair.GetName() == "pciAddr" { + return *labelPair.Value + } + } + + Fail(fmt.Sprintf("Can't find PCI Address for pod [%s/%s] in metrics %+v", p.Name, p.Namespace, mf)) + return "" +} + +func findKubePodDeviceMetric(mf map[string]*dto.MetricFamily, pod *corev1.Pod) *dto.Metric { + metricFamily, ok := mf["sriov_kubepoddevice"] + Expect(ok).To(BeTrue(), "sriov_kubepoddevice metric not found: %+v", mf) + + kubePodDeviceMetric := findMetricForPod(metricFamily.Metric, pod) + Expect(kubePodDeviceMetric).ToNot(BeNil(), "sriov_kubepoddevice metric for pod [%s/%s] not found: %+v", pod.Name, pod.Namespace, mf) + + return kubePodDeviceMetric +} + +func findCounterForPciAddr(mf map[string]*dto.MetricFamily, pciAddress string, metricName string) float64 { + metricFamily, ok := mf[metricName] + Expect(ok).To(BeTrue(), "metric %s not found: %+v", metricName, mf) + + metric := findMetricFor(metricFamily.Metric, map[string]string{ + "pciAddr": pciAddress, + }) + Expect(metric).ToNot(BeNil(), "metric %s for pciAddr %s not found: %+v", metricName, pciAddress, mf) + + return *metric.GetCounter().Value +} + +func findMetricForPod(metrics []*dto.Metric, pod *corev1.Pod) *dto.Metric { + return findMetricFor(metrics, map[string]string{ + "pod": pod.Name, + "namespace": pod.Namespace, + }) +} + +func findMetricFor(metrics []*dto.Metric, labelsToMatch map[string]string) *dto.Metric { + for _, metric := range metrics { + if areLabelsMatching(metric.Label, labelsToMatch) { + return metric + } + } + + return nil +} + +func areLabelsMatching(labels []*dto.LabelPair, labelsToMatch map[string]string) bool { + for _, labelPair := range labels { + valueToMatch, ok := labelsToMatch[labelPair.GetName()] + if !ok { + continue + } + + if *labelPair.Value != valueToMatch { + return false + } + } + + return true +} diff --git a/test/util/namespaces/namespaces.go b/test/util/namespaces/namespaces.go index 9e865b7d1..5ed106398 100644 --- a/test/util/namespaces/namespaces.go +++ b/test/util/namespaces/namespaces.go @@ -10,6 +10,7 @@ import ( k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" + corev1client "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/utils/pointer" runtimeclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -165,3 +166,27 @@ func Clean(operatorNamespace, namespace string, cs *testclient.ClientSet, discov } return nil } + +func AddLabel(cs corev1client.NamespacesGetter, ctx context.Context, namespaceName, key, value string) error { + ns, err := cs.Namespaces().Get(context.Background(), namespaceName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get namespace [%s]: %v", namespaceName, err) + } + + if ns.Labels == nil { + ns.Labels = make(map[string]string) + } + + if ns.Labels[key] == value { + return nil + } + + ns.Labels[key] = value + + _, err = cs.Namespaces().Update(ctx, ns, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update namespace [%s] with label [%s: %s]: %v", namespaceName, key, value, err) + } + + return nil +}