From 8cd432a8ebbb1627105f689debc5628718b0821e Mon Sep 17 00:00:00 2001 From: Vihang Mehta Date: Wed, 14 Jun 2023 13:10:33 -0700 Subject: [PATCH] Use pod dns addrs instead of IPs to check status (#1489) Summary: Using the pod advertised DNS addr instead of the IP address ensures that the SSL cert is valid and that we can scrape with TLS. Relevant Issues: Followup to a breakage caused by #1480 Type of change: /kind bug Test Plan: skaffold the operator to test. --------- Signed-off-by: Vihang Mehta --- src/operator/controllers/monitor.go | 5 +- src/operator/controllers/monitor_test.go | 26 +++++--- src/utils/shared/k8s/BUILD.bazel | 8 ++- src/utils/shared/k8s/dns_addr.go | 35 +++++++++++ src/utils/shared/k8s/dns_addr_test.go | 63 +++++++++++++++++++ .../cloud_connector/vzmetrics/scrape.go | 8 +-- 6 files changed, 126 insertions(+), 19 deletions(-) create mode 100644 src/utils/shared/k8s/dns_addr.go create mode 100644 src/utils/shared/k8s/dns_addr_test.go diff --git a/src/operator/controllers/monitor.go b/src/operator/controllers/monitor.go index a191b11e707..db727391bc7 100644 --- a/src/operator/controllers/monitor.go +++ b/src/operator/controllers/monitor.go @@ -328,7 +328,7 @@ func getNATSState(client HTTPClient, pods *concurrentPodMap) *vizierState { u := url.URL{ Scheme: "http", - Host: net.JoinHostPort(natsPod.pod.Status.PodIP, "8222"), + Host: net.JoinHostPort(k8s.GetPodAddr(*natsPod.pod), "8222"), } resp, err := client.Get(u.String()) @@ -780,7 +780,6 @@ func (m *VizierMonitor) runReconciler() { // queryPodStatusz returns a pod's self-reported status as served by its statusz endpoint. func queryPodStatusz(client HTTPClient, pod *v1.Pod) (bool, string) { - podIP := pod.Status.PodIP // Assume that the statusz endpoint is on the first port in the first container. var port int32 if len(pod.Spec.Containers) > 0 && len(pod.Spec.Containers[0].Ports) > 0 { @@ -789,7 +788,7 @@ func queryPodStatusz(client HTTPClient, pod *v1.Pod) (bool, string) { u := url.URL{ Scheme: "https", - Host: net.JoinHostPort(podIP, fmt.Sprintf("%d", port)), + Host: net.JoinHostPort(k8s.GetPodAddr(*pod), fmt.Sprintf("%d", port)), Path: "statusz", } resp, err := client.Get(u.String()) diff --git a/src/operator/controllers/monitor_test.go b/src/operator/controllers/monitor_test.go index e9b32a56f60..52dba094bfc 100644 --- a/src/operator/controllers/monitor_test.go +++ b/src/operator/controllers/monitor_test.go @@ -74,8 +74,8 @@ func (f *FakeHTTPClient) Get(url string) (*http.Response, error) { func TestMonitor_queryPodStatusz(t *testing.T) { httpClient := &FakeHTTPClient{ responses: map[string]string{ - "https://127.0.0.1:8080/statusz": "", - "https://127.0.0.3:50100/statusz": "CloudConnectFailed", + "https://127-0-0-1.pl.pod.cluster.local:8080/statusz": "", + "https://127-0-0-3.pl2.pod.cluster.local:50100/statusz": "CloudConnectFailed", }, } @@ -83,6 +83,7 @@ func TestMonitor_queryPodStatusz(t *testing.T) { name string podPort int32 podIP string + podNamespace string expectedStatus string expectedOK bool }{ @@ -90,6 +91,7 @@ func TestMonitor_queryPodStatusz(t *testing.T) { name: "OK", podPort: 8080, podIP: "127.0.0.1", + podNamespace: "pl", expectedStatus: "", expectedOK: true, }, @@ -104,6 +106,7 @@ func TestMonitor_queryPodStatusz(t *testing.T) { name: "unhealthy", podPort: 50100, podIP: "127.0.0.3", + podNamespace: "pl2", expectedStatus: "CloudConnectFailed", expectedOK: false, }, @@ -115,6 +118,9 @@ func TestMonitor_queryPodStatusz(t *testing.T) { Status: v1.PodStatus{ PodIP: test.podIP, }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: test.podNamespace, + }, Spec: v1.PodSpec{ Containers: []v1.Container{ { @@ -169,7 +175,7 @@ func TestMonitor_getCloudConnState(t *testing.T) { t.Run(test.name, func(t *testing.T) { httpClient := &FakeHTTPClient{ responses: map[string]string{ - "https://127.0.0.1:8080/statusz": test.cloudConnStatusz, + "https://127-0-0-1.pl.pod.cluster.local:8080/statusz": test.cloudConnStatusz, }, } @@ -183,6 +189,9 @@ func TestMonitor_getCloudConnState(t *testing.T) { PodIP: "127.0.0.1", Phase: test.cloudConnPhase, }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: "pl", + }, Spec: v1.PodSpec{ Containers: []v1.Container{ { @@ -500,9 +509,7 @@ func TestMonitor_repairVizier_PVC(t *testing.T) { func TestMonitor_getCloudConnState_SeveralCloudConns(t *testing.T) { httpClient := &FakeHTTPClient{ - responses: map[string]string{ - "https://127.0.0.1:8080/statusz": "", - }, + responses: map[string]string{}, } pods := &concurrentPodMap{unsafeMap: make(map[string]map[string]*podWrapper)} @@ -545,8 +552,8 @@ func TestMonitor_getCloudConnState_SeveralCloudConns(t *testing.T) { func TestMonitor_NATSPods(t *testing.T) { httpClient := &FakeHTTPClient{ responses: map[string]string{ - "http://127.0.0.1:8222": "", - "http://127.0.0.3:8222": "NATS Failed", + "http://127-0-0-1.pl.pod.cluster.local:8222": "", + "http://127-0-0-3.pl.pod.cluster.local:8222": "NATS Failed", }, } @@ -624,6 +631,9 @@ func TestMonitor_NATSPods(t *testing.T) { PodIP: test.natsIP, Phase: test.natsPhase, }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: "pl", + }, }, }, ) diff --git a/src/utils/shared/k8s/BUILD.bazel b/src/utils/shared/k8s/BUILD.bazel index 98e8555763e..a9dc8ef2fd6 100644 --- a/src/utils/shared/k8s/BUILD.bazel +++ b/src/utils/shared/k8s/BUILD.bazel @@ -23,6 +23,7 @@ go_library( "apply.go", "auth.go", "delete.go", + "dns_addr.go", "kubectl.go", "logs.go", "secrets.go", @@ -64,10 +65,15 @@ go_library( pl_go_test( name = "k8s_test", - srcs = ["apply_test.go"], + srcs = [ + "apply_test.go", + "dns_addr_test.go", + ], deps = [ ":k8s", "@com_github_stretchr_testify//assert", "@com_github_stretchr_testify//require", + "@io_k8s_api//core/v1:core", + "@io_k8s_apimachinery//pkg/apis/meta/v1:meta", ], ) diff --git a/src/utils/shared/k8s/dns_addr.go b/src/utils/shared/k8s/dns_addr.go new file mode 100644 index 00000000000..5029a290e47 --- /dev/null +++ b/src/utils/shared/k8s/dns_addr.go @@ -0,0 +1,35 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package k8s + +import ( + "fmt" + "strings" + + v1 "k8s.io/api/core/v1" +) + +func GetPodAddr(pod v1.Pod) string { + // IPv4 + podIP := strings.ReplaceAll(pod.Status.PodIP, ".", "-") + // IPv6 + podIP = strings.ReplaceAll(podIP, ":", "-") + + return fmt.Sprintf("%s.%s.pod.cluster.local", podIP, pod.Namespace) +} diff --git a/src/utils/shared/k8s/dns_addr_test.go b/src/utils/shared/k8s/dns_addr_test.go new file mode 100644 index 00000000000..06d72b0dba5 --- /dev/null +++ b/src/utils/shared/k8s/dns_addr_test.go @@ -0,0 +1,63 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package k8s_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "px.dev/pixie/src/utils/shared/k8s" +) + +func TestGetPodAddr(t *testing.T) { + keyValueStringToMapTests := []struct { + pod v1.Pod + expectedAddr string + }{ + { + pod: v1.Pod{ + Status: v1.PodStatus{ + PodIP: "1.2.3.4", + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test-ns", + }, + }, + expectedAddr: "1-2-3-4.test-ns.pod.cluster.local", + }, + { + pod: v1.Pod{ + Status: v1.PodStatus{ + PodIP: "2001:0db8:85a3:0000:0000:8a2e:0370:7334", + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test-ns", + }, + }, + expectedAddr: "2001-0db8-85a3-0000-0000-8a2e-0370-7334.test-ns.pod.cluster.local", + }, + } + + for _, tc := range keyValueStringToMapTests { + assert.Equal(t, tc.expectedAddr, k8s.GetPodAddr(tc.pod)) + } +} diff --git a/src/vizier/services/cloud_connector/vzmetrics/scrape.go b/src/vizier/services/cloud_connector/vzmetrics/scrape.go index e22c2722b2b..3e19fd77e5a 100644 --- a/src/vizier/services/cloud_connector/vzmetrics/scrape.go +++ b/src/vizier/services/cloud_connector/vzmetrics/scrape.go @@ -22,13 +22,11 @@ import ( "context" "crypto/tls" "crypto/x509" - "fmt" "io" "net" "net/http" "net/url" "os" - "strings" "time" log "github.com/sirupsen/logrus" @@ -124,12 +122,8 @@ func (s *scraperImpl) getEndpointsToScrape() ([]endpoint, error) { continue } - // IPv4 - podIP := strings.ReplaceAll(p.Status.PodIP, ".", "-") - // IPv6 - podIP = strings.ReplaceAll(podIP, ":", "-") // Use k8s pod DNS format instead of just the pod IP, so that the cert is valid. - host := fmt.Sprintf("%s.%s.pod.cluster.local", podIP, s.namespace) + host := k8s.GetPodAddr(p) u := url.URL{ Scheme: "https",