From 6af85c163c7e45c80e97273e73690438cc5257df Mon Sep 17 00:00:00 2001 From: vikastigera Date: Wed, 9 Oct 2024 15:25:30 -0700 Subject: [PATCH 1/4] Adding felix service metric port Changes done to add felix service metric port in calico-node service. This helps in removing the manual step required by the client to enable felix metric for BYO prometheus. --- .../installation/core_controller.go | 38 +++++++------ pkg/render/node.go | 43 ++++++++++----- pkg/render/node_test.go | 53 ++++++++++++++++--- 3 files changed, 99 insertions(+), 35 deletions(-) diff --git a/pkg/controller/installation/core_controller.go b/pkg/controller/installation/core_controller.go index 57a342cc85..9c6966d2ed 100644 --- a/pkg/controller/installation/core_controller.go +++ b/pkg/controller/installation/core_controller.go @@ -1349,21 +1349,22 @@ func (r *ReconcileInstallation) Reconcile(ctx context.Context, request reconcile // Build a configuration for rendering calico/node. nodeCfg := render.NodeConfiguration{ - K8sServiceEp: k8sapi.Endpoint, - Installation: &instance.Spec, - IPPools: crdPoolsToOperator(currentPools.Items), - LogCollector: logCollector, - BirdTemplates: birdTemplates, - TLS: typhaNodeTLS, - ClusterDomain: r.clusterDomain, - NodeReporterMetricsPort: nodeReporterMetricsPort, - BGPLayouts: bgpLayout, - NodeAppArmorProfile: nodeAppArmorProfile, - MigrateNamespaces: needNsMigration, - CanRemoveCNIFinalizer: canRemoveCNI, - PrometheusServerTLS: nodePrometheusTLS, - FelixHealthPort: *felixConfiguration.Spec.HealthPort, - BindMode: bgpConfiguration.Spec.BindMode, + K8sServiceEp: k8sapi.Endpoint, + Installation: &instance.Spec, + IPPools: crdPoolsToOperator(currentPools.Items), + LogCollector: logCollector, + BirdTemplates: birdTemplates, + TLS: typhaNodeTLS, + ClusterDomain: r.clusterDomain, + NodeReporterMetricsPort: nodeReporterMetricsPort, + BGPLayouts: bgpLayout, + NodeAppArmorProfile: nodeAppArmorProfile, + MigrateNamespaces: needNsMigration, + CanRemoveCNIFinalizer: canRemoveCNI, + PrometheusServerTLS: nodePrometheusTLS, + FelixHealthPort: *felixConfiguration.Spec.HealthPort, + BindMode: bgpConfiguration.Spec.BindMode, + FelixPrometheusMetricsEnabled: isFelixPrometheusMetricsEnabled(felixConfiguration), } components = append(components, render.Node(&nodeCfg)) @@ -1552,6 +1553,13 @@ func (r *ReconcileInstallation) Reconcile(ctx context.Context, request reconcile return reconcile.Result{}, nil } +func isFelixPrometheusMetricsEnabled(felixConfiguration *crdv1.FelixConfiguration) bool { + if felixConfiguration.Spec.PrometheusMetricsEnabled != nil { + return *felixConfiguration.Spec.PrometheusMetricsEnabled + } + return false +} + func readMTUFile() (int, error) { filename := "/var/lib/calico/mtu" data, err := os.ReadFile(filename) diff --git a/pkg/render/node.go b/pkg/render/node.go index 93860f2c5d..b6f7d342e3 100644 --- a/pkg/render/node.go +++ b/pkg/render/node.go @@ -74,6 +74,8 @@ var ( nodeBGPReporterPort int32 = 9900 NodeTLSSecretName = "node-certs" + + felixMetricsDefaultPort int32 = 9091 ) // TyphaNodeTLS holds configuration for Node and Typha to establish TLS. @@ -124,6 +126,8 @@ type NodeConfiguration struct { // The bindMode read from the default BGPConfiguration. Used to trigger rolling updates // should this value change. BindMode string + + FelixPrometheusMetricsEnabled bool } // Node creates the node daemonset and other resources for the daemonset to operate normally. @@ -1717,6 +1721,30 @@ func (c *nodeComponent) nodeLivenessReadinessProbes() (*corev1.Probe, *corev1.Pr // This service is used internally by Calico Enterprise and is separate from general // Prometheus metrics which are user-configurable. func (c *nodeComponent) nodeMetricsService() *corev1.Service { + ports := []corev1.ServicePort{ + { + Name: "calico-metrics-port", + Port: int32(c.cfg.NodeReporterMetricsPort), + TargetPort: intstr.FromInt(c.cfg.NodeReporterMetricsPort), + Protocol: corev1.ProtocolTCP, + }, + { + Name: "calico-bgp-metrics-port", + Port: nodeBGPReporterPort, + TargetPort: intstr.FromInt(int(nodeBGPReporterPort)), + Protocol: corev1.ProtocolTCP, + }, + } + + if c.cfg.FelixPrometheusMetricsEnabled == true { + ports = append(ports, corev1.ServicePort{ + Name: "felix-metrics-port", + Port: felixMetricsDefaultPort, + TargetPort: intstr.FromInt(int(felixMetricsDefaultPort)), + Protocol: corev1.ProtocolTCP, + }) + } + return &corev1.Service{ TypeMeta: metav1.TypeMeta{Kind: "Service", APIVersion: "v1"}, ObjectMeta: metav1.ObjectMeta{ @@ -1731,20 +1759,7 @@ func (c *nodeComponent) nodeMetricsService() *corev1.Service { // a huge set of iptables rules for this service since there's an instance // on every node. ClusterIP: "None", - Ports: []corev1.ServicePort{ - { - Name: "calico-metrics-port", - Port: int32(c.cfg.NodeReporterMetricsPort), - TargetPort: intstr.FromInt(c.cfg.NodeReporterMetricsPort), - Protocol: corev1.ProtocolTCP, - }, - { - Name: "calico-bgp-metrics-port", - Port: nodeBGPReporterPort, - TargetPort: intstr.FromInt(int(nodeBGPReporterPort)), - Protocol: corev1.ProtocolTCP, - }, - }, + Ports: ports, }, } } diff --git a/pkg/render/node_test.go b/pkg/render/node_test.go index dfda55d781..341b33cbca 100644 --- a/pkg/render/node_test.go +++ b/pkg/render/node_test.go @@ -133,12 +133,13 @@ var _ = Describe("Node rendering tests", func() { // Create a default configuration. cfg = render.NodeConfiguration{ - K8sServiceEp: k8sServiceEp, - Installation: defaultInstance, - TLS: typhaNodeTLS, - ClusterDomain: defaultClusterDomain, - FelixHealthPort: 9099, - IPPools: defaultInstance.CalicoNetwork.IPPools, + K8sServiceEp: k8sServiceEp, + Installation: defaultInstance, + TLS: typhaNodeTLS, + ClusterDomain: defaultClusterDomain, + FelixHealthPort: 9099, + IPPools: defaultInstance.CalicoNetwork.IPPools, + FelixPrometheusMetricsEnabled: false, } }) @@ -841,9 +842,49 @@ var _ = Describe("Node rendering tests", func() { Expect(ds.Spec.Template.Spec.Containers[0].Env).To(ConsistOf(expectedNodeEnv)) Expect(len(ds.Spec.Template.Spec.Containers[0].Env)).To(Equal(len(expectedNodeEnv))) + //Expect 2 Ports when FelixPrometheusMetricsEnabled is false + ms := rtest.GetResource(resources, "calico-node-metrics", "calico-system", "", "v1", "Service").(*corev1.Service) + Expect(len(ms.Spec.Ports)).To(Equal(2)) + verifyProbesAndLifecycle(ds, false, true) }) + It("should render felix service metric port when FelixPrometheusMetricsEnabled is true ", func() { + + defaultInstance.Variant = operatorv1.TigeraSecureEnterprise + cfg.NodeReporterMetricsPort = 9081 + cfg.FelixPrometheusMetricsEnabled = true + + component := render.Node(&cfg) + Expect(component.ResolveImages(nil)).To(BeNil()) + resources, _ := component.Objects() + + expectedServicePorts := []corev1.ServicePort{ + { + Name: "calico-metrics-port", + Port: int32(cfg.NodeReporterMetricsPort), + TargetPort: intstr.FromInt(cfg.NodeReporterMetricsPort), + Protocol: corev1.ProtocolTCP, + }, + { + Name: "calico-bgp-metrics-port", + Port: 9900, + TargetPort: intstr.FromInt(int(9900)), + Protocol: corev1.ProtocolTCP, + }, + { + Name: "felix-metrics-port", + Port: 9091, + TargetPort: intstr.FromInt(int(9091)), + Protocol: corev1.ProtocolTCP, + }, + } + + //Expect 3 Ports when FelixPrometheusMetricsEnabled is true + ms := rtest.GetResource(resources, "calico-node-metrics", "calico-system", "", "v1", "Service").(*corev1.Service) + Expect(ms.Spec.Ports).To(Equal(expectedServicePorts)) + }) + It("should render all resources with the appropriate permissions when running as non-privileged", func() { expectedResources := []struct { name string From 6ad5b40a63b4ccb8224dcf92fd8708bf408586cc Mon Sep 17 00:00:00 2001 From: vikastigera Date: Thu, 24 Oct 2024 11:43:07 -0700 Subject: [PATCH 2/4] Updating port selection logic --- .../installation/core_controller.go | 11 ++++- pkg/render/node.go | 18 ++++++-- pkg/render/node_test.go | 45 +++++++++++++++++-- 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/pkg/controller/installation/core_controller.go b/pkg/controller/installation/core_controller.go index 9c6966d2ed..964d6920e8 100644 --- a/pkg/controller/installation/core_controller.go +++ b/pkg/controller/installation/core_controller.go @@ -89,6 +89,8 @@ const ( // The default port used by calico/node to report Calico Enterprise internal metrics. // This is separate from the calico/node prometheus metrics port, which is user configurable. defaultNodeReporterPort = 9081 + + defaultFelixMetricsDefaultPort = 9091 ) const InstallationName string = "calico" @@ -1135,19 +1137,25 @@ func (r *ReconcileInstallation) Reconcile(ctx context.Context, request reconcile nodeReporterMetricsPort := defaultNodeReporterPort var nodePrometheusTLS certificatemanagement.KeyPairInterface calicoVersion := components.CalicoRelease + + felixPrometheusMetricsPort := defaultFelixMetricsDefaultPort + if instance.Spec.Variant == operator.TigeraSecureEnterprise { // Determine the port to use for nodeReporter metrics. if felixConfiguration.Spec.PrometheusReporterPort != nil { nodeReporterMetricsPort = *felixConfiguration.Spec.PrometheusReporterPort } - if nodeReporterMetricsPort == 0 { err := errors.New("felixConfiguration prometheusReporterPort=0 not supported") r.status.SetDegraded(operator.InvalidConfigurationError, "invalid metrics port", err, reqLogger) return reconcile.Result{}, err } + if felixConfiguration.Spec.PrometheusMetricsPort != nil { + felixPrometheusMetricsPort = *felixConfiguration.Spec.PrometheusMetricsPort + } + nodePrometheusTLS, err = certificateManager.GetOrCreateKeyPair(r.client, render.NodePrometheusTLSServerSecret, common.OperatorNamespace(), dns.GetServiceDNSNames(render.CalicoNodeMetricsService, common.CalicoNamespace, r.clusterDomain)) if err != nil { r.status.SetDegraded(operator.ResourceCreateError, "Error creating TLS certificate", err, reqLogger) @@ -1365,6 +1373,7 @@ func (r *ReconcileInstallation) Reconcile(ctx context.Context, request reconcile FelixHealthPort: *felixConfiguration.Spec.HealthPort, BindMode: bgpConfiguration.Spec.BindMode, FelixPrometheusMetricsEnabled: isFelixPrometheusMetricsEnabled(felixConfiguration), + FelixPrometheusMetricsPort: felixPrometheusMetricsPort, } components = append(components, render.Node(&nodeCfg)) diff --git a/pkg/render/node.go b/pkg/render/node.go index b6f7d342e3..f26626cea2 100644 --- a/pkg/render/node.go +++ b/pkg/render/node.go @@ -74,8 +74,6 @@ var ( nodeBGPReporterPort int32 = 9900 NodeTLSSecretName = "node-certs" - - felixMetricsDefaultPort int32 = 9091 ) // TyphaNodeTLS holds configuration for Node and Typha to establish TLS. @@ -128,6 +126,8 @@ type NodeConfiguration struct { BindMode string FelixPrometheusMetricsEnabled bool + + FelixPrometheusMetricsPort int } // Node creates the node daemonset and other resources for the daemonset to operate normally. @@ -1737,10 +1737,12 @@ func (c *nodeComponent) nodeMetricsService() *corev1.Service { } if c.cfg.FelixPrometheusMetricsEnabled == true { + felixMetricsPort := c.getFelixMetricsPort() + ports = append(ports, corev1.ServicePort{ Name: "felix-metrics-port", - Port: felixMetricsDefaultPort, - TargetPort: intstr.FromInt(int(felixMetricsDefaultPort)), + Port: felixMetricsPort, + TargetPort: intstr.FromInt(int(felixMetricsPort)), Protocol: corev1.ProtocolTCP, }) } @@ -1764,6 +1766,14 @@ func (c *nodeComponent) nodeMetricsService() *corev1.Service { } } +func (c *nodeComponent) getFelixMetricsPort() int32 { + if c.cfg.Installation.NodeMetricsPort != nil { + return *c.cfg.Installation.NodeMetricsPort + } + + return int32(c.cfg.FelixPrometheusMetricsPort) +} + // hostPathInitContainer creates an init container that changes the permissions on hostPath volumes // so that they can be written to by a non-root container. func (c *nodeComponent) hostPathInitContainer() corev1.Container { diff --git a/pkg/render/node_test.go b/pkg/render/node_test.go index 341b33cbca..673c1bd39d 100644 --- a/pkg/render/node_test.go +++ b/pkg/render/node_test.go @@ -140,6 +140,7 @@ var _ = Describe("Node rendering tests", func() { FelixHealthPort: 9099, IPPools: defaultInstance.CalicoNetwork.IPPools, FelixPrometheusMetricsEnabled: false, + FelixPrometheusMetricsPort: 9098, } }) @@ -849,7 +850,7 @@ var _ = Describe("Node rendering tests", func() { verifyProbesAndLifecycle(ds, false, true) }) - It("should render felix service metric port when FelixPrometheusMetricsEnabled is true ", func() { + It("should render felix service metric with FelixPrometheusMetricPort when FelixPrometheusMetricsEnabled is true and nodeMetricsPort is nil", func() { defaultInstance.Variant = operatorv1.TigeraSecureEnterprise cfg.NodeReporterMetricsPort = 9081 @@ -874,8 +875,46 @@ var _ = Describe("Node rendering tests", func() { }, { Name: "felix-metrics-port", - Port: 9091, - TargetPort: intstr.FromInt(int(9091)), + Port: 9098, + TargetPort: intstr.FromInt(int(9098)), + Protocol: corev1.ProtocolTCP, + }, + } + + //Expect 3 Ports when FelixPrometheusMetricsEnabled is true + ms := rtest.GetResource(resources, "calico-node-metrics", "calico-system", "", "v1", "Service").(*corev1.Service) + Expect(ms.Spec.Ports).To(Equal(expectedServicePorts)) + }) + + It("should render felix service metric with nodeMetricsPort when FelixPrometheusMetricsEnabled is true and nodeMetricsPort is not nil", func() { + + defaultInstance.Variant = operatorv1.TigeraSecureEnterprise + cfg.NodeReporterMetricsPort = 9081 + cfg.FelixPrometheusMetricsEnabled = true + var nodeMetricsPort int32 = 9099 + cfg.Installation.NodeMetricsPort = &nodeMetricsPort + + component := render.Node(&cfg) + Expect(component.ResolveImages(nil)).To(BeNil()) + resources, _ := component.Objects() + + expectedServicePorts := []corev1.ServicePort{ + { + Name: "calico-metrics-port", + Port: int32(cfg.NodeReporterMetricsPort), + TargetPort: intstr.FromInt(cfg.NodeReporterMetricsPort), + Protocol: corev1.ProtocolTCP, + }, + { + Name: "calico-bgp-metrics-port", + Port: 9900, + TargetPort: intstr.FromInt(int(9900)), + Protocol: corev1.ProtocolTCP, + }, + { + Name: "felix-metrics-port", + Port: nodeMetricsPort, + TargetPort: intstr.FromInt(int(nodeMetricsPort)), Protocol: corev1.ProtocolTCP, }, } From d07431870f87e97d1d60931dc387db215f35fc28 Mon Sep 17 00:00:00 2001 From: vikastigera Date: Thu, 24 Oct 2024 13:41:30 -0700 Subject: [PATCH 3/4] fixing static check --- pkg/render/node.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/render/node.go b/pkg/render/node.go index f26626cea2..9c18dfd6ac 100644 --- a/pkg/render/node.go +++ b/pkg/render/node.go @@ -1736,7 +1736,7 @@ func (c *nodeComponent) nodeMetricsService() *corev1.Service { }, } - if c.cfg.FelixPrometheusMetricsEnabled == true { + if c.cfg.FelixPrometheusMetricsEnabled { felixMetricsPort := c.getFelixMetricsPort() ports = append(ports, corev1.ServicePort{ From 56d6a4a64ee43a05a3480e74834804d68eefe444 Mon Sep 17 00:00:00 2001 From: vikastigera Date: Fri, 15 Nov 2024 17:35:08 -0800 Subject: [PATCH 4/4] Changing logic to use FelixPrometheusMetricsPort --- pkg/render/node.go | 10 +--------- pkg/render/node_test.go | 40 +--------------------------------------- 2 files changed, 2 insertions(+), 48 deletions(-) diff --git a/pkg/render/node.go b/pkg/render/node.go index 9c18dfd6ac..8016563ef8 100644 --- a/pkg/render/node.go +++ b/pkg/render/node.go @@ -1737,7 +1737,7 @@ func (c *nodeComponent) nodeMetricsService() *corev1.Service { } if c.cfg.FelixPrometheusMetricsEnabled { - felixMetricsPort := c.getFelixMetricsPort() + felixMetricsPort := int32(c.cfg.FelixPrometheusMetricsPort) ports = append(ports, corev1.ServicePort{ Name: "felix-metrics-port", @@ -1766,14 +1766,6 @@ func (c *nodeComponent) nodeMetricsService() *corev1.Service { } } -func (c *nodeComponent) getFelixMetricsPort() int32 { - if c.cfg.Installation.NodeMetricsPort != nil { - return *c.cfg.Installation.NodeMetricsPort - } - - return int32(c.cfg.FelixPrometheusMetricsPort) -} - // hostPathInitContainer creates an init container that changes the permissions on hostPath volumes // so that they can be written to by a non-root container. func (c *nodeComponent) hostPathInitContainer() corev1.Container { diff --git a/pkg/render/node_test.go b/pkg/render/node_test.go index 673c1bd39d..763469fc0d 100644 --- a/pkg/render/node_test.go +++ b/pkg/render/node_test.go @@ -850,7 +850,7 @@ var _ = Describe("Node rendering tests", func() { verifyProbesAndLifecycle(ds, false, true) }) - It("should render felix service metric with FelixPrometheusMetricPort when FelixPrometheusMetricsEnabled is true and nodeMetricsPort is nil", func() { + It("should render felix service metric with FelixPrometheusMetricPort when FelixPrometheusMetricsEnabled is true", func() { defaultInstance.Variant = operatorv1.TigeraSecureEnterprise cfg.NodeReporterMetricsPort = 9081 @@ -886,44 +886,6 @@ var _ = Describe("Node rendering tests", func() { Expect(ms.Spec.Ports).To(Equal(expectedServicePorts)) }) - It("should render felix service metric with nodeMetricsPort when FelixPrometheusMetricsEnabled is true and nodeMetricsPort is not nil", func() { - - defaultInstance.Variant = operatorv1.TigeraSecureEnterprise - cfg.NodeReporterMetricsPort = 9081 - cfg.FelixPrometheusMetricsEnabled = true - var nodeMetricsPort int32 = 9099 - cfg.Installation.NodeMetricsPort = &nodeMetricsPort - - component := render.Node(&cfg) - Expect(component.ResolveImages(nil)).To(BeNil()) - resources, _ := component.Objects() - - expectedServicePorts := []corev1.ServicePort{ - { - Name: "calico-metrics-port", - Port: int32(cfg.NodeReporterMetricsPort), - TargetPort: intstr.FromInt(cfg.NodeReporterMetricsPort), - Protocol: corev1.ProtocolTCP, - }, - { - Name: "calico-bgp-metrics-port", - Port: 9900, - TargetPort: intstr.FromInt(int(9900)), - Protocol: corev1.ProtocolTCP, - }, - { - Name: "felix-metrics-port", - Port: nodeMetricsPort, - TargetPort: intstr.FromInt(int(nodeMetricsPort)), - Protocol: corev1.ProtocolTCP, - }, - } - - //Expect 3 Ports when FelixPrometheusMetricsEnabled is true - ms := rtest.GetResource(resources, "calico-node-metrics", "calico-system", "", "v1", "Service").(*corev1.Service) - Expect(ms.Spec.Ports).To(Equal(expectedServicePorts)) - }) - It("should render all resources with the appropriate permissions when running as non-privileged", func() { expectedResources := []struct { name string