Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding felix service metric port #3534

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 33 additions & 16 deletions pkg/controller/installation/core_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ const (
// The default port used by calico/node to report Calico Enterprise internal metrics.
// This is separate from the calico/node prometheus metrics port, which is user configurable.
defaultNodeReporterPort = 9081

defaultFelixMetricsDefaultPort = 9091
)

const InstallationName string = "calico"
Expand Down Expand Up @@ -1135,19 +1137,25 @@ func (r *ReconcileInstallation) Reconcile(ctx context.Context, request reconcile
nodeReporterMetricsPort := defaultNodeReporterPort
var nodePrometheusTLS certificatemanagement.KeyPairInterface
calicoVersion := components.CalicoRelease

felixPrometheusMetricsPort := defaultFelixMetricsDefaultPort

if instance.Spec.Variant == operator.TigeraSecureEnterprise {

// Determine the port to use for nodeReporter metrics.
if felixConfiguration.Spec.PrometheusReporterPort != nil {
nodeReporterMetricsPort = *felixConfiguration.Spec.PrometheusReporterPort
}

if nodeReporterMetricsPort == 0 {
err := errors.New("felixConfiguration prometheusReporterPort=0 not supported")
r.status.SetDegraded(operator.InvalidConfigurationError, "invalid metrics port", err, reqLogger)
return reconcile.Result{}, err
}

if felixConfiguration.Spec.PrometheusMetricsPort != nil {
felixPrometheusMetricsPort = *felixConfiguration.Spec.PrometheusMetricsPort
}

nodePrometheusTLS, err = certificateManager.GetOrCreateKeyPair(r.client, render.NodePrometheusTLSServerSecret, common.OperatorNamespace(), dns.GetServiceDNSNames(render.CalicoNodeMetricsService, common.CalicoNamespace, r.clusterDomain))
if err != nil {
r.status.SetDegraded(operator.ResourceCreateError, "Error creating TLS certificate", err, reqLogger)
Expand Down Expand Up @@ -1349,21 +1357,23 @@ func (r *ReconcileInstallation) Reconcile(ctx context.Context, request reconcile

// Build a configuration for rendering calico/node.
nodeCfg := render.NodeConfiguration{
K8sServiceEp: k8sapi.Endpoint,
Installation: &instance.Spec,
IPPools: crdPoolsToOperator(currentPools.Items),
LogCollector: logCollector,
BirdTemplates: birdTemplates,
TLS: typhaNodeTLS,
ClusterDomain: r.clusterDomain,
NodeReporterMetricsPort: nodeReporterMetricsPort,
BGPLayouts: bgpLayout,
NodeAppArmorProfile: nodeAppArmorProfile,
MigrateNamespaces: needNsMigration,
CanRemoveCNIFinalizer: canRemoveCNI,
PrometheusServerTLS: nodePrometheusTLS,
FelixHealthPort: *felixConfiguration.Spec.HealthPort,
BindMode: bgpConfiguration.Spec.BindMode,
K8sServiceEp: k8sapi.Endpoint,
Installation: &instance.Spec,
IPPools: crdPoolsToOperator(currentPools.Items),
LogCollector: logCollector,
BirdTemplates: birdTemplates,
TLS: typhaNodeTLS,
ClusterDomain: r.clusterDomain,
NodeReporterMetricsPort: nodeReporterMetricsPort,
BGPLayouts: bgpLayout,
NodeAppArmorProfile: nodeAppArmorProfile,
MigrateNamespaces: needNsMigration,
CanRemoveCNIFinalizer: canRemoveCNI,
PrometheusServerTLS: nodePrometheusTLS,
FelixHealthPort: *felixConfiguration.Spec.HealthPort,
BindMode: bgpConfiguration.Spec.BindMode,
FelixPrometheusMetricsEnabled: isFelixPrometheusMetricsEnabled(felixConfiguration),
FelixPrometheusMetricsPort: felixPrometheusMetricsPort,
}
components = append(components, render.Node(&nodeCfg))

Expand Down Expand Up @@ -1552,6 +1562,13 @@ func (r *ReconcileInstallation) Reconcile(ctx context.Context, request reconcile
return reconcile.Result{}, nil
}

func isFelixPrometheusMetricsEnabled(felixConfiguration *crdv1.FelixConfiguration) bool {
if felixConfiguration.Spec.PrometheusMetricsEnabled != nil {
return *felixConfiguration.Spec.PrometheusMetricsEnabled
}
return false
}

func readMTUFile() (int, error) {
filename := "/var/lib/calico/mtu"
data, err := os.ReadFile(filename)
Expand Down
53 changes: 39 additions & 14 deletions pkg/render/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ type NodeConfiguration struct {
// The bindMode read from the default BGPConfiguration. Used to trigger rolling updates
// should this value change.
BindMode string

FelixPrometheusMetricsEnabled bool

FelixPrometheusMetricsPort int
}

// Node creates the node daemonset and other resources for the daemonset to operate normally.
Expand Down Expand Up @@ -1717,6 +1721,32 @@ func (c *nodeComponent) nodeLivenessReadinessProbes() (*corev1.Probe, *corev1.Pr
// This service is used internally by Calico Enterprise and is separate from general
// Prometheus metrics which are user-configurable.
func (c *nodeComponent) nodeMetricsService() *corev1.Service {
ports := []corev1.ServicePort{
{
Name: "calico-metrics-port",
Port: int32(c.cfg.NodeReporterMetricsPort),
TargetPort: intstr.FromInt(c.cfg.NodeReporterMetricsPort),
Protocol: corev1.ProtocolTCP,
},
{
Name: "calico-bgp-metrics-port",
Port: nodeBGPReporterPort,
TargetPort: intstr.FromInt(int(nodeBGPReporterPort)),
Protocol: corev1.ProtocolTCP,
},
}

if c.cfg.FelixPrometheusMetricsEnabled {
felixMetricsPort := c.getFelixMetricsPort()

ports = append(ports, corev1.ServicePort{
Name: "felix-metrics-port",
Port: felixMetricsPort,
TargetPort: intstr.FromInt(int(felixMetricsPort)),
Protocol: corev1.ProtocolTCP,
})
}

return &corev1.Service{
TypeMeta: metav1.TypeMeta{Kind: "Service", APIVersion: "v1"},
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -1731,24 +1761,19 @@ func (c *nodeComponent) nodeMetricsService() *corev1.Service {
// a huge set of iptables rules for this service since there's an instance
// on every node.
ClusterIP: "None",
Ports: []corev1.ServicePort{
{
Name: "calico-metrics-port",
Port: int32(c.cfg.NodeReporterMetricsPort),
TargetPort: intstr.FromInt(c.cfg.NodeReporterMetricsPort),
Protocol: corev1.ProtocolTCP,
},
{
Name: "calico-bgp-metrics-port",
Port: nodeBGPReporterPort,
TargetPort: intstr.FromInt(int(nodeBGPReporterPort)),
Protocol: corev1.ProtocolTCP,
},
},
Ports: ports,
},
}
}

func (c *nodeComponent) getFelixMetricsPort() int32 {
if c.cfg.Installation.NodeMetricsPort != nil {
return *c.cfg.Installation.NodeMetricsPort
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the two metrics ports are unrelated, other than providing some metrics information. I don't think there is any reason we should use NodeMetricsPort here.
If this is needed then I'd suggest this logic should be moved to the core_controller.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated to only use PrometheusMetricsPort


return int32(c.cfg.FelixPrometheusMetricsPort)
}

// hostPathInitContainer creates an init container that changes the permissions on hostPath volumes
// so that they can be written to by a non-root container.
func (c *nodeComponent) hostPathInitContainer() corev1.Container {
Expand Down
92 changes: 86 additions & 6 deletions pkg/render/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,14 @@ var _ = Describe("Node rendering tests", func() {

// Create a default configuration.
cfg = render.NodeConfiguration{
K8sServiceEp: k8sServiceEp,
Installation: defaultInstance,
TLS: typhaNodeTLS,
ClusterDomain: defaultClusterDomain,
FelixHealthPort: 9099,
IPPools: defaultInstance.CalicoNetwork.IPPools,
K8sServiceEp: k8sServiceEp,
Installation: defaultInstance,
TLS: typhaNodeTLS,
ClusterDomain: defaultClusterDomain,
FelixHealthPort: 9099,
IPPools: defaultInstance.CalicoNetwork.IPPools,
FelixPrometheusMetricsEnabled: false,
FelixPrometheusMetricsPort: 9098,
}
})

Expand Down Expand Up @@ -841,9 +843,87 @@ var _ = Describe("Node rendering tests", func() {
Expect(ds.Spec.Template.Spec.Containers[0].Env).To(ConsistOf(expectedNodeEnv))
Expect(len(ds.Spec.Template.Spec.Containers[0].Env)).To(Equal(len(expectedNodeEnv)))

//Expect 2 Ports when FelixPrometheusMetricsEnabled is false
ms := rtest.GetResource(resources, "calico-node-metrics", "calico-system", "", "v1", "Service").(*corev1.Service)
Expect(len(ms.Spec.Ports)).To(Equal(2))

verifyProbesAndLifecycle(ds, false, true)
})

It("should render felix service metric with FelixPrometheusMetricPort when FelixPrometheusMetricsEnabled is true and nodeMetricsPort is nil", func() {

defaultInstance.Variant = operatorv1.TigeraSecureEnterprise
cfg.NodeReporterMetricsPort = 9081
cfg.FelixPrometheusMetricsEnabled = true

component := render.Node(&cfg)
Expect(component.ResolveImages(nil)).To(BeNil())
resources, _ := component.Objects()

expectedServicePorts := []corev1.ServicePort{
{
Name: "calico-metrics-port",
Port: int32(cfg.NodeReporterMetricsPort),
TargetPort: intstr.FromInt(cfg.NodeReporterMetricsPort),
Protocol: corev1.ProtocolTCP,
},
{
Name: "calico-bgp-metrics-port",
Port: 9900,
TargetPort: intstr.FromInt(int(9900)),
Protocol: corev1.ProtocolTCP,
},
{
Name: "felix-metrics-port",
Port: 9098,
TargetPort: intstr.FromInt(int(9098)),
Protocol: corev1.ProtocolTCP,
},
}

//Expect 3 Ports when FelixPrometheusMetricsEnabled is true
ms := rtest.GetResource(resources, "calico-node-metrics", "calico-system", "", "v1", "Service").(*corev1.Service)
Expect(ms.Spec.Ports).To(Equal(expectedServicePorts))
})

It("should render felix service metric with nodeMetricsPort when FelixPrometheusMetricsEnabled is true and nodeMetricsPort is not nil", func() {

defaultInstance.Variant = operatorv1.TigeraSecureEnterprise
cfg.NodeReporterMetricsPort = 9081
cfg.FelixPrometheusMetricsEnabled = true
var nodeMetricsPort int32 = 9099
cfg.Installation.NodeMetricsPort = &nodeMetricsPort

component := render.Node(&cfg)
Expect(component.ResolveImages(nil)).To(BeNil())
resources, _ := component.Objects()

expectedServicePorts := []corev1.ServicePort{
{
Name: "calico-metrics-port",
Port: int32(cfg.NodeReporterMetricsPort),
TargetPort: intstr.FromInt(cfg.NodeReporterMetricsPort),
Protocol: corev1.ProtocolTCP,
},
{
Name: "calico-bgp-metrics-port",
Port: 9900,
TargetPort: intstr.FromInt(int(9900)),
Protocol: corev1.ProtocolTCP,
},
{
Name: "felix-metrics-port",
Port: nodeMetricsPort,
TargetPort: intstr.FromInt(int(nodeMetricsPort)),
Protocol: corev1.ProtocolTCP,
},
}

//Expect 3 Ports when FelixPrometheusMetricsEnabled is true
ms := rtest.GetResource(resources, "calico-node-metrics", "calico-system", "", "v1", "Service").(*corev1.Service)
Expect(ms.Spec.Ports).To(Equal(expectedServicePorts))
})

It("should render all resources with the appropriate permissions when running as non-privileged", func() {
expectedResources := []struct {
name string
Expand Down
Loading