From 2b63a0f49d9dd85ba8ada60b22e2f5af8a8f871a Mon Sep 17 00:00:00 2001 From: Jan Fajerski Date: Mon, 4 Nov 2024 20:48:23 +0100 Subject: [PATCH 1/2] feat: add operator controller to add ServiceMonitor Signed-off-by: Jan Fajerski --- ...bility-operator.clusterserviceversion.yaml | 13 +- .../observability-operator-cluster-role.yaml | 11 ++ go.mod | 5 +- go.sum | 2 + pkg/controllers/operator/components.go | 111 +++++++++++++++ pkg/controllers/operator/controller.go | 127 ++++++++++++++++++ pkg/operator/operator.go | 17 +++ 7 files changed, 284 insertions(+), 2 deletions(-) create mode 100644 pkg/controllers/operator/components.go create mode 100644 pkg/controllers/operator/controller.go diff --git a/bundle/manifests/observability-operator.clusterserviceversion.yaml b/bundle/manifests/observability-operator.clusterserviceversion.yaml index 1509ae60..b275ffaa 100644 --- a/bundle/manifests/observability-operator.clusterserviceversion.yaml +++ b/bundle/manifests/observability-operator.clusterserviceversion.yaml @@ -42,7 +42,7 @@ metadata: categories: Monitoring certified: "false" containerImage: observability-operator:0.4.2 - createdAt: "2024-10-29T11:40:05Z" + createdAt: "2024-11-05T06:54:25Z" description: A Go based Kubernetes operator to setup and manage highly available Monitoring Stack using Prometheus, Alertmanager and Thanos Querier. operators.operatorframework.io/builder: operator-sdk-v1.37.0 @@ -403,6 +403,17 @@ spec: - create - get - update + - apiGroups: + - monitoring.coreos.com + resources: + - servicemonitors + verbs: + - create + - delete + - list + - patch + - update + - watch - apiGroups: - monitoring.rhobs resources: diff --git a/deploy/operator/observability-operator-cluster-role.yaml b/deploy/operator/observability-operator-cluster-role.yaml index 2933e19f..2cf69f90 100644 --- a/deploy/operator/observability-operator-cluster-role.yaml +++ b/deploy/operator/observability-operator-cluster-role.yaml @@ -130,6 +130,17 @@ rules: - create - get - update +- apiGroups: + - monitoring.coreos.com + resources: + - servicemonitors + verbs: + - create + - delete + - list + - patch + - update + - watch - apiGroups: - monitoring.rhobs resources: diff --git a/go.mod b/go.mod index 6983e499..8154f39f 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,15 @@ module github.com/rhobs/observability-operator -go 1.22.7 +go 1.22.0 + +toolchain go1.22.8 require ( github.com/go-logr/logr v1.4.2 github.com/google/go-cmp v0.6.0 github.com/openshift/api v0.0.0-20240404200104-96ed2d49b255 github.com/pkg/errors v0.9.1 + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.0 github.com/prometheus/common v0.60.1 github.com/rhobs/obo-prometheus-operator v0.77.1-rhobs1 github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring v0.77.1-rhobs1 diff --git a/go.sum b/go.sum index 0f1d9bbe..f5ee42b7 100644 --- a/go.sum +++ b/go.sum @@ -202,6 +202,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus-community/prom-label-proxy v0.11.0 h1:IO02WiiFMfcIqvjhwMbCYnDJiTNcSHBrkCGRQ/7KDd0= github.com/prometheus-community/prom-label-proxy v0.11.0/go.mod h1:lfvrG70XqsxWDrSh1843QXBG0fSg8EbIXmAo8xGsvw8= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.0 h1:tRwEFYFg+To2TGnibGl8dHBCh8Z/BVNKnXj2O5Za/2M= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.0/go.mod h1:Rd8YnCqz+2FYsiGmE2DMlaLjQRB4v2jFNnzCt9YY4IM= github.com/prometheus/alertmanager v0.27.0 h1:V6nTa2J5V4s8TG4C4HtrBP/WNSebCCTYGGv4qecA/+I= github.com/prometheus/alertmanager v0.27.0/go.mod h1:8Ia/R3urPmbzJ8OsdvmZvIprDwvwmYCmUbwBL+jlPOE= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= diff --git a/pkg/controllers/operator/components.go b/pkg/controllers/operator/components.go new file mode 100644 index 00000000..e15dd64f --- /dev/null +++ b/pkg/controllers/operator/components.go @@ -0,0 +1,111 @@ +package operator_controller + +import ( + "fmt" + + monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + + "github.com/rhobs/observability-operator/pkg/reconciler" +) + +const ( + name = "observability-operato" +) + +func operatorComponentReconcilers(owner metav1.Object, namespace string) []reconciler.Reconciler { + return []reconciler.Reconciler{ + reconciler.NewUpdater(newServiceMonitor(namespace), owner), + reconciler.NewUpdater(newPrometheusRole(namespace), owner), + reconciler.NewUpdater(newRoleBindingForPrometheusRole(namespace), owner), + } +} + +func newServiceMonitor(namespace string) *monv1.ServiceMonitor { + return &monv1.ServiceMonitor{ + TypeMeta: metav1.TypeMeta{ + APIVersion: monv1.SchemeGroupVersion.String(), + Kind: "ServiceMonitor", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{ + "app.kubernetes.io/component": "operator", + "app.kubernetes.io/name": name, + "app.kubernetes.io/part-of": name, + "openshift.io/user-monitoring": "true", + }, + }, + + Spec: monv1.ServiceMonitorSpec{ + Endpoints: []monv1.Endpoint{ + { + Port: "metrics", + Scheme: "https", + TLSConfig: &monv1.TLSConfig{ + CAFile: "/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt", + CertFile: "/etc/prometheus/secrets/metrics-client-certs/tls.crt", + KeyFile: "/etc/prometheus/secrets/metrics-client-certs/tls.key", + SafeTLSConfig: monv1.SafeTLSConfig{ + ServerName: ptr.To(fmt.Sprintf("%s.%s.svc", name, namespace)), + InsecureSkipVerify: ptr.To(false), + }, + }, + }, + }, + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app.kubernetes.io/component": "operator", + "app.kubernetes.io/name": name, + }, + }, + }, + } +} + +func newPrometheusRole(namespace string) *rbacv1.Role { + return &rbacv1.Role{ + TypeMeta: metav1.TypeMeta{ + APIVersion: rbacv1.SchemeGroupVersion.String(), + Kind: "Role", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "prometheus-k8s", + Namespace: namespace, + }, + Rules: []rbacv1.PolicyRule{{ + APIGroups: []string{""}, + Resources: []string{"services", "endpoints", "pods"}, + Verbs: []string{"get", "list", "watch"}, + }}, + } +} + +func newRoleBindingForPrometheusRole(namespace string) *rbacv1.RoleBinding { + roleBinding := &rbacv1.RoleBinding{ + TypeMeta: metav1.TypeMeta{ + APIVersion: rbacv1.SchemeGroupVersion.String(), + Kind: "RoleBinding", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "prometheus-k8s", + Namespace: namespace, + }, + Subjects: []rbacv1.Subject{{ + APIGroup: corev1.SchemeGroupVersion.Group, + Kind: "ServiceAccount", + Name: "prometheus-k8s", + Namespace: namespace, + }}, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.SchemeGroupVersion.Group, + Kind: "Role", + Name: "prometheus-k8s", + }, + } + return roleBinding +} diff --git a/pkg/controllers/operator/controller.go b/pkg/controllers/operator/controller.go new file mode 100644 index 00000000..258a75cb --- /dev/null +++ b/pkg/controllers/operator/controller.go @@ -0,0 +1,127 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package operator_controller + +import ( + "context" + "time" + + "github.com/go-logr/logr" + monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + appsv1 "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +type resourceManager struct { + k8sClient client.Client + scheme *runtime.Scheme + logger logr.Logger + controller controller.Controller + namespace string +} + +// RBAC for managing Prometheus Operator CRs +//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors,verbs=list;watch;create;update;delete;patch +//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles;rolebindings,verbs=list;create;update;patch + +// RegisterWithManager registers the controller with Manager +func RegisterWithManager(mgr ctrl.Manager, namespace string) error { + + rm := &resourceManager{ + k8sClient: mgr.GetClient(), + scheme: mgr.GetScheme(), + logger: ctrl.Log.WithName("observability-operator"), + namespace: namespace, + } + // We only want to trigger a reconciliation when the generation + // of a child changes. Until we need to update our the status for our own objects, + // we can save CPU cycles by avoiding reconciliations triggered by + // child status changes. + generationChanged := builder.WithPredicates(predicate.GenerationChangedPredicate{}) + + ctrl, err := ctrl.NewControllerManagedBy(mgr). + Owns(&monv1.ServiceMonitor{}, generationChanged). + Watches( + &appsv1.Deployment{}, + handler.EnqueueRequestsFromMapFunc(rm.operatorDeployment), + builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}), + ). + Build(rm) + + if err != nil { + return err + } + rm.controller = ctrl + return nil +} + +func (rm resourceManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := rm.logger.WithValues("operator", req.NamespacedName) + logger.Info("Reconciling operator resources") + + op := &appsv1.Deployment{} + err := rm.k8sClient.Get(ctx, req.NamespacedName, op) + if errors.IsNotFound(err) { + return ctrl.Result{}, nil + } + if err != nil { + return ctrl.Result{}, err + } + + reconcilers := operatorComponentReconcilers(op, rm.namespace) + for _, reconciler := range reconcilers { + err := reconciler.Reconcile(ctx, rm.k8sClient, rm.scheme) + // handle create / update errors that can happen due to a stale cache by + // retrying after some time. + if errors.IsAlreadyExists(err) || errors.IsConflict(err) { + logger.V(3).Info("skipping reconcile error", "err", err) + return ctrl.Result{RequeueAfter: 2 * time.Second}, nil + } + if err != nil { + return ctrl.Result{}, err + } + } + + return ctrl.Result{}, nil +} +func (rm resourceManager) operatorDeployment(ctx context.Context, ms client.Object) []reconcile.Request { + var requests []reconcile.Request + op := &appsv1.Deployment{} + err := rm.k8sClient.Get(ctx, types.NamespacedName{Name: "observability-operator", Namespace: rm.namespace}, op) + if errors.IsNotFound(err) { + return requests + } + if err != nil { + return requests + } + requests = append(requests, reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: op.GetName(), + Namespace: op.GetNamespace(), + }, + }) + return requests +} diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 15f53635..5a77adb9 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -23,6 +23,7 @@ import ( stackctrl "github.com/rhobs/observability-operator/pkg/controllers/monitoring/monitoring-stack" tqctrl "github.com/rhobs/observability-operator/pkg/controllers/monitoring/thanos-querier" + opctrl "github.com/rhobs/observability-operator/pkg/controllers/operator" uictrl "github.com/rhobs/observability-operator/pkg/controllers/uiplugin" ) @@ -64,6 +65,7 @@ type OperatorConfiguration struct { ThanosQuerier tqctrl.ThanosConfiguration UIPlugins uictrl.UIPluginsConfiguration FeatureGates FeatureGates + Namespace string } func WithNamespace(ns string) func(*OperatorConfiguration) { @@ -121,6 +123,12 @@ func WithFeatureGates(featureGates FeatureGates) func(*OperatorConfiguration) { } } +func WithNamespace(ns string) func(*OperatorConfiguration) { + return func(oc *OperatorConfiguration) { + oc.Namespace = ns + } +} + func NewOperatorConfiguration(opts ...func(*OperatorConfiguration)) *OperatorConfiguration { cfg := &OperatorConfiguration{} for _, o := range opts { @@ -252,6 +260,15 @@ func New(ctx context.Context, cfg *OperatorConfiguration) (*Operator, error) { setupLog.Info("OpenShift feature gate is disabled, UIPlugins are not enabled") } + if cfg.FeatureGates.OpenShift.Enabled { + if err := opctrl.RegisterWithManager(mgr, cfg.Namespace); err != nil { + return nil, fmt.Errorf("unable to register operator controller: %w", err) + } + } else { + setupLog := ctrl.Log.WithName("setup") + setupLog.Info("OpenShift feature gate is disabled, Operator controller is not enabled") + } + if err := mgr.AddHealthzCheck("health probe", healthz.Ping); err != nil { return nil, fmt.Errorf("unable to add health probe: %w", err) } From 0869cae543d486e694ec269af70e8bf8fbe8d6f7 Mon Sep 17 00:00:00 2001 From: Jan Fajerski Date: Tue, 5 Nov 2024 10:10:29 +0100 Subject: [PATCH 2/2] chore: address review comments Signed-off-by: Jan Fajerski --- pkg/controllers/operator/components.go | 9 ++++----- pkg/controllers/operator/controller.go | 14 +++++++------- pkg/operator/operator.go | 7 ------- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/pkg/controllers/operator/components.go b/pkg/controllers/operator/components.go index e15dd64f..2bc42a0a 100644 --- a/pkg/controllers/operator/components.go +++ b/pkg/controllers/operator/components.go @@ -1,4 +1,4 @@ -package operator_controller +package operator import ( "fmt" @@ -13,7 +13,7 @@ import ( ) const ( - name = "observability-operato" + name = "observability-operator" ) func operatorComponentReconcilers(owner metav1.Object, namespace string) []reconciler.Reconciler { @@ -37,7 +37,7 @@ func newServiceMonitor(namespace string) *monv1.ServiceMonitor { "app.kubernetes.io/component": "operator", "app.kubernetes.io/name": name, "app.kubernetes.io/part-of": name, - "openshift.io/user-monitoring": "true", + "openshift.io/user-monitoring": "false", }, }, @@ -51,8 +51,7 @@ func newServiceMonitor(namespace string) *monv1.ServiceMonitor { CertFile: "/etc/prometheus/secrets/metrics-client-certs/tls.crt", KeyFile: "/etc/prometheus/secrets/metrics-client-certs/tls.key", SafeTLSConfig: monv1.SafeTLSConfig{ - ServerName: ptr.To(fmt.Sprintf("%s.%s.svc", name, namespace)), - InsecureSkipVerify: ptr.To(false), + ServerName: ptr.To(fmt.Sprintf("%s.%s.svc", name, namespace)), }, }, }, diff --git a/pkg/controllers/operator/controller.go b/pkg/controllers/operator/controller.go index 258a75cb..2f0deb87 100644 --- a/pkg/controllers/operator/controller.go +++ b/pkg/controllers/operator/controller.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package operator_controller +package operator import ( "context" @@ -22,7 +22,7 @@ import ( "github.com/go-logr/logr" monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -65,8 +65,8 @@ func RegisterWithManager(mgr ctrl.Manager, namespace string) error { ctrl, err := ctrl.NewControllerManagedBy(mgr). Owns(&monv1.ServiceMonitor{}, generationChanged). Watches( - &appsv1.Deployment{}, - handler.EnqueueRequestsFromMapFunc(rm.operatorDeployment), + &corev1.Service{}, + handler.EnqueueRequestsFromMapFunc(rm.operatorService), builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}), ). Build(rm) @@ -82,7 +82,7 @@ func (rm resourceManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl logger := rm.logger.WithValues("operator", req.NamespacedName) logger.Info("Reconciling operator resources") - op := &appsv1.Deployment{} + op := &corev1.Service{} err := rm.k8sClient.Get(ctx, req.NamespacedName, op) if errors.IsNotFound(err) { return ctrl.Result{}, nil @@ -107,9 +107,9 @@ func (rm resourceManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl return ctrl.Result{}, nil } -func (rm resourceManager) operatorDeployment(ctx context.Context, ms client.Object) []reconcile.Request { +func (rm resourceManager) operatorService(ctx context.Context, _ client.Object) []reconcile.Request { var requests []reconcile.Request - op := &appsv1.Deployment{} + op := &corev1.Service{} err := rm.k8sClient.Get(ctx, types.NamespacedName{Name: "observability-operator", Namespace: rm.namespace}, op) if errors.IsNotFound(err) { return requests diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 5a77adb9..9f1af085 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -65,7 +65,6 @@ type OperatorConfiguration struct { ThanosQuerier tqctrl.ThanosConfiguration UIPlugins uictrl.UIPluginsConfiguration FeatureGates FeatureGates - Namespace string } func WithNamespace(ns string) func(*OperatorConfiguration) { @@ -123,12 +122,6 @@ func WithFeatureGates(featureGates FeatureGates) func(*OperatorConfiguration) { } } -func WithNamespace(ns string) func(*OperatorConfiguration) { - return func(oc *OperatorConfiguration) { - oc.Namespace = ns - } -} - func NewOperatorConfiguration(opts ...func(*OperatorConfiguration)) *OperatorConfiguration { cfg := &OperatorConfiguration{} for _, o := range opts {