Skip to content

Commit

Permalink
[ACM-14076]: Ensure CMO ConfigMap is reconciled on any event (#1610)
Browse files Browse the repository at this point in the history
* config: Expose platform monitoring constants across the project

Signed-off-by: Philip Gough <philip.p.gough@gmail.com>

* Ensure we don't keep stale config in ocp update loop for additional AlertmanagersConfig

Signed-off-by: Philip Gough <philip.p.gough@gmail.com>

* Reconcile CMO config on all events

Signed-off-by: Philip Gough <philip.p.gough@gmail.com>

* test: Adds integration test for OCP CMO Config reconcile

Signed-off-by: Philip Gough <philip.p.gough@gmail.com>

* endpointmetrics: Adds CMO ConfigMap to the IBM filtered cache

Signed-off-by: Philip Gough <philip.p.gough@gmail.com>

---------

Signed-off-by: Philip Gough <philip.p.gough@gmail.com>
  • Loading branch information
philipgough authored Sep 10, 2024
1 parent 2c316bf commit 59654ff
Show file tree
Hide file tree
Showing 5 changed files with 170 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,17 @@ var (
const (
obAddonName = "observability-addon"
obsAddonFinalizer = "observability.open-cluster-management.io/addon-cleanup"
promSvcName = "prometheus-k8s"
promNamespace = "openshift-monitoring"
openShiftClusterMonitoringlabel = "openshift.io/cluster-monitoring"
mtlsCertName = "observability-controller-open-cluster-management.io-observability-signer-client-cert"
mtlsCaName = "observability-managed-cluster-certs"
metricsCollectorName = "metrics-collector-deployment"
uwlMetricsCollectorName = "uwl-metrics-collector-deployment"
uwlNamespace = "openshift-user-workload-monitoring"
)
const (
promSvcName = operatorconfig.OCPClusterMonitoringPrometheusService
promNamespace = operatorconfig.OCPClusterMonitoringNamespace
)

// ObservabilityAddonReconciler reconciles a ObservabilityAddon object.
type ObservabilityAddonReconciler struct {
Expand Down Expand Up @@ -555,7 +557,7 @@ func (r *ObservabilityAddonReconciler) SetupWithManager(mgr ctrl.Manager) error
Watches(
&corev1.ConfigMap{},
&handler.EnqueueRequestForObject{},
builder.WithPredicates(getPred(clusterMonitoringConfigName, promNamespace, true, true, false)),
builder.WithPredicates(getPred(clusterMonitoringConfigName, promNamespace, true, true, true)),
).
Watches(
&appsv1.Deployment{},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ import (
"testing"
"time"

yaml2 "github.com/ghodss/yaml"
ocinfrav1 "github.com/openshift/api/config/v1"
cmomanifests "github.com/openshift/cluster-monitoring-operator/pkg/manifests"
hyperv1 "github.com/openshift/hypershift/api/hypershift/v1alpha1"
promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/stolostron/multicluster-observability-operator/operators/endpointmetrics/pkg/hypershift"
Expand Down Expand Up @@ -47,6 +50,154 @@ var (
restCfgHub *rest.Config
)

func TestCMOConfigWatching(t *testing.T) {
namespace := "test-cmo-config"

scheme := createBaseScheme()
ocinfrav1.AddToScheme(scheme)

k8sClient, err := client.New(restCfgHub, client.Options{Scheme: scheme})
if err != nil {
t.Fatal(err)
}

defer tearDownCommonHubResources(t, k8sClient, namespace)

// Create resources required for the cmo case
resourcesDeps := []client.Object{
makeNamespace(promNamespace),
makeNamespace(namespace),
newImagesCM(namespace),
newHubInfoSecret([]byte(`
endpoint: "http://test-endpoint"
alertmanager-endpoint: "http://test-alertamanger-endpoint"
alertmanager-router-ca: |
-----BEGIN CERTIFICATE-----
xxxxxxxxxxxxxxxxxxxxxxxxxxx
-----END CERTIFICATE-----
`), namespace),
&corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: promSvcName,
Namespace: promNamespace,
},
Spec: corev1.ServiceSpec{
Ports: []corev1.ServicePort{{Name: "metrics", Port: 9090}},
},
Status: corev1.ServiceStatus{},
},
&ocinfrav1.ClusterVersion{
ObjectMeta: metav1.ObjectMeta{
Name: "version",
},
},
&corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: hubAmAccessorSecretName,
Namespace: namespace,
},
Immutable: nil,
Data: nil,
StringData: map[string]string{hubAmAccessorSecretKey: "lol"},
},
&oav1beta1.ObservabilityAddon{
ObjectMeta: metav1.ObjectMeta{
Name: "observability-addon",
Namespace: namespace,
},
},
}
if err := createResources(k8sClient, resourcesDeps...); err != nil {
t.Fatalf("Failed to create resources: %v", err)
}

mgr, err := ctrl.NewManager(testEnvHub.Config, ctrl.Options{
Scheme: k8sClient.Scheme(),
Metrics: metricsserver.Options{BindAddress: "0"}, // Avoids port conflict with the default port 8080
})
assert.NoError(t, err)

hubClientWithReload, err := util.NewReloadableHubClientWithReloadFunc(func() (client.Client, error) {
return k8sClient, nil
})
assert.NoError(t, err)
reconciler := ObservabilityAddonReconciler{
Client: k8sClient,
HubClient: hubClientWithReload,
IsHubMetricsCollector: true,
Scheme: scheme,
Namespace: namespace,
HubNamespace: "local-cluster",
ServiceAccountName: "endpoint-monitoring-operator",
InstallPrometheus: false,
}

err = reconciler.SetupWithManager(mgr)
assert.NoError(t, err)

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

go func() {
err = mgr.Start(ctx)
assert.NoError(t, err)
}()

cm := &corev1.ConfigMap{}
err = wait.Poll(1*time.Second, time.Minute, func() (bool, error) {
err := k8sClient.Get(context.Background(), types.NamespacedName{Namespace: promNamespace, Name: clusterMonitoringConfigName}, cm)
if err != nil && errors.IsNotFound(err) {
return false, nil
}

return true, err
})
assert.NoError(t, err)

foundClusterMonitoringConfiguration := &cmomanifests.ClusterMonitoringConfiguration{}
err = yaml2.Unmarshal([]byte(cm.Data[clusterMonitoringConfigDataKey]), foundClusterMonitoringConfiguration)
assert.NoError(t, err)

assert.Len(t, foundClusterMonitoringConfiguration.PrometheusK8sConfig.AlertmanagerConfigs, 1)
assert.Equal(t, foundClusterMonitoringConfiguration.PrometheusK8sConfig.AlertmanagerConfigs[0].Scheme, "https")

foundClusterMonitoringConfiguration.PrometheusK8sConfig.AlertmanagerConfigs[0].Scheme = "http"
foundClusterMonitoringConfiguration.PrometheusK8sConfig.Retention = "infinity-and-beyond"

b, err := yaml2.Marshal(foundClusterMonitoringConfiguration)
assert.NoError(t, err)
cm.Data[clusterMonitoringConfigDataKey] = string(b)
err = k8sClient.Update(context.Background(), cm)
assert.NoError(t, err)

// repeat the test and expect a partial revert
err = wait.Poll(1*time.Second, time.Minute, func() (bool, error) {
updated := &corev1.ConfigMap{}
err := k8sClient.Get(context.Background(), types.NamespacedName{Namespace: promNamespace, Name: clusterMonitoringConfigName}, updated)
if err != nil && errors.IsNotFound(err) {
return false, nil
}

foundUpdatedClusterMonitoringConfiguration := &cmomanifests.ClusterMonitoringConfiguration{}
err = yaml2.Unmarshal([]byte(updated.Data[clusterMonitoringConfigDataKey]), foundUpdatedClusterMonitoringConfiguration)
if err != nil {
return false, nil
}

if foundUpdatedClusterMonitoringConfiguration.PrometheusK8sConfig.AlertmanagerConfigs[0].Scheme != "https" {
return false, nil
}

if foundUpdatedClusterMonitoringConfiguration.PrometheusK8sConfig.Retention != "infinity-and-beyond" {
return false, nil
}

return true, err
})
assert.NoError(t, err)

}

// TestIntegrationReconcileHypershift tests the reconcile function for hypershift CRDs.
func TestIntegrationReconcileHypershift(t *testing.T) {
testNamespace := "test-ns"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -394,16 +394,20 @@ func createOrUpdateClusterMonitoringConfig(
// check if alertmanagerConfigs exists
if foundClusterMonitoringConfiguration.PrometheusK8sConfig.AlertmanagerConfigs != nil {
additionalAlertmanagerConfigExists := false
for _, v := range foundClusterMonitoringConfiguration.PrometheusK8sConfig.AlertmanagerConfigs {
var atIndex int
for i, v := range foundClusterMonitoringConfiguration.PrometheusK8sConfig.AlertmanagerConfigs {
if isManaged(v) {
additionalAlertmanagerConfigExists = true
atIndex = i
break
}
}
if !additionalAlertmanagerConfigExists {
foundClusterMonitoringConfiguration.PrometheusK8sConfig.AlertmanagerConfigs = append(
foundClusterMonitoringConfiguration.PrometheusK8sConfig.AlertmanagerConfigs,
newAdditionalAlertmanagerConfig(hubInfo))
} else {
foundClusterMonitoringConfiguration.PrometheusK8sConfig.AlertmanagerConfigs[atIndex] = newAdditionalAlertmanagerConfig(hubInfo)
}
} else {
foundClusterMonitoringConfiguration.PrometheusK8sConfig.AlertmanagerConfigs = newAlertmanagerConfigs
Expand Down
2 changes: 2 additions & 0 deletions operators/endpointmetrics/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ func main() {
{FieldSelector: namespaceSelector},
{FieldSelector: fmt.Sprintf("metadata.name==%s,metadata.namespace!=%s",
operatorconfig.AllowlistCustomConfigMapName, "open-cluster-management-observability")},
{FieldSelector: fmt.Sprintf("metadata.name==%s,metadata.namespace==%s",
operatorconfig.OCPClusterMonitoringConfigMapName, operatorconfig.OCPClusterMonitoringNamespace)},
},
appsv1.SchemeGroupVersion.WithKind("Deployment"): {
{FieldSelector: namespaceSelector},
Expand Down
7 changes: 7 additions & 0 deletions operators/pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ const (
ClientCACertificateCN = "observability-client-ca-certificate"
)

const (
OCPClusterMonitoringNamespace = "openshift-monitoring"
OCPClusterMonitoringConfigMapName = "cluster-monitoring-config"
OCPClusterMonitoringConfigMapKey = "config.yaml"
OCPClusterMonitoringPrometheusService = "prometheus-k8s"
)

const (
MetricsCollectorImgName = "metrics-collector"
MetricsCollectorKey = "metrics_collector"
Expand Down

0 comments on commit 59654ff

Please sign in to comment.