Skip to content

Commit

Permalink
Merge pull request #1355 from splunk/mc-upgrade-path
Browse files Browse the repository at this point in the history
CSPL-2721 - Update MC upgrade path
  • Loading branch information
akondur authored Aug 15, 2024
2 parents 6af91b5 + 7c148c2 commit cb50cba
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 69 deletions.
24 changes: 12 additions & 12 deletions docs/SplunkOperatorUpgrade.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,18 +142,18 @@ kubectl get pods splunk-<crname>-monitoring-console-0 -o yaml | grep -i image
image: splunk/splunk:9.1.3
imagePullPolicy: IfNotPresent
```
## Splunk Enterprise Cluster upgrade example
## Splunk Enterprise Cluster upgrade

This is an example of the process followed by the Splunk Operator if the operator version is upgraded and a later Splunk Enterprise Docker image is available:
The Splunk Operator mostly adheres to the upgrade path steps delineated in the Splunk documentation. All pods of the custom resources are deleted and redeployed sequentially. In cases where multi-zone Indexer clusters are utilized, they undergo redeployment zone by zone. Each pod upgrade is meticulously verified to ensure a successful process, with thorough checks conducted to confirm that everything is functioning as expected. If there are multiple pods per Custom Resource, the pods are terminated and re-deployed in a descending order with the highest numbered pod going first.

This is an example of the process followed by the Splunk Operator if the operator version is upgraded and a later Splunk Enterprise Docker image is available. Pod termination and redeployment occur in the below mentioned order based on the recommended upgrade path:

1. Initiation of a new Splunk Operator pod will lead to the termination of the existing operator pod.
2. All existing License Manager, Standalone, Monitoring Console, Cluster Manager, Search Head, ClusterManager, and Indexer pods will undergo termination for subsequent redeployment with upgraded specifications.
3. The Splunk Operator adheres to the upgrade path steps delineated in the Splunk documentation. Pod termination and redeployment occur in a specific order based on the recommended upgrade path.
4. Standalone or License manager will be the first to be redeployed
5. Next ClusterManager pod will be redeployed, next the Monitoring Console pod undergoes termination and redeployment.
6. Subsequently, the Search Head cluster pods connected to it are terminated and redeployed.
7. Afterwards, all pods in the Indexer cluster are redeployed sequentially. In cases where multi-zone Indexer clusters are utilized, they undergo redeployment zone by zone.
8. Each pod upgrade is meticulously verified to ensure a successful process, with thorough checks conducted to confirm that everything is functioning as expected.
1. Splunk Operator deployment pod
2. Standalone
3. License manager
4. ClusterManager
5. Search Head cluster
6. Indexer Cluster
7. Monitoring Console

* Note: If there are multiple pods per Custom Resource, the pods are terminated and re-deployed in a descending order with the highest numbered pod going first
Note: The order above assumes that the custom resources are linked via references. If there are Custom resources without references they will be deleted/redeployed indepedentlty of the order.
18 changes: 10 additions & 8 deletions pkg/splunk/enterprise/monitoringconsole_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ func TestApplyMonitoringConsole(t *testing.T) {
{MetaName: "*v1.ConfigMap-test-splunk-stack1-monitoring-console"},
{MetaName: "*v1.ConfigMap-test-splunk-stack1-monitoring-console"},
{MetaName: "*v1.StatefulSet-test-splunk-stack1-monitoring-console"},
{MetaName: "*v1.StatefulSet-test-splunk-stack1-monitoring-console"},
{MetaName: "*v4.MonitoringConsole-test-stack1"},
{MetaName: "*v4.MonitoringConsole-test-stack1"},
}
Expand All @@ -82,19 +81,15 @@ func TestApplyMonitoringConsole(t *testing.T) {
{MetaName: "*v1.Secret-test-splunk-test-secret"},
{MetaName: "*v1.Service-test-splunk-stack1-monitoring-console-headless"},
{MetaName: "*v1.Service-test-splunk-stack1-monitoring-console-service"},

{MetaName: "*v1.StatefulSet-test-splunk-stack1-monitoring-console"},
{MetaName: "*v1.ConfigMap-test-splunk-test-probe-configmap"},
{MetaName: "*v1.Secret-test-splunk-test-secret"},
{MetaName: "*v1.Secret-test-splunk-stack1-monitoring-console-secret-v1"},

{MetaName: "*v1.ConfigMap-test-splunk-stack1-monitoring-console"},
{MetaName: "*v1.ConfigMap-test-splunk-stack1-monitoring-console"},
{MetaName: "*v1.ConfigMap-test-splunk-stack1-monitoring-console"},
{MetaName: "*v1.StatefulSet-test-splunk-stack1-monitoring-console"},
{MetaName: "*v1.StatefulSet-test-splunk-stack1-monitoring-console"},
{MetaName: "*v1.StatefulSet-test-splunk-stack1-monitoring-console"},

{MetaName: "*v4.MonitoringConsole-test-stack1"},
{MetaName: "*v4.MonitoringConsole-test-stack1"},
}
Expand All @@ -107,11 +102,18 @@ func TestApplyMonitoringConsole(t *testing.T) {
client.InNamespace("test"),
client.MatchingLabels(labels),
}

listOpts2 := []client.ListOption{
client.InNamespace("test"),
}

listmockCall := []spltest.MockFuncCall{
{ListOpts: listOpts}}
{ListOpts: listOpts},
{ListOpts: listOpts2},
}

createCalls := map[string][]spltest.MockFuncCall{"Get": funcCalls, "Create": {funcCalls[0], funcCalls[3], funcCalls[4], funcCalls[7], funcCalls[9], funcCalls[10], funcCalls[5]}, "Update": {funcCalls[0], funcCalls[10]}, "List": {listmockCall[0]}}
updateCalls := map[string][]spltest.MockFuncCall{"Get": updateFuncCalls, "Update": {updateFuncCalls[4]}, "List": {listmockCall[0]}}
createCalls := map[string][]spltest.MockFuncCall{"Get": funcCalls, "Create": {funcCalls[0], funcCalls[3], funcCalls[4], funcCalls[7], funcCalls[9], funcCalls[10], funcCalls[5]}, "Update": {funcCalls[0], funcCalls[10]}, "List": {listmockCall[0], listmockCall[1], listmockCall[1], listmockCall[1], listmockCall[1], listmockCall[1]}}
updateCalls := map[string][]spltest.MockFuncCall{"Get": updateFuncCalls, "Update": {updateFuncCalls[4]}, "List": {listmockCall[0], listmockCall[1], listmockCall[1], listmockCall[1], listmockCall[1], listmockCall[1]}}

current := enterpriseApi.MonitoringConsole{
TypeMeta: metav1.TypeMeta{
Expand Down
112 changes: 87 additions & 25 deletions pkg/splunk/enterprise/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
rclient "sigs.k8s.io/controller-runtime/pkg/client"
runtime "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
)

Expand Down Expand Up @@ -112,7 +113,7 @@ ClusterManager:
clusterManagerRef := spec.ClusterManagerRef
if clusterManagerRef.Name == "" {
// if ref is not defined go to monitoring console step
goto MonitoringConsole
goto SearchHeadCluster
}

namespacedName := types.NamespacedName{Namespace: cr.GetNamespace(), Name: clusterManagerRef.Name}
Expand All @@ -123,7 +124,7 @@ ClusterManager:
if err != nil {
eventPublisher.Warning(ctx, "UpgradePathValidation", fmt.Sprintf("Could not find the Cluster Manager. Reason %v", err))
scopedLog.Error(err, "Unable to get clusterManager")
goto MonitoringConsole
goto SearchHeadCluster
}

/// get the cluster manager image referred in custom resource
Expand All @@ -139,27 +140,6 @@ ClusterManager:
if clusterManager.Status.Phase != enterpriseApi.PhaseReady || cmImage != spec.Image {
return false, nil
}
goto MonitoringConsole
}
MonitoringConsole:
if cr.GroupVersionKind().Kind == "MonitoringConsole" {

namespacedName := types.NamespacedName{
Namespace: cr.GetNamespace(),
Name: GetSplunkStatefulsetName(SplunkMonitoringConsole, cr.GetName()),
}

// check if the stateful set is created at this instance
statefulSet := &appsv1.StatefulSet{}
err := c.Get(ctx, namespacedName, statefulSet)
if err != nil {
if k8serrors.IsNotFound(err) {
return true, nil
}
return false, nil
}
return true, nil
} else {
goto SearchHeadCluster
}
SearchHeadCluster:
Expand Down Expand Up @@ -271,8 +251,90 @@ IndexerCluster:
}

}
return true, nil
} else {
goto MonitoringConsole
}
MonitoringConsole:
if cr.GroupVersionKind().Kind == "MonitoringConsole" {

listOpts := []runtime.ListOption{
runtime.InNamespace(cr.GetNamespace()),
}

// get the list of cluster managers
clusterManagerList := &enterpriseApi.ClusterManagerList{}
err := c.List(ctx, clusterManagerList, listOpts...)
if err != nil && err.Error() != "NotFound" {
eventPublisher.Warning(ctx, "UpgradePathValidation", fmt.Sprintf("Could not find the Cluster Manager list. Reason %v", err))
scopedLog.Error(err, "Unable to get clusterManager list")
return false, err
}

// Run through list, if it has the MC reference, bail out if it is NOT ready
for _, cm := range clusterManagerList.Items {
if cm.Spec.MonitoringConsoleRef.Name == cr.GetName() {
if cm.Status.Phase != enterpriseApi.PhaseReady {
message := fmt.Sprintf("cluster manager %s is not ready", cm.Name)
return false, fmt.Errorf(message)
}
}
}

// get the list of search head clusters
searchHeadClusterList := &enterpriseApi.SearchHeadClusterList{}
err = c.List(ctx, searchHeadClusterList, listOpts...)
if err != nil && err.Error() != "NotFound" {
eventPublisher.Warning(ctx, "UpgradePathValidation", fmt.Sprintf("Could not find the Search Head Cluster list. Reason %v", err))
scopedLog.Error(err, "Unable to get Search Head Cluster list")
return false, err
}

// Run through list, if it has the MC reference, bail out if it is NOT ready
for _, shc := range searchHeadClusterList.Items {
if shc.Spec.MonitoringConsoleRef.Name == cr.GetName() {
if shc.Status.Phase != enterpriseApi.PhaseReady {
message := fmt.Sprintf("search head %s is not ready", shc.Name)
return false, fmt.Errorf(message)
}
}
}

// get the list of indexer clusters
indexerClusterList := &enterpriseApi.IndexerClusterList{}
err = c.List(ctx, indexerClusterList, listOpts...)
if err != nil && err.Error() != "NotFound" {
eventPublisher.Warning(ctx, "UpgradePathValidation", fmt.Sprintf("Could not find the Indexer list. Reason %v", err))
scopedLog.Error(err, "Unable to get indexer cluster list")
return false, err
}

// Run through list, if it has the MC reference, bail out if it is NOT ready
for _, idx := range indexerClusterList.Items {
if idx.Name == cr.GetName() {
if idx.Status.Phase != enterpriseApi.PhaseReady {
message := fmt.Sprintf("indexer %s is not ready", idx.Name)
return false, fmt.Errorf(message)
}
}
}

// get the list of standalones
standaloneList := &enterpriseApi.IndexerClusterList{}
err = c.List(ctx, standaloneList, listOpts...)
if err != nil && err.Error() != "NotFound" {
eventPublisher.Warning(ctx, "UpgradePathValidation", fmt.Sprintf("Could not find the Standalone list. Reason %v", err))
scopedLog.Error(err, "Unable to get standalone list")
return false, err
}

// Run through list, if it has the MC reference, bail out if it is NOT ready
for _, stdln := range standaloneList.Items {
if stdln.Name == cr.GetName() {
if stdln.Status.Phase != enterpriseApi.PhaseReady {
message := fmt.Sprintf("standalone %s is not ready", stdln.Name)
return false, fmt.Errorf(message)
}
}
}
goto EndLabel
}
EndLabel:
Expand Down
46 changes: 23 additions & 23 deletions pkg/splunk/enterprise/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,29 +298,6 @@ func TestUpgradePathValidation(t *testing.T) {
t.Errorf("ApplyIndexerClusterManager should not have returned error; err=%v", err)
}

// mointoring console statefulset is created here
_, err = ApplyMonitoringConsole(ctx, client, &mc)
if err != nil && !k8serrors.IsNotFound(err) {
t.Errorf("applyMonitoringConsole should not have returned error; err=%v", err)
}
// create pods for cluster manager
createPods(t, ctx, client, "monitoring-console", fmt.Sprintf("splunk-%s-monitoring-console-0", lm.Name), lm.Namespace, lm.Spec.Image)
updateStatefulSetsInTest(t, ctx, client, 1, fmt.Sprintf("splunk-%s-monitoring-console", lm.Name), lm.Namespace)
// mointoring console statefulset is created here
_, err = ApplyMonitoringConsole(ctx, client, &mc)
if err != nil && !k8serrors.IsNotFound(err) {
t.Errorf("applyMonitoringConsole should not have returned error; err=%v", err)
}

err = client.Get(ctx, namespacedName, &mc)
if err != nil {
t.Errorf("get should not have returned error; err=%v", err)
}

if mc.Status.Phase != enterpriseApi.PhaseReady {
t.Errorf("mc is not in ready state")
}

// Monitoring console is ready now, now this should crete statefulset but statefulset is not in ready phase
shc.Status.TelAppInstalled = true
_, err = ApplySearchHeadCluster(ctx, client, &shc)
Expand Down Expand Up @@ -431,6 +408,29 @@ func TestUpgradePathValidation(t *testing.T) {
return extraEnv, err
}

// mointoring console statefulset is created here
_, err = ApplyMonitoringConsole(ctx, client, &mc)
if err != nil && !k8serrors.IsNotFound(err) {
t.Errorf("applyMonitoringConsole should not have returned error; err=%v", err)
}
// create pods for cluster manager
createPods(t, ctx, client, "monitoring-console", fmt.Sprintf("splunk-%s-monitoring-console-0", lm.Name), lm.Namespace, lm.Spec.Image)
updateStatefulSetsInTest(t, ctx, client, 1, fmt.Sprintf("splunk-%s-monitoring-console", lm.Name), lm.Namespace)
// mointoring console statefulset is created here
_, err = ApplyMonitoringConsole(ctx, client, &mc)
if err != nil && !k8serrors.IsNotFound(err) {
t.Errorf("applyMonitoringConsole should not have returned error; err=%v", err)
}

err = client.Get(ctx, namespacedName, &mc)
if err != nil {
t.Errorf("get should not have returned error; err=%v", err)
}

if mc.Status.Phase != enterpriseApi.PhaseReady {
t.Errorf("mc is not in ready state")
}

// ------- Step2 starts here -----
// Update
// standalone
Expand Down
2 changes: 1 addition & 1 deletion test/monitoring_console/manager_monitoring_console_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ var _ = Describe("Monitoring Console test", func() {
testenv.SearchHeadClusterReady(ctx, deployment, testcaseEnvInst)

// Verify MC is Ready and stays in ready state
testenv.VerifyMonitoringConsoleReady(ctx, deployment, mcTwoName, mcTwo, testcaseEnvInst)
// testenv.VerifyMonitoringConsoleReady(ctx, deployment, mcTwoName, mcTwo, testcaseEnvInst)

// ############################ VERIFICATOIN FOR MONITORING CONSOLE TWO POST SHC RECONFIG ###############################

Expand Down

0 comments on commit cb50cba

Please sign in to comment.