diff --git a/chaoslib/litmus/vira/node-restart/lib/node-restart.go b/chaoslib/litmus/vira/node-restart/lib/node-restart.go index b892265..4fb029b 100644 --- a/chaoslib/litmus/vira/node-restart/lib/node-restart.go +++ b/chaoslib/litmus/vira/node-restart/lib/node-restart.go @@ -1,7 +1,6 @@ package lib import ( - "context" "fmt" "io/ioutil" "os" @@ -9,9 +8,11 @@ import ( "os/signal" "syscall" "time" + "strconv" "github.com/litmuschaos/litmus-go/pkg/cerrors" "github.com/palantir/stacktrace" + "github.com/sirupsen/logrus" clients "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -21,8 +22,6 @@ import ( "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" - "github.com/litmuschaos/litmus-go/pkg/utils/retry" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) var ( @@ -49,14 +48,17 @@ func PrepareNodeRestart(experimentsDetails *experimentTypes.ExperimentDetails, c common.WaitForDuration(experimentsDetails.RampTime) } - if experimentsDetails.TargetNode == "" { - //Select node for kubelet-service-kill - experimentsDetails.TargetNode, err = common.getNodesByLabels(experimentsDetaiAppNS, ls.experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients) - if err != nil { - return stacktrace.Propagate(err, "could not get node name") - } + nodesAffectedPerc, _ := strconv.Atoi(experimentsDetails.NodesAffectedPerc) + targetNodeList, err := common.GetNodeList(experimentsDetails.TargetNode, experimentsDetails.NodeLabel, nodesAffectedPerc, clients) + if err != nil { + return stacktrace.Propagate(err, "could not get node list") } + log.InfoWithValues("[Info]: Details of Nodes under chaos injection", logrus.Fields{ + "No. Of Nodes": len(targetNodeList), + "Node Names": targetNodeList, + }) + if experimentsDetails.EngineName != "" { msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + experimentsDetails.TargetNode + " node" types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) @@ -74,7 +76,7 @@ func PrepareNodeRestart(experimentsDetails *experimentTypes.ExperimentDetails, c go abortWatcher(experimentsDetails, clients, resultDetails, chaosDetails, eventsDetails) // Restart the application node - if err := restartNode(experimentsDetails, clients, chaosDetails); err != nil { + if err := restartNode(targetNodeList, experimentsDetails, clients, chaosDetails); err != nil { log.Info("[Revert]: Reverting chaos because error during restart of node") return stacktrace.Propagate(err, "could not restart node") } @@ -110,18 +112,13 @@ func PrepareNodeRestart(experimentsDetails *experimentTypes.ExperimentDetails, c } // restartNode restart the target node -func restartNode(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { +func restartNode(targetNodeList []string, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { select { case <-inject: // stopping the chaos execution, if abort signal received os.Exit(0) default: - targetNodeList, err := common.getNodesByLabels(experimentsDetails.NodeLabel, clients) - if err != nil { - return err - } - token, err := ioutil.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token") if err != nil { return err @@ -148,14 +145,14 @@ func restartNode(experimentsDetails *experimentTypes.ExperimentDetails, clients if err := common.RunCLICommands(useContextCmd, "", "", "failed to use context", cerrors.ErrorTypeHelper); err != nil { return err } - for _, node := range targetNodeList.Items { - log.Infof("[Inject]: Restarting the %v node", node.Name) - command := exec.Command("kubectl", "node_shell", node.Name, "--", "shutdown", "-r", "+1") - if err := common.RunCLICommands(command, "", fmt.Sprintf("{node: %s}", node.Name), "failed to restart the target node", cerrors.ErrorTypeChaosInject); err != nil { + for _, appNode := range targetNodeList { + log.Infof("[Inject]: Restarting the %v node", appNode) + command := exec.Command("kubectl", "node_shell", appNode, "--", "shutdown", "-r", "+1") + if err := common.RunCLICommands(command, "", fmt.Sprintf("{node: %s}", appNode), "failed to restart the target node", cerrors.ErrorTypeChaosInject); err != nil { return err } - common.SetTargets(node.Name, "injected", "node", chaosDetails) + common.SetTargets(appNode, "injected", "node", chaosDetails) } diff --git a/pkg/generic/node-restart/environment/environment.go b/pkg/generic/node-restart/environment/environment.go index 54fb422..02d84b4 100644 --- a/pkg/generic/node-restart/environment/environment.go +++ b/pkg/generic/node-restart/environment/environment.go @@ -18,6 +18,7 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.ChaosUID = clientTypes.UID(types.Getenv("CHAOS_UID", "")) experimentDetails.InstanceID = types.Getenv("INSTANCE_ID", "") experimentDetails.ChaosPodName = types.Getenv("POD_NAME", "") + experimentDetails.NodesAffectedPerc = types.Getenv("NODES_AFFECTED_PERC", "0") experimentDetails.LIBImage = types.Getenv("LIB_IMAGE", "litmuschaos/go-runner:latest") experimentDetails.LIBImagePullPolicy = types.Getenv("LIB_IMAGE_PULL_POLICY", "Always") experimentDetails.AuxiliaryAppInfo = types.Getenv("AUXILIARY_APPINFO", "") diff --git a/pkg/generic/node-restart/types/types.go b/pkg/generic/node-restart/types/types.go index 82c8e2d..1e917ec 100644 --- a/pkg/generic/node-restart/types/types.go +++ b/pkg/generic/node-restart/types/types.go @@ -17,6 +17,7 @@ type ExperimentDetails struct { TerminationGracePeriodSeconds int InstanceID string ChaosNamespace string + NodesAffectedPerc string ChaosPodName string RunID string LIBImage string diff --git a/pkg/kubernetes/node-restart/environment/environment.go b/pkg/kubernetes/node-restart/environment/environment.go index e2a6d19..3d5fe02 100644 --- a/pkg/kubernetes/node-restart/environment/environment.go +++ b/pkg/kubernetes/node-restart/environment/environment.go @@ -19,6 +19,7 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.ChaosUID = clientTypes.UID(types.Getenv("CHAOS_UID", "")) experimentDetails.InstanceID = types.Getenv("INSTANCE_ID", "") experimentDetails.ChaosPodName = types.Getenv("POD_NAME", "") + experimentDetails.NodesAffectedPerc = types.Getenv("NODES_AFFECTED_PERC", "0") experimentDetails.AuxiliaryAppInfo = types.Getenv("AUXILIARY_APPINFO", "") experimentDetails.TargetNode = types.Getenv("TARGET_NODE", "") experimentDetails.Delay, _ = strconv.Atoi(types.Getenv("STATUS_CHECK_DELAY", "2")) diff --git a/pkg/kubernetes/node-restart/types/types.go b/pkg/kubernetes/node-restart/types/types.go index dc90498..ad16d5b 100644 --- a/pkg/kubernetes/node-restart/types/types.go +++ b/pkg/kubernetes/node-restart/types/types.go @@ -17,6 +17,7 @@ type ExperimentDetails struct { InstanceID string ChaosNamespace string ChaosPodName string + NodesAffectedPerc string TargetNode string AuxiliaryAppInfo string Timeout int