Skip to content

Commit

Permalink
fix parallel node
Browse files Browse the repository at this point in the history
  • Loading branch information
topahadzi committed Dec 1, 2023
1 parent d028eac commit ac64c11
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 21 deletions.
39 changes: 18 additions & 21 deletions chaoslib/litmus/vira/node-restart/lib/node-restart.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
package lib

import (
"context"
"fmt"
"io/ioutil"
"os"
"os/exec"
"os/signal"
"syscall"
"time"
"strconv"

"github.com/litmuschaos/litmus-go/pkg/cerrors"
"github.com/palantir/stacktrace"
"github.com/sirupsen/logrus"

clients "github.com/litmuschaos/litmus-go/pkg/clients"
"github.com/litmuschaos/litmus-go/pkg/events"
Expand All @@ -21,8 +22,6 @@ import (
"github.com/litmuschaos/litmus-go/pkg/status"
"github.com/litmuschaos/litmus-go/pkg/types"
"github.com/litmuschaos/litmus-go/pkg/utils/common"
"github.com/litmuschaos/litmus-go/pkg/utils/retry"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

var (
Expand All @@ -49,14 +48,17 @@ func PrepareNodeRestart(experimentsDetails *experimentTypes.ExperimentDetails, c
common.WaitForDuration(experimentsDetails.RampTime)
}

if experimentsDetails.TargetNode == "" {
//Select node for kubelet-service-kill
experimentsDetails.TargetNode, err = common.getNodesByLabels(experimentsDetaiAppNS, ls.experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients)
if err != nil {
return stacktrace.Propagate(err, "could not get node name")
}
nodesAffectedPerc, _ := strconv.Atoi(experimentsDetails.NodesAffectedPerc)
targetNodeList, err := common.GetNodeList(experimentsDetails.TargetNode, experimentsDetails.NodeLabel, nodesAffectedPerc, clients)
if err != nil {
return stacktrace.Propagate(err, "could not get node list")
}

log.InfoWithValues("[Info]: Details of Nodes under chaos injection", logrus.Fields{
"No. Of Nodes": len(targetNodeList),
"Node Names": targetNodeList,
})

if experimentsDetails.EngineName != "" {
msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + experimentsDetails.TargetNode + " node"
types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails)
Expand All @@ -74,7 +76,7 @@ func PrepareNodeRestart(experimentsDetails *experimentTypes.ExperimentDetails, c
go abortWatcher(experimentsDetails, clients, resultDetails, chaosDetails, eventsDetails)

// Restart the application node
if err := restartNode(experimentsDetails, clients, chaosDetails); err != nil {
if err := restartNode(targetNodeList, experimentsDetails, clients, chaosDetails); err != nil {
log.Info("[Revert]: Reverting chaos because error during restart of node")
return stacktrace.Propagate(err, "could not restart node")
}
Expand Down Expand Up @@ -110,18 +112,13 @@ func PrepareNodeRestart(experimentsDetails *experimentTypes.ExperimentDetails, c
}

// restartNode restart the target node
func restartNode(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error {
func restartNode(targetNodeList []string, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error {

select {
case <-inject:
// stopping the chaos execution, if abort signal received
os.Exit(0)
default:
targetNodeList, err := common.getNodesByLabels(experimentsDetails.NodeLabel, clients)
if err != nil {
return err
}

token, err := ioutil.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token")
if err != nil {
return err
Expand All @@ -148,14 +145,14 @@ func restartNode(experimentsDetails *experimentTypes.ExperimentDetails, clients
if err := common.RunCLICommands(useContextCmd, "", "", "failed to use context", cerrors.ErrorTypeHelper); err != nil {
return err
}
for _, node := range targetNodeList.Items {
log.Infof("[Inject]: Restarting the %v node", node.Name)
command := exec.Command("kubectl", "node_shell", node.Name, "--", "shutdown", "-r", "+1")
if err := common.RunCLICommands(command, "", fmt.Sprintf("{node: %s}", node.Name), "failed to restart the target node", cerrors.ErrorTypeChaosInject); err != nil {
for _, appNode := range targetNodeList {
log.Infof("[Inject]: Restarting the %v node", appNode)
command := exec.Command("kubectl", "node_shell", appNode, "--", "shutdown", "-r", "+1")
if err := common.RunCLICommands(command, "", fmt.Sprintf("{node: %s}", appNode), "failed to restart the target node", cerrors.ErrorTypeChaosInject); err != nil {
return err
}

common.SetTargets(node.Name, "injected", "node", chaosDetails)
common.SetTargets(appNode, "injected", "node", chaosDetails)

}

Expand Down
1 change: 1 addition & 0 deletions pkg/generic/node-restart/environment/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) {
experimentDetails.ChaosUID = clientTypes.UID(types.Getenv("CHAOS_UID", ""))
experimentDetails.InstanceID = types.Getenv("INSTANCE_ID", "")
experimentDetails.ChaosPodName = types.Getenv("POD_NAME", "")
experimentDetails.NodesAffectedPerc = types.Getenv("NODES_AFFECTED_PERC", "0")
experimentDetails.LIBImage = types.Getenv("LIB_IMAGE", "litmuschaos/go-runner:latest")
experimentDetails.LIBImagePullPolicy = types.Getenv("LIB_IMAGE_PULL_POLICY", "Always")
experimentDetails.AuxiliaryAppInfo = types.Getenv("AUXILIARY_APPINFO", "")
Expand Down
1 change: 1 addition & 0 deletions pkg/generic/node-restart/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ type ExperimentDetails struct {
TerminationGracePeriodSeconds int
InstanceID string
ChaosNamespace string
NodesAffectedPerc string
ChaosPodName string
RunID string
LIBImage string
Expand Down
1 change: 1 addition & 0 deletions pkg/kubernetes/node-restart/environment/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) {
experimentDetails.ChaosUID = clientTypes.UID(types.Getenv("CHAOS_UID", ""))
experimentDetails.InstanceID = types.Getenv("INSTANCE_ID", "")
experimentDetails.ChaosPodName = types.Getenv("POD_NAME", "")
experimentDetails.NodesAffectedPerc = types.Getenv("NODES_AFFECTED_PERC", "0")
experimentDetails.AuxiliaryAppInfo = types.Getenv("AUXILIARY_APPINFO", "")
experimentDetails.TargetNode = types.Getenv("TARGET_NODE", "")
experimentDetails.Delay, _ = strconv.Atoi(types.Getenv("STATUS_CHECK_DELAY", "2"))
Expand Down
1 change: 1 addition & 0 deletions pkg/kubernetes/node-restart/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ type ExperimentDetails struct {
InstanceID string
ChaosNamespace string
ChaosPodName string
NodesAffectedPerc string
TargetNode string
AuxiliaryAppInfo string
Timeout int
Expand Down

0 comments on commit ac64c11

Please sign in to comment.