-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathn-version-fault-injection.sh
executable file
·122 lines (95 loc) · 3.3 KB
/
n-version-fault-injection.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/bin/bash
set -x
trap forced_shutdown SIGINT SIGQUIT SIGTERM
forced_shutdown()
{
exit
}
# TODO:
# exclude sudo if in container
USER=$(whoami)
SUDO=""
if [[ $USER != "root" ]]; then
SUDO="sudo"
else
# HACK: root if inside container -> mount debugfs and tracefs
mount -t debugfs debugfs /sys/kernel/debug
mount -t tracefs tracefs /sys/kernel/tracing
# END HACK :)
fi
if [ -z $1 ]; then
echo "target client undefined"
exit 1
fi
CONFIG_FILE=$(pwd)/config.toml
TARGET=$1
ERROR_MODEL_URL="$2"
get_config () {
stoml $CONFIG_FILE $1
}
WORKING_DIR=$HOME
OUTPUT_DIR=$(get_config "output_dir")
CHAOS_ETH_DIR=$(get_config "chaos_eth_dir")
ERROR_MODELS="$WORKING_DIR/error_models.json"
wget -O $ERROR_MODELS $ERROR_MODEL_URL
PRE_SYNC_CMD=$(pwd)/synchronize-stop.sh
# spawn + sync wait
# { $PRE_SYNC_CMD $TARGET; }
echo "START" > ipc.dat
while true; do
# start target
TARGET_LOG="$OUTPUT_DIR/$TARGET-sync-$(date -Iseconds).log"
TARGET_CMD=$(get_config "$TARGET.exec_cmd")
DATA_DIR_PARAM=$(get_config "$TARGET.datadir_flag")=$WORKING_DIR/$(get_config "$TARGET.datadir")
JWT_FLAG=$(get_config "$TARGET.jwt_flag")
TARGET_JWT_FILE=$WORKING_DIR/$(get_config "$TARGET.jwt_path")
if [ ! -z $JWT_FLAG ]; then
JWT_PARAM="$JWT_FLAG=$TARGET_JWT_FILE"
fi
{ $TARGET_CMD $JWT_PARAM $DATA_DIR_PARAM &> $TARGET_LOG; } &
TARGET_PPID=$!
TARGET_GREP_STR=$TARGET_PPID.*$(get_config "$TARGET.grep_str")
TARGET_PID=`ps axo pid,ppid,cmd | grep "$TARGET_GREP_STR" | awk '{print $1}'`
sleep 60
# start teku
TEKU_LOG=$OUTPUT_DIR/teku-sync-$(date -Iseconds).log
{ teku --ee-endpoint=http://localhost:8551 --ee-jwt-secret-file=$TARGET_JWT_FILE --data-beacon-path=$WORKING_DIR/nvme/teku-data-dir/ &> $TEKU_LOG; } &
TEKU_PPID=$!
sleep 2
TEKU_GREP_STR=$TEKU_PPID.*teku\\.home
TEKU_PID=`ps axo pid,ppid,cmd | grep "$TEKU_GREP_STR" | awk '{print $1}'`
#attach error injection
# delay two minutes to catch up
sleep 120
CHAOS_ETH_GREP_STR="[s]yscall_injector.py"
{ $SUDO python $CHAOS_ETH_DIR/syscall_injector.py --config $ERROR_MODELS -p $TARGET_PID &> $OUTPUT_DIR/chaos-$(date -Iseconds).log; } &
CHAOS_ETH_PPID=$!
CHAOS_ETH_GREP_STR=$CHAOS_ETH_PPID.*$CHAOS_ETH_GREP_STR
CHAOS_ETH_PID=`ps axo pid,ppid,cmd | grep "$CHAOS_ETH_GREP_STR" | awk '{print $1}'`
sleep 3
# check that everything is still running
TARGET_GREP="target"
TEKU_GREP="teku"
CHAOS_ETH_GREP="chaoseth"
while [ ! -z "$TARGET_GREP" ] && [ ! -z "$TEKU_GREP" ] && [ ! -z "$CHAOS_ETH_GREP" ]
do
TARGET_GREP=`ps axo pid,ppid,cmd | grep "$TARGET_GREP_STR"`
TEKU_GREP=`ps axo pid,ppid,cmd | grep "$TEKU_GREP_STR"`
CHAOS_ETH_GREP=`ps axo pid,ppid,cmd | grep "$CHAOS_ETH_GREP_STR"`
sleep 10
done
# if one crashed restart all
kill -2 $TARGET_PID
kill -2 $TEKU_PID
$SUDO kill -2 $CHAOS_ETH_PID
TARGET_GREP="target"
TEKU_GREP="teku"
CHAOS_ETH_GREP="chaoseth"
while [ ! -z "$TARGET_GREP" ] || [ ! -z "$TEKU_GREP" ] || [ ! -z "$CHAOS_ETH_GREP" ]
do
sleep 1
TARGET_GREP=`ps axo pid,ppid,cmd | grep "$TARGET_GREP_STR"`
TEKU_GREP=`ps axo pid,ppid,cmd | grep "$TEKU_GREP_STR"`
CHAOS_ETH_GREP=`ps axo pid,ppid,cmd | grep "$CHAOS_ETH_GREP_STR"`
done
done