forked from COSIMA/1deg_jra55_ryf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
resub.sh
executable file
·60 lines (51 loc) · 1.36 KB
/
resub.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/bash
logfile='resubmit.log'
counterfile='resubmit.count'
outfile='access-om2.err'
MAX_RESUBMISSIONS=2
date >> ${logfile}
# Define errors from which a resubmit is appropriate
declare -a errors=(
"Segmentation fault: address not mapped to object"
"Segmentation fault: invalid permissions for mapped object"
"Transport retry count exceeded"
"atmosphere/input.nml"
)
resub=false
for error in "${errors[@]}"
do
if grep -q "${error}" ${outfile}
then
echo "Error found: ${error}" >> ${logfile}
resub=true
break
else
echo "Error not found: ${error}" >> ${logfile}
fi
done
if ! ${resub}
then
echo "Error not eligible for resubmission" >> ${logfile}
exit 0
fi
if [ -f "${counterfile}" ]
then
PAYU_N_RESUB=$(cat ${counterfile})
else
echo "Reset resubmission counter" >> ${logfile}
PAYU_N_RESUB=${MAX_RESUBMISSIONS}
fi
echo "Resubmission counter: ${PAYU_N_RESUB}" >> ${logfile}
if [[ "${PAYU_N_RESUB}" -gt 0 ]]
then
# Sweep and re-run
${PAYU_PATH}/payu sweep >> ${logfile}
${PAYU_PATH}/payu run -n ${PAYU_N_RUNS} >> ${logfile}
# Decrement resub counter and save to counter file
((PAYU_N_RESUB=PAYU_N_RESUB-1))
echo "${PAYU_N_RESUB}" > ${counterfile}
else
echo "Resubmit limit reached ... " >> ${logfile}
rm ${counterfile}
fi
echo "" >> ${logfile}