Skip to content

Commit

Permalink
Merge pull request #207 from JeffersonLab/aaust_launch_merge
Browse files Browse the repository at this point in the history
typo in swif2 output fixed, automatic retry of jobs
  • Loading branch information
aaust authored Nov 11, 2022
2 parents dd9b19d + 4c8ceba commit 5af682b
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
2 changes: 1 addition & 1 deletion launch_scripts/merge_trees/jobs_merge.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ SCRIPTFILE /home/gxproj1/monitoring/merge_trees/script.sh
CACHE_PIN_DAYS 60 # max is 60, 0 or comment-out for none

# FILE INPUT, OUTPUT BASE DIRECTORIES
INDATA_TOPDIR /cache/halld/RunPeriod-[RUNPERIOD]/analysis/ver[VERSION]/
INDATA_TOPDIR /volatile/halld/analysis/RunPeriod-[RUNPERIOD]/ver[VERSION]/

OUTDIR_LARGE /cache/halld/RunPeriod-[RUNPERIOD]/analysis/ver[VERSION]/
OUTDIR_SMALL /work/halld2/analysis/RunPeriod-[RUNPERIOD]/ver[VERSION]/ # log files
9 changes: 8 additions & 1 deletion launch_scripts/merge_trees/merge_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def build_launch_dictionary(WORKFLOW):
job_dictionary[run_string] = 1


elif (field == "slum_exitcode"):
elif (field == "slurm_exitcode"):
job_result = line.split()[2]
if (job_result == "0"):
run_done = 1
Expand Down Expand Up @@ -376,6 +376,13 @@ def main(argv):

print "New runs complete and submitted:" + str(n_submit)

# RETRY FAILED JOBS
command = "swif2 retry-jobs -workflow " + LAUNCH_WORKFLOW + " -problems SLURM_FAILED SLURM_CANCELLED SLURM_TIMEOUT SLURM_NODE_FAIL"
if VERBOSE > 1:
print command
try_command(command)


if __name__ == "__main__":
main(sys.argv[1:])

Expand Down

0 comments on commit 5af682b

Please sign in to comment.