Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix sidecar index out of range issues #115

Merged
merged 2 commits into from
Aug 1, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 22 additions & 10 deletions {{cookiecutter.profile_name}}/slurm-sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,10 @@ def get_state(self, jobid):
"""Return the job state for the given jobid."""
jobid = str(jobid)
if jobid not in self.states:
self.states[jobid] = self._get_state_sacct(jobid)
try:
self.states[jobid] = self._get_state_sacct(jobid)
except:
return "__not_seen_yet__"
return self.states.get(jobid, "__not_seen_yet__")

def register_job(self, jobid):
Expand All @@ -122,17 +125,22 @@ def _get_state_sacct(self, jobid):
try:
logger.debug("Calling %s (try %d)", cmd, try_num)
output = subprocess.check_output(cmd, timeout=self.squeue_timeout, text=True)
break
except subprocess.TimeoutExpired as e:
logger.debug("Call to %s timed out (try %d of %d)", cmd, try_num, self.max_tries)
logger.warning("Call to %s timed out (try %d of %d)", cmd, try_num, self.max_tries)
continue
except subprocess.CalledProcessError as e:
logger.debug("Call to %s failed (try %d of %d)", cmd, try_num, self.max_tries)
if try_num >= self.max_tries:
raise Exception("Problem with call to %s" % cmd)
else:
parsed = {x.split("|")[0]: x.split("|")[1] for x in output.strip().split("\n")}
logger.debug("Returning state of %s as %s", jobid, parsed[jobid])
return parsed[jobid]
logger.warning("Call to %s failed (try %d of %d)", cmd, try_num, self.max_tries)
continue
try:
parsed = {x.split("|")[0]: x.split("|")[1] for x in output.strip().split("\n")}
logger.debug("Returning state of %s as %s", jobid, parsed[jobid])
return parsed[jobid]
except IndexError:
logger.warning("Could not parse %s (try %d of %d)", repr(output), try_num, self.max_tries)
secs = try_num / 2.0
loger.info("Sleeping %f seconds", secs)
time.sleep(secs)
raise Exception("Problem with call to %s" % cmd)

def stop(self):
"""Flag thread to stop execution"""
Expand Down Expand Up @@ -209,6 +217,10 @@ def do_GET(self):
return
# Otherwise, query job ID status
job_id = self.path[len("/job/status/") :]
try:
job_id=job_id.split("%20")[3]
except IndexError:
pass
logger.debug("Querying for job ID %s" % repr(job_id))
status = self.server.poll_thread.get_state(job_id)
logger.debug("Status: %s" % status)
Expand Down