Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

shell/oom: log more detail when tasks are killed #6289

Merged
merged 1 commit into from
Sep 18, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 37 additions & 2 deletions src/shell/oom.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "src/common/libutil/strstrip.h"
#include "src/common/libutil/errno_safe.h"
#include "src/common/libutil/errprintf.h"
#include "src/common/libutil/parse_size.h"
#include "ccan/str/str.h"

#include "builtins.h"
Expand Down Expand Up @@ -104,6 +105,35 @@
return NULL;
}

static int get_cgroup_value (const char *name, char *buf, size_t len)

Check warning on line 108 in src/shell/oom.c

View check run for this annotation

Codecov / codecov/patch

src/shell/oom.c#L108

Added line #L108 was not covered by tests
{
char *path;
char *s = NULL;
int rc = -1;

Check warning on line 112 in src/shell/oom.c

View check run for this annotation

Codecov / codecov/patch

src/shell/oom.c#L110-L112

Added lines #L110 - L112 were not covered by tests

if (!(path = get_cgroup_path (getpid (), name, R_OK))
|| !(s = read_file (path)))
goto out;
if (snprintf (buf, len, "%s", strstrip (s)) >= len)
goto out;

Check warning on line 118 in src/shell/oom.c

View check run for this annotation

Codecov / codecov/patch

src/shell/oom.c#L114-L118

Added lines #L114 - L118 were not covered by tests
rc = 0;
out:
free (s);
free (path);
return rc;

Check warning on line 123 in src/shell/oom.c

View check run for this annotation

Codecov / codecov/patch

src/shell/oom.c#L120-L123

Added lines #L120 - L123 were not covered by tests
}

static const char *get_cgroup_size (const char *name)

Check warning on line 126 in src/shell/oom.c

View check run for this annotation

Codecov / codecov/patch

src/shell/oom.c#L126

Added line #L126 was not covered by tests
{
uint64_t size;
char rawbuf[32];

Check warning on line 129 in src/shell/oom.c

View check run for this annotation

Codecov / codecov/patch

src/shell/oom.c#L128-L129

Added lines #L128 - L129 were not covered by tests

if (get_cgroup_value (name, rawbuf, sizeof (rawbuf)) < 0
|| parse_size (rawbuf, &size) < 0)
return "unknown";
return encode_size (size);

Check warning on line 134 in src/shell/oom.c

View check run for this annotation

Codecov / codecov/patch

src/shell/oom.c#L131-L134

Added lines #L131 - L134 were not covered by tests
}

/* Parse 'name' from memory.events file. Example content:
* low 0
* high 0
Expand Down Expand Up @@ -170,9 +200,14 @@
/* If any new oom events have been recorded, log them.
*/
if (oom->oom_kill < count) {
shell_log_error ("Memory cgroup out of memory: killed %lu task%s.",
shell_log_error ("Memory cgroup out of memory: "

Check warning on line 203 in src/shell/oom.c

View check run for this annotation

Codecov / codecov/patch

src/shell/oom.c#L203

Added line #L203 was not covered by tests
"killed %lu task%s on %s.",
count - oom->oom_kill,
count - oom->oom_kill > 1 ? "s" : "");
count - oom->oom_kill > 1 ? "s" : "",
oom->shell->hostname);

shell_log_error ("memory.peak = %s", get_cgroup_size ("memory.peak"));

Check warning on line 209 in src/shell/oom.c

View check run for this annotation

Codecov / codecov/patch

src/shell/oom.c#L209

Added line #L209 was not covered by tests

oom->oom_kill = count;
}
out:
Expand Down
Loading