Skip to content

Commit

Permalink
Add events to kubectl.wait() and kubectl.rollout() errors
Browse files Browse the repository at this point in the history
Most errors are timeouts, and the error from kubectl is not helpful:

    error: timed out waiting for the condition on deployments/nginx

Try to add events from the relevant resource to help debugging the
issue.

Here is an example when running a self test in a loop, reaching Docker
Hub rate limit:

    drenv.commands.Error: Command failed:
       command: ('kubectl', 'wait', '--context', 'dr1', 'deploy/nginx', '--for=condition=Available',
                 '--namespace=ns1', '--timeout=120s')
       exitcode: 1
       error:
          error: timed out waiting for the condition on deployments/nginx
       events:
          LAST SEEN              TYPE      REASON    OBJECT                       MESSAGE
          36m (x27 over 151m)    Warning   Failed    Pod/nginx-7f456874f4-kqljw   Failed to pull
          image "nginx": rpc error: code = Unknown desc = failed to pull and unpack image
              "docker.io/library/nginx:latest": ...: 429 Too Many Requests - Server message:
              toomanyrequests: You have reached your pull rate limit. You may increase the limit by
              authenticating and upgrading: https://www.docker.com/increase-rate-limit
          26m (x29 over 151m)    Normal    Pulling   Pod/nginx-7f456874f4-kqljw   Pulling image "nginx"
          85s (x648 over 151m)   Normal    BackOff   Pod/nginx-7f456874f4-kqljw   Back-off pulling image "nginx"

Signed-off-by: Nir Soffer <nsoffer@redhat.com>
  • Loading branch information
nirs committed Jul 12, 2023
1 parent f9c7292 commit fad97f6
Showing 1 changed file with 22 additions and 2 deletions.
24 changes: 22 additions & 2 deletions test/drenv/kubectl.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,13 @@ def rollout(action, resource, timeout=300, namespace=None, context=None, log=pri
args = [action, resource, f"--timeout={timeout}s"]
if namespace:
args.append(f"--namespace={namespace}")
_watch("rollout", *args, context=context, log=log)
try:
_watch("rollout", *args, context=context, log=log)
except commands.Error as e:
# Most failures are timeouts, events may help to debug.
if action == "status":
e.events = _try_events(resource, namespace=namespace, context=context)
raise


def wait(
Expand Down Expand Up @@ -123,7 +129,21 @@ def wait(
args.append(f"--for={condition}")
if namespace:
args.append(f"--namespace={namespace}")
_watch("wait", *args, context=context, log=log)

try:
_watch("wait", *args, context=context, log=log)
except commands.Error as e:
# Most failures are timeouts, events may help to debug.
if resource and not (all or selector):
e.events = _try_events(resource, namespace=namespace, context=context)
raise


def _try_events(resource, namespace=None, context=None):
try:
return events(resource, namespace=namespace, context=context)
except Exception as e:
return f"(error getting events: {e})"


def _run(cmd, *args, context=None):
Expand Down

0 comments on commit fad97f6

Please sign in to comment.