Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retry on 503 #5938

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 28 additions & 22 deletions cloudinit/sources/DataSourceEc2.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import time
import uuid
from contextlib import suppress
from typing import Dict, List
from typing import Dict, List, Literal

from cloudinit import dmi, net, sources
from cloudinit import url_helper as uhelp
Expand Down Expand Up @@ -310,7 +310,7 @@ def _maybe_fetch_api_token(self, mdurls):
timeout=url_params.timeout_seconds,
status_cb=LOG.warning,
headers_cb=self._get_headers,
exception_cb=self._imds_exception_cb,
exception_cb=self._token_exception_cb,
request_method=request_method,
headers_redact=self.imdsv2_token_redact,
connect_synchronously=False,
Expand Down Expand Up @@ -622,25 +622,27 @@ def _refresh_api_token(self, seconds=None):
return None
return response.contents

def _skip_or_refresh_stale_aws_token_cb(self, msg, exception):
def _skip_or_refresh_stale_aws_token_cb(
self, exception: uhelp.UrlError
) -> bool:
"""Callback will not retry on SKIP_USERDATA_CODES or if no token
is available."""
retry = ec2.skip_retry_on_codes(
ec2.SKIP_USERDATA_CODES, msg, exception
)
retry = ec2.skip_retry_on_codes(ec2.SKIP_USERDATA_CODES, exception)
if not retry:
return False # False raises exception
return self._refresh_stale_aws_token_cb(msg, exception)
return self._refresh_stale_aws_token_cb(exception)

def _refresh_stale_aws_token_cb(self, msg, exception):
def _refresh_stale_aws_token_cb(
self, exception: uhelp.UrlError
) -> Literal[True]:
"""Exception handler for Ec2 to refresh token if token is stale."""
if isinstance(exception, uhelp.UrlError) and exception.code == 401:
if exception.code == 401:
# With _api_token as None, _get_headers will _refresh_api_token.
LOG.debug("Clearing cached Ec2 API token due to expiry")
self._api_token = None
return True # always retry

def _imds_exception_cb(self, msg, exception=None):
def _token_exception_cb(self, exception: uhelp.UrlError) -> bool:
"""Fail quickly on proper AWS if IMDSv2 rejects API token request

Guidance from Amazon is that if IMDSv2 had disabled token requests
Expand All @@ -653,19 +655,23 @@ def _imds_exception_cb(self, msg, exception=None):
temporarily unroutable or unavailable will still retry due to the
callsite wait_for_url.
"""
if isinstance(exception, uhelp.UrlError):
if exception.code:
# requests.ConnectionError will have exception.code == None
if exception.code and exception.code >= 400:
if exception.code == 403:
LOG.warning(
"Ec2 IMDS endpoint returned a 403 error. "
"HTTP endpoint is disabled. Aborting."
)
else:
LOG.warning(
"Fatal error while requesting Ec2 IMDSv2 API tokens"
)
raise exception
if exception.code == 403:
LOG.warning(
"Ec2 IMDS endpoint returned a 403 error. "
"HTTP endpoint is disabled. Aborting."
)
return False
elif exception.code == 503:
# Let the global handler deal with it
return False
elif exception.code >= 400:
LOG.warning(
"Fatal error while requesting Ec2 IMDSv2 API tokens"
)
return False
return True

def _get_headers(self, url=""):
"""Return a dict of headers for accessing a url.
Expand Down
2 changes: 1 addition & 1 deletion cloudinit/sources/DataSourceScaleway.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def query_data_api_once(api_address, timeout, requests_session):
session=requests_session,
# If the error is a HTTP/404 or a ConnectionError, go into raise
# block below and don't bother retrying.
exception_cb=lambda _, exc: exc.code != 404
exception_cb=lambda exc: exc.code != 404
and (
not isinstance(exc.cause, requests.exceptions.ConnectionError)
),
Expand Down
2 changes: 1 addition & 1 deletion cloudinit/sources/azure/imds.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(
self._request_count = 0
self._last_error: Union[None, Type, int] = None

def exception_callback(self, req_args, exception) -> bool:
def exception_callback(self, exception) -> bool:
self._request_count += 1
if not isinstance(exception, UrlError):
report_diagnostic_event(
Expand Down
3 changes: 2 additions & 1 deletion cloudinit/sources/helpers/ec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,14 +135,15 @@ def _materialize(self, blob, base_url):
return joined


def skip_retry_on_codes(status_codes, _request_args, cause):
def skip_retry_on_codes(status_codes, cause):
"""Returns False if cause.code is in status_codes."""
return cause.code not in status_codes


def get_instance_userdata(
api_version="latest",
metadata_address="http://169.254.169.254",
*,
ssl_details=None,
timeout=5,
retries=5,
Expand Down
2 changes: 1 addition & 1 deletion cloudinit/sources/helpers/openstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ def _fetch_available_versions(self):
return self._versions

def _path_read(self, path, decode=False):
def should_retry_cb(_request_args, cause):
def should_retry_cb(cause):
try:
code = int(cause.code)
if code >= 400:
Expand Down
Loading
Loading