Skip to content

Commit

Permalink
Merge pull request #71 from zytedata/conservative-retrying
Browse files Browse the repository at this point in the history
Add conservative_retrying
  • Loading branch information
kmike authored May 29, 2024
2 parents 51e59ea + d87c0e6 commit 221eb5c
Show file tree
Hide file tree
Showing 8 changed files with 713 additions and 220 deletions.
5 changes: 5 additions & 0 deletions docs/ref/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,13 @@ Retries
.. autodata:: zyte_api_retrying
:no-value:

.. autodata:: aggressive_retrying
:no-value:

.. autoclass:: RetryFactory

.. autoclass:: AggressiveRetryFactory


Errors
======
Expand Down
37 changes: 30 additions & 7 deletions docs/use/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,20 +152,43 @@ following:

- Retries :ref:`rate-limiting responses <zyte-api-rate-limit>` forever.

- Retries :ref:`unsuccessful responses <zyte-api-unsuccessful-responses>` up
to 3 times.
- Retries :ref:`temporary download errors
<zyte-api-temporary-download-errors>` up to 3 times.

- Retries network errors for up to 15 minutes.
- Retries network errors until they have happened for 15 minutes straight.

All retries are done with an exponential backoff algorithm.

To customize the retry policy, create your own :class:`~tenacity.AsyncRetrying`
object, e.g. using a custom subclass of :data:`~zyte_api.RetryFactory`, and
pass it when creating your client object:
.. _aggressive-retry-policy:

If some :ref:`unsuccessful responses <zyte-api-unsuccessful-responses>` exceed
maximum retries with the default retry policy, try using
:data:`~zyte_api.aggressive_retrying` instead, which modifies the default retry
policy as follows:

- Temporary download error are retried 7 times. :ref:`Permanent download
errors <zyte-api-permanent-download-errors>` also count towards this retry
limit.

- Retries permanent download errors up to 3 times.

- Retries error responses with an HTTP status code in the 500-599 range (503,
520 and 521 excluded) up to 3 times.

Alternatively, the reference documentation of :class:`~zyte_api.RetryFactory`
and :class:`~zyte_api.AggressiveRetryFactory` features some examples of custom
retry policies, and you can always build your own
:class:`~tenacity.AsyncRetrying` object from scratch.

To use :data:`~zyte_api.aggressive_retrying` or a custom retry policy, pass an
instance of your :class:`~tenacity.AsyncRetrying` subclass when creating your
client object:

.. code-block:: python
client = ZyteAPI(retrying=custom_retry_policy)
from zyte_api import ZyteAPI, aggressive_retrying
client = ZyteAPI(retrying=aggressive_retrying)
When retries are exceeded for a given request, an exception is raised. Except
for the :meth:`~ZyteAPI.iter` method of the :ref:`sync API <sync>`, which
Expand Down
3 changes: 3 additions & 0 deletions tests/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ def render_POST(self, request):
request.setResponseCode(429)
response_data = {"status": 429, "type": "/limits/over-user-limit"}
return json.dumps(response_data).encode()
if domain == "e500.example":
request.setResponseCode(500)
return ""
if domain == "e520.example":
request.setResponseCode(520)
response_data = {"status": 520, "type": "/download/temporary-error"}
Expand Down
179 changes: 8 additions & 171 deletions tests/test_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,13 @@
from unittest.mock import AsyncMock

import pytest
from tenacity import AsyncRetrying

from zyte_api import AsyncZyteAPI, RequestError
from zyte_api._retry import RetryFactory
from zyte_api import AggressiveRetryFactory, AsyncZyteAPI, RequestError
from zyte_api.aio.client import AsyncClient
from zyte_api.apikey import NoApiKey
from zyte_api.errors import ParsedError
from zyte_api.utils import USER_AGENT

from .mockserver import DropResource, MockServer


@pytest.mark.parametrize(
("client_cls",),
Expand Down Expand Up @@ -72,46 +68,6 @@ async def test_get(client_cls, get_method, mockserver):
assert actual_result == expected_result


UNSET = object()


class OutlierException(RuntimeError):
pass


@pytest.mark.parametrize(
("client_cls", "get_method"),
(
(AsyncZyteAPI, "get"),
(AsyncClient, "request_raw"),
),
)
@pytest.mark.parametrize(
("value", "exception"),
(
(UNSET, OutlierException),
(True, OutlierException),
(False, RequestError),
),
)
@pytest.mark.asyncio
async def test_get_handle_retries(client_cls, get_method, value, exception, mockserver):
kwargs = {}
if value is not UNSET:
kwargs["handle_retries"] = value

def broken_stop(_):
raise OutlierException

retrying = AsyncRetrying(stop=broken_stop)
client = client_cls(api_key="a", api_url=mockserver.urljoin("/"), retrying=retrying)
with pytest.raises(exception):
await getattr(client, get_method)(
{"url": "https://exception.example", "browserHtml": True},
**kwargs,
)


@pytest.mark.parametrize(
("client_cls", "get_method"),
(
Expand Down Expand Up @@ -234,132 +190,6 @@ async def test_iter(client_cls, iter_method, mockserver):
assert actual_result in expected_results


@pytest.mark.parametrize(
("client_cls", "get_method"),
(
(AsyncZyteAPI, "get"),
(AsyncClient, "request_raw"),
),
)
@pytest.mark.parametrize(
("subdomain", "waiter"),
(
("e429", "throttling"),
("e520", "temporary_download_error"),
),
)
@pytest.mark.asyncio
async def test_retry_wait(client_cls, get_method, subdomain, waiter, mockserver):
def broken_wait(self, retry_state):
raise OutlierException

class CustomRetryFactory(RetryFactory):
pass

setattr(CustomRetryFactory, f"{waiter}_wait", broken_wait)

retrying = CustomRetryFactory().build()
client = client_cls(api_key="a", api_url=mockserver.urljoin("/"), retrying=retrying)
with pytest.raises(OutlierException):
await getattr(client, get_method)(
{"url": f"https://{subdomain}.example", "browserHtml": True},
)


@pytest.mark.parametrize(
("client_cls", "get_method"),
(
(AsyncZyteAPI, "get"),
(AsyncClient, "request_raw"),
),
)
@pytest.mark.asyncio
async def test_retry_wait_network_error(client_cls, get_method):
waiter = "network_error"

def broken_wait(self, retry_state):
raise OutlierException

class CustomRetryFactory(RetryFactory):
pass

setattr(CustomRetryFactory, f"{waiter}_wait", broken_wait)

retrying = CustomRetryFactory().build()
with MockServer(resource=DropResource) as mockserver:
client = client_cls(
api_key="a", api_url=mockserver.urljoin("/"), retrying=retrying
)
with pytest.raises(OutlierException):
await getattr(client, get_method)(
{"url": "https://example.com", "browserHtml": True},
)


@pytest.mark.parametrize(
("client_cls", "get_method"),
(
(AsyncZyteAPI, "get"),
(AsyncClient, "request_raw"),
),
)
@pytest.mark.parametrize(
("subdomain", "stopper"),
(
("e429", "throttling"),
("e520", "temporary_download_error"),
),
)
@pytest.mark.asyncio
async def test_retry_stop(client_cls, get_method, subdomain, stopper, mockserver):
def broken_stop(self, retry_state):
raise OutlierException

class CustomRetryFactory(RetryFactory):
def wait(self, retry_state):
return None

setattr(CustomRetryFactory, f"{stopper}_stop", broken_stop)

retrying = CustomRetryFactory().build()
client = client_cls(api_key="a", api_url=mockserver.urljoin("/"), retrying=retrying)
with pytest.raises(OutlierException):
await getattr(client, get_method)(
{"url": f"https://{subdomain}.example", "browserHtml": True},
)


@pytest.mark.parametrize(
("client_cls", "get_method"),
(
(AsyncZyteAPI, "get"),
(AsyncClient, "request_raw"),
),
)
@pytest.mark.asyncio
async def test_retry_stop_network_error(client_cls, get_method):
stopper = "network_error"

def broken_stop(self, retry_state):
raise OutlierException

class CustomRetryFactory(RetryFactory):
def wait(self, retry_state):
return None

setattr(CustomRetryFactory, f"{stopper}_stop", broken_stop)

retrying = CustomRetryFactory().build()
with MockServer(resource=DropResource) as mockserver:
client = client_cls(
api_key="a", api_url=mockserver.urljoin("/"), retrying=retrying
)
with pytest.raises(OutlierException):
await getattr(client, get_method)(
{"url": "https://example.com", "browserHtml": True},
)


@pytest.mark.parametrize(
("client_cls", "get_method", "iter_method"),
(
Expand Down Expand Up @@ -482,3 +312,10 @@ async def test_session_no_context_manager(mockserver):
assert Exception in expected_results
else:
assert actual_result in expected_results


def test_retrying_class():
"""A descriptive exception is raised when creating a client with an
AsyncRetrying subclass or similar instead of an instance of it."""
with pytest.raises(ValueError):
AsyncZyteAPI(api_key="foo", retrying=AggressiveRetryFactory)
Loading

0 comments on commit 221eb5c

Please sign in to comment.