From 1e736473ee090a94de2e05e903c536e2cccef559 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Fri, 9 Jun 2023 16:57:33 +0200 Subject: [PATCH 01/14] Add configuration of the backoff algorithm for the federation client --- .../configuration/config_documentation.md | 8 +++++ synapse/config/federation.py | 14 +++++++++ synapse/util/retryutils.py | 29 ++++++++++--------- tests/storage/test_transactions.py | 6 ++-- tests/util/test_retryutils.py | 22 +++++++------- 5 files changed, 53 insertions(+), 26 deletions(-) diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 8426de04179b..5c511d09956b 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1212,6 +1212,14 @@ like sending a federation transaction. * `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts. * `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts. +The following options are related to configuring the backoff parameters used fo a specific destination. +Unlike previous configuration those values applies across all requests, +and the state of the backoff is stored on DB. + +* `destination_min_retry_interval`: the initial backoff, after the first request fails, in seconds. Default to 10mn. +* `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Default to 2. +* `destination_max_retry_interval`: a cap on the backoff. Default to one day. + Example configuration: ```yaml federation: diff --git a/synapse/config/federation.py b/synapse/config/federation.py index d21f7fd02a5e..b2dbec9d32c0 100644 --- a/synapse/config/federation.py +++ b/synapse/config/federation.py @@ -59,5 +59,19 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: self.max_long_retries = federation_config.get("max_long_retries", 10) self.max_short_retries = federation_config.get("max_short_retries", 3) + # Allow for the configuration of the backoff algorithm used + # when trying to reach an unavailable destination. + # Unlike previous configuration those values applies across + # multiple requests and the state of the backoff is stored on DB. + self.destination_min_retry_interval = federation_config.get( + "destination_min_retry_interval", 10 * 60 + ) + self.destination_retry_multiplier = federation_config.get( + "destination_retry_multiplier", 2 + ) + self.destination_max_retry_interval = federation_config.get( + "destination_max_retry_interval", 24 * 60 * 60 + ) + _METRICS_FOR_DOMAINS_SCHEMA = {"type": "array", "items": {"type": "string"}} diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index dcc037b9822e..aceda7bf133e 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -27,15 +27,6 @@ logger = logging.getLogger(__name__) -# the initial backoff, after the first transaction fails -MIN_RETRY_INTERVAL = 10 * 60 * 1000 - -# how much we multiply the backoff by after each subsequent fail -RETRY_MULTIPLIER = 5 - -# a cap on the backoff. (Essentially none) -MAX_RETRY_INTERVAL = 2**62 - class NotRetryingDestination(Exception): def __init__(self, retry_last_ts: int, retry_interval: int, destination: str): @@ -169,6 +160,16 @@ def __init__( self.notifier = notifier self.replication_client = replication_client + self.destination_min_retry_interval = ( + self.store.hs.config.federation.destination_min_retry_interval * 1000 + ) + self.destination_retry_multiplier = ( + self.store.hs.config.federation.destination_retry_multiplier + ) + self.destination_max_retry_interval = ( + self.store.hs.config.federation.destination_max_retry_interval * 1000 + ) + def __enter__(self) -> None: pass @@ -220,13 +221,15 @@ def __exit__( # We couldn't connect. if self.retry_interval: self.retry_interval = int( - self.retry_interval * RETRY_MULTIPLIER * random.uniform(0.8, 1.4) + self.retry_interval + * self.destination_retry_multiplier + * random.uniform(0.8, 1.4) ) - if self.retry_interval >= MAX_RETRY_INTERVAL: - self.retry_interval = MAX_RETRY_INTERVAL + if self.retry_interval >= self.destination_max_retry_interval: + self.retry_interval = self.destination_max_retry_interval else: - self.retry_interval = MIN_RETRY_INTERVAL + self.retry_interval = self.destination_min_retry_interval logger.info( "Connection to %s was unsuccessful (%s(%s)); backoff now %i", diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py index 2fab84a52939..52d5bda31aca 100644 --- a/tests/storage/test_transactions.py +++ b/tests/storage/test_transactions.py @@ -17,7 +17,6 @@ from synapse.server import HomeServer from synapse.storage.databases.main.transactions import DestinationRetryTimings from synapse.util import Clock -from synapse.util.retryutils import MAX_RETRY_INTERVAL from tests.unittest import HomeserverTestCase @@ -57,8 +56,11 @@ def test_initial_set_transactions(self) -> None: self.get_success(d) def test_large_destination_retry(self) -> None: + max_retry_interval = ( + self.hs.config.federation.destination_max_retry_interval * 1000 + ) d = self.store.set_destination_retry_timings( - "example.com", MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL + "example.com", max_retry_interval, max_retry_interval, max_retry_interval ) self.get_success(d) diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py index 5f8f4e76b544..4e252d1a616d 100644 --- a/tests/util/test_retryutils.py +++ b/tests/util/test_retryutils.py @@ -11,12 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from synapse.util.retryutils import ( - MIN_RETRY_INTERVAL, - RETRY_MULTIPLIER, - NotRetryingDestination, - get_retry_limiter, -) +from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter from tests.unittest import HomeserverTestCase @@ -42,6 +37,11 @@ def test_limiter(self) -> None: limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) + min_retry_interval = ( + self.hs.config.federation.destination_min_retry_interval * 1000 + ) + retry_multiplier = self.hs.config.federation.destination_retry_multiplier + self.pump(1) try: with limiter: @@ -57,7 +57,7 @@ def test_limiter(self) -> None: assert new_timings is not None self.assertEqual(new_timings.failure_ts, failure_ts) self.assertEqual(new_timings.retry_last_ts, failure_ts) - self.assertEqual(new_timings.retry_interval, MIN_RETRY_INTERVAL) + self.assertEqual(new_timings.retry_interval, min_retry_interval) # now if we try again we should get a failure self.get_failure( @@ -68,7 +68,7 @@ def test_limiter(self) -> None: # advance the clock and try again # - self.pump(MIN_RETRY_INTERVAL) + self.pump(min_retry_interval) limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) self.pump(1) @@ -87,16 +87,16 @@ def test_limiter(self) -> None: self.assertEqual(new_timings.failure_ts, failure_ts) self.assertEqual(new_timings.retry_last_ts, retry_ts) self.assertGreaterEqual( - new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 0.5 + new_timings.retry_interval, min_retry_interval * retry_multiplier * 0.5 ) self.assertLessEqual( - new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0 + new_timings.retry_interval, min_retry_interval * retry_multiplier * 2.0 ) # # one more go, with success # - self.reactor.advance(MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0) + self.reactor.advance(min_retry_interval * retry_multiplier * 2.0) limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) self.pump(1) From 2f09c03fd970ebdf1d3de0fdccccd242d2559c8d Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Fri, 9 Jun 2023 17:01:29 +0200 Subject: [PATCH 02/14] Add changelog --- changelog.d/15754.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/15754.misc diff --git a/changelog.d/15754.misc b/changelog.d/15754.misc new file mode 100644 index 000000000000..4314d415a372 --- /dev/null +++ b/changelog.d/15754.misc @@ -0,0 +1 @@ +Allow for the configuration of the backoff algorithm for federation destinations. From 35fbd934ce407245d0f5a7221bbcd6a22d8bd738 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Tue, 13 Jun 2023 18:11:11 +0200 Subject: [PATCH 03/14] Apply suggestions from code review Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- docs/usage/configuration/config_documentation.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 5c511d09956b..2feb11016e12 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1212,13 +1212,13 @@ like sending a federation transaction. * `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts. * `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts. -The following options are related to configuring the backoff parameters used fo a specific destination. -Unlike previous configuration those values applies across all requests, -and the state of the backoff is stored on DB. +The following options are related to configuring the backoff parameters used for a specific destination. +Unlike previous configuration options, these values apply across all requests +and the state of the backoff is stored in the database. -* `destination_min_retry_interval`: the initial backoff, after the first request fails, in seconds. Default to 10mn. -* `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Default to 2. -* `destination_max_retry_interval`: a cap on the backoff. Default to one day. +* `destination_min_retry_interval`: the initial backoff, after the first request fails, in seconds. Defaults to 10m. +* `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Defaults to 2. +* `destination_max_retry_interval`: a cap on the backoff. Defaults to one day. Example configuration: ```yaml From 404a471e37e03c7e8313086fea5669af4167d90e Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Tue, 13 Jun 2023 23:33:45 +0200 Subject: [PATCH 04/14] Comments --- docs/usage/configuration/config_documentation.md | 5 ++++- synapse/config/federation.py | 8 ++++---- synapse/util/retryutils.py | 4 ++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 2feb11016e12..93b0ece3e97a 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1216,7 +1216,7 @@ The following options are related to configuring the backoff parameters used for Unlike previous configuration options, these values apply across all requests and the state of the backoff is stored in the database. -* `destination_min_retry_interval`: the initial backoff, after the first request fails, in seconds. Defaults to 10m. +* `destination_min_retry_interval`: the initial backoff, after the first request fails. Defaults to 10m. * `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Defaults to 2. * `destination_max_retry_interval`: a cap on the backoff. Defaults to one day. @@ -1228,6 +1228,9 @@ federation: max_long_retry_delay: 100 max_short_retries: 5 max_long_retries: 20 + destination_min_retry_interval: 30s + destination_retry_multiplier: 5 + destination_max_retry_interval: 12h ``` --- ## Caching diff --git a/synapse/config/federation.py b/synapse/config/federation.py index b2dbec9d32c0..b46ebf30c224 100644 --- a/synapse/config/federation.py +++ b/synapse/config/federation.py @@ -63,14 +63,14 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: # when trying to reach an unavailable destination. # Unlike previous configuration those values applies across # multiple requests and the state of the backoff is stored on DB. - self.destination_min_retry_interval = federation_config.get( - "destination_min_retry_interval", 10 * 60 + self.destination_min_retry_interval = Config.parse_duration( + federation_config.get("destination_min_retry_interval", "10m") ) self.destination_retry_multiplier = federation_config.get( "destination_retry_multiplier", 2 ) - self.destination_max_retry_interval = federation_config.get( - "destination_max_retry_interval", 24 * 60 * 60 + self.destination_max_retry_interval = Config.parse_duration( + federation_config.get("destination_max_retry_interval", "1d") ) diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index aceda7bf133e..fefa31988e35 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -161,13 +161,13 @@ def __init__( self.replication_client = replication_client self.destination_min_retry_interval = ( - self.store.hs.config.federation.destination_min_retry_interval * 1000 + self.store.hs.config.federation.destination_min_retry_interval ) self.destination_retry_multiplier = ( self.store.hs.config.federation.destination_retry_multiplier ) self.destination_max_retry_interval = ( - self.store.hs.config.federation.destination_max_retry_interval * 1000 + self.store.hs.config.federation.destination_max_retry_interval ) def __enter__(self) -> None: From 5ec6e48c368bfd1c2ebbc8a4708869bc0ce63a19 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Tue, 13 Jun 2023 23:58:53 +0200 Subject: [PATCH 05/14] Fix test --- tests/util/test_retryutils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py index 4e252d1a616d..3174ac5e4364 100644 --- a/tests/util/test_retryutils.py +++ b/tests/util/test_retryutils.py @@ -37,9 +37,7 @@ def test_limiter(self) -> None: limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) - min_retry_interval = ( - self.hs.config.federation.destination_min_retry_interval * 1000 - ) + min_retry_interval = self.hs.config.federation.destination_min_retry_interval retry_multiplier = self.hs.config.federation.destination_retry_multiplier self.pump(1) From 702bc7dd1dbfe378e45e95b9d6d1d91416192c42 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Thu, 15 Jun 2023 15:06:22 +0200 Subject: [PATCH 06/14] Add unit --- synapse/config/federation.py | 4 ++-- synapse/util/retryutils.py | 4 ++-- tests/util/test_retryutils.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/config/federation.py b/synapse/config/federation.py index b46ebf30c224..e88f479c9b1e 100644 --- a/synapse/config/federation.py +++ b/synapse/config/federation.py @@ -63,13 +63,13 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: # when trying to reach an unavailable destination. # Unlike previous configuration those values applies across # multiple requests and the state of the backoff is stored on DB. - self.destination_min_retry_interval = Config.parse_duration( + self.destination_min_retry_interval_ms = Config.parse_duration( federation_config.get("destination_min_retry_interval", "10m") ) self.destination_retry_multiplier = federation_config.get( "destination_retry_multiplier", 2 ) - self.destination_max_retry_interval = Config.parse_duration( + self.destination_max_retry_interval_ms = Config.parse_duration( federation_config.get("destination_max_retry_interval", "1d") ) diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index fefa31988e35..99a7257e74cd 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -161,13 +161,13 @@ def __init__( self.replication_client = replication_client self.destination_min_retry_interval = ( - self.store.hs.config.federation.destination_min_retry_interval + self.store.hs.config.federation.destination_min_retry_interval_ms ) self.destination_retry_multiplier = ( self.store.hs.config.federation.destination_retry_multiplier ) self.destination_max_retry_interval = ( - self.store.hs.config.federation.destination_max_retry_interval + self.store.hs.config.federation.destination_max_retry_interval_ms ) def __enter__(self) -> None: diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py index 3174ac5e4364..cf95d77c8c76 100644 --- a/tests/util/test_retryutils.py +++ b/tests/util/test_retryutils.py @@ -37,7 +37,7 @@ def test_limiter(self) -> None: limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) - min_retry_interval = self.hs.config.federation.destination_min_retry_interval + min_retry_interval = self.hs.config.federation.destination_min_retry_interval_ms retry_multiplier = self.hs.config.federation.destination_retry_multiplier self.pump(1) From 1079dca4ec35233ffa8b716db37f972018943bca Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Wed, 21 Jun 2023 14:25:03 +0200 Subject: [PATCH 07/14] Add unit to var names --- synapse/util/retryutils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 99a7257e74cd..4e03e4f2044f 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -160,13 +160,13 @@ def __init__( self.notifier = notifier self.replication_client = replication_client - self.destination_min_retry_interval = ( + self.destination_min_retry_interval_ms = ( self.store.hs.config.federation.destination_min_retry_interval_ms ) self.destination_retry_multiplier = ( self.store.hs.config.federation.destination_retry_multiplier ) - self.destination_max_retry_interval = ( + self.destination_max_retry_interval_ms = ( self.store.hs.config.federation.destination_max_retry_interval_ms ) @@ -226,10 +226,10 @@ def __exit__( * random.uniform(0.8, 1.4) ) - if self.retry_interval >= self.destination_max_retry_interval: - self.retry_interval = self.destination_max_retry_interval + if self.retry_interval >= self.destination_max_retry_interva_ms: + self.retry_interval = self.destination_max_retry_interva_ms else: - self.retry_interval = self.destination_min_retry_interval + self.retry_interval = self.destination_min_retry_interval_ms logger.info( "Connection to %s was unsuccessful (%s(%s)); backoff now %i", From ab4ccfc9e747370b72c737e10457d61aae7216f3 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Wed, 21 Jun 2023 16:00:42 +0200 Subject: [PATCH 08/14] typos --- pyproject.toml | 2 +- synapse/util/retryutils.py | 4 ++-- tests/storage/test_transactions.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 90812de019e8..752f333b8a98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -369,7 +369,7 @@ furo = ">=2022.12.7,<2024.0.0" # system changes. # We are happy to raise these upper bounds upon request, # provided we check that it's safe to do so (i.e. that CI passes). -requires = ["poetry-core>=1.1.0,<=1.6.0", "setuptools_rust>=1.3,<=1.6.0"] +requires = ["poetry-core>=1.1.0,<=1.6.0", "setuptools_rust>=1.3,<=1.6.0", "wheel"] build-backend = "poetry.core.masonry.api" diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 4e03e4f2044f..27e9fc976c10 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -226,8 +226,8 @@ def __exit__( * random.uniform(0.8, 1.4) ) - if self.retry_interval >= self.destination_max_retry_interva_ms: - self.retry_interval = self.destination_max_retry_interva_ms + if self.retry_interval >= self.destination_max_retry_interval_ms: + self.retry_interval = self.destination_max_retry_interval_ms else: self.retry_interval = self.destination_min_retry_interval_ms diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py index 52d5bda31aca..0e903e1dca77 100644 --- a/tests/storage/test_transactions.py +++ b/tests/storage/test_transactions.py @@ -57,7 +57,7 @@ def test_initial_set_transactions(self) -> None: def test_large_destination_retry(self) -> None: max_retry_interval = ( - self.hs.config.federation.destination_max_retry_interval * 1000 + self.hs.config.federation.destination_max_retry_interval_ms * 1000 ) d = self.store.set_destination_retry_timings( "example.com", max_retry_interval, max_retry_interval, max_retry_interval From 68ca3a0e735107af27bb7e48b5925a2911a53e07 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Mon, 26 Jun 2023 16:00:10 +0200 Subject: [PATCH 09/14] Update docs/usage/configuration/config_documentation.md Co-authored-by: Eric Eastwood --- docs/usage/configuration/config_documentation.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 78edeb812368..f1e3813dce8e 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1212,9 +1212,9 @@ like sending a federation transaction. * `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts. * `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts. -The following options are related to configuring the backoff parameters used for a specific destination. -Unlike previous configuration options, these values apply across all requests -and the state of the backoff is stored in the database. +The following options control the retry logic when communicating with a specific homeserver destination. +Unlike the previous configuration options, these values apply across all requests +for a given destination and the state of the backoff is stored in the database. * `destination_min_retry_interval`: the initial backoff, after the first request fails. Defaults to 10m. * `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Defaults to 2. From e476f3c3f65a9af9f846335c22b370da56b04773 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Mon, 10 Jul 2023 16:48:50 +0200 Subject: [PATCH 10/14] Remove unneeded added dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3799fee15f45..fc1b8c0dadd2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -367,7 +367,7 @@ furo = ">=2022.12.7,<2024.0.0" # system changes. # We are happy to raise these upper bounds upon request, # provided we check that it's safe to do so (i.e. that CI passes). -requires = ["poetry-core>=1.1.0,<=1.6.0", "setuptools_rust>=1.3,<=1.6.0", "wheel"] +requires = ["poetry-core>=1.1.0,<=1.6.0", "setuptools_rust>=1.3,<=1.6.0"] build-backend = "poetry.core.masonry.api" From 7f7a5e8c7af6c9e3da6a228a47c2194443ef23a9 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Tue, 11 Jul 2023 10:53:05 +0200 Subject: [PATCH 11/14] Change destination_max_retry_interval default to a week --- docs/usage/configuration/config_documentation.md | 2 +- synapse/config/federation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index f5972d1dc348..7fc960297478 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1218,7 +1218,7 @@ for a given destination and the state of the backoff is stored in the database. * `destination_min_retry_interval`: the initial backoff, after the first request fails. Defaults to 10m. * `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Defaults to 2. -* `destination_max_retry_interval`: a cap on the backoff. Defaults to one day. +* `destination_max_retry_interval`: a cap on the backoff. Defaults to a week. Example configuration: ```yaml diff --git a/synapse/config/federation.py b/synapse/config/federation.py index 30397f5ef842..eb6f0b46a320 100644 --- a/synapse/config/federation.py +++ b/synapse/config/federation.py @@ -76,7 +76,7 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: "destination_retry_multiplier", 2 ) self.destination_max_retry_interval_ms = Config.parse_duration( - federation_config.get("destination_max_retry_interval", "1d") + federation_config.get("destination_max_retry_interval", "7d") ) From a58ef664566f82b116db83e376ac65c4748fde36 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Thu, 3 Aug 2023 15:03:31 +0200 Subject: [PATCH 12/14] Clarify units --- tests/storage/test_transactions.py | 9 ++++++--- tests/util/test_retryutils.py | 14 ++++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py index 0e903e1dca77..ef06b50dbb7b 100644 --- a/tests/storage/test_transactions.py +++ b/tests/storage/test_transactions.py @@ -56,11 +56,14 @@ def test_initial_set_transactions(self) -> None: self.get_success(d) def test_large_destination_retry(self) -> None: - max_retry_interval = ( - self.hs.config.federation.destination_max_retry_interval_ms * 1000 + max_retry_interval_ms = ( + self.hs.config.federation.destination_max_retry_interval_ms ) d = self.store.set_destination_retry_timings( - "example.com", max_retry_interval, max_retry_interval, max_retry_interval + "example.com", + max_retry_interval_ms, + max_retry_interval_ms, + max_retry_interval_ms, ) self.get_success(d) diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py index cf95d77c8c76..1277e1a865ff 100644 --- a/tests/util/test_retryutils.py +++ b/tests/util/test_retryutils.py @@ -37,7 +37,9 @@ def test_limiter(self) -> None: limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) - min_retry_interval = self.hs.config.federation.destination_min_retry_interval_ms + min_retry_interval_ms = ( + self.hs.config.federation.destination_min_retry_interval_ms + ) retry_multiplier = self.hs.config.federation.destination_retry_multiplier self.pump(1) @@ -55,7 +57,7 @@ def test_limiter(self) -> None: assert new_timings is not None self.assertEqual(new_timings.failure_ts, failure_ts) self.assertEqual(new_timings.retry_last_ts, failure_ts) - self.assertEqual(new_timings.retry_interval, min_retry_interval) + self.assertEqual(new_timings.retry_interval, min_retry_interval_ms) # now if we try again we should get a failure self.get_failure( @@ -66,7 +68,7 @@ def test_limiter(self) -> None: # advance the clock and try again # - self.pump(min_retry_interval) + self.pump(min_retry_interval_ms) limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) self.pump(1) @@ -85,16 +87,16 @@ def test_limiter(self) -> None: self.assertEqual(new_timings.failure_ts, failure_ts) self.assertEqual(new_timings.retry_last_ts, retry_ts) self.assertGreaterEqual( - new_timings.retry_interval, min_retry_interval * retry_multiplier * 0.5 + new_timings.retry_interval, min_retry_interval_ms * retry_multiplier * 0.5 ) self.assertLessEqual( - new_timings.retry_interval, min_retry_interval * retry_multiplier * 2.0 + new_timings.retry_interval, min_retry_interval_ms * retry_multiplier * 2.0 ) # # one more go, with success # - self.reactor.advance(min_retry_interval * retry_multiplier * 2.0) + self.reactor.advance(min_retry_interval_ms * retry_multiplier * 2.0) limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) self.pump(1) From 033e59f54a2a6ca2ab57b1a737db6e3f6b1785ad Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Thu, 3 Aug 2023 15:38:12 +0200 Subject: [PATCH 13/14] Limit retry interval values to avoid overflowing the DB --- synapse/config/federation.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/synapse/config/federation.py b/synapse/config/federation.py index eb6f0b46a320..4c83b6565afb 100644 --- a/synapse/config/federation.py +++ b/synapse/config/federation.py @@ -69,14 +69,20 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: # when trying to reach an unavailable destination. # Unlike previous configuration those values applies across # multiple requests and the state of the backoff is stored on DB. - self.destination_min_retry_interval_ms = Config.parse_duration( - federation_config.get("destination_min_retry_interval", "10m") + self.destination_min_retry_interval_ms = min( + Config.parse_duration( + federation_config.get("destination_min_retry_interval", "10m") + ), + 2**62, ) self.destination_retry_multiplier = federation_config.get( "destination_retry_multiplier", 2 ) - self.destination_max_retry_interval_ms = Config.parse_duration( - federation_config.get("destination_max_retry_interval", "7d") + self.destination_max_retry_interval_ms = min( + Config.parse_duration( + federation_config.get("destination_max_retry_interval", "7d") + ), + 2**62, ) From 2e633c3f907dcc2ff0bcf47a02838a34fcf6b0be Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 3 Aug 2023 12:32:57 -0400 Subject: [PATCH 14/14] Review comments --- synapse/config/federation.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/synapse/config/federation.py b/synapse/config/federation.py index 4c83b6565afb..97636039b8ad 100644 --- a/synapse/config/federation.py +++ b/synapse/config/federation.py @@ -69,11 +69,8 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: # when trying to reach an unavailable destination. # Unlike previous configuration those values applies across # multiple requests and the state of the backoff is stored on DB. - self.destination_min_retry_interval_ms = min( - Config.parse_duration( - federation_config.get("destination_min_retry_interval", "10m") - ), - 2**62, + self.destination_min_retry_interval_ms = Config.parse_duration( + federation_config.get("destination_min_retry_interval", "10m") ) self.destination_retry_multiplier = federation_config.get( "destination_retry_multiplier", 2 @@ -82,6 +79,7 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: Config.parse_duration( federation_config.get("destination_max_retry_interval", "7d") ), + # Set a hard-limit to not overflow the database column. 2**62, )