From 05b5a59572c33da123ae9eeec02a692b6b3d4357 Mon Sep 17 00:00:00 2001 From: Jianquan Ye Date: Wed, 15 Jan 2025 12:40:18 +1000 Subject: [PATCH] [Chassis] Reduce wait/timeout time for chassis (#16077) Description of PR Cisco chassis used to be unstable months ago, so updated the wait time of sshd ready to 600s, and service ready to 900s. After the image became stable, we can reduce the time to: sshd ready: 420s service ready: 600s Will keep monitoring and see whether we can reduce the wait time to align with t0/t1 in the future. Approach What is the motivation for this PR? Cisco chassis used to be unstable months ago, so updated the wait time of sshd ready to 600s, and service ready to 900s. After the image became stable, we can reduce the time to: sshd ready: 420s service ready: 600s Will keep monitoring and see whether we can reduce the wait time to align with t0/t1 in the future. How did you do it? How did you verify/test it? In the nightly test, the configuration works well. Any platform specific information? Supported testbed topology if it's a new test case? Documentation co-authorized by: jianquanye@microsoft.com --- tests/common/config_reload.py | 2 +- tests/common/plugins/sanity_check/checks.py | 4 ++-- tests/common/reboot.py | 4 ++-- tests/tacacs/test_ro_disk.py | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/common/config_reload.py b/tests/common/config_reload.py index 5916a63b2bf..b522a343b9e 100644 --- a/tests/common/config_reload.py +++ b/tests/common/config_reload.py @@ -192,7 +192,7 @@ def _config_reload_cmd_wrapper(cmd, executable): sonic_host.shell(cmd, executable="/bin/bash") modular_chassis = sonic_host.get_facts().get("modular_chassis") - wait = max(wait, 900) if modular_chassis else wait + wait = max(wait, 600) if modular_chassis else wait if safe_reload: # The wait time passed in might not be guaranteed to cover the actual diff --git a/tests/common/plugins/sanity_check/checks.py b/tests/common/plugins/sanity_check/checks.py index 0c786811839..83299323bc9 100644 --- a/tests/common/plugins/sanity_check/checks.py +++ b/tests/common/plugins/sanity_check/checks.py @@ -110,7 +110,7 @@ def _check_interfaces_on_dut(*args, **kwargs): networking_uptime = dut.get_networking_uptime().seconds timeout = max((SYSTEM_STABILIZE_MAX_TIME - networking_uptime), 0) if dut.get_facts().get("modular_chassis"): - timeout = max(timeout, 900) + timeout = max(timeout, 600) interval = 20 logger.info("networking_uptime=%d seconds, timeout=%d seconds, interval=%d seconds" % (networking_uptime, timeout, interval)) @@ -259,7 +259,7 @@ def _check_bgp_status_helper(): else: max_timeout = SYSTEM_STABILIZE_MAX_TIME - networking_uptime + 480 if dut.get_facts().get("modular_chassis"): - max_timeout = max(max_timeout, 900) + max_timeout = max(max_timeout, 600) timeout = max(max_timeout, 1) interval = 20 wait_until(timeout, interval, 0, _check_bgp_status_helper) diff --git a/tests/common/reboot.py b/tests/common/reboot.py index 7115869827f..aabc9bf4280 100644 --- a/tests/common/reboot.py +++ b/tests/common/reboot.py @@ -258,8 +258,8 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10, if warmboot_finalizer_timeout == 0 and 'warmboot_finalizer_timeout' in reboot_ctrl: warmboot_finalizer_timeout = reboot_ctrl['warmboot_finalizer_timeout'] if duthost.get_facts().get("modular_chassis") and safe_reboot: - wait = max(wait, 900) - timeout = max(timeout, 600) + wait = max(wait, 600) + timeout = max(timeout, 420) except KeyError: raise ValueError('invalid reboot type: "{} for {}"'.format(reboot_type, hostname)) logger.info('Reboot {}: wait[{}], timeout[{}]'.format(hostname, wait, timeout)) diff --git a/tests/tacacs/test_ro_disk.py b/tests/tacacs/test_ro_disk.py index 65dc5ee0bc7..d6d1c23cbb2 100644 --- a/tests/tacacs/test_ro_disk.py +++ b/tests/tacacs/test_ro_disk.py @@ -104,8 +104,8 @@ def do_reboot(duthost, localhost, duthosts): def post_reboot_healthcheck(duthost, localhost, duthosts, wait_time): timeout = 300 if duthost.get_facts().get("modular_chassis"): - wait_time = max(wait_time, 900) - timeout = max(timeout, 600) + wait_time = max(wait_time, 600) + timeout = max(timeout, 420) localhost.wait_for(host=duthost.mgmt_ip, port=22, state="started", delay=10, timeout=timeout) else: localhost.wait_for(host=duthost.mgmt_ip, port=22, state="started", delay=10, timeout=timeout)