Skip to content

Commit

Permalink
[Chassis] Reduce wait/timeout time for chassis (#16077)
Browse files Browse the repository at this point in the history
Description of PR
Cisco chassis used to be unstable months ago, so updated the wait time of sshd ready to 600s, and service ready to 900s.
After the image became stable, we can reduce the time to:
sshd ready: 420s
service ready: 600s

Will keep monitoring and see whether we can reduce the wait time to align with t0/t1 in the future.

Approach
What is the motivation for this PR?
Cisco chassis used to be unstable months ago, so updated the wait time of sshd ready to 600s, and service ready to 900s.
After the image became stable, we can reduce the time to:
sshd ready: 420s
service ready: 600s

Will keep monitoring and see whether we can reduce the wait time to align with t0/t1 in the future.

How did you do it?
How did you verify/test it?
In the nightly test, the configuration works well.

Any platform specific information?
Supported testbed topology if it's a new test case?
Documentation

co-authorized by: jianquanye@microsoft.com
  • Loading branch information
yejianquan authored Jan 15, 2025
1 parent c4d6a7c commit b8645da
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 7 deletions.
2 changes: 1 addition & 1 deletion tests/common/config_reload.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def _config_reload_cmd_wrapper(cmd, executable):
sonic_host.shell(cmd, executable="/bin/bash")

modular_chassis = sonic_host.get_facts().get("modular_chassis")
wait = max(wait, 900) if modular_chassis else wait
wait = max(wait, 600) if modular_chassis else wait

if safe_reload:
# The wait time passed in might not be guaranteed to cover the actual
Expand Down
4 changes: 2 additions & 2 deletions tests/common/plugins/sanity_check/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def _check_interfaces_on_dut(*args, **kwargs):
networking_uptime = dut.get_networking_uptime().seconds
timeout = max((SYSTEM_STABILIZE_MAX_TIME - networking_uptime), 0)
if dut.get_facts().get("modular_chassis"):
timeout = max(timeout, 900)
timeout = max(timeout, 600)
interval = 20
logger.info("networking_uptime=%d seconds, timeout=%d seconds, interval=%d seconds" %
(networking_uptime, timeout, interval))
Expand Down Expand Up @@ -259,7 +259,7 @@ def _check_bgp_status_helper():
else:
max_timeout = SYSTEM_STABILIZE_MAX_TIME - networking_uptime + 480
if dut.get_facts().get("modular_chassis"):
max_timeout = max(max_timeout, 900)
max_timeout = max(max_timeout, 600)
timeout = max(max_timeout, 1)
interval = 20
wait_until(timeout, interval, 0, _check_bgp_status_helper)
Expand Down
4 changes: 2 additions & 2 deletions tests/common/reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,8 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10,
if warmboot_finalizer_timeout == 0 and 'warmboot_finalizer_timeout' in reboot_ctrl:
warmboot_finalizer_timeout = reboot_ctrl['warmboot_finalizer_timeout']
if duthost.get_facts().get("modular_chassis") and safe_reboot:
wait = max(wait, 900)
timeout = max(timeout, 600)
wait = max(wait, 600)
timeout = max(timeout, 420)
except KeyError:
raise ValueError('invalid reboot type: "{} for {}"'.format(reboot_type, hostname))
logger.info('Reboot {}: wait[{}], timeout[{}]'.format(hostname, wait, timeout))
Expand Down
4 changes: 2 additions & 2 deletions tests/tacacs/test_ro_disk.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ def do_reboot(duthost, localhost, duthosts):
def post_reboot_healthcheck(duthost, localhost, duthosts, wait_time):
timeout = 300
if duthost.get_facts().get("modular_chassis"):
wait_time = max(wait_time, 900)
timeout = max(timeout, 600)
wait_time = max(wait_time, 600)
timeout = max(timeout, 420)
localhost.wait_for(host=duthost.mgmt_ip, port=22, state="started", delay=10, timeout=timeout)
else:
localhost.wait_for(host=duthost.mgmt_ip, port=22, state="started", delay=10, timeout=timeout)
Expand Down

0 comments on commit b8645da

Please sign in to comment.