Skip to content

Commit

Permalink
Merge branch 'sonic-net:master' into ecn_xoff_quanta_variance
Browse files Browse the repository at this point in the history
  • Loading branch information
sreejithsreekumaran authored Dec 18, 2024
2 parents f04dc50 + 5b619d4 commit 46cf60c
Show file tree
Hide file tree
Showing 121 changed files with 23,402 additions and 357 deletions.
10 changes: 5 additions & 5 deletions .azure-pipelines/pr_test_scripts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ t0:
- fdb/test_fdb_mac_learning.py
- ip/test_mgmt_ipv6_only.py
- zmq/test_gnmi_zmq.py
- bgp/test_bgp_route_neigh_learning.py
- l2/test_l2_configure.py
- srv6/test_srv6_basic_sanity.py

t0-2vlans:
- dhcp_relay/test_dhcp_relay.py
Expand Down Expand Up @@ -434,6 +437,8 @@ t1-lag:
- vxlan/test_vxlan_route_advertisement.py
- lldp/test_lldp_syncd.py
- ipfwd/test_nhop_group.py
- restapi/test_restapi_vxlan_ecmp.py
- srv6/test_srv6_basic_sanity.py

multi-asic-t1-lag:
- bgp/test_bgp_bbr.py
Expand Down Expand Up @@ -475,15 +480,10 @@ onboarding_t0:
- lldp/test_lldp_syncd.py
# Flaky, we will triage and fix it later, move to onboarding to unblock pr check
- dhcp_relay/test_dhcp_relay_stress.py
- bgp/test_bgp_route_neigh_learning.py
- l2/test_l2_configure.py
- pc/test_lag_member_forwarding.py
- srv6/test_srv6_basic_sanity.py

onboarding_t1:
- pc/test_lag_member_forwarding.py
- restapi/test_restapi_vxlan_ecmp.py
- srv6/test_srv6_basic_sanity.py
- pfcwd/test_pfcwd_all_port_storm.py
- pfcwd/test_pfcwd_function.py
- pfcwd/test_pfcwd_timer_accuracy.py
Expand Down
2 changes: 2 additions & 0 deletions .azure-pipelines/pr_test_skip_scripts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ tgen:
- snappi_tests/multidut/bgp/test_bgp_outbound_uplink_po_member_flap.py
- snappi_tests/multidut/bgp/test_bgp_outbound_uplink_process_crash.py
- snappi_tests/multidut/ecn/test_multidut_dequeue_ecn_with_snappi.py
- snappi_tests/multidut/ecn/test_multidut_ecn_marking_with_snappi.py
- snappi_tests/multidut/ecn/test_multidut_red_accuracy_with_snappi.py
- snappi_tests/multidut/pfc/test_lossless_response_to_external_pause_storms.py
- snappi_tests/multidut/pfc/test_lossless_response_to_throttling_pause_storms.py
Expand All @@ -242,6 +243,7 @@ tgen:
- snappi_tests/multidut/pfc/test_multidut_global_pause_with_snappi.py
- snappi_tests/multidut/pfc/test_multidut_pfc_pause_lossless_with_snappi.py
- snappi_tests/multidut/pfc/test_multidut_pfc_pause_lossy_with_snappi.py
- snappi_tests/multidut/pfc/test_tx_drop_counter_with_snappi.py
- snappi_tests/multidut/pfcwd/test_multidut_pfcwd_a2a_with_snappi.py
- snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py
- snappi_tests/multidut/pfcwd/test_multidut_pfcwd_burst_storm_with_snappi.py
Expand Down
10 changes: 10 additions & 0 deletions .azure-pipelines/run-test-elastictest-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,18 @@ parameters:
type: string
default: ""

# Enable parallel run for test cases that support parallel run
- name: ENABLE_PARALLEL_RUN
type: string
default: ""

# Specify the file that contains the parallel mode for test cases that need to run in parallel when
# ENABLE_PARALLEL_RUN is set to True. Default value is the test_parallel_modes/default.json file in this repo.
# This field will be ignored if ENABLE_PARALLEL_RUN is set to False.
- name: PARALLEL_MODES_FILE
type: string
default: ""

# The number of retries when the script fails. Global retry if retry_cases_include and retry_cases_exclude are both empty, otherwise specific retry
- name: RETRY_TIMES
type: string
Expand Down Expand Up @@ -257,6 +265,8 @@ steps:
--repo-name ${{ parameters.REPO_NAME }} \
--mgmt-branch ${{ parameters.MGMT_BRANCH }} \
--stop-on-failure ${{ parameters.STOP_ON_FAILURE }} \
--enable-parallel-run ${{ parameters.ENABLE_PARALLEL_RUN }} \
--parallel-modes-file ${{ parameters.PARALLEL_MODES_FILE }} \
--retry-times ${{ parameters.RETRY_TIMES }} \
--retry-cases-include ${{ parameters.RETRY_CASES_INCLUDE }} \
--retry-cases-exclude ${{ parameters.RETRY_CASES_EXCLUDE }} \
Expand Down
12 changes: 12 additions & 0 deletions .azure-pipelines/test_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ def create(self, topology, test_plan_name="my_test_plan", deploy_mg_extra_params
"test_option": {
"stop_on_failure": kwargs.get("stop_on_failure", True),
"enable_parallel_run": kwargs.get("enable_parallel_run", False),
"parallel_modes_file": kwargs.get("parallel_modes_file", "default.json"),
"retry_times": kwargs.get("retry_times", 2),
"retry_cases_include": retry_cases_include,
"retry_cases_exclude": retry_cases_exclude,
Expand Down Expand Up @@ -831,6 +832,16 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte
choices=[True, False],
help="Enable parallel run or not."
)
parser_create.add_argument(
"--parallel-modes-file",
type=str,
dest="parallel_modes_file",
nargs='?',
const='default.json',
default='default.json',
required=False,
help="Which parallel modes file to use when parallel run is enabled."
)
parser_create.add_argument(
"--retry-times",
type=int,
Expand Down Expand Up @@ -1034,6 +1045,7 @@ def poll(self, test_plan_id, interval=60, timeout=-1, expected_state="", expecte
platform=args.platform,
stop_on_failure=args.stop_on_failure,
enable_parallel_run=args.enable_parallel_run,
parallel_modes_file=args.parallel_modes_file,
retry_times=args.retry_times,
retry_cases_include=args.retry_cases_include,
retry_cases_exclude=args.retry_cases_exclude,
Expand Down
146 changes: 112 additions & 34 deletions .azure-pipelines/testbed_health_check.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ def __init__(self, inventory, testbed_name, testbed_file, log_verbosity, output_

self.localhost = None
self.sonichosts = None
self.duts_basic_facts = None
self.is_multi_asic = False
self.is_chassis = False

self.inventory = inventory
self.testbed_name = testbed_name
Expand All @@ -111,6 +114,13 @@ def init_hosts(self):
if not self.sonichosts:
raise HostInitFailed("sonichosts is None. Please check testbed name/file/inventory.")

self.duts_basic_facts = self.sonichosts.dut_basic_facts()
self.is_multi_asic = self.duts_basic_facts[self.sonichosts[0].hostname][
"ansible_facts"]["dut_basic_facts"]["is_multi_asic"]

self.is_chassis = self.duts_basic_facts[self.sonichosts[0].hostname][
"ansible_facts"]["dut_basic_facts"]["is_chassis"]

logger.info("======================= init_hosts ends =======================")

def pre_check(self):
Expand Down Expand Up @@ -208,20 +218,13 @@ def pre_check(self):
if len(ipv4_not_exists_hosts) > 0:
raise HostsUnreachable(self.check_result.errmsg)

# TODO: Refactor the following code to specify a "leader" T2 Testbed and skip the check on "followers"
# Retrieve the basic facts of the DUTs
duts_basic_facts = self.sonichosts.dut_basic_facts()

for dut_name, single_dut_basic_facts in duts_basic_facts.items():

# Get the basic facts of one DUT
dut_basic_facts = single_dut_basic_facts["ansible_facts"]["dut_basic_facts"]

# todo: Skip multi_asic check on multi_asic dut now because currently not support get asic object
if dut_basic_facts["is_multi_asic"]:
errmsg = "Not support to perform checks on multi-asic DUT now."
logger.info(errmsg)
if self.is_multi_asic:
errmsg = "Not support to perform checks on multi-asic DUT now."
logger.info(errmsg)

raise SkipCurrentTestbed(errmsg)
raise SkipCurrentTestbed(errmsg)

logger.info("======================= pre_check ends =======================")

Expand Down Expand Up @@ -297,28 +300,47 @@ def check_bgp_session_state(self, state="established"):
state: str. The target state to compare the BGP session state against. Defaults to "established".
"""

def find_unexpected_bgp_neighbors(neigh_bgp_facts, expected_state, unexpected_neighbors):
for k, v in list(neigh_bgp_facts['bgp_neighbors'].items()):
if v['state'] != expected_state:
unexpected_neighbors.append(f"{k}, {v['state']}")

failed = False
bgp_facts_on_hosts = {}

logger.info("======================= check_bgp_session_state starts =======================")

for sonichost in self.sonichosts:
if (self.is_chassis and
self.duts_basic_facts[sonichost.hostname]["ansible_facts"]["dut_basic_facts"]["is_supervisor"]):
logger.info("Skip check_bgp_session_state on Supervisor.")
continue

hostname = sonichost.hostname

logger.info("----------------------- check_bgp_session_state on [{}] -----------------------".format(
hostname))

# Retrieve BGP facts for the Sonic host
bgp_facts = sonichost.bgp_facts()['ansible_facts']
bgp_facts = {}
if self.is_multi_asic:
host_asics_list = self.duts_basic_facts[sonichost.hostname][
"ansible_facts"]["dut_basic_facts"]["asic_index_list"]

for instance_id in host_asics_list:
bgp_facts[instance_id] = sonichost.bgp_facts(instance_id=instance_id)['ansible_facts']
else:
bgp_facts = sonichost.bgp_facts()['ansible_facts']

bgp_facts_on_hosts[hostname] = bgp_facts

# Check BGP session state for each neighbor
neigh_not_ok = []
for k, v in list(bgp_facts['bgp_neighbors'].items()):
if v['state'] != state:
neigh_not_ok.append(f"{k}, {v['state']}")
if self.is_multi_asic:
for instance_id, facts in bgp_facts.items():
find_unexpected_bgp_neighbors(facts, state, neigh_not_ok)
else:
find_unexpected_bgp_neighbors(bgp_facts, state, neigh_not_ok)

errlog = "BGP neighbors that not established on {}: {}".format(hostname, neigh_not_ok)

Expand Down Expand Up @@ -349,36 +371,74 @@ def check_interface_status_of_up_ports(self):
logger.info("======================= check_interface_status_of_up_ports starts =======================")

for sonichost in self.sonichosts:
if (self.is_chassis and
self.duts_basic_facts[sonichost.hostname]["ansible_facts"]["dut_basic_facts"]["is_supervisor"]):
logger.info("Skip check_interface_status_of_up_ports on Supervisor.")
continue

hostname = sonichost.hostname
logger.info(
"----------------------- check_interface_status_of_up_ports on [{}] -----------------------".format(
hostname))

# Retrieve the configuration facts for the DUT
cfg_facts = sonichost.config_facts(host=hostname, source='running')['ansible_facts']
# 1. Retrieve the configuration facts for the DUT
# 2. Get a list of up ports from the configuration facts
# 3. Retrieve the interface facts for the up ports
if self.is_multi_asic:
host_asics_list = self.duts_basic_facts[sonichost.hostname][
"ansible_facts"]["dut_basic_facts"]["asic_index_list"]

# Get a list of up ports from the configuration facts
up_ports = [p for p, v in list(cfg_facts['PORT'].items()) if v.get('admin_status', None) == 'up']
interface_facts = {}
for asic_id in host_asics_list:
cfg_facts_of_asic = sonichost.config_facts(
host=hostname, source='running', namespace='asic{}'.format(asic_id)
)['ansible_facts']

logger.info('up_ports: {}'.format(up_ports))
up_ports = [
p for p, v in list(cfg_facts_of_asic['PORT'].items())
if v.get('admin_status', None) == 'up'
]

# Retrieve the interface facts for the up ports
interface_facts = sonichost.interface_facts(up_ports=up_ports)['ansible_facts']
logger.info('up_ports: {}'.format(up_ports))
interface_facts_of_asic = sonichost.interface_facts(
up_ports=up_ports, namespace='asic{}'.format(asic_id)
)['ansible_facts']

interface_facts_on_hosts[hostname] = interface_facts
interface_facts[asic_id] = interface_facts_of_asic
if hostname not in interface_facts_on_hosts:
interface_facts_on_hosts[hostname] = {}

errlog = 'ansible_interface_link_down_ports on {}: {}'.format(
hostname, interface_facts['ansible_interface_link_down_ports'])
interface_facts_on_hosts[hostname][asic_id] = interface_facts

logger.info(errlog)
errlog = 'ansible_interface_link_down_ports on asic{} of {}: {}'.format(
asic_id, hostname, interface_facts[asic_id]['ansible_interface_link_down_ports'])

# Check if there are any link down ports in the interface facts
if len(interface_facts['ansible_interface_link_down_ports']) > 0:
# Set failed to True if any BGP neighbors are not established
failed = True
# Add errlog to check result errmsg
self.check_result.errmsg.append(errlog)
logger.info(errlog)

# Check if there are any link down ports in the interface facts
if len(interface_facts[asic_id]['ansible_interface_link_down_ports']) > 0:
# Set failed to True if any BGP neighbors are not established
failed = True
# Add errlog to check result errmsg
self.check_result.errmsg.append(errlog)

else:
cfg_facts = sonichost.config_facts(host=hostname, source='running')['ansible_facts']
up_ports = [p for p, v in list(cfg_facts['PORT'].items()) if v.get('admin_status', None) == 'up']
logger.info('up_ports: {}'.format(up_ports))
interface_facts = sonichost.interface_facts(up_ports=up_ports)['ansible_facts']
interface_facts_on_hosts[hostname] = interface_facts
errlog = 'ansible_interface_link_down_ports on {}: {}'.format(
hostname, interface_facts['ansible_interface_link_down_ports'])

logger.info(errlog)

# Check if there are any link down ports in the interface facts
if len(interface_facts['ansible_interface_link_down_ports']) > 0:
# Set failed to True if any BGP neighbors are not established
failed = True
# Add errlog to check result errmsg
self.check_result.errmsg.append(errlog)

# Set the check result
self.check_result.data["interface_facts_on_hosts"] = interface_facts_on_hosts
Expand All @@ -405,6 +465,10 @@ def check_critical_containers_running(self, critical_containers: list = None):
logger.info("======================= check_critical_containers_running starts =======================")

for sonichost in self.sonichosts:
host_asics_list = []
if self.is_multi_asic:
host_asics_list = self.duts_basic_facts[sonichost.hostname][
"ansible_facts"]["dut_basic_facts"]["asic_index_list"]

hostname = sonichost.hostname
logger.info(
Expand All @@ -415,7 +479,21 @@ def check_critical_containers_running(self, critical_containers: list = None):
running_containers = sonichost.shell(r"docker ps -f 'status=running' --format \{\{.Names\}\}")[
'stdout_lines']

for critical_container in critical_containers:
containers_to_check = critical_containers
if self.is_multi_asic:
if (self.is_chassis and
self.duts_basic_facts[sonichost.hostname]["ansible_facts"]["dut_basic_facts"]["is_supervisor"]):
containers_to_check = [
"{}{}".format(container, asic)
for asic in host_asics_list for container in critical_containers if container != "bgp"
]
else:
containers_to_check = [
"{}{}".format(container, asic)
for asic in host_asics_list for container in critical_containers
]

for critical_container in containers_to_check:

# If the critical container is not running, add an error log
if critical_container not in running_containers:
Expand Down
2 changes: 1 addition & 1 deletion ansible/group_vars/sonic/variables
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ broadcom_jr2_hwskus: ['Arista-7800R3-48CQ2-C48', 'Arista-7800R3-48CQM2-C48']

mellanox_spc1_hwskus: [ 'ACS-MSN2700', 'ACS-MSN2740', 'ACS-MSN2100', 'ACS-MSN2410', 'ACS-MSN2010', 'Mellanox-SN2700', 'Mellanox-SN2700-A1', 'Mellanox-SN2700-D48C8','Mellanox-SN2700-D40C8S8', 'Mellanox-SN2700-A1-D48C8']
mellanox_spc2_hwskus: [ 'ACS-MSN3700', 'ACS-MSN3700C', 'ACS-MSN3800', 'Mellanox-SN3800-D112C8' , 'ACS-MSN3420']
mellanox_spc3_hwskus: [ 'ACS-MSN4700', 'Mellanox-SN4700-O28', 'ACS-MSN4600', 'ACS-MSN4600C', 'ACS-MSN4410', 'Mellanox-SN4600C-D112C8', 'Mellanox-SN4600C-C64', 'Mellanox-SN4700-O8C48', 'Mellanox-SN4700-O8V48', 'ACS-SN4280', 'Mellanox-SN4700-V64', 'Mellanox-SN4700-O32']
mellanox_spc3_hwskus: [ 'ACS-MSN4700', 'Mellanox-SN4700-O28', 'ACS-MSN4600', 'ACS-MSN4600C', 'ACS-MSN4410', 'Mellanox-SN4600C-D112C8', 'Mellanox-SN4600C-C64', 'Mellanox-SN4700-O8C48', 'Mellanox-SN4700-O8V48', 'ACS-SN4280', 'Mellanox-SN4280-O28', 'Mellanox-SN4700-V64', 'Mellanox-SN4700-O32']
mellanox_spc4_hwskus: [ 'ACS-SN5600' , 'Mellanox-SN5600-V256', 'Mellanox-SN5600-C256S1', 'Mellanox-SN5600-C224O8']
mellanox_hwskus: "{{ mellanox_spc1_hwskus + mellanox_spc2_hwskus + mellanox_spc3_hwskus + mellanox_spc4_hwskus }}"
mellanox_dualtor_hwskus: [ 'Mellanox-SN4600C-C64' ]
Expand Down
13 changes: 12 additions & 1 deletion ansible/library/dut_basic_facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from module_utils.parse_utils import parse_tabular_output

from ansible.module_utils.basic import AnsibleModule
from sonic_py_common import device_info
from sonic_py_common import device_info, multi_asic

DOCUMENTATION = '''
---
Expand Down Expand Up @@ -47,6 +47,17 @@ def main():
if hasattr(device_info, 'is_supervisor'):
results['is_supervisor'] = device_info.is_supervisor()

results['is_chassis'] = False
if hasattr(device_info, 'is_chassis'):
results['is_chassis'] = device_info.is_chassis()

if results['is_multi_asic']:
results['asic_index_list'] = []
if results['is_chassis']:
results['asic_index_list'] = multi_asic.get_asic_presence_list()
else:
results['asic_index_list'] = [ns.replace('asic', '') for ns in multi_asic.get_namespace_list()]

# In case a image does not have /etc/sonic/sonic_release, guess release from 'build_version'
if 'release' not in results or not results['release'] or results['release'] == 'none':
if 'build_version' in results:
Expand Down
2 changes: 1 addition & 1 deletion ansible/library/generate_golden_config_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def generate_mx_golden_config_db(self):
# Generate FEATURE table from init_cfg.ini
ori_config_db = json.loads(out)
if "FEATURE" not in ori_config_db or "dhcp_server" not in ori_config_db["FEATURE"]:
return "{}"
return {}

ori_config_db["FEATURE"]["dhcp_server"]["state"] = "enabled"
gold_config_db = {
Expand Down
2 changes: 1 addition & 1 deletion ansible/module_utils/port_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def get_port_alias_to_name_map(hwsku, asic_name=None):
idx += 4
port_alias_to_name_map["etp%db" % i] = "Ethernet%d" % idx
idx += 4
elif hwsku in ["Mellanox-SN4700-O28"]:
elif hwsku in ["Mellanox-SN4700-O28", 'Mellanox-SN4280-O28']:
idx = 0
for i in range(1, 33):
port_alias_to_name_map["etp%d" % i] = "Ethernet%d" % idx
Expand Down
1 change: 0 additions & 1 deletion ansible/roles/eos/t1-64-lag-tor.j2

This file was deleted.

1 change: 0 additions & 1 deletion ansible/roles/eos/t1-lag-spine.j2

This file was deleted.

Loading

0 comments on commit 46cf60c

Please sign in to comment.