From 79d2fb766b162bfa2a9bc74931f27dfeb068ef74 Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox Date: Wed, 11 Oct 2023 09:31:05 +0300 Subject: [PATCH] [Mellanox] implement platform wait in python code --- .../x86_64-mlnx_msn2700-r0/platform_wait | 82 ++++--------------- .../sonic_platform/device_data.py | 36 +++++++- .../mlnx-platform-api/sonic_platform/sfp.py | 16 ++++ .../mlnx-platform-api/sonic_platform/utils.py | 59 +++++++++++-- .../tests/test_device_data.py | 29 +++++++ .../mlnx-platform-api/tests/test_sfp.py | 25 ++++++ .../mlnx-platform-api/tests/test_utils.py | 10 +++ 7 files changed, 183 insertions(+), 74 deletions(-) diff --git a/device/mellanox/x86_64-mlnx_msn2700-r0/platform_wait b/device/mellanox/x86_64-mlnx_msn2700-r0/platform_wait index 0809748687b6..7d92cb8f4455 100755 --- a/device/mellanox/x86_64-mlnx_msn2700-r0/platform_wait +++ b/device/mellanox/x86_64-mlnx_msn2700-r0/platform_wait @@ -1,68 +1,14 @@ -#!/bin/bash - -declare -r SYSLOG_LOGGER="/usr/bin/logger" -declare -r SYSLOG_IDENTIFIER="platform_wait" -declare -r SYSLOG_ERROR="error" -declare -r SYSLOG_NOTICE="notice" -declare -r SYSLOG_INFO="info" - -declare -r HW_MGMT_CONFIG="/var/run/hw-management/config" - -declare -r ASIC_INIT_DONE="${HW_MGMT_CONFIG}/asics_init_done" -declare -r NUM_ASICS="${HW_MGMT_CONFIG}/asic_num" -declare -r ASIC_CHIPUP_COMPLETED="${HW_MGMT_CONFIG}/asic_chipup_completed" - -declare -r EXIT_SUCCESS="0" -declare -r EXIT_TIMEOUT="1" - -function log_error() { - eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_ERROR} $@" -} - -function log_notice() { - eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_NOTICE} $@" -} - -function log_info() { - eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_INFO} $@" -} - -function wait_for_asic_chipup() { - - local _ASIC_INIT="0" - local _ASIC_COUNT="0" - local _ASICS_CHIPUP="0" - - local -i _WDOG_CNT="1" - local -ir _WDOG_MAX="300" - - local -r _TIMEOUT="1s" - - while [[ "${_WDOG_CNT}" -le "${_WDOG_MAX}" ]]; do - _ASIC_INIT="$(cat ${ASIC_INIT_DONE} 2>&1)" - _ASIC_COUNT="$(cat ${NUM_ASICS} 2>&1)" - _ASICS_CHIPUP="$(cat ${ASIC_CHIPUP_COMPLETED} 2>&1)" - - if [[ "${_ASIC_INIT}" -eq 1 && "${_ASIC_COUNT}" -eq "${_ASICS_CHIPUP}" ]]; then - return "${EXIT_SUCCESS}" - fi - - let "_WDOG_CNT++" - sleep "${_TIMEOUT}" - done - - log_error "Mellanox ASIC is not ready: INIT: ${_ASIC_INIT}, NUM_ASIC: ${_ASIC_COUNT}, CHIPUP: ${_ASICS_CHIPUP} timeout...." - return "${EXIT_TIMEOUT}" -} - -log_info "Wait for Mellanox ASIC to be ready" - -wait_for_asic_chipup -EXIT_CODE="$?" -if [[ "${EXIT_CODE}" != "${EXIT_SUCCESS}" ]]; then - exit "${EXIT_CODE}" -fi - -log_notice "Mellanox ASIC is ready" - -exit "${EXIT_SUCCESS}" +#!/usr/bin/python3 +import sys +from sonic_platform.device_data import DeviceDataManager +from sonic_py_common.logger import Logger + + +logger = Logger(log_identifier='platform_wait') +logger.log_notice('Nvidia: Wait for PMON dependencies to be ready') +if DeviceDataManager.wait_platform_ready(): + logger.log_notice('Nvidia: PMON dependencies are ready') + sys.exit(0) +else: + logger.log_error('Nvidia: PMON dependencies are not ready: timeout') + sys.exit(-1) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py b/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py index 84d1706e137c..aeceb15d1983 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py @@ -17,6 +17,7 @@ import glob import os +import time from . import utils @@ -167,7 +168,11 @@ def is_psu_hotswapable(cls): @classmethod @utils.read_only_cache() def get_sfp_count(cls): - return utils.read_int_from_file('/run/hw-management/config/module_counter') + from sonic_py_common import device_info + platform_path = device_info.get_path_to_platform_dir() + platform_json_path = os.path.join(platform_path, 'platform.json') + platform_data = utils.load_json_file(platform_json_path) + return len(platform_data['chassis']['sfps']) @classmethod def get_linecard_sfp_count(cls, lc_index): @@ -234,3 +239,32 @@ def get_cpld_component_list(cls): # Currently, only fetching BIOS version is supported return ComponentCPLDSN2201.get_component_list() return ComponentCPLD.get_component_list() + + @classmethod + @utils.read_only_cache() + def is_independent_mode(cls): + from sonic_py_common import device_info + _, hwsku_dir = device_info.get_paths_to_platform_and_hwsku_dirs() + sai_profile_file = os.path.join(hwsku_dir, 'sai.profile') + data = utils.read_key_value_file(sai_profile_file, delimeter='=') + return data.get('SAI_INDEPENDENT_MODULE_MODE') == '1' + + @classmethod + def wait_platform_ready(cls): + """ + Wait for Nvidia platform related services(SDK, hw-management) ready + Returns: + bool: True if wait success else timeout + """ + conditions = [] + sysfs_nodes = ['power_mode', 'power_mode_policy', 'present', 'reset', 'status', 'statuserror'] + if cls.is_independent_mode(): + sysfs_nodes.extend(['control', 'frequency', 'frequency_support', 'hw_present', 'hw_reset', + 'power_good', 'power_limit', 'power_on', 'temperature/input']) + else: + conditions.append(lambda: utils.read_int_from_file('/var/run/hw-management/config/asics_init_done') == 1) + sfp_count = cls.get_sfp_count() + for sfp_index in range(sfp_count): + for sysfs_node in sysfs_nodes: + conditions.append(lambda: os.path.exists(f'/sys/module/sx_core/asic0/module{sfp_index}/{sysfs_node}')) + return utils.wait_until_conditions(conditions, 300, 1) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py index a4dd7a8fc842..66682cd78e7b 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py @@ -139,6 +139,7 @@ SFP_SYSFS_STATUS_ERROR = 'statuserror' SFP_SYSFS_PRESENT = 'present' SFP_SYSFS_RESET = 'reset' +SFP_SYSFS_HWRESET = 'hw_reset' SFP_SYSFS_POWER_MODE = 'power_mode' SFP_SYSFS_POWER_MODE_POLICY = 'power_mode_policy' POWER_MODE_POLICY_HIGH = 1 @@ -792,6 +793,21 @@ def get_xcvr_api(self): self._xcvr_api.get_tx_fault = self.get_tx_fault return self._xcvr_api + def is_sw_control(self): + if not DeviceDataManager.is_independent_mode(): + return False + + db = utils.DbUtils.get_db_instance('STATE_DB') + control_type = db.get('STATE_DB', f'TRANSCEIVER_MODULES_MGMT|{self.sdk_index}', 'control_type') + control_file_value = utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/control') + + if control_type == 'SW_CONTROL' and control_file_value == 1: + return True + elif control_type == 'FW_CONTROL' and control_file_value == 0: + return False + else: + raise Exception(f'Module {self.sdk_index} is in initialization, please retry later') + class RJ45Port(NvidiaSFPCommon): """class derived from SFP, representing RJ45 ports""" diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py index 83063b5c368e..b8652d0c656f 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2021 NVIDIA CORPORATION & AFFILIATES. +# Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. # Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,6 +19,7 @@ import subprocess import json import sys +import threading import time import os from sonic_py_common import device_info @@ -100,15 +101,15 @@ def read_float_from_file(file_path, default=0.0, raise_exception=False, log_func return read_from_file(file_path=file_path, target_type=float, default=default, raise_exception=raise_exception, log_func=log_func) -def _key_value_converter(content): +def _key_value_converter(content, delimeter): ret = {} for line in content.splitlines(): - k,v = line.split(':') + k,v = line.split(delimeter) ret[k.strip()] = v.strip() return ret -def read_key_value_file(file_path, default={}, raise_exception=False, log_func=logger.log_error): +def read_key_value_file(file_path, default={}, raise_exception=False, log_func=logger.log_error, delimeter=':'): """Read file content and parse the content to a dict. The file content should like: key1:value1 key2:value2 @@ -119,7 +120,8 @@ def read_key_value_file(file_path, default={}, raise_exception=False, log_func=l raise_exception (bool, optional): If exception should be raised or hiden. Defaults to False. log_func (optional): logger function.. Defaults to logger.log_error. """ - return read_from_file(file_path=file_path, target_type=_key_value_converter, default=default, raise_exception=raise_exception, log_func=log_func) + converter = lambda content: _key_value_converter(content, delimeter) + return read_from_file(file_path=file_path, target_type=converter, default=default, raise_exception=raise_exception, log_func=log_func) def write_file(file_path, content, raise_exception=False, log_func=logger.log_error): @@ -285,3 +287,50 @@ def wait_until(predict, timeout, interval=1, *args, **kwargs): time.sleep(interval) timeout -= interval return False + + +def wait_until_conditions(conditions, timeout, interval=1): + """ + Wait until all the conditions become true + Args: + conditions (list): a list of callable which generate True|False + timeout (int): wait time in seconds + interval (int, optional): interval to check the predict. Defaults to 1. + + Returns: + bool: True if wait success else False + """ + while timeout > 0: + pending_conditions = [] + for condition in conditions: + if not condition(): + pending_conditions.append(condition) + if not pending_conditions: + return True + conditions = pending_conditions + time.sleep(interval) + timeout -= interval + return False + + +class DbUtils: + lock = threading.Lock() + db_instances = threading.local() + + @classmethod + def get_db_instance(cls, db_name, **kargs): + try: + if not hasattr(cls.db_instances, 'data'): + with cls.lock: + if not hasattr(cls.db_instances, 'data'): + cls.db_instances.data = {} + + if db_name not in cls.db_instances.data: + from swsscommon.swsscommon import SonicV2Connector + db = SonicV2Connector(use_unix_socket_path=True) + db.connect(db_name) + cls.db_instances.data[db_name] = db + return cls.db_instances.data[db_name] + except Exception as e: + logger.log_error(f'Failed to get DB instance for DB {db_name} - {e}') + raise e diff --git a/platform/mellanox/mlnx-platform-api/tests/test_device_data.py b/platform/mellanox/mlnx-platform-api/tests/test_device_data.py index d99591d513c8..c172b82a30b7 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_device_data.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_device_data.py @@ -52,5 +52,34 @@ def test_get_linecard_max_port_count(self): def test_get_bios_component(self): assert DeviceDataManager.get_bios_component() is not None + @mock.patch('sonic_py_common.device_info.get_paths_to_platform_and_hwsku_dirs', mock.MagicMock(return_value=('', '/tmp'))) + @mock.patch('sonic_platform.device_data.utils.read_key_value_file') + def test_is_independent_mode(self, mock_read): + mock_read.return_value = {} + assert not DeviceDataManager.is_independent_mode() + mock_read.return_value = {'SAI_INDEPENDENT_MODULE_MODE': '1'} + assert DeviceDataManager.is_independent_mode() + @mock.patch('sonic_py_common.device_info.get_path_to_platform_dir', mock.MagicMock(return_value='/tmp')) + @mock.patch('sonic_platform.device_data.utils.load_json_file') + def test_get_sfp_count(self, mock_load_json): + mock_load_json.return_value = { + 'chassis': { + 'sfps': [1,2,3] + } + } + assert DeviceDataManager.get_sfp_count() == 3 + @mock.patch('sonic_platform.device_data.time.sleep', mock.MagicMock()) + @mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', mock.MagicMock(return_value=3)) + @mock.patch('sonic_platform.device_data.utils.read_int_from_file', mock.MagicMock(return_value=1)) + @mock.patch('sonic_platform.device_data.os.path.exists') + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode') + def test_wait_platform_ready(self, mock_is_indep, mock_exists): + mock_exists.return_value = True + mock_is_indep.return_value = True + assert DeviceDataManager.wait_platform_ready() + mock_is_indep.return_value = False + assert DeviceDataManager.wait_platform_ready() + mock_exists.return_value = False + assert not DeviceDataManager.wait_platform_ready() diff --git a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py index b9edc6838998..ba0d4ade63d0 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py @@ -299,3 +299,28 @@ def test_set_lpmode(self): assert not sfp.set_lpmode(True) assert sfp.set_lpmode(False) assert not sfp.set_lpmode(False) + + @mock.patch('sonic_platform.utils.read_int_from_file') + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode') + @mock.patch('sonic_platform.utils.DbUtils.get_db_instance') + def test_is_sw_control(self, mock_get_db, mock_mode, mock_read): + sfp = SFP(0) + mock_mode.return_value = False + assert not sfp.is_sw_control() + mock_mode.return_value = True + + mock_db = mock.MagicMock() + mock_get_db.return_value = mock_db + mock_db.get = mock.MagicMock(return_value=None) + with pytest.raises(Exception): + sfp.is_sw_control() + + mock_read.return_value = 0 + mock_db.get.return_value = 'FW_CONTROL' + assert not sfp.is_sw_control() + mock_read.return_value = 1 + mock_db.get.return_value = 'SW_CONTROL' + assert sfp.is_sw_control() + mock_read.return_value = 0 + with pytest.raises(Exception): + sfp.is_sw_control() diff --git a/platform/mellanox/mlnx-platform-api/tests/test_utils.py b/platform/mellanox/mlnx-platform-api/tests/test_utils.py index ad474433bfe8..709a7af4f2ab 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_utils.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_utils.py @@ -191,3 +191,13 @@ def test_read_key_value_file(self): mock_os_open = mock.mock_open(read_data='a:b') with mock.patch('sonic_platform.utils.open', mock_os_open): assert utils.read_key_value_file('some_file') == {'a':'b'} + mock_os_open = mock.mock_open(read_data='a=b') + with mock.patch('sonic_platform.utils.open', mock_os_open): + assert utils.read_key_value_file('some_file', delimeter='=') == {'a':'b'} + + @mock.patch('sonic_platform.utils.time.sleep', mock.MagicMock()) + def test_wait_until_conditions(self): + conditions = [lambda: True] + assert utils.wait_until_conditions(conditions, 1) + conditions = [lambda: False] + assert not utils.wait_until_conditions(conditions, 1)