Skip to content

Commit

Permalink
[Mellanox] implement platform wait in python code
Browse files Browse the repository at this point in the history
  • Loading branch information
Junchao-Mellanox committed Nov 29, 2023
1 parent 67e414f commit 79d2fb7
Show file tree
Hide file tree
Showing 7 changed files with 183 additions and 74 deletions.
82 changes: 14 additions & 68 deletions device/mellanox/x86_64-mlnx_msn2700-r0/platform_wait
Original file line number Diff line number Diff line change
@@ -1,68 +1,14 @@
#!/bin/bash

declare -r SYSLOG_LOGGER="/usr/bin/logger"
declare -r SYSLOG_IDENTIFIER="platform_wait"
declare -r SYSLOG_ERROR="error"
declare -r SYSLOG_NOTICE="notice"
declare -r SYSLOG_INFO="info"

declare -r HW_MGMT_CONFIG="/var/run/hw-management/config"

declare -r ASIC_INIT_DONE="${HW_MGMT_CONFIG}/asics_init_done"
declare -r NUM_ASICS="${HW_MGMT_CONFIG}/asic_num"
declare -r ASIC_CHIPUP_COMPLETED="${HW_MGMT_CONFIG}/asic_chipup_completed"

declare -r EXIT_SUCCESS="0"
declare -r EXIT_TIMEOUT="1"

function log_error() {
eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_ERROR} $@"
}

function log_notice() {
eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_NOTICE} $@"
}

function log_info() {
eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_INFO} $@"
}

function wait_for_asic_chipup() {

local _ASIC_INIT="0"
local _ASIC_COUNT="0"
local _ASICS_CHIPUP="0"

local -i _WDOG_CNT="1"
local -ir _WDOG_MAX="300"

local -r _TIMEOUT="1s"

while [[ "${_WDOG_CNT}" -le "${_WDOG_MAX}" ]]; do
_ASIC_INIT="$(cat ${ASIC_INIT_DONE} 2>&1)"
_ASIC_COUNT="$(cat ${NUM_ASICS} 2>&1)"
_ASICS_CHIPUP="$(cat ${ASIC_CHIPUP_COMPLETED} 2>&1)"

if [[ "${_ASIC_INIT}" -eq 1 && "${_ASIC_COUNT}" -eq "${_ASICS_CHIPUP}" ]]; then
return "${EXIT_SUCCESS}"
fi

let "_WDOG_CNT++"
sleep "${_TIMEOUT}"
done

log_error "Mellanox ASIC is not ready: INIT: ${_ASIC_INIT}, NUM_ASIC: ${_ASIC_COUNT}, CHIPUP: ${_ASICS_CHIPUP} timeout...."
return "${EXIT_TIMEOUT}"
}

log_info "Wait for Mellanox ASIC to be ready"

wait_for_asic_chipup
EXIT_CODE="$?"
if [[ "${EXIT_CODE}" != "${EXIT_SUCCESS}" ]]; then
exit "${EXIT_CODE}"
fi

log_notice "Mellanox ASIC is ready"

exit "${EXIT_SUCCESS}"
#!/usr/bin/python3
import sys
from sonic_platform.device_data import DeviceDataManager
from sonic_py_common.logger import Logger


logger = Logger(log_identifier='platform_wait')
logger.log_notice('Nvidia: Wait for PMON dependencies to be ready')
if DeviceDataManager.wait_platform_ready():
logger.log_notice('Nvidia: PMON dependencies are ready')
sys.exit(0)
else:
logger.log_error('Nvidia: PMON dependencies are not ready: timeout')
sys.exit(-1)
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import glob
import os
import time

from . import utils

Expand Down Expand Up @@ -167,7 +168,11 @@ def is_psu_hotswapable(cls):
@classmethod
@utils.read_only_cache()
def get_sfp_count(cls):
return utils.read_int_from_file('/run/hw-management/config/module_counter')
from sonic_py_common import device_info
platform_path = device_info.get_path_to_platform_dir()
platform_json_path = os.path.join(platform_path, 'platform.json')
platform_data = utils.load_json_file(platform_json_path)
return len(platform_data['chassis']['sfps'])

@classmethod
def get_linecard_sfp_count(cls, lc_index):
Expand Down Expand Up @@ -234,3 +239,32 @@ def get_cpld_component_list(cls):
# Currently, only fetching BIOS version is supported
return ComponentCPLDSN2201.get_component_list()
return ComponentCPLD.get_component_list()

@classmethod
@utils.read_only_cache()
def is_independent_mode(cls):
from sonic_py_common import device_info
_, hwsku_dir = device_info.get_paths_to_platform_and_hwsku_dirs()
sai_profile_file = os.path.join(hwsku_dir, 'sai.profile')
data = utils.read_key_value_file(sai_profile_file, delimeter='=')
return data.get('SAI_INDEPENDENT_MODULE_MODE') == '1'

@classmethod
def wait_platform_ready(cls):
"""
Wait for Nvidia platform related services(SDK, hw-management) ready
Returns:
bool: True if wait success else timeout
"""
conditions = []
sysfs_nodes = ['power_mode', 'power_mode_policy', 'present', 'reset', 'status', 'statuserror']
if cls.is_independent_mode():
sysfs_nodes.extend(['control', 'frequency', 'frequency_support', 'hw_present', 'hw_reset',
'power_good', 'power_limit', 'power_on', 'temperature/input'])
else:
conditions.append(lambda: utils.read_int_from_file('/var/run/hw-management/config/asics_init_done') == 1)
sfp_count = cls.get_sfp_count()
for sfp_index in range(sfp_count):
for sysfs_node in sysfs_nodes:
conditions.append(lambda: os.path.exists(f'/sys/module/sx_core/asic0/module{sfp_index}/{sysfs_node}'))
return utils.wait_until_conditions(conditions, 300, 1)
16 changes: 16 additions & 0 deletions platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@
SFP_SYSFS_STATUS_ERROR = 'statuserror'
SFP_SYSFS_PRESENT = 'present'
SFP_SYSFS_RESET = 'reset'
SFP_SYSFS_HWRESET = 'hw_reset'
SFP_SYSFS_POWER_MODE = 'power_mode'
SFP_SYSFS_POWER_MODE_POLICY = 'power_mode_policy'
POWER_MODE_POLICY_HIGH = 1
Expand Down Expand Up @@ -792,6 +793,21 @@ def get_xcvr_api(self):
self._xcvr_api.get_tx_fault = self.get_tx_fault
return self._xcvr_api

def is_sw_control(self):
if not DeviceDataManager.is_independent_mode():
return False

db = utils.DbUtils.get_db_instance('STATE_DB')
control_type = db.get('STATE_DB', f'TRANSCEIVER_MODULES_MGMT|{self.sdk_index}', 'control_type')
control_file_value = utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/control')

if control_type == 'SW_CONTROL' and control_file_value == 1:
return True
elif control_type == 'FW_CONTROL' and control_file_value == 0:
return False
else:
raise Exception(f'Module {self.sdk_index} is in initialization, please retry later')


class RJ45Port(NvidiaSFPCommon):
"""class derived from SFP, representing RJ45 ports"""
Expand Down
59 changes: 54 additions & 5 deletions platform/mellanox/mlnx-platform-api/sonic_platform/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2020-2021 NVIDIA CORPORATION & AFFILIATES.
# Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES.
# Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -19,6 +19,7 @@
import subprocess
import json
import sys
import threading
import time
import os
from sonic_py_common import device_info
Expand Down Expand Up @@ -100,15 +101,15 @@ def read_float_from_file(file_path, default=0.0, raise_exception=False, log_func
return read_from_file(file_path=file_path, target_type=float, default=default, raise_exception=raise_exception, log_func=log_func)


def _key_value_converter(content):
def _key_value_converter(content, delimeter):
ret = {}
for line in content.splitlines():
k,v = line.split(':')
k,v = line.split(delimeter)
ret[k.strip()] = v.strip()
return ret


def read_key_value_file(file_path, default={}, raise_exception=False, log_func=logger.log_error):
def read_key_value_file(file_path, default={}, raise_exception=False, log_func=logger.log_error, delimeter=':'):
"""Read file content and parse the content to a dict. The file content should like:
key1:value1
key2:value2
Expand All @@ -119,7 +120,8 @@ def read_key_value_file(file_path, default={}, raise_exception=False, log_func=l
raise_exception (bool, optional): If exception should be raised or hiden. Defaults to False.
log_func (optional): logger function.. Defaults to logger.log_error.
"""
return read_from_file(file_path=file_path, target_type=_key_value_converter, default=default, raise_exception=raise_exception, log_func=log_func)
converter = lambda content: _key_value_converter(content, delimeter)
return read_from_file(file_path=file_path, target_type=converter, default=default, raise_exception=raise_exception, log_func=log_func)


def write_file(file_path, content, raise_exception=False, log_func=logger.log_error):
Expand Down Expand Up @@ -285,3 +287,50 @@ def wait_until(predict, timeout, interval=1, *args, **kwargs):
time.sleep(interval)
timeout -= interval
return False


def wait_until_conditions(conditions, timeout, interval=1):
"""
Wait until all the conditions become true
Args:
conditions (list): a list of callable which generate True|False
timeout (int): wait time in seconds
interval (int, optional): interval to check the predict. Defaults to 1.
Returns:
bool: True if wait success else False
"""
while timeout > 0:
pending_conditions = []
for condition in conditions:
if not condition():
pending_conditions.append(condition)
if not pending_conditions:
return True
conditions = pending_conditions
time.sleep(interval)
timeout -= interval
return False


class DbUtils:
lock = threading.Lock()
db_instances = threading.local()

@classmethod
def get_db_instance(cls, db_name, **kargs):
try:
if not hasattr(cls.db_instances, 'data'):
with cls.lock:
if not hasattr(cls.db_instances, 'data'):
cls.db_instances.data = {}

if db_name not in cls.db_instances.data:
from swsscommon.swsscommon import SonicV2Connector
db = SonicV2Connector(use_unix_socket_path=True)
db.connect(db_name)
cls.db_instances.data[db_name] = db
return cls.db_instances.data[db_name]
except Exception as e:
logger.log_error(f'Failed to get DB instance for DB {db_name} - {e}')
raise e
29 changes: 29 additions & 0 deletions platform/mellanox/mlnx-platform-api/tests/test_device_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,34 @@ def test_get_linecard_max_port_count(self):
def test_get_bios_component(self):
assert DeviceDataManager.get_bios_component() is not None

@mock.patch('sonic_py_common.device_info.get_paths_to_platform_and_hwsku_dirs', mock.MagicMock(return_value=('', '/tmp')))
@mock.patch('sonic_platform.device_data.utils.read_key_value_file')
def test_is_independent_mode(self, mock_read):
mock_read.return_value = {}
assert not DeviceDataManager.is_independent_mode()
mock_read.return_value = {'SAI_INDEPENDENT_MODULE_MODE': '1'}
assert DeviceDataManager.is_independent_mode()

@mock.patch('sonic_py_common.device_info.get_path_to_platform_dir', mock.MagicMock(return_value='/tmp'))
@mock.patch('sonic_platform.device_data.utils.load_json_file')
def test_get_sfp_count(self, mock_load_json):
mock_load_json.return_value = {
'chassis': {
'sfps': [1,2,3]
}
}
assert DeviceDataManager.get_sfp_count() == 3

@mock.patch('sonic_platform.device_data.time.sleep', mock.MagicMock())
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', mock.MagicMock(return_value=3))
@mock.patch('sonic_platform.device_data.utils.read_int_from_file', mock.MagicMock(return_value=1))
@mock.patch('sonic_platform.device_data.os.path.exists')
@mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode')
def test_wait_platform_ready(self, mock_is_indep, mock_exists):
mock_exists.return_value = True
mock_is_indep.return_value = True
assert DeviceDataManager.wait_platform_ready()
mock_is_indep.return_value = False
assert DeviceDataManager.wait_platform_ready()
mock_exists.return_value = False
assert not DeviceDataManager.wait_platform_ready()
25 changes: 25 additions & 0 deletions platform/mellanox/mlnx-platform-api/tests/test_sfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,3 +299,28 @@ def test_set_lpmode(self):
assert not sfp.set_lpmode(True)
assert sfp.set_lpmode(False)
assert not sfp.set_lpmode(False)

@mock.patch('sonic_platform.utils.read_int_from_file')
@mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode')
@mock.patch('sonic_platform.utils.DbUtils.get_db_instance')
def test_is_sw_control(self, mock_get_db, mock_mode, mock_read):
sfp = SFP(0)
mock_mode.return_value = False
assert not sfp.is_sw_control()
mock_mode.return_value = True

mock_db = mock.MagicMock()
mock_get_db.return_value = mock_db
mock_db.get = mock.MagicMock(return_value=None)
with pytest.raises(Exception):
sfp.is_sw_control()

mock_read.return_value = 0
mock_db.get.return_value = 'FW_CONTROL'
assert not sfp.is_sw_control()
mock_read.return_value = 1
mock_db.get.return_value = 'SW_CONTROL'
assert sfp.is_sw_control()
mock_read.return_value = 0
with pytest.raises(Exception):
sfp.is_sw_control()
10 changes: 10 additions & 0 deletions platform/mellanox/mlnx-platform-api/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,13 @@ def test_read_key_value_file(self):
mock_os_open = mock.mock_open(read_data='a:b')
with mock.patch('sonic_platform.utils.open', mock_os_open):
assert utils.read_key_value_file('some_file') == {'a':'b'}
mock_os_open = mock.mock_open(read_data='a=b')
with mock.patch('sonic_platform.utils.open', mock_os_open):
assert utils.read_key_value_file('some_file', delimeter='=') == {'a':'b'}

@mock.patch('sonic_platform.utils.time.sleep', mock.MagicMock())
def test_wait_until_conditions(self):
conditions = [lambda: True]
assert utils.wait_until_conditions(conditions, 1)
conditions = [lambda: False]
assert not utils.wait_until_conditions(conditions, 1)

0 comments on commit 79d2fb7

Please sign in to comment.