From 7c08edee240a191822678af874bff0ba2c26d11e Mon Sep 17 00:00:00 2001 From: Wataru Ishida Date: Fri, 14 Jun 2024 01:28:06 +0000 Subject: [PATCH 1/3] refactor(xcvrd): extract CMIS SM handling into a separate function Signed-off-by: Wataru Ishida --- sonic-xcvrd/xcvrd/xcvrd.py | 633 +++++++++++++++++++------------------ 1 file changed, 317 insertions(+), 316 deletions(-) diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index 31a11d120..f5cb0b3cd 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -1303,6 +1303,322 @@ def wait_for_port_config_done(self, namespace): if key in ["PortConfigDone", "PortInitDone"]: break + def handle_cmis_state_machine(self, lport, info, is_fast_reboot): + state = get_cmis_state_from_state_db(lport, self.xcvr_table_helper.get_status_tbl(self.port_mapping.get_asic_id_for_logical_port(lport))) + if state in CMIS_TERMINAL_STATES or state == CMIS_STATE_UNKNOWN: + if state != CMIS_STATE_READY: + self.port_dict[lport]['appl'] = 0 + self.port_dict[lport]['host_lanes_mask'] = 0 + return + + # Handle the case when Xcvrd was NOT running when 'host_tx_ready' or 'admin_status' + # was updated or this is the first run so reconcile the above two attributes + if 'host_tx_ready' not in self.port_dict[lport]: + self.port_dict[lport]['host_tx_ready'] = self.get_host_tx_status(lport) + + if 'admin_status' not in self.port_dict[lport]: + self.port_dict[lport]['admin_status'] = self.get_port_admin_status(lport) + + pport = int(info.get('index', "-1")) + speed = int(info.get('speed', "0")) + lanes = info.get('lanes', "").strip() + subport = info.get('subport', 0) + if pport < 0 or speed == 0 or len(lanes) < 1 or subport < 0: + return + + # Desired port speed on the host side + host_speed = speed + host_lane_count = len(lanes.split(',')) + + # double-check the HW presence before moving forward + sfp = platform_chassis.get_sfp(pport) + if not sfp.get_presence(): + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_REMOVED) + return + + try: + # Skip if XcvrApi is not supported + api = sfp.get_xcvr_api() + if api is None: + self.log_error("{}: skipping CMIS state machine since no xcvr api!!!".format(lport)) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) + return + + # Skip if it's not a paged memory device + if api.is_flat_memory(): + self.log_notice("{}: skipping CMIS state machine for flat memory xcvr".format(lport)) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) + return + + # Skip if it's not a CMIS module + type = api.get_module_type_abbreviation() + if (type is None) or (type not in self.CMIS_MODULE_TYPES): + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) + return + + if api.is_coherent_module(): + if 'tx_power' not in self.port_dict[lport]: + self.port_dict[lport]['tx_power'] = self.get_configured_tx_power_from_db(lport) + if 'laser_freq' not in self.port_dict[lport]: + self.port_dict[lport]['laser_freq'] = self.get_configured_laser_freq_from_db(lport) + except AttributeError: + # Skip if these essential routines are not available + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) + return + except Exception as e: + self.log_error("{}: Exception in xcvr api: {}".format(lport, e)) + log_exception_traceback() + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) + return + + # CMIS expiration and retries + # + # A retry should always start over at INSETRTED state, while the + # expiration will reset the state to INSETRTED and advance the + # retry counter + now = datetime.datetime.now() + expired = self.port_dict[lport].get('cmis_expired') + retries = self.port_dict[lport].get('cmis_retries', 0) + host_lanes_mask = self.port_dict[lport].get('host_lanes_mask', 0) + appl = self.port_dict[lport].get('appl', 0) + if state != CMIS_STATE_INSERTED and (host_lanes_mask <= 0 or appl < 1): + self.log_error("{}: Unexpected value for host_lanes_mask {} or appl {} in " + "{} state".format(lport, host_lanes_mask, appl, state)) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) + return + + self.log_notice("{}: {}G, lanemask=0x{:x}, state={}, appl {} host_lane_count {} " + "retries={}".format(lport, int(speed/1000), host_lanes_mask, + state, appl, host_lane_count, retries)) + if retries > self.CMIS_MAX_RETRIES: + self.log_error("{}: FAILED".format(lport)) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) + return + + try: + # CMIS state transitions + if state == CMIS_STATE_INSERTED: + self.port_dict[lport]['appl'] = get_cmis_application_desired(api, host_lane_count, host_speed) + if self.port_dict[lport]['appl'] is None: + self.log_error("{}: no suitable app for the port appl {} host_lane_count {} " + "host_speed {}".format(lport, appl, host_lane_count, host_speed)) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) + return + appl = self.port_dict[lport]['appl'] + self.log_notice("{}: Setting appl={}".format(lport, appl)) + + self.port_dict[lport]['host_lanes_mask'] = self.get_cmis_host_lanes_mask(api, + appl, host_lane_count, subport) + if self.port_dict[lport]['host_lanes_mask'] <= 0: + self.log_error("{}: Invalid lane mask received - host_lane_count {} subport {} " + "appl {}!".format(lport, host_lane_count, subport, appl)) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) + return + host_lanes_mask = self.port_dict[lport]['host_lanes_mask'] + self.log_notice("{}: Setting host_lanemask=0x{:x}".format(lport, host_lanes_mask)) + + self.port_dict[lport]['media_lane_count'] = int(api.get_media_lane_count(appl)) + self.port_dict[lport]['media_lane_assignment_options'] = int(api.get_media_lane_assignment_option(appl)) + media_lane_count = self.port_dict[lport]['media_lane_count'] + media_lane_assignment_options = self.port_dict[lport]['media_lane_assignment_options'] + self.port_dict[lport]['media_lanes_mask'] = self.get_cmis_media_lanes_mask(api, + appl, lport, subport) + if self.port_dict[lport]['media_lanes_mask'] <= 0: + self.log_error("{}: Invalid media lane mask received - media_lane_count {} " + "media_lane_assignment_options {} subport {}" + " appl {}!".format(lport, media_lane_count, media_lane_assignment_options, subport, appl)) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) + return + media_lanes_mask = self.port_dict[lport]['media_lanes_mask'] + self.log_notice("{}: Setting media_lanemask=0x{:x}".format(lport, media_lanes_mask)) + + if self.port_dict[lport]['host_tx_ready'] != 'true' or \ + self.port_dict[lport]['admin_status'] != 'up': + if is_fast_reboot and self.check_datapath_state(api, host_lanes_mask, ['DataPathActivated']): + self.log_notice("{} Skip datapath re-init in fast-reboot".format(lport)) + else: + self.log_notice("{} Forcing Tx laser OFF".format(lport)) + # Force DataPath re-init + api.tx_disable_channel(media_lanes_mask, True) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) + return + # Configure the target output power if ZR module + if api.is_coherent_module(): + tx_power = self.port_dict[lport]['tx_power'] + # Prevent configuring same tx power multiple times + if 0 != tx_power and tx_power != api.get_tx_config_power(): + if 1 != self.configure_tx_output_power(api, lport, tx_power): + self.log_error("{} failed to configure Tx power = {}".format(lport, tx_power)) + else: + self.log_notice("{} Successfully configured Tx power = {}".format(lport, tx_power)) + + # Set all the DP lanes AppSel to unused(0) when non default app code needs to be configured + if True == self.is_appl_reconfigure_required(api, appl): + self.log_notice("{}: Decommissioning all lanes/datapaths to default AppSel=0".format(lport)) + if True != api.decommission_all_datapaths(): + self.log_notice("{}: Failed to default to AppSel=0".format(lport)) + self.force_cmis_reinit(lport, retries + 1) + return + + need_update = self.is_cmis_application_update_required(api, appl, host_lanes_mask) + + # For ZR module, Datapath needes to be re-initlialized on new channel selection + if api.is_coherent_module(): + freq = self.port_dict[lport]['laser_freq'] + # If user requested frequency is NOT the same as configured on the module + # force datapath re-initialization + if 0 != freq and freq != api.get_laser_config_freq(): + if self.validate_frequency_and_grid(api, lport, freq) == True: + need_update = True + else: + # clear setting of invalid frequency config + self.port_dict[lport]['laser_freq'] = 0 + + if not need_update: + # No application updates + self.log_notice("{}: no CMIS application update required...READY".format(lport)) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) + return + self.log_notice("{}: force Datapath reinit".format(lport)) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_DP_DEINIT) + elif state == CMIS_STATE_DP_DEINIT: + # D.2.2 Software Deinitialization + api.set_datapath_deinit(host_lanes_mask) + + # D.1.3 Software Configuration and Initialization + media_lanes_mask = self.port_dict[lport]['media_lanes_mask'] + if not api.tx_disable_channel(media_lanes_mask, True): + self.log_notice("{}: unable to turn off tx power with host_lanes_mask {}".format(lport, host_lanes_mask)) + self.port_dict[lport]['cmis_retries'] = retries + 1 + return + + #Sets module to high power mode and doesn't impact datapath if module is already in high power mode + api.set_lpmode(False) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_AP_CONF) + dpDeinitDuration = self.get_cmis_dp_deinit_duration_secs(api) + modulePwrUpDuration = self.get_cmis_module_power_up_duration_secs(api) + self.log_notice("{}: DpDeinit duration {} secs, modulePwrUp duration {} secs".format(lport, dpDeinitDuration, modulePwrUpDuration)) + self.port_dict[lport]['cmis_expired'] = now + datetime.timedelta(seconds = max(modulePwrUpDuration, dpDeinitDuration)) + + elif state == CMIS_STATE_AP_CONF: + # Explicit control bit to apply custom Host SI settings. + # It will be set to 1 and applied via set_application if + # custom SI settings is applicable + ec = 0 + + # TODO: Use fine grained time when the CMIS memory map is available + if not self.check_module_state(api, ['ModuleReady']): + if (expired is not None) and (expired <= now): + self.log_notice("{}: timeout for 'ModuleReady'".format(lport)) + self.force_cmis_reinit(lport, retries + 1) + return + + if not self.check_datapath_state(api, host_lanes_mask, ['DataPathDeactivated']): + if (expired is not None) and (expired <= now): + self.log_notice("{}: timeout for 'DataPathDeactivated state'".format(lport)) + self.force_cmis_reinit(lport, retries + 1) + return + + if api.is_coherent_module(): + # For ZR module, configure the laser frequency when Datapath is in Deactivated state + freq = self.port_dict[lport]['laser_freq'] + if 0 != freq: + if 1 != self.configure_laser_frequency(api, lport, freq): + self.log_error("{} failed to configure laser frequency {} GHz".format(lport, freq)) + else: + self.log_notice("{} configured laser frequency {} GHz".format(lport, freq)) + + # Stage custom SI settings + if optics_si_parser.optics_si_present(): + optics_si_dict = {} + # Apply module SI settings if applicable + lane_speed = int(speed/1000)//host_lane_count + optics_si_dict = optics_si_parser.fetch_optics_si_setting(pport, lane_speed, sfp) + + self.log_debug("Read SI parameters for port {} from optics_si_settings.json vendor file:".format(lport)) + for key, sub_dict in optics_si_dict.items(): + self.log_debug("{}".format(key)) + for sub_key, value in sub_dict.items(): + self.log_debug("{}: {}".format(sub_key, str(value))) + + if optics_si_dict: + self.log_notice("{}: Apply Optics SI found for Vendor: {} PN: {} lane speed: {}G". + format(lport, api.get_manufacturer(), api.get_model(), lane_speed)) + if not api.stage_custom_si_settings(host_lanes_mask, optics_si_dict): + self.log_notice("{}: unable to stage custom SI settings ".format(lport)) + self.force_cmis_reinit(lport, retries + 1) + return + + # Set Explicit control bit to apply Custom Host SI settings + ec = 1 + + # D.1.3 Software Configuration and Initialization + api.set_application(host_lanes_mask, appl, ec) + if not api.scs_apply_datapath_init(host_lanes_mask): + self.log_notice("{}: unable to set application and stage DP init".format(lport)) + self.force_cmis_reinit(lport, retries + 1) + return + + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_DP_INIT) + elif state == CMIS_STATE_DP_INIT: + if not self.check_config_error(api, host_lanes_mask, ['ConfigSuccess']): + if (expired is not None) and (expired <= now): + self.log_notice("{}: timeout for 'ConfigSuccess'".format(lport)) + self.force_cmis_reinit(lport, retries + 1) + return + + if hasattr(api, 'get_cmis_rev'): + # Check datapath init pending on module that supports CMIS 5.x + majorRev = int(api.get_cmis_rev().split('.')[0]) + if majorRev >= 5 and not self.check_datapath_init_pending(api, host_lanes_mask): + self.log_notice("{}: datapath init not pending".format(lport)) + self.force_cmis_reinit(lport, retries + 1) + return + + # Ensure the Datapath is NOT Activated unless the host Tx siganl is good. + # NOTE: Some CMIS compliant modules may have 'auto-squelch' feature where + # the module won't take datapaths to Activated state if host tries to enable + # the datapaths while there is no good Tx signal from the host-side. + if self.port_dict[lport]['admin_status'] != 'up' or \ + self.port_dict[lport]['host_tx_ready'] != 'true': + self.log_notice("{} waiting for host tx ready...".format(lport)) + return + + # D.1.3 Software Configuration and Initialization + api.set_datapath_init(host_lanes_mask) + dpInitDuration = self.get_cmis_dp_init_duration_secs(api) + self.log_notice("{}: DpInit duration {} secs".format(lport, dpInitDuration)) + self.port_dict[lport]['cmis_expired'] = now + datetime.timedelta(seconds=dpInitDuration) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_DP_TXON) + elif state == CMIS_STATE_DP_TXON: + if not self.check_datapath_state(api, host_lanes_mask, ['DataPathInitialized']): + if (expired is not None) and (expired <= now): + self.log_notice("{}: timeout for 'DataPathInitialized'".format(lport)) + self.force_cmis_reinit(lport, retries + 1) + return + + # Turn ON the laser + media_lanes_mask = self.port_dict[lport]['media_lanes_mask'] + api.tx_disable_channel(media_lanes_mask, False) + self.log_notice("{}: Turning ON tx power".format(lport)) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_DP_ACTIVATE) + elif state == CMIS_STATE_DP_ACTIVATE: + if not self.check_datapath_state(api, host_lanes_mask, ['DataPathActivated']): + if (expired is not None) and (expired <= now): + self.log_notice("{}: timeout for 'DataPathActivated'".format(lport)) + self.force_cmis_reinit(lport, retries + 1) + return + + self.log_notice("{}: READY".format(lport)) + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) + self.post_port_active_apsel_to_db(api, lport, host_lanes_mask) + + except Exception as e: + self.log_error("{}: internal errors due to {}".format(lport, e)) + log_exception_traceback() + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) + + def task_worker(self): self.xcvr_table_helper = XcvrTableHelper(self.namespaces) @@ -1329,322 +1645,7 @@ def task_worker(self): if self.task_stopping_event.is_set(): break - if lport not in self.port_dict: - continue - - state = get_cmis_state_from_state_db(lport, self.xcvr_table_helper.get_status_tbl(self.port_mapping.get_asic_id_for_logical_port(lport))) - if state in CMIS_TERMINAL_STATES or state == CMIS_STATE_UNKNOWN: - if state != CMIS_STATE_READY: - self.port_dict[lport]['appl'] = 0 - self.port_dict[lport]['host_lanes_mask'] = 0 - continue - - # Handle the case when Xcvrd was NOT running when 'host_tx_ready' or 'admin_status' - # was updated or this is the first run so reconcile the above two attributes - if 'host_tx_ready' not in self.port_dict[lport]: - self.port_dict[lport]['host_tx_ready'] = self.get_host_tx_status(lport) - - if 'admin_status' not in self.port_dict[lport]: - self.port_dict[lport]['admin_status'] = self.get_port_admin_status(lport) - - pport = int(info.get('index', "-1")) - speed = int(info.get('speed', "0")) - lanes = info.get('lanes', "").strip() - subport = info.get('subport', 0) - if pport < 0 or speed == 0 or len(lanes) < 1 or subport < 0: - continue - - # Desired port speed on the host side - host_speed = speed - host_lane_count = len(lanes.split(',')) - - # double-check the HW presence before moving forward - sfp = platform_chassis.get_sfp(pport) - if not sfp.get_presence(): - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_REMOVED) - continue - - try: - # Skip if XcvrApi is not supported - api = sfp.get_xcvr_api() - if api is None: - self.log_error("{}: skipping CMIS state machine since no xcvr api!!!".format(lport)) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) - continue - - # Skip if it's not a paged memory device - if api.is_flat_memory(): - self.log_notice("{}: skipping CMIS state machine for flat memory xcvr".format(lport)) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) - continue - - # Skip if it's not a CMIS module - type = api.get_module_type_abbreviation() - if (type is None) or (type not in self.CMIS_MODULE_TYPES): - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) - continue - - if api.is_coherent_module(): - if 'tx_power' not in self.port_dict[lport]: - self.port_dict[lport]['tx_power'] = self.get_configured_tx_power_from_db(lport) - if 'laser_freq' not in self.port_dict[lport]: - self.port_dict[lport]['laser_freq'] = self.get_configured_laser_freq_from_db(lport) - except AttributeError: - # Skip if these essential routines are not available - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) - continue - except Exception as e: - self.log_error("{}: Exception in xcvr api: {}".format(lport, e)) - log_exception_traceback() - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) - continue - - # CMIS expiration and retries - # - # A retry should always start over at INSETRTED state, while the - # expiration will reset the state to INSETRTED and advance the - # retry counter - now = datetime.datetime.now() - expired = self.port_dict[lport].get('cmis_expired') - retries = self.port_dict[lport].get('cmis_retries', 0) - host_lanes_mask = self.port_dict[lport].get('host_lanes_mask', 0) - appl = self.port_dict[lport].get('appl', 0) - if state != CMIS_STATE_INSERTED and (host_lanes_mask <= 0 or appl < 1): - self.log_error("{}: Unexpected value for host_lanes_mask {} or appl {} in " - "{} state".format(lport, host_lanes_mask, appl, state)) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) - continue - - self.log_notice("{}: {}G, lanemask=0x{:x}, state={}, appl {} host_lane_count {} " - "retries={}".format(lport, int(speed/1000), host_lanes_mask, - state, appl, host_lane_count, retries)) - if retries > self.CMIS_MAX_RETRIES: - self.log_error("{}: FAILED".format(lport)) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) - continue - - try: - # CMIS state transitions - if state == CMIS_STATE_INSERTED: - self.port_dict[lport]['appl'] = get_cmis_application_desired(api, host_lane_count, host_speed) - if self.port_dict[lport]['appl'] is None: - self.log_error("{}: no suitable app for the port appl {} host_lane_count {} " - "host_speed {}".format(lport, appl, host_lane_count, host_speed)) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) - continue - appl = self.port_dict[lport]['appl'] - self.log_notice("{}: Setting appl={}".format(lport, appl)) - - self.port_dict[lport]['host_lanes_mask'] = self.get_cmis_host_lanes_mask(api, - appl, host_lane_count, subport) - if self.port_dict[lport]['host_lanes_mask'] <= 0: - self.log_error("{}: Invalid lane mask received - host_lane_count {} subport {} " - "appl {}!".format(lport, host_lane_count, subport, appl)) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) - continue - host_lanes_mask = self.port_dict[lport]['host_lanes_mask'] - self.log_notice("{}: Setting host_lanemask=0x{:x}".format(lport, host_lanes_mask)) - - self.port_dict[lport]['media_lane_count'] = int(api.get_media_lane_count(appl)) - self.port_dict[lport]['media_lane_assignment_options'] = int(api.get_media_lane_assignment_option(appl)) - media_lane_count = self.port_dict[lport]['media_lane_count'] - media_lane_assignment_options = self.port_dict[lport]['media_lane_assignment_options'] - self.port_dict[lport]['media_lanes_mask'] = self.get_cmis_media_lanes_mask(api, - appl, lport, subport) - if self.port_dict[lport]['media_lanes_mask'] <= 0: - self.log_error("{}: Invalid media lane mask received - media_lane_count {} " - "media_lane_assignment_options {} subport {}" - " appl {}!".format(lport, media_lane_count, media_lane_assignment_options, subport, appl)) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) - continue - media_lanes_mask = self.port_dict[lport]['media_lanes_mask'] - self.log_notice("{}: Setting media_lanemask=0x{:x}".format(lport, media_lanes_mask)) - - if self.port_dict[lport]['host_tx_ready'] != 'true' or \ - self.port_dict[lport]['admin_status'] != 'up': - if is_fast_reboot and self.check_datapath_state(api, host_lanes_mask, ['DataPathActivated']): - self.log_notice("{} Skip datapath re-init in fast-reboot".format(lport)) - else: - self.log_notice("{} Forcing Tx laser OFF".format(lport)) - # Force DataPath re-init - api.tx_disable_channel(media_lanes_mask, True) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) - continue - # Configure the target output power if ZR module - if api.is_coherent_module(): - tx_power = self.port_dict[lport]['tx_power'] - # Prevent configuring same tx power multiple times - if 0 != tx_power and tx_power != api.get_tx_config_power(): - if 1 != self.configure_tx_output_power(api, lport, tx_power): - self.log_error("{} failed to configure Tx power = {}".format(lport, tx_power)) - else: - self.log_notice("{} Successfully configured Tx power = {}".format(lport, tx_power)) - - # Set all the DP lanes AppSel to unused(0) when non default app code needs to be configured - if True == self.is_appl_reconfigure_required(api, appl): - self.log_notice("{}: Decommissioning all lanes/datapaths to default AppSel=0".format(lport)) - if True != api.decommission_all_datapaths(): - self.log_notice("{}: Failed to default to AppSel=0".format(lport)) - self.force_cmis_reinit(lport, retries + 1) - continue - - need_update = self.is_cmis_application_update_required(api, appl, host_lanes_mask) - - # For ZR module, Datapath needes to be re-initlialized on new channel selection - if api.is_coherent_module(): - freq = self.port_dict[lport]['laser_freq'] - # If user requested frequency is NOT the same as configured on the module - # force datapath re-initialization - if 0 != freq and freq != api.get_laser_config_freq(): - if self.validate_frequency_and_grid(api, lport, freq) == True: - need_update = True - else: - # clear setting of invalid frequency config - self.port_dict[lport]['laser_freq'] = 0 - - if not need_update: - # No application updates - self.log_notice("{}: no CMIS application update required...READY".format(lport)) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) - continue - self.log_notice("{}: force Datapath reinit".format(lport)) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_DP_DEINIT) - elif state == CMIS_STATE_DP_DEINIT: - # D.2.2 Software Deinitialization - api.set_datapath_deinit(host_lanes_mask) - - # D.1.3 Software Configuration and Initialization - media_lanes_mask = self.port_dict[lport]['media_lanes_mask'] - if not api.tx_disable_channel(media_lanes_mask, True): - self.log_notice("{}: unable to turn off tx power with host_lanes_mask {}".format(lport, host_lanes_mask)) - self.port_dict[lport]['cmis_retries'] = retries + 1 - continue - - #Sets module to high power mode and doesn't impact datapath if module is already in high power mode - api.set_lpmode(False) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_AP_CONF) - dpDeinitDuration = self.get_cmis_dp_deinit_duration_secs(api) - modulePwrUpDuration = self.get_cmis_module_power_up_duration_secs(api) - self.log_notice("{}: DpDeinit duration {} secs, modulePwrUp duration {} secs".format(lport, dpDeinitDuration, modulePwrUpDuration)) - self.port_dict[lport]['cmis_expired'] = now + datetime.timedelta(seconds = max(modulePwrUpDuration, dpDeinitDuration)) - - elif state == CMIS_STATE_AP_CONF: - # Explicit control bit to apply custom Host SI settings. - # It will be set to 1 and applied via set_application if - # custom SI settings is applicable - ec = 0 - - # TODO: Use fine grained time when the CMIS memory map is available - if not self.check_module_state(api, ['ModuleReady']): - if (expired is not None) and (expired <= now): - self.log_notice("{}: timeout for 'ModuleReady'".format(lport)) - self.force_cmis_reinit(lport, retries + 1) - continue - - if not self.check_datapath_state(api, host_lanes_mask, ['DataPathDeactivated']): - if (expired is not None) and (expired <= now): - self.log_notice("{}: timeout for 'DataPathDeactivated state'".format(lport)) - self.force_cmis_reinit(lport, retries + 1) - continue - - if api.is_coherent_module(): - # For ZR module, configure the laser frequency when Datapath is in Deactivated state - freq = self.port_dict[lport]['laser_freq'] - if 0 != freq: - if 1 != self.configure_laser_frequency(api, lport, freq): - self.log_error("{} failed to configure laser frequency {} GHz".format(lport, freq)) - else: - self.log_notice("{} configured laser frequency {} GHz".format(lport, freq)) - - # Stage custom SI settings - if optics_si_parser.optics_si_present(): - optics_si_dict = {} - # Apply module SI settings if applicable - lane_speed = int(speed/1000)//host_lane_count - optics_si_dict = optics_si_parser.fetch_optics_si_setting(pport, lane_speed, sfp) - - self.log_debug("Read SI parameters for port {} from optics_si_settings.json vendor file:".format(lport)) - for key, sub_dict in optics_si_dict.items(): - self.log_debug("{}".format(key)) - for sub_key, value in sub_dict.items(): - self.log_debug("{}: {}".format(sub_key, str(value))) - - if optics_si_dict: - self.log_notice("{}: Apply Optics SI found for Vendor: {} PN: {} lane speed: {}G". - format(lport, api.get_manufacturer(), api.get_model(), lane_speed)) - if not api.stage_custom_si_settings(host_lanes_mask, optics_si_dict): - self.log_notice("{}: unable to stage custom SI settings ".format(lport)) - self.force_cmis_reinit(lport, retries + 1) - continue - - # Set Explicit control bit to apply Custom Host SI settings - ec = 1 - - # D.1.3 Software Configuration and Initialization - api.set_application(host_lanes_mask, appl, ec) - if not api.scs_apply_datapath_init(host_lanes_mask): - self.log_notice("{}: unable to set application and stage DP init".format(lport)) - self.force_cmis_reinit(lport, retries + 1) - continue - - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_DP_INIT) - elif state == CMIS_STATE_DP_INIT: - if not self.check_config_error(api, host_lanes_mask, ['ConfigSuccess']): - if (expired is not None) and (expired <= now): - self.log_notice("{}: timeout for 'ConfigSuccess'".format(lport)) - self.force_cmis_reinit(lport, retries + 1) - continue - - if hasattr(api, 'get_cmis_rev'): - # Check datapath init pending on module that supports CMIS 5.x - majorRev = int(api.get_cmis_rev().split('.')[0]) - if majorRev >= 5 and not self.check_datapath_init_pending(api, host_lanes_mask): - self.log_notice("{}: datapath init not pending".format(lport)) - self.force_cmis_reinit(lport, retries + 1) - continue - - # Ensure the Datapath is NOT Activated unless the host Tx siganl is good. - # NOTE: Some CMIS compliant modules may have 'auto-squelch' feature where - # the module won't take datapaths to Activated state if host tries to enable - # the datapaths while there is no good Tx signal from the host-side. - if self.port_dict[lport]['admin_status'] != 'up' or \ - self.port_dict[lport]['host_tx_ready'] != 'true': - self.log_notice("{} waiting for host tx ready...".format(lport)) - continue - - # D.1.3 Software Configuration and Initialization - api.set_datapath_init(host_lanes_mask) - dpInitDuration = self.get_cmis_dp_init_duration_secs(api) - self.log_notice("{}: DpInit duration {} secs".format(lport, dpInitDuration)) - self.port_dict[lport]['cmis_expired'] = now + datetime.timedelta(seconds=dpInitDuration) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_DP_TXON) - elif state == CMIS_STATE_DP_TXON: - if not self.check_datapath_state(api, host_lanes_mask, ['DataPathInitialized']): - if (expired is not None) and (expired <= now): - self.log_notice("{}: timeout for 'DataPathInitialized'".format(lport)) - self.force_cmis_reinit(lport, retries + 1) - continue - - # Turn ON the laser - media_lanes_mask = self.port_dict[lport]['media_lanes_mask'] - api.tx_disable_channel(media_lanes_mask, False) - self.log_notice("{}: Turning ON tx power".format(lport)) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_DP_ACTIVATE) - elif state == CMIS_STATE_DP_ACTIVATE: - if not self.check_datapath_state(api, host_lanes_mask, ['DataPathActivated']): - if (expired is not None) and (expired <= now): - self.log_notice("{}: timeout for 'DataPathActivated'".format(lport)) - self.force_cmis_reinit(lport, retries + 1) - continue - - self.log_notice("{}: READY".format(lport)) - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) - self.post_port_active_apsel_to_db(api, lport, host_lanes_mask) - - except Exception as e: - self.log_error("{}: internal errors due to {}".format(lport, e)) - log_exception_traceback() - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) + self.handle_cmis_state_machine(lport, info, is_fast_reboot) self.log_notice("Stopped") From a5dd3e83ea9fedf5c758bbdea816d1bb8d30ceeb Mon Sep 17 00:00:00 2001 From: Wataru Ishida Date: Fri, 14 Jun 2024 02:01:24 +0000 Subject: [PATCH 2/3] fix(xcvrd): handle port add/remove events in CmisManagerTask This commit fixes DPB support with CMIS transceivers. CmisManagerTask's `port_dict` must be updated according to the port add/remove events. This commit removes the `port_mapping` field from CmisManagerTask as `port_mapping` was mostly used just for storing `asic_id` information and that can be simply done by `port_dict` instead. Added a helper method `get_asic_id()` method to CmisManagerTask for getting `asic_id` from `logical_port`. fixes https://github.com/sonic-net/sonic-buildimage/issues/18893 Signed-off-by: Wataru Ishida --- sonic-xcvrd/tests/test_xcvrd.py | 359 ++++++++++++++++++++++++++++---- sonic-xcvrd/xcvrd/xcvrd.py | 67 +++--- 2 files changed, 351 insertions(+), 75 deletions(-) diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index 11b7f4e89..46a37948b 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -182,6 +182,7 @@ def test_CmisManagerTask_task_run_with_exception(self): @patch('xcvrd.xcvrd.CmisManagerTask.wait_for_port_config_done', MagicMock()) @patch('xcvrd.xcvrd.is_fast_reboot_enabled', MagicMock(return_value=(False))) @patch('xcvrd.xcvrd.get_cmis_application_desired', MagicMock(side_effect=KeyError)) + @patch('xcvrd.xcvrd.is_cmis_api', MagicMock(return_value=True)) @patch('xcvrd.xcvrd.log_exception_traceback') @patch('xcvrd.xcvrd.XcvrTableHelper.get_status_tbl') @patch('xcvrd.xcvrd.platform_chassis') @@ -204,9 +205,12 @@ def test_CmisManagerTask_get_xcvr_api_exception(self, mock_platform_chassis, moc # Case 1: get_xcvr_api() raises an exception task.on_port_update_event(port_change_event) mock_sfp.get_xcvr_api = MagicMock(side_effect=NotImplementedError) - task.task_worker() + assert len(task.port_dict) == 1 + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) + assert mock_log_exception_traceback.call_count == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_FAILED + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_FAILED # Case 2: is_flat_memory() raises AttributeError. In this case, CMIS SM should transition to READY state mock_xcvr_api = MagicMock() @@ -214,21 +218,70 @@ def test_CmisManagerTask_get_xcvr_api_exception(self, mock_platform_chassis, moc mock_xcvr_api.is_flat_memory = MagicMock(side_effect=AttributeError) task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.on_port_update_event(port_change_event) - task.task_worker() - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_READY + assert len(task.port_dict) == 1 + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY # Case 3: get_cmis_application_desired() raises an exception mock_xcvr_api.is_flat_memory = MagicMock(return_value=False) mock_xcvr_api.is_coherent_module = MagicMock(return_value=False) mock_xcvr_api.get_module_type_abbreviation = MagicMock(return_value='QSFP-DD') - task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.on_port_update_event(port_change_event) task.get_cmis_host_lanes_mask = MagicMock() - task.task_worker() + assert len(task.port_dict) == 1 + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) assert mock_log_exception_traceback.call_count == 2 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_FAILED + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_FAILED assert task.get_cmis_host_lanes_mask.call_count == 0 + # Case 4: get_xcvr_api() returns None + task.on_port_update_event(port_change_event) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + mock_sfp.get_xcvr_api.return_value = None + assert len(task.port_dict) == 1 + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY + + # Case 5: is_flat_memory() returns True. In this case, CMIS SM should transition to READY state + task.on_port_update_event(port_change_event) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + mock_xcvr_api = MagicMock() + mock_xcvr_api.is_flat_memory.return_value = True + mock_sfp.get_xcvr_api.return_value = mock_xcvr_api + assert len(task.port_dict) == 1 + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY + + # Case 6: get_module_type_abbreviation() reutrns non-cmis module type + task.on_port_update_event(port_change_event) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + mock_xcvr_api = MagicMock() + mock_xcvr_api.is_flat_memory.return_value = False + mock_xcvr_api.get_module_type_abbreviation.return_value = 'SFP' + mock_sfp.get_xcvr_api.return_value = mock_xcvr_api + assert len(task.port_dict) == 1 + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY + + # Case 7: too many retries + task.on_port_update_event(port_change_event) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + mock_xcvr_api = MagicMock() + mock_xcvr_api.is_flat_memory.return_value = False + mock_xcvr_api.get_module_type_abbreviation.return_value = 'QSFP-DD' + mock_xcvr_api.is_coherent_module.return_value = False + mock_sfp.get_xcvr_api.return_value = mock_xcvr_api + assert len(task.port_dict) == 1 + for lport, info in task.port_dict.items(): + info["cmis_retries"] = task.CMIS_MAX_RETRIES + 1 + task.handle_cmis_state_machine(lport, info, False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_FAILED + @patch('xcvrd.xcvrd_utilities.port_event_helper.subscribe_port_config_change', MagicMock(side_effect = NotImplementedError)) def test_DomInfoUpdateTask_task_run_with_exception(self): port_mapping = PortMapping() @@ -289,7 +342,7 @@ def test_SfpStateUpdateTask_task_run_with_exception(self): @patch('xcvrd.xcvrd.SffManagerTask.join') def test_DaemonXcvrd_run_with_exception(self, mock_task_join_sff, mock_task_join_sfp, mock_task_join_dom, mock_init, mock_os_kill): - mock_init.return_value = (PortMapping(), set()) + mock_init.return_value = PortMapping() xcvrd = DaemonXcvrd(SYSLOG_IDENTIFIER) xcvrd.enable_sff_mgr = True xcvrd.load_feature_flags = MagicMock() @@ -1188,7 +1241,7 @@ def test_DaemonXcvrd_wait_for_port_config_done(self, mock_select, mock_sub_table @patch('xcvrd.xcvrd.DomInfoUpdateTask.join') @patch('xcvrd.xcvrd.SfpStateUpdateTask.join') def test_DaemonXcvrd_run(self, mock_task_stop1, mock_task_stop2, mock_task_run1, mock_task_run2, mock_deinit, mock_init): - mock_init.return_value = (PortMapping(), set()) + mock_init.return_value = PortMapping() xcvrd = DaemonXcvrd(SYSLOG_IDENTIFIER) xcvrd.load_feature_flags = MagicMock() xcvrd.stop_event.wait = MagicMock() @@ -1470,22 +1523,97 @@ def test_CmisManagerTask_handle_port_change_event(self): task.on_port_update_event(port_change_event) assert task.isPortConfigDone + assert len(task.port_dict) == 0 + assert len(task.deleted_ports) == 0 + # CmisManagerTask doesn't handle PORT_ADD event. + # No change on port_dict port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_ADD) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 0 + assert len(task.deleted_ports) == 0 + # CmisManagerTask doesn't handle PORT_REMOVE event. + # No change on port_dict port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_REMOVE) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 0 + assert len(task.deleted_ports) == 0 + # PORT_DEL event before handling PORT_SET + # No change on port_dict port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_DEL) task.on_port_update_event(port_change_event) - assert len(task.port_dict) == 1 + assert len(task.port_dict) == 0 + assert len(task.deleted_ports) == 0 port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_SET) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 1 + assert len(task.deleted_ports) == 0 + + # PORT_DEL event after handling PORT_SET + # No change on port_dict + # deleted_ports should have been updated + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_DEL) + task.on_port_update_event(port_change_event) + assert len(task.port_dict) == 1 + assert len(task.deleted_ports) == 1 + + @patch('xcvrd.xcvrd.XcvrTableHelper.get_status_tbl') + @patch('xcvrd.xcvrd.XcvrTableHelper.get_state_port_tbl') + @patch('xcvrd.xcvrd.XcvrTableHelper.get_cfg_port_tbl') + @patch('xcvrd.xcvrd.platform_chassis') + def test_CmisManagerTask_do_task(self, mock_chassis, mock_get_cfg_port_tbl, mock_get_state_port_tbl, mock_get_status_tbl): + mock_get_status_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE) + mock_get_state_port_tbl = Table("STATE_DB", "PORT_TABLE") + mock_get_cfg_port_tbl.get.return_value = (True, {"admin_status": "up"}) + mock_chassis.get_sfp.return_value.get_presence.return_value = False # sfp not present + + port_mapping = PortMapping() + stop_event = threading.Event() + task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) + task.xcvr_table_helper.get_status_tbl.return_value = mock_get_status_tbl + task.xcvr_table_helper.get_state_port_tbl.return_value = mock_get_state_port_tbl + task.xcvr_table_helper.get_cfg_port_tbl.return_value = mock_get_cfg_port_tbl + + assert not task.isPortConfigDone + port_change_event = PortChangeEvent('PortConfigDone', -1, 0, PortChangeEvent.PORT_SET) + task.on_port_update_event(port_change_event) + assert task.isPortConfigDone + assert len(task.port_dict) == 0 + + # empty lanes + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_SET, {"lanes": ""}) + task.on_port_update_event(port_change_event) + assert len(task.port_dict) == 1 + + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + task.do_task(False) + + # CMIS state doesn't change + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_SET, {"lanes": "1", "speed": "1000000"}) + task.on_port_update_event(port_change_event) + task.do_task(False) + + # CMIS state goes to REMOVED + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_REMOVED + mock_chassis.get_sfp.return_value.get_presence.return_value = True # sfp present + + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_SET, {"lanes": "1", "speed": "1000000"}) + task.on_port_update_event(port_change_event) + task.do_task(False) + + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY + port_change_event = PortChangeEvent('Ethernet0', -1, 0, PortChangeEvent.PORT_DEL) + task.on_port_update_event(port_change_event) + assert len(task.port_dict) == 1 + assert len(task.deleted_ports) == 1 + task.do_task(False) + assert len(task.deleted_ports) == 0 + assert len(task.port_dict) == 0 @patch('xcvrd.xcvrd.XcvrTableHelper') def test_CmisManagerTask_get_configured_freq(self, mock_table_helper): @@ -1509,6 +1637,51 @@ def test_CmisManagerTask_get_configured_tx_power_from_db(self, mock_table_helper task.xcvr_table_helper.get_cfg_port_tbl = mock_table_helper.get_cfg_port_tbl assert task.get_configured_tx_power_from_db('Ethernet0') == -10 + @patch('xcvrd.xcvrd.XcvrTableHelper') + def test_CmisManagerTask_get_host_tx_status(self, mock_table_helper): + port_mapping = PortMapping() + stop_event = threading.Event() + task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) + state_port_tbl = MagicMock() + state_port_tbl.get = MagicMock(return_value=(False, None)) + mock_table_helper.get_state_port_tbl = MagicMock(return_value=state_port_tbl) + task.xcvr_table_helper.get_state_port_tbl = mock_table_helper.get_state_port_tbl + assert task.get_host_tx_status('Ethernet0') == 'false' + state_port_tbl = MagicMock() + state_port_tbl.get = MagicMock(return_value=(True, (('host_tx_ready', 'true'),))) + mock_table_helper.get_state_port_tbl = MagicMock(return_value=state_port_tbl) + task.xcvr_table_helper.get_state_port_tbl = mock_table_helper.get_state_port_tbl + assert task.get_host_tx_status('Ethernet0') == 'true' + + @patch('xcvrd.xcvrd.XcvrTableHelper') + def test_CmisManagerTask_get_port_admin_status(self, mock_table_helper): + port_mapping = PortMapping() + stop_event = threading.Event() + task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) + cfg_port_tbl = MagicMock() + cfg_port_tbl.get = MagicMock(return_value=(False, None)) + mock_table_helper.get_cfg_port_tbl = MagicMock(return_value=cfg_port_tbl) + task.xcvr_table_helper.get_cfg_port_tbl = mock_table_helper.get_cfg_port_tbl + assert task.get_port_admin_status('Ethernet0') == 'down' + cfg_port_tbl = MagicMock() + cfg_port_tbl.get = MagicMock(return_value=(True, (('admin_status', 'up'),))) + mock_table_helper.get_cfg_port_tbl = MagicMock(return_value=cfg_port_tbl) + task.xcvr_table_helper.get_cfg_port_tbl = mock_table_helper.get_cfg_port_tbl + assert task.get_port_admin_status('Ethernet0') == 'up' + + def test_CmisManagerTask_configre_tx_output_power(self): + mock_xcvr_api = MagicMock() + mock_xcvr_api.get_supported_power_config = MagicMock(return_value=(-10, 4)) + mock_xcvr_api.set_tx_power = MagicMock(return_value=1) + port_mapping = PortMapping() + stop_event = threading.Event() + task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) + assert task.configure_tx_output_power(mock_xcvr_api, 'Ethernet0', 0) == 1 + # setting power out of range is currently not an error + assert task.configure_tx_output_power(mock_xcvr_api, 'Ethernet0', -20) == 1 + # setting power out of range is currently not an error + assert task.configure_tx_output_power(mock_xcvr_api, 'Ethernet0', 10) == 1 + @patch('xcvrd.xcvrd.platform_chassis') @patch('xcvrd.xcvrd.is_fast_reboot_enabled', MagicMock(return_value=(False))) @patch('xcvrd.xcvrd.PortChangeObserver', MagicMock(handle_port_update_event=MagicMock())) @@ -1856,24 +2029,33 @@ def test_CmisManagerTask_task_worker(self, mock_chassis, mock_get_status_tbl): mock_chassis.get_sfp = MagicMock(return_value=mock_sfp) port_mapping = PortMapping() + port_mapping.handle_port_change_event(PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_ADD)) stop_event = threading.Event() task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) - task.port_mapping.logical_port_list = ['Ethernet0'] task.xcvr_table_helper.get_status_tbl.return_value = mock_get_status_tbl task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.task_worker() - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_UNKNOWN + assert len(task.port_dict) == 1 + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_UNKNOWN - task.port_mapping.logical_port_list = MagicMock() port_change_event = PortChangeEvent('PortConfigDone', -1, 0, PortChangeEvent.PORT_SET) task.on_port_update_event(port_change_event) assert task.isPortConfigDone + port_change_event = PortChangeEvent('PortInitDone', -1, 0, PortChangeEvent.PORT_SET) + task.on_port_update_event(port_change_event) + assert task.isPortInitDone + + # non-physical port is silently ignored + port_change_event = PortChangeEvent('PortChannel1', -1, 0, PortChangeEvent.PORT_SET) + task.on_port_update_event(port_change_event) + assert len(task.port_dict) == 1 + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_SET, {'speed':'400000', 'lanes':'1,2,3,4,5,6,7,8'}) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_INSERTED + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED task.get_host_tx_status = MagicMock(return_value='true') task.get_port_admin_status = MagicMock(return_value='up') @@ -1886,51 +2068,50 @@ def test_CmisManagerTask_task_worker(self, mock_chassis, mock_get_status_tbl): task.is_appl_reconfigure_required = MagicMock(return_value=True) mock_xcvr_api.decommission_all_datapaths = MagicMock(return_value=True) task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_DP_DEINIT + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_DEINIT task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() + task.do_task(False) assert mock_xcvr_api.set_datapath_deinit.call_count == 1 assert mock_xcvr_api.tx_disable_channel.call_count == 1 assert mock_xcvr_api.set_lpmode.call_count == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_AP_CONF + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_AP_CONF # Case 2: DP_DEINIT --> AP Configured task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() + task.do_task(False) assert mock_xcvr_api.set_application.call_count == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_DP_INIT + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_INIT # Case 3: AP Configured --> DP_INIT task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() + task.do_task(False) assert mock_xcvr_api.set_datapath_init.call_count == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_DP_TXON + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_TXON # Case 4: DP_INIT --> DP_TXON task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() + task.do_task(False) assert mock_xcvr_api.tx_disable_channel.call_count == 2 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_DP_ACTIVATE + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_ACTIVATE # Case 5: DP_TXON --> DP_ACTIVATION - task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.post_port_active_apsel_to_db = MagicMock() - task.task_worker() + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) assert task.post_port_active_apsel_to_db.call_count == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_READY + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY # Fail test coverage - Module Inserted state failing to reach DP_DEINIT port_mapping = PortMapping() + port_mapping.handle_port_change_event(PortChangeEvent('Ethernet1', 1, 0, PortChangeEvent.PORT_ADD)) stop_event = threading.Event() task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) - task.port_mapping.logical_port_list = ['Ethernet1'] task.xcvr_table_helper.get_status_tbl.return_value = mock_get_status_tbl task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.task_worker() - assert get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet1'))) == CMIS_STATE_UNKNOWN + assert get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet1'))) == CMIS_STATE_UNKNOWN - task.port_mapping.logical_port_list = MagicMock() port_change_event = PortChangeEvent('PortConfigDone', -1, 0, PortChangeEvent.PORT_SET) task.on_port_update_event(port_change_event) assert task.isPortConfigDone @@ -1939,7 +2120,7 @@ def test_CmisManagerTask_task_worker(self, mock_chassis, mock_get_status_tbl): {'speed':'400000', 'lanes':'1,2,3,4,5,6,7,8'}) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 1 - assert get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet1'))) == CMIS_STATE_INSERTED + assert get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet1'))) == CMIS_STATE_INSERTED task.get_host_tx_status = MagicMock(return_value='true') task.get_port_admin_status = MagicMock(return_value='up') @@ -1952,8 +2133,98 @@ def test_CmisManagerTask_task_worker(self, mock_chassis, mock_get_status_tbl): task.is_appl_reconfigure_required = MagicMock(return_value=True) mock_xcvr_api.decommission_all_datapaths = MagicMock(return_value=False) task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() - assert not get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet1'))) == CMIS_STATE_DP_DEINIT + task.do_task(False) + assert not get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet1'))) == CMIS_STATE_DP_DEINIT + + @patch('xcvrd.xcvrd_utilities.optics_si_parser.g_optics_si_dict', optics_si_settings_dict) + @patch('xcvrd.xcvrd.platform_chassis') + @patch('xcvrd.xcvrd.XcvrTableHelper.get_cfg_port_tbl') + @patch('xcvrd.xcvrd.XcvrTableHelper.get_state_port_tbl') + @patch('xcvrd.xcvrd.XcvrTableHelper.get_status_tbl') + def test_CmisManagerTask_do_task_with_optics_si(self, mock_get_status_tbl, mock_get_state_port_tbl, mock_get_cfg_port_tbl, mock_chassis): + mock_get_status_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE) + mock_get_state_port_tbl = Table("STATE_DB", "PORT_TABLE") + mock_get_cfg_port_tbl.get.return_value = (True, {"admin_status": "up"}) + mock_sfp = MagicMock() + mock_sfp.get_presence.return_value = True # sfp present + mock_api = MagicMock() + mock_api.is_flat_memory.return_value = False + mock_api.get_module_type_abbreviation.return_value = 'QSFP-DD' + mock_api.is_coherent_module.return_value = False + + mock_sfp.get_xcvr_api.return_value = mock_api + mock_chassis.get_sfp.return_value = mock_sfp + + port_mapping = PortMapping() + stop_event = threading.Event() + task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) + task.xcvr_table_helper.get_status_tbl.return_value = mock_get_status_tbl + task.xcvr_table_helper.get_state_port_tbl.return_value = mock_get_state_port_tbl + task.xcvr_table_helper.get_cfg_port_tbl.return_value = mock_get_cfg_port_tbl + + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_SET, + {'speed':'400000', 'lanes':'1,2,3,4,5,6,7,8', 'host_tx_ready': 'true'}) + task.on_port_update_event(port_change_event) + # forcing the state to be CMIS_STATE_DEINIT + task.update_port_transceiver_status_table_sw_cmis_state('Ethernet0', CMIS_STATE_DP_DEINIT) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_DEINIT + task.port_dict['Ethernet0']['cmis_retries'] = 0 + task.port_dict['Ethernet0']['host_lanes_mask'] = 0xff + task.port_dict['Ethernet0']['media_lanes_mask'] = 0xff + task.port_dict['Ethernet0']['appl'] = 1 + + mock_api.tx_disable_channel.return_value = False + task.do_task(False) + assert task.port_dict['Ethernet0'].get('cmis_retries') == 1 + assert task.port_dict['Ethernet0'].get('cmis_expired') == None + + mock_api.tx_disable_channel.return_value = True + mock_api.get_datapath_deinit_duration.return_value = 1000 + mock_api.get_module_pwr_up_duration.return_value = 1000 + task.do_task(False) + assert task.port_dict['Ethernet0'].get('cmis_retries') == 1 + assert task.port_dict['Ethernet0'].get('cmis_expired') != None + + mock_api.get_module_state.return_value = 'Test' + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_AP_CONF + + # set expired time explicitly + task.port_dict['Ethernet0']['cmis_expired'] = datetime.datetime.now() + + task.do_task(False) + + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + + # forcing the state to be CMIS_STATE_AP_CONF + task.update_port_transceiver_status_table_sw_cmis_state('Ethernet0', CMIS_STATE_AP_CONF) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_AP_CONF + mock_api.get_module_state.return_value = 'ModuleReady' + mock_api.get_datapath_state.return_value = {f"DP{i}State": 'Test' for i in range(1, task.CMIS_MAX_HOST_LANES + 1)} + + assert task.check_datapath_state(mock_api, 0xff, ['DataPathDeactivated']) == False + + # set expired time explicitly + task.port_dict['Ethernet0']['cmis_expired'] = datetime.datetime.now() + + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + + # forcing the state to be CMIS_STATE_AP_CONF + task.update_port_transceiver_status_table_sw_cmis_state('Ethernet0', CMIS_STATE_AP_CONF) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_AP_CONF + mock_api.get_datapath_state.return_value = {f"DP{i}State": 'DataPathDeactivated' for i in range(1, task.CMIS_MAX_HOST_LANES + 1)} + + assert task.check_datapath_state(mock_api, 0xff, ['DataPathDeactivated']) + task.do_task(False) + + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_INIT + + mock_api.get_config_datapath_hostlane_status.return_value = {f"ConfigStatusLane{i}": 'ConfigSuccess' for i in range(1, task.CMIS_MAX_HOST_LANES + 1)} + assert task.check_config_error(mock_api, 0xff, ['ConfigSuccess']) + mock_api.get_datapath_init_duration.return_value = 1000 + + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_TXON @patch('xcvrd.xcvrd.XcvrTableHelper.get_status_tbl') @patch('xcvrd.xcvrd.platform_chassis') @@ -2058,15 +2329,14 @@ def test_CmisManagerTask_task_worker_fastboot(self, mock_chassis, mock_get_statu mock_chassis.get_sfp = MagicMock(return_value=mock_sfp) port_mapping = PortMapping() + port_mapping.handle_port_change_event(PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_ADD)) stop_event = threading.Event() task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) - task.port_mapping.logical_port_list = ['Ethernet0'] task.xcvr_table_helper.get_status_tbl.return_value = mock_get_status_tbl task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.task_worker() - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_UNKNOWN + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_UNKNOWN - task.port_mapping.logical_port_list = MagicMock() port_change_event = PortChangeEvent('PortConfigDone', -1, 0, PortChangeEvent.PORT_SET) task.on_port_update_event(port_change_event) assert task.isPortConfigDone @@ -2075,7 +2345,7 @@ def test_CmisManagerTask_task_worker_fastboot(self, mock_chassis, mock_get_statu {'speed':'400000', 'lanes':'1,2,3,4,5,6,7,8'}) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_INSERTED + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED task.get_host_tx_status = MagicMock(return_value='false') task.get_port_admin_status = MagicMock(return_value='up') @@ -2085,9 +2355,9 @@ def test_CmisManagerTask_task_worker_fastboot(self, mock_chassis, mock_get_statu task.configure_laser_frequency = MagicMock(return_value=1) task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() + task.do_task(False) - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_READY + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY @patch('xcvrd.xcvrd.XcvrTableHelper.get_status_tbl') @@ -2193,15 +2463,14 @@ def test_CmisManagerTask_task_worker_host_tx_ready_false(self, mock_chassis, moc mock_chassis.get_sfp = MagicMock(return_value=mock_sfp) port_mapping = PortMapping() + port_mapping.handle_port_change_event(PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_ADD)) stop_event = threading.Event() task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) - task.port_mapping.logical_port_list = ['Ethernet0'] task.xcvr_table_helper.get_status_tbl.return_value = mock_get_status_tbl task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.task_worker() - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_UNKNOWN + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_UNKNOWN - task.port_mapping.logical_port_list = MagicMock() port_change_event = PortChangeEvent('PortConfigDone', -1, 0, PortChangeEvent.PORT_SET) task.on_port_update_event(port_change_event) assert task.isPortConfigDone @@ -2210,7 +2479,7 @@ def test_CmisManagerTask_task_worker_host_tx_ready_false(self, mock_chassis, moc {'speed':'400000', 'lanes':'1,2,3,4,5,6,7,8'}) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_INSERTED + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED task.get_host_tx_status = MagicMock(return_value='false') task.get_port_admin_status = MagicMock(return_value='up') @@ -2220,10 +2489,10 @@ def test_CmisManagerTask_task_worker_host_tx_ready_false(self, mock_chassis, moc task.configure_laser_frequency = MagicMock(return_value=1) task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() + task.do_task(False) assert mock_xcvr_api.tx_disable_channel.call_count == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_READY + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY @pytest.mark.parametrize("lport, expected_dom_polling", [ ('Ethernet0', 'disabled'), diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index f5cb0b3cd..09fdcab76 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -827,8 +827,8 @@ def __init__(self, namespaces, port_mapping, main_thread_stop_event, skip_cmis_m self.exc = None self.task_stopping_event = threading.Event() self.main_thread_stop_event = main_thread_stop_event - self.port_dict = {} - self.port_mapping = copy.deepcopy(port_mapping) + self.port_dict = {k: {"asic_id": v} for k, v in port_mapping.logical_to_asic.items()} + self.deleted_ports = {} self.xcvr_table_helper = XcvrTableHelper(namespaces) self.isPortInitDone = False self.isPortConfigDone = False @@ -844,9 +844,11 @@ def log_notice(self, message): def log_error(self, message): helper_logger.log_error("CMIS: {}".format(message)) + def get_asic_id(self, lport): + return self.port_dict.get(lport, {}).get("asic_id", -1) + def update_port_transceiver_status_table_sw_cmis_state(self, lport, cmis_state_to_set): - asic_index = self.port_mapping.get_asic_id_for_logical_port(lport) - status_table = self.xcvr_table_helper.get_status_tbl(asic_index) + status_table = self.xcvr_table_helper.get_status_tbl(self.get_asic_id(lport)) if status_table is None: helper_logger.log_error("status_table is None while updating " "sw CMIS state for lport {}".format(lport)) @@ -860,6 +862,7 @@ def on_port_update_event(self, port_change_event): return lport = port_change_event.port_name + # 'index' can be -1 if STATE_DB|PORT_TABLE pport = port_change_event.port_index if lport in ['PortInitDone']: @@ -878,15 +881,13 @@ def on_port_update_event(self, port_change_event): if pport is None: return - # Skip if the port/cage type is not a CMIS - # 'index' can be -1 if STATE_DB|PORT_TABLE - if lport not in self.port_dict: - self.port_dict[lport] = {} + if port_change_event.event_type == port_change_event.PORT_SET: + if lport not in self.port_dict: + self.port_dict[lport] = {"asic_id": port_change_event.asic_id} - if port_change_event.port_dict is None: - return + if port_change_event.port_dict is None: + return - if port_change_event.event_type == port_change_event.PORT_SET: if pport >= 0: self.port_dict[lport]['index'] = pport if 'speed' in port_change_event.port_dict and port_change_event.port_dict['speed'] != 'N/A': @@ -906,7 +907,13 @@ def on_port_update_event(self, port_change_event): self.force_cmis_reinit(lport, 0) else: + # PORT_DEL event for the same lport happens 3 times because + # we are subscribing to CONFIG_DB, STATE_DB|TRANSCEIVER_INFO, and STATE_DB|PORT_TABLE. + # We only handle the first one and ignore the rest. + if lport in self.deleted_ports or lport not in self.port_dict: + return self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_REMOVED) + self.deleted_ports[lport] = port_change_event def get_cmis_dp_init_duration_secs(self, api): return api.get_datapath_init_duration()/1000 @@ -1170,8 +1177,7 @@ def get_configured_laser_freq_from_db(self, lport): Return the Tx power configured by user in CONFIG_DB's PORT table """ freq = 0 - asic_index = self.port_mapping.get_asic_id_for_logical_port(lport) - port_tbl = self.xcvr_table_helper.get_cfg_port_tbl(asic_index) + port_tbl = self.xcvr_table_helper.get_cfg_port_tbl(self.get_asic_id(lport)) found, port_info = port_tbl.get(lport) if found and 'laser_freq' in dict(port_info): @@ -1183,8 +1189,7 @@ def get_configured_tx_power_from_db(self, lport): Return the Tx power configured by user in CONFIG_DB's PORT table """ power = 0 - asic_index = self.port_mapping.get_asic_id_for_logical_port(lport) - port_tbl = self.xcvr_table_helper.get_cfg_port_tbl(asic_index) + port_tbl = self.xcvr_table_helper.get_cfg_port_tbl(self.get_asic_id(lport)) found, port_info = port_tbl.get(lport) if found and 'tx_power' in dict(port_info): @@ -1194,8 +1199,7 @@ def get_configured_tx_power_from_db(self, lport): def get_host_tx_status(self, lport): host_tx_ready = 'false' - asic_index = self.port_mapping.get_asic_id_for_logical_port(lport) - state_port_tbl = self.xcvr_table_helper.get_state_port_tbl(asic_index) + state_port_tbl = self.xcvr_table_helper.get_state_port_tbl(self.get_asic_id(lport)) found, port_info = state_port_tbl.get(lport) if found and 'host_tx_ready' in dict(port_info): @@ -1205,8 +1209,7 @@ def get_host_tx_status(self, lport): def get_port_admin_status(self, lport): admin_status = 'down' - asic_index = self.port_mapping.get_asic_id_for_logical_port(lport) - cfg_port_tbl = self.xcvr_table_helper.get_cfg_port_tbl(asic_index) + cfg_port_tbl = self.xcvr_table_helper.get_cfg_port_tbl(self.get_asic_id(lport)) found, port_info = cfg_port_tbl.get(lport) if found: @@ -1276,8 +1279,7 @@ def post_port_active_apsel_to_db(self, api, lport, host_lanes_mask): media_lane_count = appl_advt_act.get('media_lane_count', 'N/A') if appl_advt_act else 'N/A' tuple_list.append(('media_lane_count', str(media_lane_count))) - asic_index = self.port_mapping.get_asic_id_for_logical_port(lport) - intf_tbl = self.xcvr_table_helper.get_intf_tbl(asic_index) + intf_tbl = self.xcvr_table_helper.get_intf_tbl(self.get_asic_id(lport)) fvs = swsscommon.FieldValuePairs(tuple_list) intf_tbl.set(lport, fvs) self.log_notice("{}: updated TRANSCEIVER_INFO_TABLE {}".format(lport, tuple_list)) @@ -1304,7 +1306,7 @@ def wait_for_port_config_done(self, namespace): break def handle_cmis_state_machine(self, lport, info, is_fast_reboot): - state = get_cmis_state_from_state_db(lport, self.xcvr_table_helper.get_status_tbl(self.port_mapping.get_asic_id_for_logical_port(lport))) + state = get_cmis_state_from_state_db(lport, self.xcvr_table_helper.get_status_tbl(self.get_asic_id(lport))) if state in CMIS_TERMINAL_STATES or state == CMIS_STATE_UNKNOWN: if state != CMIS_STATE_READY: self.port_dict[lport]['appl'] = 0 @@ -1618,6 +1620,17 @@ def handle_cmis_state_machine(self, lport, info, is_fast_reboot): log_exception_traceback() self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) + def do_task(self, is_fast_reboot): + for lport, info in self.port_dict.items(): + if self.task_stopping_event.is_set(): + break + + self.handle_cmis_state_machine(lport, info, is_fast_reboot) + + for event in self.deleted_ports.values(): + self.port_dict.pop(event.port_name) + + self.deleted_ports = {} def task_worker(self): self.xcvr_table_helper = XcvrTableHelper(self.namespaces) @@ -1626,8 +1639,7 @@ def task_worker(self): for namespace in self.namespaces: self.wait_for_port_config_done(namespace) - logical_port_list = self.port_mapping.logical_port_list - for lport in logical_port_list: + for lport in self.port_dict.keys(): self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_UNKNOWN) is_fast_reboot = is_fast_reboot_enabled() @@ -1640,12 +1652,7 @@ def task_worker(self): while not self.task_stopping_event.is_set(): # Handle port change event from main thread port_change_observer.handle_port_update_event() - - for lport, info in self.port_dict.items(): - if self.task_stopping_event.is_set(): - break - - self.handle_cmis_state_machine(lport, info, is_fast_reboot) + self.do_task(is_fast_reboot) self.log_notice("Stopped") From 4253a46e0f24a74a9660379849cf778ea3f017f4 Mon Sep 17 00:00:00 2001 From: Wataru Ishida Date: Wed, 26 Jun 2024 02:32:41 +0000 Subject: [PATCH 3/3] fix(xcvrd): allow selecting appl whose host lane count is less than the actual value Signed-off-by: Wataru Ishida --- sonic-xcvrd/tests/test_xcvrd.py | 38 +++++++++++++++++++++++++++++++++ sonic-xcvrd/xcvrd/xcvrd.py | 14 ++++++++---- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index 46a37948b..449106da3 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -2226,6 +2226,44 @@ def test_CmisManagerTask_do_task_with_optics_si(self, mock_get_status_tbl, mock_ task.do_task(False) assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_TXON + # test https://github.com/sonic-net/sonic-buildimage/issues/19336 is fixed + @patch('xcvrd.xcvrd.is_cmis_api', MagicMock(return_value=True)) + def test_CmisManagerTask_get_cmis_application_desired(self): + mock_api = MagicMock() + mock_api.get_application_advertisement.return_value = { + 1: { + 'host_electrical_interface_id': '100G', + 'module_media_interface_id': '100G', + 'media_lane_count': 2, + 'host_lane_count': 2, + 'host_lane_assignment_options': 0b01010101, + 'media_lane_assignment_options': 0b01010101, + }, + } + + assert get_cmis_application_desired(mock_api, 8, 100000) == 1 + + mock_api.get_application_advertisement.return_value = { + 1: { + 'host_electrical_interface_id': '100G', + 'module_media_interface_id': '100G', + 'media_lane_count': 2, + 'host_lane_count': 2, + 'host_lane_assignment_options': 0b01010101, + 'media_lane_assignment_options': 0b01010101, + }, + 2: { + 'host_electrical_interface_id': '100G', + 'module_media_interface_id': '100G', + 'media_lane_count': 1, + 'host_lane_count': 1, + 'host_lane_assignment_options': 0b11111111, + 'media_lane_assignment_options': 0b11111111, + }, + } + + assert get_cmis_application_desired(mock_api, 8, 100000) == 2 + @patch('xcvrd.xcvrd.XcvrTableHelper.get_status_tbl') @patch('xcvrd.xcvrd.platform_chassis') @patch('xcvrd.xcvrd.is_fast_reboot_enabled', MagicMock(return_value=(True))) diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index 09fdcab76..4f58b6cb6 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -155,10 +155,16 @@ def get_cmis_application_desired(api, host_lane_count, speed): return None appl_dict = api.get_application_advertisement() - for index, app_info in appl_dict.items(): - if (app_info.get('host_lane_count') == host_lane_count and - get_interface_speed(app_info.get('host_electrical_interface_id')) == speed): - return (index & 0xf) + current_lane_count = 0xffff # big enough value + index = -1 + for i, app_info in appl_dict.items(): + app_speed = get_interface_speed(app_info.get('host_electrical_interface_id')) + app_lane_count = app_info.get('host_lane_count') + if app_speed == speed and app_lane_count <= host_lane_count and app_lane_count < current_lane_count: + index = i + + if index != -1: + return index & 0xf helper_logger.log_notice(f'No application found from {appl_dict} with host_lane_count={host_lane_count} speed={speed}') return None