From d2e7e879efe6ead0e4730dc7f3becd6ed8d2ff14 Mon Sep 17 00:00:00 2001 From: Patrick MacArthur Date: Thu, 12 Dec 2024 17:01:33 -0500 Subject: [PATCH] thermalctld: Ignore exception when deleting chassisdb entry fails After sonic-mgmt tests, it does a config consistency check and if the config consistency check fails, it runs recover_chassis(). That function simultaneously runs config_reload on the supervisor and all linecards (in parallel). This means that the supervisor networking can be in the process of restarting when thermalctld exits and goes through the cleanup process of deleting entries from chassisdb. --- sonic-thermalctld/scripts/thermalctld | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sonic-thermalctld/scripts/thermalctld b/sonic-thermalctld/scripts/thermalctld index ce4972339..2739964ac 100644 --- a/sonic-thermalctld/scripts/thermalctld +++ b/sonic-thermalctld/scripts/thermalctld @@ -559,8 +559,14 @@ class TemperatureUpdater(logger.Logger): table_keys = self.table.getKeys() for tk in table_keys: self.table._del(tk) - if self.is_chassis_upd_required and self.chassis_table is not None: - self.chassis_table._del(tk) + try: + if self.is_chassis_upd_required and self.chassis_table is not None: + self.chassis_table._del(tk) + except Exception as e: + # On a chassis system it is possible we may lose connection + # to the supervisor and chassisdb. If this happens then we + # should simply remove our handle to chassisdb. + self.chassis_table = None if self.phy_entity_table: phy_entity_keys = self.phy_entity_table.getKeys() for pek in phy_entity_keys: