From 2cf510b09f13396e4e1f72e2983120860b68adcd Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Mon, 18 Dec 2023 18:48:49 -0800 Subject: [PATCH] =?UTF-8?q?[202305][active-standby]=20Fix=20`show=20mux=20?= =?UTF-8?q?status`=20inconsistency=20introduced=20by=20or=E2=80=A6=20(#228?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …chagent rollback (#225) Approach What is the motivation for this PR? This is to fix the show mux status inconsistency introduced by orchagent roll back. In mux port state machine design, linkmgrd honors hardware state for active-standby ports, and never intends to trigger a secondary toggle when everything is healthy. But after we introduce orchagent rollback, show mux status can return unmatched APP_DB and STATE_DB entries for this, which blocks upgrade. Hence, submitting this PR as a workaround. sign-off: Jing Zhang zhangjing@microsoft.com Work item tracking Microsoft ADO (number only): 26136887 How did you do it? How did you verify/test it? --- azure-pipelines.yml | 12 ++++---- .../LinkManagerStateMachineActiveStandby.cpp | 7 +++-- .../LinkManagerStateMachineActiveStandby.h | 2 ++ test/LinkManagerStateMachineTest.cpp | 28 +++++++++++++++++++ test/MuxManagerTest.cpp | 2 +- 5 files changed, 42 insertions(+), 9 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 469081d1..9beaef8e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -82,8 +82,8 @@ jobs: path: $(Build.ArtifactStagingDirectory)/download artifact: common-lib patterns: | - target/debs/buster/libyang-*.deb - target/debs/buster/libyang_*.deb + target/debs/bullseye/libyang-*.deb + target/debs/bullseye/libyang_*.deb displayName: "Download libyang from common lib" - script: | set -ex @@ -179,8 +179,8 @@ jobs: path: $(Build.ArtifactStagingDirectory)/download artifact: common-lib.arm64 patterns: | - target/debs/buster/libyang-*.deb - target/debs/buster/libyang_*.deb + target/debs/bullseye/libyang-*.deb + target/debs/bullseye/libyang_*.deb displayName: "Download libyang from common lib" - script: | set -ex @@ -271,8 +271,8 @@ jobs: path: $(Build.ArtifactStagingDirectory)/download artifact: common-lib.armhf patterns: | - target/debs/buster/libyang-*.deb - target/debs/buster/libyang_*.deb + target/debs/bullseye/libyang-*.deb + target/debs/bullseye/libyang_*.deb displayName: "Download libyang from common lib" - script: | set -ex diff --git a/src/link_manager/LinkManagerStateMachineActiveStandby.cpp b/src/link_manager/LinkManagerStateMachineActiveStandby.cpp index 3ca96bef..e60b5952 100644 --- a/src/link_manager/LinkManagerStateMachineActiveStandby.cpp +++ b/src/link_manager/LinkManagerStateMachineActiveStandby.cpp @@ -317,6 +317,8 @@ void ActiveStandbyStateMachine::switchMuxState( mWaitActiveUpCount = 0; } + mLastSetMuxState = label; + enterMuxState(nextState, mux_state::MuxState::Label::Wait); mMuxStateMachine.setWaitStateCause(mux_state::WaitState::WaitStateCause::SwssUpdate); mMuxPortPtr->postMetricsEvent(Metrics::SwitchingStart, label); @@ -614,10 +616,11 @@ void ActiveStandbyStateMachine::handleGetMuxStateNotification(mux_state::MuxStat { MUXLOGINFO(boost::format("%s: state db mux state: %s") % mMuxPortConfig.getPortName() % mMuxStateName[label]); - if (mComponentInitState.all() && ms(mCompositeState) != label && + if (mComponentInitState.all() && ms(mCompositeState) != mux_state::MuxState::Wait && ms(mCompositeState) != mux_state::MuxState::Error && - ms(mCompositeState) != mux_state::MuxState::Unknown) { + ms(mCompositeState) != mux_state::MuxState::Unknown && + (ms(mCompositeState) != label || mLastSetMuxState != label)) { // notify swss of mux state change MUXLOGWARNING(boost::format("%s: Switching MUX state from '%s' to '%s' to match linkmgrd/xcvrd state") % mMuxPortConfig.getPortName() % diff --git a/src/link_manager/LinkManagerStateMachineActiveStandby.h b/src/link_manager/LinkManagerStateMachineActiveStandby.h index b8b289fd..41b12579 100644 --- a/src/link_manager/LinkManagerStateMachineActiveStandby.h +++ b/src/link_manager/LinkManagerStateMachineActiveStandby.h @@ -849,6 +849,8 @@ class ActiveStandbyStateMachine: public LinkManagerStateMachineBase, bool mPendingMuxModeChange = false; common::MuxPortConfig::Mode mTargetMuxMode = common::MuxPortConfig::Mode::Auto; + mux_state::MuxState::Label mLastSetMuxState = mux_state::MuxState::Label::Wait; + bool mContinuousLinkProberUnknownEvent = false; // When posting unknown_end event, we want to make sure the previous state is unknown. link_manager::ActiveStandbyStateMachine::SwitchCause mSendSwitchActiveCommandCause; diff --git a/test/LinkManagerStateMachineTest.cpp b/test/LinkManagerStateMachineTest.cpp index 01fa839b..4851165f 100644 --- a/test/LinkManagerStateMachineTest.cpp +++ b/test/LinkManagerStateMachineTest.cpp @@ -1435,5 +1435,33 @@ TEST_F(LinkManagerStateMachineTest, MuxStandbyLinkProberStandbyLinkDownLinkUpRes VALIDATE_STATE(Standby, Standby, Up); } +TEST_F(LinkManagerStateMachineTest, OrchagentRollback) +{ + setMuxStandby(); + + EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0); + + handleMuxConfig("active", 4); + VALIDATE_STATE(Wait, Wait, Up); + EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 1); + + // orchagent rollback, toggle is not successful. + postLinkProberEvent(link_prober::LinkProberState::Standby, 3); + VALIDATE_STATE(Standby, Wait, Up); + handleMuxState("standby", 3); + VALIDATE_STATE(Standby, Standby, Up); + + // extra toggle to make app_db entry in sync with state_db + handleGetMuxState("standby", 2); + EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 2); + VALIDATE_STATE(Standby, Wait, Up); + + handleMuxState("standby", 3); + handleGetMuxState("standby", 2); + VALIDATE_STATE(Standby, Standby, Up); + + // no 3rd toggle + EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 2); +} } /* namespace test */ diff --git a/test/MuxManagerTest.cpp b/test/MuxManagerTest.cpp index 87b8da1b..461b8ca1 100644 --- a/test/MuxManagerTest.cpp +++ b/test/MuxManagerTest.cpp @@ -924,7 +924,7 @@ INSTANTIATE_TEST_CASE_P( MuxState, GetMuxStateTest, ::testing::Values( - std::make_tuple("active", mux_state::MuxState::Label::Active), + std::make_tuple("active", mux_state::MuxState::Label::Wait), std::make_tuple("standby", mux_state::MuxState::Label::Wait), std::make_tuple("unknown", mux_state::MuxState::Label::Wait), std::make_tuple("error", mux_state::MuxState::Label::Wait)