From 45c857c46a333dc97f7a49d74e7807ef30f880cd Mon Sep 17 00:00:00 2001 From: Tony Zhou <66936782+morningboata@users.noreply.github.com> Date: Fri, 3 Nov 2023 11:59:58 +0800 Subject: [PATCH] [thread] refine `RetrieveTelemetryData` to populate metrics with best effort (#2076) Best effort means, for a given telemetry, if its retrieval has error, it is left unpopulated and the process continues to retrieve the remaining telemetries instead of the immediately return. The error code OT_ERRROR_FAILED will be returned if there is one or more error(s) happened in the process. This change helps to retrieve telemetries with best effort when needed. --- src/dbus/server/dbus_thread_object.cpp | 5 +- src/utils/thread_helper.cpp | 158 ++++++++++++++++--------- src/utils/thread_helper.hpp | 8 +- 3 files changed, 112 insertions(+), 59 deletions(-) diff --git a/src/dbus/server/dbus_thread_object.cpp b/src/dbus/server/dbus_thread_object.cpp index b183c650320..e178e24531c 100644 --- a/src/dbus/server/dbus_thread_object.cpp +++ b/src/dbus/server/dbus_thread_object.cpp @@ -1412,7 +1412,10 @@ otError DBusThreadObject::GetTelemetryDataHandler(DBusMessageIter &aIter) threadnetwork::TelemetryData telemetryData; auto threadHelper = mNcp->GetThreadHelper(); - VerifyOrExit(threadHelper->RetrieveTelemetryData(mPublisher, telemetryData) == OT_ERROR_NONE); + if (threadHelper->RetrieveTelemetryData(mPublisher, telemetryData) != OT_ERROR_NONE) + { + otbrLogWarning("Some metrics were not populated in RetrieveTelemetryData"); + } { const std::string telemetryDataBytes = telemetryData.SerializeAsString(); diff --git a/src/utils/thread_helper.cpp b/src/utils/thread_helper.cpp index 9a7b2d124dc..b51c5509f73 100644 --- a/src/utils/thread_helper.cpp +++ b/src/utils/thread_helper.cpp @@ -917,8 +917,14 @@ otError ThreadHelper::RetrieveTelemetryData(Mdns::Publisher *aPublisher, threadn { int8_t radioTxPower; - SuccessOrExit(error = otPlatRadioGetTransmitPower(mInstance, &radioTxPower)); - wpanStats->set_radio_tx_power(radioTxPower); + if (otPlatRadioGetTransmitPower(mInstance, &radioTxPower) == OT_ERROR_NONE) + { + wpanStats->set_radio_tx_power(radioTxPower); + } + else + { + error = OT_ERROR_FAILED; + } } { @@ -972,10 +978,18 @@ otError ThreadHelper::RetrieveTelemetryData(Mdns::Publisher *aPublisher, threadn wpanTopoFull->set_rloc16(rloc16); - otRouterInfo info; + { + otRouterInfo info; - VerifyOrExit(otThreadGetRouterInfo(mInstance, rloc16, &info) == OT_ERROR_NONE, error = OT_ERROR_INVALID_STATE); - wpanTopoFull->set_router_id(info.mRouterId); + if (otThreadGetRouterInfo(mInstance, rloc16, &info) == OT_ERROR_NONE) + { + wpanTopoFull->set_router_id(info.mRouterId); + } + else + { + error = OT_ERROR_FAILED; + } + } otNeighborInfoIterator iter = OT_NEIGHBOR_INFO_ITERATOR_INIT; otNeighborInfo neighborInfo; @@ -997,13 +1011,21 @@ otError ThreadHelper::RetrieveTelemetryData(Mdns::Publisher *aPublisher, threadn } wpanTopoFull->set_child_table_size(childTable.size()); - struct otLeaderData leaderData; + { + struct otLeaderData leaderData; - SuccessOrExit(error = otThreadGetLeaderData(mInstance, &leaderData)); - wpanTopoFull->set_leader_router_id(leaderData.mLeaderRouterId); - wpanTopoFull->set_leader_weight(leaderData.mWeighting); - wpanTopoFull->set_network_data_version(leaderData.mDataVersion); - wpanTopoFull->set_stable_network_data_version(leaderData.mStableDataVersion); + if (otThreadGetLeaderData(mInstance, &leaderData) == OT_ERROR_NONE) + { + wpanTopoFull->set_leader_router_id(leaderData.mLeaderRouterId); + wpanTopoFull->set_leader_weight(leaderData.mWeighting); + wpanTopoFull->set_network_data_version(leaderData.mDataVersion); + wpanTopoFull->set_stable_network_data_version(leaderData.mStableDataVersion); + } + else + { + error = OT_ERROR_FAILED; + } + } uint8_t weight = otThreadGetLocalLeaderWeight(mInstance); @@ -1019,9 +1041,15 @@ otError ThreadHelper::RetrieveTelemetryData(Mdns::Publisher *aPublisher, threadn uint8_t len = sizeof(data); std::vector networkData; - SuccessOrExit(error = otNetDataGet(mInstance, /*stable=*/false, data, &len)); - networkData = std::vector(&data[0], &data[len]); - wpanTopoFull->set_network_data(std::string(networkData.begin(), networkData.end())); + if (otNetDataGet(mInstance, /*stable=*/false, data, &len) == OT_ERROR_NONE) + { + networkData = std::vector(&data[0], &data[len]); + wpanTopoFull->set_network_data(std::string(networkData.begin(), networkData.end())); + } + else + { + error = OT_ERROR_FAILED; + } } { @@ -1029,9 +1057,15 @@ otError ThreadHelper::RetrieveTelemetryData(Mdns::Publisher *aPublisher, threadn uint8_t len = sizeof(data); std::vector networkData; - SuccessOrExit(error = otNetDataGet(mInstance, /*stable=*/true, data, &len)); - networkData = std::vector(&data[0], &data[len]); - wpanTopoFull->set_stable_network_data(std::string(networkData.begin(), networkData.end())); + if (otNetDataGet(mInstance, /*stable=*/true, data, &len) == OT_ERROR_NONE) + { + networkData = std::vector(&data[0], &data[len]); + wpanTopoFull->set_stable_network_data(std::string(networkData.begin(), networkData.end())); + } + else + { + error = OT_ERROR_FAILED; + } } int8_t rssi = otPlatRadioGetRssi(mInstance); @@ -1331,31 +1365,38 @@ otError ThreadHelper::RetrieveTelemetryData(Mdns::Publisher *aPublisher, threadn // Start of WpanRcp section. { - auto wpanRcp = telemetryData.mutable_wpan_rcp(); - auto rcpStabilityStatistics = wpanRcp->mutable_rcp_stability_statistics(); - otRadioSpinelMetrics otRadioSpinelMetrics = *otSysGetRadioSpinelMetrics(); + auto wpanRcp = telemetryData.mutable_wpan_rcp(); + const otRadioSpinelMetrics *otRadioSpinelMetrics = otSysGetRadioSpinelMetrics(); + auto rcpStabilityStatistics = wpanRcp->mutable_rcp_stability_statistics(); - rcpStabilityStatistics->set_rcp_timeout_count(otRadioSpinelMetrics.mRcpTimeoutCount); - rcpStabilityStatistics->set_rcp_reset_count(otRadioSpinelMetrics.mRcpUnexpectedResetCount); - rcpStabilityStatistics->set_rcp_restoration_count(otRadioSpinelMetrics.mRcpRestorationCount); - rcpStabilityStatistics->set_spinel_parse_error_count(otRadioSpinelMetrics.mSpinelParseErrorCount); + if (otRadioSpinelMetrics != nullptr) + { + rcpStabilityStatistics->set_rcp_timeout_count(otRadioSpinelMetrics->mRcpTimeoutCount); + rcpStabilityStatistics->set_rcp_reset_count(otRadioSpinelMetrics->mRcpUnexpectedResetCount); + rcpStabilityStatistics->set_rcp_restoration_count(otRadioSpinelMetrics->mRcpRestorationCount); + rcpStabilityStatistics->set_spinel_parse_error_count(otRadioSpinelMetrics->mSpinelParseErrorCount); + } // TODO: provide rcp_firmware_update_count info. rcpStabilityStatistics->set_thread_stack_uptime(otInstanceGetUptime(mInstance)); - auto rcpInterfaceStatistics = wpanRcp->mutable_rcp_interface_statistics(); - otRcpInterfaceMetrics otRcpInterfaceMetrics = *otSysGetRcpInterfaceMetrics(); - - rcpInterfaceStatistics->set_rcp_interface_type(otRcpInterfaceMetrics.mRcpInterfaceType); - rcpInterfaceStatistics->set_transferred_frames_count(otRcpInterfaceMetrics.mTransferredFrameCount); - rcpInterfaceStatistics->set_transferred_valid_frames_count( - otRcpInterfaceMetrics.mTransferredValidFrameCount); - rcpInterfaceStatistics->set_transferred_garbage_frames_count( - otRcpInterfaceMetrics.mTransferredGarbageFrameCount); - rcpInterfaceStatistics->set_rx_frames_count(otRcpInterfaceMetrics.mRxFrameCount); - rcpInterfaceStatistics->set_rx_bytes_count(otRcpInterfaceMetrics.mRxFrameByteCount); - rcpInterfaceStatistics->set_tx_frames_count(otRcpInterfaceMetrics.mTxFrameCount); - rcpInterfaceStatistics->set_tx_bytes_count(otRcpInterfaceMetrics.mTxFrameByteCount); + const otRcpInterfaceMetrics *otRcpInterfaceMetrics = otSysGetRcpInterfaceMetrics(); + + if (otRcpInterfaceMetrics != nullptr) + { + auto rcpInterfaceStatistics = wpanRcp->mutable_rcp_interface_statistics(); + + rcpInterfaceStatistics->set_rcp_interface_type(otRcpInterfaceMetrics->mRcpInterfaceType); + rcpInterfaceStatistics->set_transferred_frames_count(otRcpInterfaceMetrics->mTransferredFrameCount); + rcpInterfaceStatistics->set_transferred_valid_frames_count( + otRcpInterfaceMetrics->mTransferredValidFrameCount); + rcpInterfaceStatistics->set_transferred_garbage_frames_count( + otRcpInterfaceMetrics->mTransferredGarbageFrameCount); + rcpInterfaceStatistics->set_rx_frames_count(otRcpInterfaceMetrics->mRxFrameCount); + rcpInterfaceStatistics->set_rx_bytes_count(otRcpInterfaceMetrics->mRxFrameByteCount); + rcpInterfaceStatistics->set_tx_frames_count(otRcpInterfaceMetrics->mTxFrameCount); + rcpInterfaceStatistics->set_tx_bytes_count(otRcpInterfaceMetrics->mTxFrameByteCount); + } } // End of WpanRcp section. @@ -1364,24 +1405,30 @@ otError ThreadHelper::RetrieveTelemetryData(Mdns::Publisher *aPublisher, threadn auto coexMetrics = telemetryData.mutable_coex_metrics(); otRadioCoexMetrics otRadioCoexMetrics; - SuccessOrExit(error = otPlatRadioGetCoexMetrics(mInstance, &otRadioCoexMetrics)); - coexMetrics->set_count_tx_request(otRadioCoexMetrics.mNumTxRequest); - coexMetrics->set_count_tx_grant_immediate(otRadioCoexMetrics.mNumTxGrantImmediate); - coexMetrics->set_count_tx_grant_wait(otRadioCoexMetrics.mNumTxGrantWait); - coexMetrics->set_count_tx_grant_wait_activated(otRadioCoexMetrics.mNumTxGrantWaitActivated); - coexMetrics->set_count_tx_grant_wait_timeout(otRadioCoexMetrics.mNumTxGrantWaitTimeout); - coexMetrics->set_count_tx_grant_deactivated_during_request( - otRadioCoexMetrics.mNumTxGrantDeactivatedDuringRequest); - coexMetrics->set_tx_average_request_to_grant_time_us(otRadioCoexMetrics.mAvgTxRequestToGrantTime); - coexMetrics->set_count_rx_request(otRadioCoexMetrics.mNumRxRequest); - coexMetrics->set_count_rx_grant_immediate(otRadioCoexMetrics.mNumRxGrantImmediate); - coexMetrics->set_count_rx_grant_wait(otRadioCoexMetrics.mNumRxGrantWait); - coexMetrics->set_count_rx_grant_wait_activated(otRadioCoexMetrics.mNumRxGrantWaitActivated); - coexMetrics->set_count_rx_grant_wait_timeout(otRadioCoexMetrics.mNumRxGrantWaitTimeout); - coexMetrics->set_count_rx_grant_deactivated_during_request( - otRadioCoexMetrics.mNumRxGrantDeactivatedDuringRequest); - coexMetrics->set_count_rx_grant_none(otRadioCoexMetrics.mNumRxGrantNone); - coexMetrics->set_rx_average_request_to_grant_time_us(otRadioCoexMetrics.mAvgRxRequestToGrantTime); + if (otPlatRadioGetCoexMetrics(mInstance, &otRadioCoexMetrics) == OT_ERROR_NONE) + { + coexMetrics->set_count_tx_request(otRadioCoexMetrics.mNumTxRequest); + coexMetrics->set_count_tx_grant_immediate(otRadioCoexMetrics.mNumTxGrantImmediate); + coexMetrics->set_count_tx_grant_wait(otRadioCoexMetrics.mNumTxGrantWait); + coexMetrics->set_count_tx_grant_wait_activated(otRadioCoexMetrics.mNumTxGrantWaitActivated); + coexMetrics->set_count_tx_grant_wait_timeout(otRadioCoexMetrics.mNumTxGrantWaitTimeout); + coexMetrics->set_count_tx_grant_deactivated_during_request( + otRadioCoexMetrics.mNumTxGrantDeactivatedDuringRequest); + coexMetrics->set_tx_average_request_to_grant_time_us(otRadioCoexMetrics.mAvgTxRequestToGrantTime); + coexMetrics->set_count_rx_request(otRadioCoexMetrics.mNumRxRequest); + coexMetrics->set_count_rx_grant_immediate(otRadioCoexMetrics.mNumRxGrantImmediate); + coexMetrics->set_count_rx_grant_wait(otRadioCoexMetrics.mNumRxGrantWait); + coexMetrics->set_count_rx_grant_wait_activated(otRadioCoexMetrics.mNumRxGrantWaitActivated); + coexMetrics->set_count_rx_grant_wait_timeout(otRadioCoexMetrics.mNumRxGrantWaitTimeout); + coexMetrics->set_count_rx_grant_deactivated_during_request( + otRadioCoexMetrics.mNumRxGrantDeactivatedDuringRequest); + coexMetrics->set_count_rx_grant_none(otRadioCoexMetrics.mNumRxGrantNone); + coexMetrics->set_rx_average_request_to_grant_time_us(otRadioCoexMetrics.mAvgRxRequestToGrantTime); + } + else + { + error = OT_ERROR_FAILED; + } } // End of CoexMetrics section. } @@ -1408,7 +1455,6 @@ otError ThreadHelper::RetrieveTelemetryData(Mdns::Publisher *aPublisher, threadn } #endif // OTBR_ENABLE_LINK_METRICS_TELEMETRY -exit: return error; } #endif // OTBR_ENABLE_TELEMETRY_DATA_API diff --git a/src/utils/thread_helper.hpp b/src/utils/thread_helper.hpp index 10d38770a8b..b566e6632e8 100644 --- a/src/utils/thread_helper.hpp +++ b/src/utils/thread_helper.hpp @@ -258,12 +258,16 @@ class ThreadHelper #if OTBR_ENABLE_TELEMETRY_DATA_API /** - * This method populates the telemetry data and returns the error code if error happens. + * This method populates the telemetry data with best effort. The best effort means, for a given + * telemetry, if its retrieval has error, it is left unpopulated and the process continues to + * retrieve the remaining telemetries instead of the immediately return. The error code + * OT_ERRROR_FAILED will be returned if there is one or more error(s) happened in the process. * * @param[in] aPublisher The Mdns::Publisher to provide MDNS telemetry if it is not `nullptr`. * @param[in] telemetryData The telemetry data to be populated. * - * @returns The error code if error happens during the population of the telemetry data. + * @retval OTBR_ERROR_NONE There is no error happened in the process. + * @retval OT_ERRROR_FAILED There is one or more error(s) happened in the process. */ otError RetrieveTelemetryData(Mdns::Publisher *aPublisher, threadnetwork::TelemetryData &telemetryData); #endif // OTBR_ENABLE_TELEMETRY_DATA_API