From 1beaf5311c2a6814962a948d97c8c2456437a9c8 Mon Sep 17 00:00:00 2001 From: Zachary Jones Date: Mon, 7 Oct 2024 10:57:22 -0700 Subject: [PATCH] Collect and report Neoverse V2 core metrics (#309) Summary: Suport some CPU core metercs * Add L1i, L1d, L2, and L3 cache misses * Add HP, SP, DP, and total FLOP Differential Revision: D63650169 --- hbt/src/perf_event/ArmEvents.cpp | 20 ++++ hbt/src/perf_event/BuiltinMetrics.cpp | 114 ++++++++++++++++++++- hbt/src/perf_event/tests/ArmEventsTest.cpp | 2 +- 3 files changed, 133 insertions(+), 3 deletions(-) diff --git a/hbt/src/perf_event/ArmEvents.cpp b/hbt/src/perf_event/ArmEvents.cpp index 05874a17..160fc43c 100644 --- a/hbt/src/perf_event/ArmEvents.cpp +++ b/hbt/src/perf_event/ArmEvents.cpp @@ -83,6 +83,26 @@ void addEvents(PmuDeviceManager& pmu_manager) { scanPmu(pmu_manager, PmuType::nvidia_nvlink_c2c0_pmu); scanPmu(pmu_manager, PmuType::nvidia_nvlink_c2c1_pmu); scanPmu(pmu_manager, PmuType::nvidia_pcie_pmu); + + // Add Neoverse v2 PMU events not found in sysfs + pmu_manager.addEvent(std::make_shared( + PmuType::armv8_pmuv3, + "FP_HP_SPEC", + EventDef::Encoding{.code = 0x8014}, + "Floating-point operation speculatively executed, half precision", + "Counts speculatively executed half precision floating point operations.")); + pmu_manager.addEvent(std::make_shared( + PmuType::armv8_pmuv3, + "FP_SP_SPEC", + EventDef::Encoding{.code = 0x8018}, + "Floating-point operation speculatively executed, single precision", + "Counts speculatively executed single precision floating point operations.")); + pmu_manager.addEvent(std::make_shared( + PmuType::armv8_pmuv3, + "FP_DP_SPEC", + EventDef::Encoding{.code = 0x801C}, + "Floating-point operation speculatively executed, double precision", + "Counts speculatively executed double precision floating point operations.")); } } // namespace neoverse_v2 diff --git a/hbt/src/perf_event/BuiltinMetrics.cpp b/hbt/src/perf_event/BuiltinMetrics.cpp index 7dce64c4..e85a04ef 100644 --- a/hbt/src/perf_event/BuiltinMetrics.cpp +++ b/hbt/src/perf_event/BuiltinMetrics.cpp @@ -1465,8 +1465,118 @@ std::shared_ptr makeAvailableMetrics() { return metrics; } -void addArmCoreMetrics(std::shared_ptr& /*metrics*/) { - // TODO +void addArmCoreMetrics(std::shared_ptr& metrics) { + metrics->add(std::make_shared( + "HW_CORE_L1_ICACHE_REFILL", + "L1 instruction cache refill", + "Counts any instruction fetch which misses in the cache.", + std::map{ + {CpuArch::NEOVERSE_V2, + EventRefs{EventRef{ + "l1i_cache_refill", + PmuType::armv8_pmuv3, + "l1i_cache_refill", + EventExtraAttr{}, + {}}}}}, + 100'000'000, + System::Permissions{}, + std::vector{})); + + metrics->add(std::make_shared( + "HW_CORE_L1_DCACHE_REFILL", + "L1 data cache refill", + "Counts any load or store operation or page table walk access which causes data to be read from outside the L1, including accesses which do not allocate into L1.", + std::map{ + {CpuArch::NEOVERSE_V2, + EventRefs{EventRef{ + "l1d_cache_refill", + PmuType::armv8_pmuv3, + "l1d_cache_refill", + EventExtraAttr{}, + {}}}}}, + 100'000'000, + System::Permissions{}, + std::vector{})); + + metrics->add(std::make_shared( + "HW_CORE_L2_CACHE_REFILL", + "L2 cache refill", + "Counts any cacheable transaction from L1 which causes data to be read from outside the core. L2 refills caused by stashes into L2 should not be counted.", + std::map{ + {CpuArch::NEOVERSE_V2, + EventRefs{EventRef{ + "l2d_cache_refill", + PmuType::armv8_pmuv3, + "l2d_cache_refill", + EventExtraAttr{}, + {}}}}}, + 100'000'000, + System::Permissions{}, + std::vector{})); + + metrics->add(std::make_shared( + "HW_CORE_L3_CACHE_REFILL", + "L3 cache refill", + "Counts for any cacheable read transaction returning data from the SCU for which the data source was outside the cluster. Transactions such as ReadUnique are counted here as 'read' transactions, even though they can be generated by store instructions.", + std::map{ + {CpuArch::NEOVERSE_V2, + EventRefs{EventRef{ + "l3d_cache_refill", + PmuType::armv8_pmuv3, + "l3d_cache_refill", + EventExtraAttr{}, + {}}}}}, + 100'000'000, + System::Permissions{}, + std::vector{})); + + metrics->add(std::make_shared( + "HW_CORE_FP_HP_SPEC", + "Floating-point operation speculatively executed, half precision", + "Counts speculatively executed half precision floating point operations.", + std::map{ + {CpuArch::NEOVERSE_V2, + EventRefs{EventRef{ + "FP_HP_SPEC", + PmuType::armv8_pmuv3, + "FP_HP_SPEC", + EventExtraAttr{}, + {}}}}}, + 100'000'000, + System::Permissions{}, + std::vector{})); + + metrics->add(std::make_shared( + "HW_CORE_FP_SP_SPEC", + "Floating-point operation speculatively executed, single precision", + "Counts speculatively executed single precision floating point operations.", + std::map{ + {CpuArch::NEOVERSE_V2, + EventRefs{EventRef{ + "FP_SP_SPEC", + PmuType::armv8_pmuv3, + "FP_SP_SPEC", + EventExtraAttr{}, + {}}}}}, + 100'000'000, + System::Permissions{}, + std::vector{})); + + metrics->add(std::make_shared( + "HW_CORE_FP_DP_SPEC", + "Floating-point operation speculatively executed, double precision", + "Counts speculatively executed double precision floating point operations.", + std::map{ + {CpuArch::NEOVERSE_V2, + EventRefs{EventRef{ + "FP_DP_SPEC", + PmuType::armv8_pmuv3, + "FP_DP_SPEC", + EventExtraAttr{}, + {}}}}}, + 100'000'000, + System::Permissions{}, + std::vector{})); } void addIntelCoreMetrics(std::shared_ptr& metrics) { diff --git a/hbt/src/perf_event/tests/ArmEventsTest.cpp b/hbt/src/perf_event/tests/ArmEventsTest.cpp index ca42d421..7b12fcd1 100644 --- a/hbt/src/perf_event/tests/ArmEventsTest.cpp +++ b/hbt/src/perf_event/tests/ArmEventsTest.cpp @@ -45,7 +45,7 @@ TEST(ArmEventsTest, ScanPmu) { addArmEvents(cpu_info, pmu_manager); auto pmu = pmu_manager.getPmuGroups().at(PmuType::armv8_pmuv3); - EXPECT_EQ(pmu.at(kDeviceEnum)->getEventDefs().size(), 1); + EXPECT_EQ(pmu.at(kDeviceEnum)->getEventDefs().size(), 4); auto event1 = pmu_manager.findEventDef("cpu_cycles"); EXPECT_TRUE(event1 != nullptr);