Skip to content

Commit

Permalink
Collect and report Neoverse V2 core metrics (#309)
Browse files Browse the repository at this point in the history
Summary:

Suport some CPU core metercs
* Add L1i, L1d, L2, and L3 cache misses
* Add HP, SP, DP, and total FLOP

Differential Revision: D63650169
  • Loading branch information
bigzachattack authored and facebook-github-bot committed Oct 8, 2024
1 parent 82269df commit be6d5f3
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 3 deletions.
20 changes: 20 additions & 0 deletions hbt/src/perf_event/ArmEvents.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,26 @@ void addEvents(PmuDeviceManager& pmu_manager) {
scanPmu(pmu_manager, PmuType::nvidia_nvlink_c2c0_pmu);
scanPmu(pmu_manager, PmuType::nvidia_nvlink_c2c1_pmu);
scanPmu(pmu_manager, PmuType::nvidia_pcie_pmu);

// Add Neoverse v2 PMU events not found in sysfs
pmu_manager.addEvent(std::make_shared<EventDef>(
PmuType::armv8_pmuv3,
"FP_HP_SPEC",
EventDef::Encoding{.code = 0x8014},
"Floating-point operation speculatively executed, half precision",
"Counts speculatively executed half precision floating point operations."));
pmu_manager.addEvent(std::make_shared<EventDef>(
PmuType::armv8_pmuv3,
"FP_SP_SPEC",
EventDef::Encoding{.code = 0x8018},
"Floating-point operation speculatively executed, single precision",
"Counts speculatively executed single precision floating point operations."));
pmu_manager.addEvent(std::make_shared<EventDef>(
PmuType::armv8_pmuv3,
"FP_DP_SPEC",
EventDef::Encoding{.code = 0x801C},
"Floating-point operation speculatively executed, double precision",
"Counts speculatively executed double precision floating point operations."));
}

} // namespace neoverse_v2
Expand Down
114 changes: 112 additions & 2 deletions hbt/src/perf_event/BuiltinMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1465,8 +1465,118 @@ std::shared_ptr<Metrics> makeAvailableMetrics() {
return metrics;
}

void addArmCoreMetrics(std::shared_ptr<Metrics>& /*metrics*/) {
// TODO
void addArmCoreMetrics(std::shared_ptr<Metrics>& metrics) {
metrics->add(std::make_shared<MetricDesc>(
"HW_CORE_L1_ICACHE_REFILL",
"L1 instruction cache refill",
"Counts any instruction fetch which misses in the cache.",
std::map<TOptCpuArch, EventRefs>{
{CpuArch::NEOVERSE_V2,
EventRefs{EventRef{
"l1i_cache_refill",
PmuType::armv8_pmuv3,
"l1i_cache_refill",
EventExtraAttr{},
{}}}}},
100'000'000,
System::Permissions{},
std::vector<std::string>{}));

metrics->add(std::make_shared<MetricDesc>(
"HW_CORE_L1_DCACHE_REFILL",
"L1 data cache refill",
"Counts any load or store operation or page table walk access which causes data to be read from outside the L1, including accesses which do not allocate into L1.",
std::map<TOptCpuArch, EventRefs>{
{CpuArch::NEOVERSE_V2,
EventRefs{EventRef{
"l1d_cache_refill",
PmuType::armv8_pmuv3,
"l1d_cache_refill",
EventExtraAttr{},
{}}}}},
100'000'000,
System::Permissions{},
std::vector<std::string>{}));

metrics->add(std::make_shared<MetricDesc>(
"HW_CORE_L2_CACHE_REFILL",
"L2 cache refill",
"Counts any cacheable transaction from L1 which causes data to be read from outside the core. L2 refills caused by stashes into L2 should not be counted.",
std::map<TOptCpuArch, EventRefs>{
{CpuArch::NEOVERSE_V2,
EventRefs{EventRef{
"l2d_cache_refill",
PmuType::armv8_pmuv3,
"l2d_cache_refill",
EventExtraAttr{},
{}}}}},
100'000'000,
System::Permissions{},
std::vector<std::string>{}));

metrics->add(std::make_shared<MetricDesc>(
"HW_CORE_L3_CACHE_REFILL",
"L3 cache refill",
"Counts for any cacheable read transaction returning data from the SCU for which the data source was outside the cluster. Transactions such as ReadUnique are counted here as 'read' transactions, even though they can be generated by store instructions.",
std::map<TOptCpuArch, EventRefs>{
{CpuArch::NEOVERSE_V2,
EventRefs{EventRef{
"l3d_cache_refill",
PmuType::armv8_pmuv3,
"l3d_cache_refill",
EventExtraAttr{},
{}}}}},
100'000'000,
System::Permissions{},
std::vector<std::string>{}));

metrics->add(std::make_shared<MetricDesc>(
"HW_CORE_FP_HP_SPEC",
"Floating-point operation speculatively executed, half precision",
"Counts speculatively executed half precision floating point operations.",
std::map<TOptCpuArch, EventRefs>{
{CpuArch::NEOVERSE_V2,
EventRefs{EventRef{
"FP_HP_SPEC",
PmuType::armv8_pmuv3,
"FP_HP_SPEC",
EventExtraAttr{},
{}}}}},
100'000'000,
System::Permissions{},
std::vector<std::string>{}));

metrics->add(std::make_shared<MetricDesc>(
"HW_CORE_FP_SP_SPEC",
"Floating-point operation speculatively executed, single precision",
"Counts speculatively executed single precision floating point operations.",
std::map<TOptCpuArch, EventRefs>{
{CpuArch::NEOVERSE_V2,
EventRefs{EventRef{
"FP_SP_SPEC",
PmuType::armv8_pmuv3,
"FP_SP_SPEC",
EventExtraAttr{},
{}}}}},
100'000'000,
System::Permissions{},
std::vector<std::string>{}));

metrics->add(std::make_shared<MetricDesc>(
"HW_CORE_FP_DP_SPEC",
"Floating-point operation speculatively executed, double precision",
"Counts speculatively executed double precision floating point operations.",
std::map<TOptCpuArch, EventRefs>{
{CpuArch::NEOVERSE_V2,
EventRefs{EventRef{
"FP_DP_SPEC",
PmuType::armv8_pmuv3,
"FP_DP_SPEC",
EventExtraAttr{},
{}}}}},
100'000'000,
System::Permissions{},
std::vector<std::string>{}));
}

void addIntelCoreMetrics(std::shared_ptr<Metrics>& metrics) {
Expand Down
2 changes: 1 addition & 1 deletion hbt/src/perf_event/tests/ArmEventsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ TEST(ArmEventsTest, ScanPmu) {
addArmEvents(cpu_info, pmu_manager);

auto pmu = pmu_manager.getPmuGroups().at(PmuType::armv8_pmuv3);
EXPECT_EQ(pmu.at(kDeviceEnum)->getEventDefs().size(), 1);
EXPECT_EQ(pmu.at(kDeviceEnum)->getEventDefs().size(), 4);

auto event1 = pmu_manager.findEventDef("cpu_cycles");
EXPECT_TRUE(event1 != nullptr);
Expand Down

0 comments on commit be6d5f3

Please sign in to comment.