From df29a50d1af8b8a5cc3f4ce58892d7a1620b4a1b Mon Sep 17 00:00:00 2001 From: Treece Burgess Date: Mon, 23 Sep 2024 08:46:56 -0700 Subject: [PATCH] Update libpfm4 Current with commit c89a379175c00a20bbc660ad9b444e8ecc16cd28 Author: Stephane Eranian Date: Sat Sep 21 21:11:10 2024 -0700 add ARM Cortex A76 core PMU support Adds ARM Cortex A76 core PMU support. Based on: https://github.com/ARM-software/data/blob/master/pmu/cortex-a76.json Signed-off-by: Stephane Eranian commit 6195cbb4686dbeeee7a237ab8a133ef6c2209476 Author: Stephane Eranian Date: Sat Sep 21 20:50:17 2024 -0700 fix detection of ARM Cortex A55 Was using code not yet released. Bug introduced by: c40b6eb0640a ("add ARM Cortex A55 core PMU support") Signed-off-by: Stephane Eranian commit 1e8734203f74f0ec6974a860c0b18cb95cce1371 Author: Sachin Monga Date: Thu Aug 15 12:54:51 2024 -0400 Update IBM Power10 core PMU support Added additional events for IBM Power 10 core PMU. Signed-off-by: Sachin Monga commit c40b6eb0640a649b2c3fdf472c1d6499a8e819c0 Author: Stephane Eranian Date: Sat Sep 21 19:47:08 2024 -0700 add ARM Cortex A55 core PMU support Add support for ARM Cortex A55 core PMU events. Based on: https://github.com/ARM-software/data/blob/master/pmu/cortex-a55.json Signed-off-by: Stephane Eranian commit f91ea4f1a76fdd5886fd9b6fe8eaa6f585a5bac4 Author: Stephane Eranian Date: Thu Sep 19 21:23:59 2024 -0700 fix ARM thunderX2 and HiSilicon support to compile on non Linux The perf_events encoding routines were mixed with generic encodings and event tables. This patch cleans all of this to separate generic from Linux specific code. commit 892c5fc89ed5fc0e4f0b4a4a290fac57613f23da Author: Stephane Eranian Date: Thu Sep 19 00:38:14 2024 -0700 Add ARM Neoverse V3 core PMU support Based on: https://github.com/ARM-software/data/blob/master/pmu/neoverse-v3.json Signed-off-by: Stephane Eranian Note: Below are the commits grouped from top to bottom and discusses what was able to be tested and what was not. - Commit ID with last 6 of 16cd28, unable to test due to no access to a machine with ARM Cortex A76. - Commit ID with last 6 of 209476, unable to test update due to no access to a machine with ARM Cortex A55. - Commit ID with last 6 of ce1371, unable to test as stands, requesting access at Oregon for a machine with Power 10 and will update once testing is completed. Commit ID with last 6 of e819c0, unable to test update due to no access to a machine with ARM Cortex A55. - Commit ID's with last 6 of a5bac4 and 3f23da, unable to test updates due to no access to a machine with either ARM Neoverse V3, ARM thunderX2 and HiSilicon. --- src/libpfm4/README | 3 +- src/libpfm4/docs/Makefile | 16 +- src/libpfm4/docs/man3/libpfm_arm_ac55.3 | 36 + src/libpfm4/docs/man3/libpfm_arm_ac76.3 | 36 + .../docs/man3/libpfm_arm_neoverse_v3.3 | 36 + src/libpfm4/include/perfmon/pfmlib.h | 3 + src/libpfm4/lib/Makefile | 22 +- .../lib/events/arm_cortex_a55_events.h | 591 +++++++++ .../lib/events/arm_cortex_a76_events.h | 564 ++++++++ .../events/arm_hisilicon_kunpeng_unc_events.h | 6 - .../lib/events/arm_marvell_tx2_unc_events.h | 5 - .../lib/events/arm_neoverse_v3_events.h | 1170 +++++++++++++++++ src/libpfm4/lib/events/power10_events.h | 109 +- src/libpfm4/lib/pfmlib_arm_armv8.c | 307 ++--- .../lib/pfmlib_arm_armv8_kunpeng_unc.c | 223 ++++ ...pfmlib_arm_armv8_kunpeng_unc_perf_event.c} | 42 +- .../lib/pfmlib_arm_armv8_thunderx2_unc.c | 156 +++ ...mlib_arm_armv8_thunderx2_unc_perf_event.c} | 46 +- src/libpfm4/lib/pfmlib_arm_armv8_unc.c | 308 +++++ src/libpfm4/lib/pfmlib_arm_armv8_unc_priv.h | 23 + src/libpfm4/lib/pfmlib_arm_armv9.c | 41 + src/libpfm4/lib/pfmlib_common.c | 6 + src/libpfm4/lib/pfmlib_priv.h | 3 + src/libpfm4/tests/validate_arm.c | 35 + src/libpfm4/tests/validate_arm64.c | 23 +- 25 files changed, 3481 insertions(+), 329 deletions(-) create mode 100644 src/libpfm4/docs/man3/libpfm_arm_ac55.3 create mode 100644 src/libpfm4/docs/man3/libpfm_arm_ac76.3 create mode 100644 src/libpfm4/docs/man3/libpfm_arm_neoverse_v3.3 create mode 100644 src/libpfm4/lib/events/arm_cortex_a55_events.h create mode 100644 src/libpfm4/lib/events/arm_cortex_a76_events.h create mode 100644 src/libpfm4/lib/events/arm_neoverse_v3_events.h create mode 100644 src/libpfm4/lib/pfmlib_arm_armv8_kunpeng_unc.c rename src/libpfm4/lib/{pfmlib_kunpeng_unc_perf_event.c => pfmlib_arm_armv8_kunpeng_unc_perf_event.c} (72%) create mode 100644 src/libpfm4/lib/pfmlib_arm_armv8_thunderx2_unc.c rename src/libpfm4/lib/{pfmlib_tx2_unc_perf_event.c => pfmlib_arm_armv8_thunderx2_unc_perf_event.c} (57%) create mode 100644 src/libpfm4/lib/pfmlib_arm_armv8_unc.c create mode 100644 src/libpfm4/lib/pfmlib_arm_armv8_unc_priv.h diff --git a/src/libpfm4/README b/src/libpfm4/README index d761d21f8..58925a5f1 100644 --- a/src/libpfm4/README +++ b/src/libpfm4/README @@ -78,12 +78,13 @@ The library supports many PMUs. The current version can handle: ARMV7 Cortex A8 ARMV7 Cortex A9 ARMV7 Cortex A15 - ARMV8 Cortex A57, A53, A72 + ARMV8 Cortex A57, A53, A55, A72, A76 Applied Micro X-Gene Qualcomm Krait Fujitsu A64FX Arm Neoverse V1 Arm Neoverse V2 + Arm Neoverse V3 Huawei HiSilicon Kunpeng 920 - For SPARC diff --git a/src/libpfm4/docs/Makefile b/src/libpfm4/docs/Makefile index badf544fd..498ab3816 100644 --- a/src/libpfm4/docs/Makefile +++ b/src/libpfm4/docs/Makefile @@ -146,27 +146,33 @@ ARCH_MAN += libpfm_arm_xgene.3 \ libpfm_arm_ac7.3 \ libpfm_arm_ac57.3 \ libpfm_arm_ac53.3 \ + libpfm_arm_ac55.3 \ libpfm_arm_ac72.3 \ + libpfm_arm_ac76.3 \ libpfm_arm_ac15.3 \ libpfm_arm_ac8.3 \ libpfm_arm_ac9.3 \ libpfm_arm_qcom_krait.3 \ libpfm_arm_neoverse_n1.3 \ libpfm_arm_neoverse_n2.3 \ - libpfm_arm_neoverse_v1.3 \ - libpfm_arm_neoverse_v2.3 + libpfm_arm_neoverse_v1.3 \ + libpfm_arm_neoverse_v2.3 \ + libpfm_arm_neoverse_v3.3 endif ifeq ($(CONFIG_PFMLIB_ARCH_ARM64),y) ARCH_MAN += libpfm_arm_xgene.3 \ libpfm_arm_ac57.3 \ libpfm_arm_ac53.3 \ + libpfm_arm_ac55.3 \ libpfm_arm_ac72.3 \ + libpfm_arm_ac76.3 \ libpfm_arm_a64fx.3 \ - libpfm_arm_neoverse_n1.3 \ + libpfm_arm_neoverse_n1.3 \ libpfm_arm_neoverse_n2.3 \ - libpfm_arm_neoverse_v1.3 \ - libpfm_arm_neoverse_v2.3 + libpfm_arm_neoverse_v1.3 \ + libpfm_arm_neoverse_v2.3 \ + libpfm_arm_neoverse_v3.3 endif ifeq ($(CONFIG_PFMLIB_ARCH_MIPS),y) diff --git a/src/libpfm4/docs/man3/libpfm_arm_ac55.3 b/src/libpfm4/docs/man3/libpfm_arm_ac55.3 new file mode 100644 index 000000000..1e64d0c7f --- /dev/null +++ b/src/libpfm4/docs/man3/libpfm_arm_ac55.3 @@ -0,0 +1,36 @@ +.TH LIBPFM 3 "September, 2024" "" "Linux Programmer's Manual" +.SH NAME +libpfm_arm_ac55 - support for ARM Cortex A55 PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: arm_ac55 +.B PMU desc: ARM Cortex A55 +.sp +.SH DESCRIPTION +The library supports the ARM Cortex A55 core PMU. + +This PMU supports 6 counters and privilege levels filtering. +It can operate in both 32 and 64 bit modes. + +.SH MODIFIERS +The following modifiers are supported on ARM Cortex A55: +.TP +.B u +Measure at the user level. This corresponds to \fBPFM_PLM3\fR. +This is a boolean modifier. +.TP +.B k +Measure at the kernel level. This corresponds to \fBPFM_PLM0\fR. +This is a boolean modifier. +.TP +.B hv +Measure at the hypervisor level. This corresponds to \fBPFM_PLMH\fR. +This is a boolean modifier. + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP diff --git a/src/libpfm4/docs/man3/libpfm_arm_ac76.3 b/src/libpfm4/docs/man3/libpfm_arm_ac76.3 new file mode 100644 index 000000000..24ce29f12 --- /dev/null +++ b/src/libpfm4/docs/man3/libpfm_arm_ac76.3 @@ -0,0 +1,36 @@ +.TH LIBPFM 3 "September, 2024" "" "Linux Programmer's Manual" +.SH NAME +libpfm_arm_ac76 - support for Arm Cortex A76 PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: arm_ac76 +.B PMU desc: ARM Cortex A76 +.sp +.SH DESCRIPTION +The library supports the ARM Cortex A76 core PMU. + +This PMU supports 6 counters and privilege levels filtering. +It can operate in both 32 and 64 bit modes. + +.SH MODIFIERS +The following modifiers are supported on ARM Cortex A76: +.TP +.B u +Measure at the user level. This corresponds to \fBPFM_PLM3\fR. +This is a boolean modifier. +.TP +.B k +Measure at the kernel level. This corresponds to \fBPFM_PLM0\fR. +This is a boolean modifier. +.TP +.B hv +Measure at the hypervisor level. This corresponds to \fBPFM_PLMH\fR. +This is a boolean modifier. + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP diff --git a/src/libpfm4/docs/man3/libpfm_arm_neoverse_v3.3 b/src/libpfm4/docs/man3/libpfm_arm_neoverse_v3.3 new file mode 100644 index 000000000..d3da07f67 --- /dev/null +++ b/src/libpfm4/docs/man3/libpfm_arm_neoverse_v3.3 @@ -0,0 +1,36 @@ +.TH LIBPFM 3 "September, 2024" "" "Linux Programmer's Manual" +.SH NAME +libpfm_arm_neoverse_v3 - support for Arm Neoverse V3 core PMU +.SH SYNOPSIS +.nf +.B #include +.sp +.B PMU name: arm_v3 +.B PMU desc: Arm Neoverse V3 +.sp +.SH DESCRIPTION +The library supports the Arm Neoverse V3 core PMU. + +This PMU supports 6 counters and privilege levels filtering. +It can operate in both 32 and 64 bit modes. + +.SH MODIFIERS +The following modifiers are supported on Arm Neoverse V3: +.TP +.B u +Measure at the user level. This corresponds to \fBPFM_PLM3\fR. +This is a boolean modifier. +.TP +.B k +Measure at the kernel level. This corresponds to \fBPFM_PLM0\fR. +This is a boolean modifier. +.TP +.B hv +Measure at the hypervisor level. This corresponds to \fBPFM_PLMH\fR. +This is a boolean modifier. + +.SH AUTHORS +.nf +Stephane Eranian +.if +.PP diff --git a/src/libpfm4/include/perfmon/pfmlib.h b/src/libpfm4/include/perfmon/pfmlib.h index 553e5ee75..dc7e15de0 100644 --- a/src/libpfm4/include/perfmon/pfmlib.h +++ b/src/libpfm4/include/perfmon/pfmlib.h @@ -816,6 +816,9 @@ typedef enum { PFM_PMU_AMD64_FAM1AH_ZEN5_L3, /* AMD64 Fam1Ah Zen5 L3 */ PFM_PMU_ARM_CORTEX_A72, /* ARM Cortex A72 (ARMv8) */ + PFM_PMU_ARM_V3, /* Arm Neoverse V3 (ARMv9) */ + PFM_PMU_ARM_CORTEX_A55, /* ARM Cortex A55 (ARMv8) */ + PFM_PMU_ARM_CORTEX_A76, /* ARM Cortex A76 (ARMv8) */ /* MUST ADD NEW PMU MODELS HERE */ PFM_PMU_MAX /* end marker */ diff --git a/src/libpfm4/lib/Makefile b/src/libpfm4/lib/Makefile index 60bfc9df8..d39f7891c 100644 --- a/src/libpfm4/lib/Makefile +++ b/src/libpfm4/lib/Makefile @@ -200,22 +200,34 @@ endif ifeq ($(CONFIG_PFMLIB_ARCH_ARM),y) ifeq ($(SYS),Linux) -SRCS += pfmlib_arm_perf_event.c +SRCS += pfmlib_arm_perf_event.c pfmlib_arm_armv8_thunderx2_unc_perf_event.c pfmlib_arm_armv8_kunpeng_unc_perf_event.c endif INCARCH = $(INC_ARM) -SRCS += pfmlib_arm.c pfmlib_arm_armv7_pmuv1.c pfmlib_arm_armv6.c pfmlib_arm_armv8.c pfmlib_arm_armv9.c pfmlib_tx2_unc_perf_event.c pfmlib_kunpeng_unc_perf_event.c +SRCS += pfmlib_arm.c \ + pfmlib_arm_armv7_pmuv1.c \ + pfmlib_arm_armv6.c \ + pfmlib_arm_armv8.c \ + pfmlib_arm_armv9.c \ + pfmlib_arm_armv8_thunderx2_unc.c \ + pfmlib_arm_armv8_kunpeng_unc.c + CFLAGS += -DCONFIG_PFMLIB_ARCH_ARM endif ifeq ($(CONFIG_PFMLIB_ARCH_ARM64),y) ifeq ($(SYS),Linux) -SRCS += pfmlib_arm_perf_event.c +SRCS += pfmlib_arm_perf_event.c pfmlib_arm_armv8_thunderx2_unc_perf_event.c pfmlib_arm_armv8_kunpeng_unc_perf_event.c endif INCARCH = $(INC_ARM64) -SRCS += pfmlib_arm.c pfmlib_arm_armv8.c pfmlib_arm_armv9.c pfmlib_tx2_unc_perf_event.c pfmlib_kunpeng_unc_perf_event.c +SRCS += pfmlib_arm.c \ + pfmlib_arm_armv8.c \ + pfmlib_arm_armv9.c \ + pfmlib_arm_armv8_thunderx2_unc.c \ + pfmlib_arm_armv8_kunpeng_unc.c + CFLAGS += -DCONFIG_PFMLIB_ARCH_ARM64 endif @@ -415,6 +427,7 @@ INC_ARM=pfmlib_arm_priv.h \ events/arm_neoverse_n2_events.h \ events/arm_neoverse_v1_events.h \ events/arm_neoverse_v2_events.h \ + events/arm_neoverse_v3_events.h \ events/arm_hisilicon_kunpeng_events.h \ events/arm_hisilicon_kunpeng_unc_events.h @@ -429,6 +442,7 @@ INC_ARM64=pfmlib_arm_priv.h \ events/arm_neoverse_n2_events.h \ events/arm_neoverse_v1_events.h \ events/arm_neoverse_v2_events.h \ + events/arm_neoverse_v3_events.h \ events/arm_hisilicon_kunpeng_events.h \ events/arm_hisilicon_kunpeng_unc_events.h diff --git a/src/libpfm4/lib/events/arm_cortex_a55_events.h b/src/libpfm4/lib/events/arm_cortex_a55_events.h new file mode 100644 index 000000000..a59bffe27 --- /dev/null +++ b/src/libpfm4/lib/events/arm_cortex_a55_events.h @@ -0,0 +1,591 @@ +/* + * Copyright (c) 2024 Google, Inc + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * ARM Cortex A55 + * References: + * - Arm Cortex A55 TRM: https://developer.arm.com/documentation/100442/0100/debug-descriptions/pmu/pmu-events + * - https://github.com/ARM-software/data/blob/master/pmu/cortex-a55.json + */ +static const arm_entry_t arm_cortex_a55_pe[]={ + {.name = "SW_INCR", + .modmsk = ARMV8_ATTRS, + .code = 0x00, + .desc = "Instruction architecturally executed, condition code check pass, software increment" + }, + {.name = "L1I_CACHE_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x01, + .desc = "Level 1 instruction cache refill" + }, + {.name = "L1I_TLB_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x02, + .desc = "Level 1 instruction TLB refill" + }, + {.name = "L1D_CACHE_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x03, + .desc = "Level 1 data cache refill" + }, + {.name = "L1D_CACHE", + .modmsk = ARMV8_ATTRS, + .code = 0x04, + .desc = "Level 1 data cache access" + }, + {.name = "L1D_TLB_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x05, + .desc = "Level 1 data TLB refill" + }, + {.name = "LD_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x06, + .desc = "Instruction architecturally executed, condition code check pass, load" + }, + {.name = "ST_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x07, + .desc = "Instruction architecturally executed, condition code check pass, store" + }, + {.name = "INST_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x08, + .desc = "Instruction architecturally executed" + }, + {.name = "EXC_TAKEN", + .modmsk = ARMV8_ATTRS, + .code = 0x09, + .desc = "Exception taken" + }, + {.name = "EXC_RETURN", + .modmsk = ARMV8_ATTRS, + .code = 0x0a, + .desc = "Instruction architecturally executed, condition code check pass, exception return" + }, + {.name = "CID_WRITE_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x0b, + .desc = "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR" + }, + {.name = "PC_WRITE_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x0c, + .desc = "Instruction architecturally executed, condition code check pass, software change of the PC" + }, + {.name = "BR_IMMED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x0d, + .desc = "Instruction architecturally executed, immediate branch" + }, + {.name = "BR_RETURN_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x0e, + .desc = "Instruction architecturally executed, condition code check pass, procedure return" + }, + {.name = "UNALIGNED_LDST_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x0f, + .desc = "Instruction architecturally executed, condition code check pass, unaligned load or store" + }, + {.name = "BR_MIS_PRED", + .modmsk = ARMV8_ATTRS, + .code = 0x10, + .desc = "Mispredicted or not predicted branch speculatively executed" + }, + {.name = "CPU_CYCLES", + .modmsk = ARMV8_ATTRS, + .code = 0x11, + .desc = "Cycle" + }, + {.name = "BR_PRED", + .modmsk = ARMV8_ATTRS, + .code = 0x12, + .desc = "Predictable branch speculatively executed" + }, + {.name = "MEM_ACCESS", + .modmsk = ARMV8_ATTRS, + .code = 0x13, + .desc = "Data memory access" + }, + {.name = "L1I_CACHE", + .modmsk = ARMV8_ATTRS, + .code = 0x14, + .desc = "Level 1 instruction cache access" + }, + {.name = "L1D_CACHE_WB", + .modmsk = ARMV8_ATTRS, + .code = 0x15, + .desc = "Level 1 data cache Write-Back" + }, + {.name = "L2D_CACHE", + .modmsk = ARMV8_ATTRS, + .code = 0x16, + .desc = "Level 2 data cache access" + }, + {.name = "L2D_CACHE_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x17, + .desc = "Level 2 data cache refill" + }, + {.name = "L2D_CACHE_WB", + .modmsk = ARMV8_ATTRS, + .code = 0x18, + .desc = "Level 2 data cache Write-Back" + }, + {.name = "BUS_ACCESS", + .modmsk = ARMV8_ATTRS, + .code = 0x19, + .desc = "Bus access" + }, + {.name = "MEMORY_ERROR", + .modmsk = ARMV8_ATTRS, + .code = 0x1a, + .desc = "Local memory error" + }, + {.name = "INST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x1b, + .desc = "Operation speculatively executed" + }, + {.name = "INT_SPEC", + .modmsk = ARMV8_ATTRS, + .equiv = "INST_SPEC", + .code = 0x1b, + .desc = "Operation speculatively executed" + }, + {.name = "TTBR_WRITE_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x1c, + .desc = "Instruction architecturally executed, condition code check pass, write to TTBR" + }, + {.name = "BUS_CYCLES", + .modmsk = ARMV8_ATTRS, + .code = 0x1d, + .desc = "Bus cycles" + }, + {.name = "L2D_CACHE_ALLOCATE", + .modmsk = ARMV8_ATTRS, + .code = 0x20, + .desc = "Level 2 data cache allocation without refill" + }, + {.name = "BR_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x21, + .desc = "Instruction architecturally executed, branch" + }, + {.name = "BR_MIS_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x22, + .desc = "Instruction architecturally executed, mispredicted branch" + }, + {.name = "BR__MIS_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .equiv = "BR_MIS_PRED_RETIRED", + .code = 0x22, + .desc = "Instruction architecturally executed, mispredicted branch" + }, + {.name = "STALL_FRONTEND", + .modmsk = ARMV8_ATTRS, + .code = 0x23, + .desc = "No operation issued because of the frontend" + }, + {.name = "STALL_BACKEND", + .modmsk = ARMV8_ATTRS, + .code = 0x24, + .desc = "No operation issued because of the backend" + }, + {.name = "L1D_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0x25, + .desc = "Level 1 data TLB access" + }, + {.name = "L1I_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0x26, + .desc = "Level 1 instruction TLB access" + }, + {.name = "L3D_CACHE_ALLOCATE", + .modmsk = ARMV8_ATTRS, + .code = 0x29, + .desc = "Attributable Level 3 unified cache allocation without refill" + }, + {.name = "L3D_CACHE_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x2a, + .desc = "Attributable Level 3 unified cache refill" + }, + {.name = "L3D_CACHE", + .modmsk = ARMV8_ATTRS, + .code = 0x2b, + .desc = "Attributable Level 3 unified cache access" + }, + {.name = "L2D_TLB_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x2d, + .desc = "Attributable Level 2 unified TLB refill" + }, + {.name = "L2D_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0x2f, + .desc = "Attributable Level 2 unified TLB access" + }, + {.name = "DTLB_WALK", + .modmsk = ARMV8_ATTRS, + .code = 0x34, + .desc = "Access to data TLB that caused a page table walk" + }, + {.name = "ITLB_WALK", + .modmsk = ARMV8_ATTRS, + .code = 0x35, + .desc = "Access to instruction TLB that caused a page table walk" + }, + {.name = "LL_CACHE_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x36, + .desc = "Last level cache access, read" + }, + {.name = "LL_CACHE_MISS_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x37, + .desc = "Last level cache miss, read" + }, + {.name = "REMOTE_ACCESS_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x38, + .desc = "Access to another socket in a multi-socket system, read" + }, + {.name = "L1D_CACHE_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x40, + .desc = "Level 1 data cache access, read" + }, + {.name = "L1D_CACHE_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x41, + .desc = "Level 1 data cache access, write" + }, + {.name = "L1D_CACHE_REFILL_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x42, + .desc = "Level 1 data cache refill, read" + }, + {.name = "L1D_CACHE_REFILL_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x43, + .desc = "Level 1 data cache refill, write" + }, + {.name = "L1D_CACHE_REFILL_INNER", + .modmsk = ARMV8_ATTRS, + .code = 0x44, + .desc = "Level 1 data cache refill, inner" + }, + {.name = "L1D_CACHE_REFILL_OUTER", + .modmsk = ARMV8_ATTRS, + .code = 0x45, + .desc = "Level 1 data cache refill, outer" + }, + {.name = "L2D_CACHE_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x50, + .desc = "Level 2 cache access, read" + }, + {.name = "L2D_CACHE_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x51, + .desc = "Level 2 cache access, write" + }, + {.name = "L2D_CACHE_REFILL_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x52, + .desc = "Level 2 cache refill, read" + }, + {.name = "L2D_CACHE_REFILL_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x53, + .desc = "Level 2 cache refill, write" + }, + {.name = "BUS_ACCESS_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x60, + .desc = "Bus access, read" + }, + {.name = "BUS_ACCESS_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x61, + .desc = "Bus access, write" + }, + {.name = "MEM_ACCESS_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x66, + .desc = "Data memory access, read" + }, + {.name = "MEM_ACCESS_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x67, + .desc = "Data memory access, write" + }, + {.name = "LD_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x70, + .desc = "Operation speculatively executed, load" + }, + {.name = "ST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x71, + .desc = "Operation speculatively executed, store" + }, + {.name = "LDST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x72, + .desc = "Operation speculatively executed, load or store" + }, + {.name = "DP_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x73, + .desc = "Operation speculatively executed, integer data processing" + }, + {.name = "ASE_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x74, + .desc = "Operation speculatively executed, Advanced SIMD instruction" + }, + {.name = "VFP_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x75, + .desc = "Operation speculatively executed, floating-point instruction" + }, + {.name = "PC_WRITE_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x76, + .desc = "Operation speculatively executed, software change of the PC" + }, + {.name = "CRYPTO_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x77, + .desc = "Operation speculatively executed, Cryptographic instruction" + }, + {.name = "BR_IMMED_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x78, + .desc = "Branch speculatively executed, immediate branch" + }, + {.name = "BR_RETURN_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x79, + .desc = "Branch speculatively executed, procedure return" + }, + {.name = "BR_INDIRECT_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x7a, + .desc = "Branch speculatively executed, indirect branch" + }, + {.name = "EXC_IRQ", + .modmsk = ARMV8_ATTRS, + .code = 0x86, + .desc = "Exception taken, IRQ" + }, + {.name = "EXC_FIQ", + .modmsk = ARMV8_ATTRS, + .code = 0x87, + .desc = "Exception taken, FIQ" + }, + {.name = "L3D_CACHE_RD", + .modmsk = ARMV8_ATTRS, + .code = 0xa0, + .desc = "Attributable Level 3 unified cache access, read" + }, + {.name = "L3D_CACHE_REFILL_RD", + .modmsk = ARMV8_ATTRS, + .code = 0xa2, + .desc = "Attributable Level 3 unified cache refill, read" + }, + {.name = "L3D_CACHE_REFILL_PREFETCH", + .modmsk = ARMV8_ATTRS, + .code = 0xc0, + .desc = "Level 3 cache refill due to prefetch" + }, + {.name = "L2D_CACHE_REFILL_PREFETCH", + .modmsk = ARMV8_ATTRS, + .code = 0xc1, + .desc = "Level 2 cache refill due to prefetch" + }, + {.name = "L1D_CACHE_REFILL_PREFETCH", + .modmsk = ARMV8_ATTRS, + .code = 0xc2, + .desc = "Level 1 data cache refill due to prefetch" + }, + {.name = "L2D_WS_MODE", + .modmsk = ARMV8_ATTRS, + .code = 0xc3, + .desc = "Level 2 cache write streaming mode" + }, + {.name = "L1D_WS_MODE_ENTRY", + .modmsk = ARMV8_ATTRS, + .code = 0xc4, + .desc = "Level 1 data cache entering write streaming mode" + }, + {.name = "L1D_WS_MODE", + .modmsk = ARMV8_ATTRS, + .code = 0xc5, + .desc = "Level 1 data cache write streaming mode" + }, + {.name = "PREDECODE_ERROR", + .modmsk = ARMV8_ATTRS, + .code = 0xc6, + .desc = "Predecode error" + }, + {.name = "L3D_WS_MODE", + .modmsk = ARMV8_ATTRS, + .code = 0xc7, + .desc = "Level 3 cache write streaming mode" + }, + {.name = "BR_COND_PRED", + .modmsk = ARMV8_ATTRS, + .code = 0xc9, + .desc = "Predicted conditional branch executed" + }, + {.name = "BR_INDIRECT_MIS_PRED", + .modmsk = ARMV8_ATTRS, + .code = 0xca, + .desc = "Indirect branch mis-predicted" + }, + {.name = "BR_INDIRECT_ADDR_MIS_PRED", + .modmsk = ARMV8_ATTRS, + .code = 0xcb, + .desc = "Indirect branch mis-predicted due to address mis-compare" + }, + {.name = "BR_COND_MIS_PRED", + .modmsk = ARMV8_ATTRS, + .code = 0xcc, + .desc = "Conditional branch mis-predicted" + }, + {.name = "BR_INDIRECT_ADDR_PRED", + .modmsk = ARMV8_ATTRS, + .code = 0xcd, + .desc = "Indirect branch with predicted address executed" + }, + {.name = "BR_RETURN_ADDR_PRED", + .modmsk = ARMV8_ATTRS, + .code = 0xce, + .desc = "Procedure return with predicted address executed" + }, + {.name = "BR_RETURN_ADDR_MIS_PRED", + .modmsk = ARMV8_ATTRS, + .code = 0xcf, + .desc = "Procedure return mis-predicted due to address mis-compare" + }, + {.name = "L2D_LLWALK_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0xd0, + .desc = "Level 2 TLB last-level walk cache access" + }, + {.name = "L2D_LLWALK_TLB_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0xd1, + .desc = "Level 2 TLB last-level walk cache refill" + }, + {.name = "L2D_L2WALK_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0xd2, + .desc = "Level 2 TLB level-2 walk cache access" + }, + {.name = "L2D_L2WALK_TLB_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0xd3, + .desc = "Level 2 TLB level-2 walk cache refill" + }, + {.name = "L2D_S2_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0xd4, + .desc = "Level 2 TLB IPA cache access" + }, + {.name = "L2D_S2_TLB_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0xd5, + .desc = "Level 2 TLB IPA cache refill" + }, + {.name = "L2D_CACHE_STASH_DROPPED", + .modmsk = ARMV8_ATTRS, + .code = 0xd6, + .desc = "Level 2 cache stash dropped" + }, + {.name = "STALL_FRONTEND_CACHE", + .modmsk = ARMV8_ATTRS, + .code = 0xe1, + .desc = "No operation issued due to the frontend, cache miss" + }, + {.name = "STALL_FRONTEND_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0xe2, + .desc = "No operation issued due to the frontend, TLB miss" + }, + {.name = "STALL_FRONTEND_PDERR", + .modmsk = ARMV8_ATTRS, + .code = 0xe3, + .desc = "No operation issued due to the frontend, pre-decode error" + }, + {.name = "STALL_BACKEND_ILOCK", + .modmsk = ARMV8_ATTRS, + .code = 0xe4, + .desc = "No operation issued due to the backend interlock" + }, + {.name = "STALL_BACKEND_ILOCK_AGU", + .modmsk = ARMV8_ATTRS, + .code = 0xe5, + .desc = "No operation issued due to the backend, interlock, AGU" + }, + {.name = "STALL_BACKEND_ILOCK_FPU", + .modmsk = ARMV8_ATTRS, + .code = 0xe6, + .desc = "No operation issued due to the backend, interlock, FPU" + }, + {.name = "STALL_BACKEND_LD", + .modmsk = ARMV8_ATTRS, + .code = 0xe7, + .desc = "No operation issued due to the backend, load" + }, + {.name = "STALL_BACKEND_ST", + .modmsk = ARMV8_ATTRS, + .code = 0xe8, + .desc = "No operation issued due to the backend, store" + }, + {.name = "STALL_BACKEND_LD_CACHE", + .modmsk = ARMV8_ATTRS, + .code = 0xe9, + .desc = "No operation issued due to the backend, load, cache miss" + }, + {.name = "STALL_BACKEND_LD_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0xea, + .desc = "No operation issued due to the backend, load, TLB miss" + }, + {.name = "STALL_BACKEND_ST_STB", + .modmsk = ARMV8_ATTRS, + .code = 0xeb, + .desc = "No operation issued due to the backend, store, STB full" + }, + {.name = "STALL_BACKEND_ST_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0xec, + .desc = "No operation issued due to the backend, store, TLB miss" + }, +}; diff --git a/src/libpfm4/lib/events/arm_cortex_a76_events.h b/src/libpfm4/lib/events/arm_cortex_a76_events.h new file mode 100644 index 000000000..ab37e7d85 --- /dev/null +++ b/src/libpfm4/lib/events/arm_cortex_a76_events.h @@ -0,0 +1,564 @@ +/* Copyright (c) 2024 Google, Inc + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * ARM Cortex A76 + * References: + * - Arm Cortex A76 TRM: https://developer.arm.com/documentation/100798/0401/Performance-Monitoring-Unit/PMU-events + * - https://github.com/ARM-software/data/blob/master/pmu/cortex-a76.json + */ +static const arm_entry_t arm_cortex_a76_pe[]={ + {.name = "SW_INCR", + .modmsk = ARMV8_ATTRS, + .code = 0x00, + .desc = "Software increment" + }, + {.name = "L1I_CACHE_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x01, + .desc = "L1 instruction cache refill" + }, + {.name = "L1I_TLB_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x02, + .desc = "L1 instruction TLB refill" + }, + {.name = "L1D_CACHE_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x03, + .desc = "L1 data cache refill" + }, + {.name = "L1D_CACHE", + .modmsk = ARMV8_ATTRS, + .code = 0x04, + .desc = "L1 data cache access" + }, + {.name = "L1D_TLB_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x05, + .desc = "L1 data TLB refill" + }, + {.name = "INST_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x08, + .desc = "Instruction architecturally executed" + }, + {.name = "EXC_TAKEN", + .modmsk = ARMV8_ATTRS, + .code = 0x09, + .desc = "Exception taken" + }, + {.name = "EXC_RETURN", + .modmsk = ARMV8_ATTRS, + .code = 0x0a, + .desc = "Instruction architecturally executed, condition code check pass, exception return" + }, + {.name = "CID_WRITE_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x0b, + .desc = "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR" + }, + {.name = "BR_MIS_PRED", + .modmsk = ARMV8_ATTRS, + .code = 0x10, + .desc = "Mispredicted or not predicted branch speculatively executed" + }, + {.name = "CPU_CYCLES", + .modmsk = ARMV8_ATTRS, + .code = 0x11, + .desc = "Cycle" + }, + {.name = "BR_PRED", + .modmsk = ARMV8_ATTRS, + .code = 0x12, + .desc = "Predictable branch speculatively executed" + }, + {.name = "MEM_ACCESS", + .modmsk = ARMV8_ATTRS, + .code = 0x13, + .desc = "Data memory access" + }, + {.name = "L1I_CACHE", + .modmsk = ARMV8_ATTRS, + .code = 0x14, + .desc = "Level 1 instruction cache access or Level 0 Macro-op cache access" + }, + {.name = "L1D_CACHE_WB", + .modmsk = ARMV8_ATTRS, + .code = 0x15, + .desc = "L1 data cache Write-Back" + }, + {.name = "L2D_CACHE", + .modmsk = ARMV8_ATTRS, + .code = 0x16, + .desc = "L2 unified cache access" + }, + {.name = "L2D_CACHE_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x17, + .desc = "L2 unified cache refill" + }, + {.name = "L2D_CACHE_WB", + .modmsk = ARMV8_ATTRS, + .code = 0x18, + .desc = "L2 unified cache write-back" + }, + {.name = "BUS_ACCESS", + .modmsk = ARMV8_ATTRS, + .code = 0x19, + .desc = "Bus access" + }, + {.name = "MEMORY_ERROR", + .modmsk = ARMV8_ATTRS, + .code = 0x1a, + .desc = "Local memory error" + }, + {.name = "INST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x1b, + .desc = "Operation speculatively executed" + }, + {.name = "TTBR_WRITE_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x1c, + .desc = "Instruction architecturally executed, condition code check pass, write to TTBR" + }, + {.name = "BUS_CYCLES", + .modmsk = ARMV8_ATTRS, + .code = 0x1d, + .desc = "Bus cycles" + }, + {.name = "L2D_CACHE_ALLOCATE", + .modmsk = ARMV8_ATTRS, + .code = 0x20, + .desc = "L2 unified cache allocation without refill" + }, + {.name = "BR_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x21, + .desc = "Instruction architecturally executed, branch" + }, + {.name = "BR_MIS_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x22, + .desc = "Instruction architecturally executed, mispredicted branch" + }, + {.name = "STALL_FRONTEND", + .modmsk = ARMV8_ATTRS, + .code = 0x23, + .desc = "No operation issued because of the frontend" + }, + {.name = "STALL_BACKEND", + .modmsk = ARMV8_ATTRS, + .code = 0x24, + .desc = "No operation issued because of the backend" + }, + {.name = "L1D_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0x25, + .desc = "Level 1 data TLB access" + }, + {.name = "L1I_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0x26, + .desc = "Level 1 instruction TLB access" + }, + {.name = "L3D_CACHE_ALLOCATE", + .modmsk = ARMV8_ATTRS, + .code = 0x29, + .desc = "Attributable L3 data or unified cache allocation without refill" + }, + {.name = "L3D_CACHE_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x2a, + .desc = "Attributable Level 3 unified cache refill" + }, + {.name = "L3D_CACHE", + .modmsk = ARMV8_ATTRS, + .code = 0x2b, + .desc = "Attributable Level 3 unified cache access" + }, + {.name = "L2D_TLB_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x2d, + .desc = "Attributable L2 data or unified TLB refill" + }, + {.name = "L2D_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0x2f, + .desc = "Attributable L2 data or unified TLB access" + }, + {.name = "REMOTE_ACCESS", + .modmsk = ARMV8_ATTRS, + .code = 0x31, + .desc = "Access to another socket in a multi-socket system" + }, + {.name = "DTLB_WALK", + .modmsk = ARMV8_ATTRS, + .code = 0x34, + .desc = "Access to data TLB that caused a page table walk" + }, + {.name = "ITLB_WALK", + .modmsk = ARMV8_ATTRS, + .code = 0x35, + .desc = "Access to instruction TLB that caused a page table walk" + }, + {.name = "LL_CACHE_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x36, + .desc = "Last level cache access, read" + }, + {.name = "LL_CACHE_MISS_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x37, + .desc = "Last level cache miss, read" + }, + {.name = "L1D_CACHE_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x40, + .desc = "L1 data cache access, read" + }, + {.name = "L1D_CACHE_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x41, + .desc = "L1 data cache access, write" + }, + {.name = "L1D_CACHE_REFILL_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x42, + .desc = "L1 data cache refill, read" + }, + {.name = "L1D_CACHE_REFILL_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x43, + .desc = "L1 data cache refill, write" + }, + {.name = "L1D_CACHE_REFILL_INNER", + .modmsk = ARMV8_ATTRS, + .code = 0x44, + .desc = "L1 data cache refill, inner" + }, + {.name = "L1D_CACHE_REFILL_OUTER", + .modmsk = ARMV8_ATTRS, + .code = 0x45, + .desc = "L1 data cache refill, outer" + }, + {.name = "L1D_CACHE_WB_VICTIM", + .modmsk = ARMV8_ATTRS, + .code = 0x46, + .desc = "L1 data cache write-back, victim" + }, + {.name = "L1D_CACHE_WB_CLEAN", + .modmsk = ARMV8_ATTRS, + .code = 0x47, + .desc = "L1 data cache write-back cleaning and coherency" + }, + {.name = "L1D_CACHE_INVAL", + .modmsk = ARMV8_ATTRS, + .code = 0x48, + .desc = "L1 data cache invalidate" + }, + {.name = "L1D_TLB_REFILL_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x4c, + .desc = "L1 data TLB refill, read" + }, + {.name = "L1D_TLB_REFILL_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x4d, + .desc = "L1 data TLB refill, write" + }, + {.name = "L1D_TLB_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x4e, + .desc = "L1 data TLB access, read" + }, + {.name = "L1D_TLB_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x4f, + .desc = "L1 data TLB access, write" + }, + {.name = "L2D_CACHE_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x50, + .desc = "L2 unified cache access, read" + }, + {.name = "L2D_CACHE_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x51, + .desc = "L2 unified cache access, write" + }, + {.name = "L2D_CACHE_REFILL_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x52, + .desc = "L2 unified cache refill, read" + }, + {.name = "L2D_CACHE_REFILL_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x53, + .desc = "L2 unified cache refill, write" + }, + {.name = "L2D_CACHE_WB_VICTIM", + .modmsk = ARMV8_ATTRS, + .code = 0x56, + .desc = "L2 unified cache write-back, victim" + }, + {.name = "L2D_CACHE_WB_CLEAN", + .modmsk = ARMV8_ATTRS, + .code = 0x57, + .desc = "L2 unified cache write-back, cleaning, and coherency" + }, + {.name = "L2D_CACHE_INVAL", + .modmsk = ARMV8_ATTRS, + .code = 0x58, + .desc = "L2 unified cache invalidate" + }, + {.name = "L2D_TLB_REFILL_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x5c, + .desc = "L2 data or unified TLB refill, read" + }, + {.name = "L2D_TLB_REFILL_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x5d, + .desc = "L2 data or unified TLB refill, write" + }, + {.name = "L2D_TLB_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x5e, + .desc = "L2 data or unified TLB access, read" + }, + {.name = "L2D_TLB_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x5f, + .desc = "L2 data or unified TLB access, write" + }, + {.name = "BUS_ACCESS_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x60, + .desc = "Bus access read" + }, + {.name = "BUS_ACCESS_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x61, + .desc = "Bus access write" + }, + {.name = "MEM_ACCESS_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x66, + .desc = "Data memory access, read" + }, + {.name = "MEM_ACCESS_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x67, + .desc = "Data memory access, write" + }, + {.name = "UNALIGNED_LD_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x68, + .desc = "Unaligned access, read" + }, + {.name = "UNALIGNED_ST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x69, + .desc = "Unaligned access, write" + }, + {.name = "UNALIGNED_LDST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x6a, + .desc = "Unaligned access Event mnemonic Event description" + }, + {.name = "LDREX_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x6c, + .desc = "Exclusive operation speculatively executed, LDREX or LDX" + }, + {.name = "STREX_PASS_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x6d, + .desc = "Exclusive operation speculatively executed, STREX or STX pass" + }, + {.name = "STREX_FAIL_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x6e, + .desc = "Exclusive operation speculatively executed, STREX or STX fail" + }, + {.name = "STREX_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x6f, + .desc = "Exclusive operation speculatively executed, STREX or STX" + }, + {.name = "LD_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x70, + .desc = "Operation speculatively executed, load" + }, + {.name = "ST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x71, + .desc = "Operation speculatively executed, store" + }, + {.name = "LDST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x72, + .desc = "Operation speculatively executed, load or store" + }, + {.name = "DP_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x73, + .desc = "Operation speculatively executed, integer data-processing" + }, + {.name = "ASE_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x74, + .desc = "Operation speculatively executed, Advanced SIMD instruction" + }, + {.name = "VFP_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x75, + .desc = "Operation speculatively executed, floating-point instruction" + }, + {.name = "PC_WRITE_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x76, + .desc = "Operation speculatively executed, software change of the PC" + }, + {.name = "CRYPTO_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x77, + .desc = "Operation speculatively executed, Cryptographic instruction" + }, + {.name = "BR_IMMED_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x78, + .desc = "Branch speculatively executed, immediate branch" + }, + {.name = "BR_RETURN_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x79, + .desc = "Branch speculatively executed, procedure return" + }, + {.name = "BR_INDIRECT_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x7a, + .desc = "Branch speculatively executed, indirect branch" + }, + {.name = "ISB_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x7c, + .desc = "Barrier speculatively executed, ISB" + }, + {.name = "DSB_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x7d, + .desc = "Barrier speculatively executed, DSB" + }, + {.name = "DMB_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x7e, + .desc = "Barrier speculatively executed, DMB" + }, + {.name = "EXC_UNDEF", + .modmsk = ARMV8_ATTRS, + .code = 0x81, + .desc = "Counts the number of undefined exceptions taken locally" + }, + {.name = "EXC_SVC", + .modmsk = ARMV8_ATTRS, + .code = 0x82, + .desc = "Exception taken locally, Supervisor Call" + }, + {.name = "EXC_PABORT", + .modmsk = ARMV8_ATTRS, + .code = 0x83, + .desc = "Exception taken locally, Instruction Abort" + }, + {.name = "EXC_DABORT", + .modmsk = ARMV8_ATTRS, + .code = 0x84, + .desc = "Exception taken locally, Data Abort and SError" + }, + {.name = "EXC_IRQ", + .modmsk = ARMV8_ATTRS, + .code = 0x86, + .desc = "Exception taken locally, IRQ" + }, + {.name = "EXC_FIQ", + .modmsk = ARMV8_ATTRS, + .code = 0x87, + .desc = "Exception taken locally, FIQ" + }, + {.name = "EXC_SMC", + .modmsk = ARMV8_ATTRS, + .code = 0x88, + .desc = "Exception taken locally, Secure Monitor Call" + }, + {.name = "EXC_HVC", + .modmsk = ARMV8_ATTRS, + .code = 0x8a, + .desc = "Exception taken locally, Hypervisor Call" + }, + {.name = "EXC_TRAP_PABORT", + .modmsk = ARMV8_ATTRS, + .code = 0x8b, + .desc = "Exception taken, Instruction Abort not taken locally" + }, + {.name = "EXC_TRAP_DABORT", + .modmsk = ARMV8_ATTRS, + .code = 0x8c, + .desc = "Exception taken, Data Abort or SError not taken locally" + }, + {.name = "EXC_TRAP_OTHER", + .modmsk = ARMV8_ATTRS, + .code = 0x8d, + .desc = "Exception taken, Other traps not taken locally" + }, + {.name = "EXC_TRAP_IRQ", + .modmsk = ARMV8_ATTRS, + .code = 0x8e, + .desc = "Exception taken, IRQ not taken locally" + }, + {.name = "EXC_TRAP_FIQ", + .modmsk = ARMV8_ATTRS, + .code = 0x8f, + .desc = "Exception taken, FIQ not taken locally" + }, + {.name = "RC_LD_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x90, + .desc = "Release consistency operation speculatively executed, load-acquire" + }, + {.name = "RC_ST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x91, + .desc = "Release consistency operation speculatively executed, store-release" + }, + {.name = "L3D_CACHE_RD", + .modmsk = ARMV8_ATTRS, + .code = 0xa0, + .desc = "L3 cache read" + }, + {.name = "L3_CACHE_RD", + .modmsk = ARMV8_ATTRS, + .equiv = "L3D_CACHE_RD", + .code = 0xa0, + .desc = "L3 cache read" + }, +}; diff --git a/src/libpfm4/lib/events/arm_hisilicon_kunpeng_unc_events.h b/src/libpfm4/lib/events/arm_hisilicon_kunpeng_unc_events.h index 6e4010941..90a8375d0 100644 --- a/src/libpfm4/lib/events/arm_hisilicon_kunpeng_unc_events.h +++ b/src/libpfm4/lib/events/arm_hisilicon_kunpeng_unc_events.h @@ -189,9 +189,3 @@ static const arm_entry_t arm_kunpeng_unc_l3c_pe[ ] = { .desc = "Count of the number of prefetch drops from this L3C." } }; - -//Uncore accessor functions -int -pfm_kunpeng_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e); -int -pfm_kunpeng_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); diff --git a/src/libpfm4/lib/events/arm_marvell_tx2_unc_events.h b/src/libpfm4/lib/events/arm_marvell_tx2_unc_events.h index 51e6b4df0..ec3f0ea88 100644 --- a/src/libpfm4/lib/events/arm_marvell_tx2_unc_events.h +++ b/src/libpfm4/lib/events/arm_marvell_tx2_unc_events.h @@ -125,8 +125,3 @@ static const arm_entry_t arm_thunderx2_unc_llc_pe[]={ }; #define ARM_TX2_CORE_LLC_COUNT (sizeof(arm_thunderx2_unc_llc_pe)/sizeof(arm_entry_t)) -//Uncore accessor functions -int -pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e); -int -pfm_tx2_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); diff --git a/src/libpfm4/lib/events/arm_neoverse_v3_events.h b/src/libpfm4/lib/events/arm_neoverse_v3_events.h new file mode 100644 index 000000000..188c73357 --- /dev/null +++ b/src/libpfm4/lib/events/arm_neoverse_v3_events.h @@ -0,0 +1,1170 @@ +/* + * Copyright (c) 2024 Google, Inc + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * ARM Neoverse V3 + * References: + * - Arm Neoverse V3 Core TRM: https://developer.arm.com/documentation/107734/ + * - https://github.com/ARM-software/data/blob/master/pmu/neoverse-v3.json + */ +static const arm_entry_t arm_neoverse_v3_pe[]={ + {.name = "SW_INCR", + .modmsk = ARMV8_ATTRS, + .code = 0x00, + .desc = "Instruction architecturally executed, Condition code check pass, software increment Counts software writes to the PMSWINC_EL0 (software PMU increment) register" + }, + {.name = "L1I_CACHE_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x01, + .desc = "Level 1 instruction cache refill Counts cache line refills in the level 1 instruction cache caused by a missed instruction fetch" + }, + {.name = "L1I_TLB_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x02, + .desc = "Level 1 instruction TLB refill Counts level 1 instruction TLB refills from any Instruction fetch" + }, + {.name = "L1D_CACHE_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x03, + .desc = "Level 1 data cache refill Counts level 1 data cache refills caused by speculatively executed load or store operations that missed in the level 1 data cache" + }, + {.name = "L1D_CACHE", + .modmsk = ARMV8_ATTRS, + .code = 0x04, + .desc = "Level 1 data cache access Counts level 1 data cache accesses from any load/store operations" + }, + {.name = "L1D_TLB_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x05, + .desc = "Level 1 data TLB refill Counts level 1 data TLB accesses that resulted in TLB refills" + }, + {.name = "INST_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x08, + .desc = "Instruction architecturally executed Counts instructions that have been architecturally executed" + }, + {.name = "EXC_TAKEN", + .modmsk = ARMV8_ATTRS, + .code = 0x09, + .desc = "Exception taken Counts any taken architecturally visible exceptions such as IRQ, FIQ, SError, and other synchronous exceptions" + }, + {.name = "EXC_RETURN", + .modmsk = ARMV8_ATTRS, + .code = 0x0a, + .desc = "Instruction architecturally executed, Condition code check pass, exception return Counts any architecturally executed exception return instructions" + }, + {.name = "CID_WRITE_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x0b, + .desc = "Instruction architecturally executed, Condition code check pass, write to CONTEXTIDR Counts architecturally executed writes to the CONTEXTIDR_EL1 register, which usually contain the kernel PID and can be output with hardware trace" + }, + {.name = "PC_WRITE_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x0c, + .desc = "Instruction architecturally executed, Condition code check pass, Software change of the PC Counts branch instructions that caused a change of Program Counter, which effectively causes a change in the control flow of the program" + }, + {.name = "BR_IMMED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x0d, + .desc = "Branch instruction architecturally executed, immediate Counts architecturally executed direct branches" + }, + {.name = "BR_RETURN_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x0e, + .desc = "Branch instruction architecturally executed, procedure return, taken Counts architecturally executed procedure returns" + }, + {.name = "BR_MIS_PRED", + .modmsk = ARMV8_ATTRS, + .code = 0x10, + .desc = "Branch instruction speculatively executed, mispredicted or not predicted Counts branches which are speculatively executed and mispredicted" + }, + {.name = "CPU_CYCLES", + .modmsk = ARMV8_ATTRS, + .code = 0x11, + .desc = "Cycle Counts CPU clock cycles (not timer cycles)" + }, + {.name = "BR_PRED", + .modmsk = ARMV8_ATTRS, + .code = 0x12, + .desc = "Predictable branch instruction speculatively executed Counts all speculatively executed branches" + }, + {.name = "MEM_ACCESS", + .modmsk = ARMV8_ATTRS, + .code = 0x13, + .desc = "Data memory access Counts memory accesses issued by the CPU load store unit, where those accesses are issued due to load or store operations" + }, + {.name = "L1I_CACHE", + .modmsk = ARMV8_ATTRS, + .code = 0x14, + .desc = "Level 1 instruction cache access Counts instruction fetches which access the level 1 instruction cache" + }, + {.name = "L1D_CACHE_WB", + .modmsk = ARMV8_ATTRS, + .code = 0x15, + .desc = "Level 1 data cache write-back Counts write-backs of dirty data from the L1 data cache to the L2 cache" + }, + {.name = "L2D_CACHE", + .modmsk = ARMV8_ATTRS, + .code = 0x16, + .desc = "Level 2 data cache access Counts accesses to the level 2 cache due to data accesses" + }, + {.name = "L2D_CACHE_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x17, + .desc = "Level 2 data cache refill Counts cache line refills into the level 2 cache" + }, + {.name = "L2D_CACHE_WB", + .modmsk = ARMV8_ATTRS, + .code = 0x18, + .desc = "Level 2 data cache write-back Counts write-backs of data from the L2 cache to outside the CPU" + }, + {.name = "BUS_ACCESS", + .modmsk = ARMV8_ATTRS, + .code = 0x19, + .desc = "Bus access Counts memory transactions issued by the CPU to the external bus, including snoop requests and snoop responses" + }, + {.name = "MEMORY_ERROR", + .modmsk = ARMV8_ATTRS, + .code = 0x1a, + .desc = "Local memory error Counts any detected correctable or uncorrectable physical memory errors (ECC or parity) in protected CPUs RAMs" + }, + {.name = "INST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x1b, + .desc = "Operation speculatively executed Counts operations that have been speculatively executed" + }, + {.name = "TTBR_WRITE_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x1c, + .desc = "Instruction architecturally executed, Condition code check pass, write to TTBR Counts architectural writes to TTBR0/1_EL1" + }, + {.name = "BUS_CYCLES", + .modmsk = ARMV8_ATTRS, + .code = 0x1d, + .desc = "Bus cycle Counts bus cycles in the CPU" + }, + {.name = "BR_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x21, + .desc = "Instruction architecturally executed, branch Counts architecturally executed branches, whether the branch is taken or not" + }, + {.name = "BR_MIS_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x22, + .desc = "Branch instruction architecturally executed, mispredicted Counts branches counted by BR_RETIRED which were mispredicted and caused a pipeline flush" + }, + {.name = "STALL_FRONTEND", + .modmsk = ARMV8_ATTRS, + .code = 0x23, + .desc = "No operation sent for execution due to the frontend Counts cycles when frontend could not send any micro-operations to the rename stage because of frontend resource stalls caused by fetch memory latency or branch prediction flow stalls" + }, + {.name = "STALL_BACKEND", + .modmsk = ARMV8_ATTRS, + .code = 0x24, + .desc = "No operation sent for execution due to the backend Counts cycles whenever the rename unit is unable to send any micro-operations to the backend of the pipeline because of backend resource constraints" + }, + {.name = "L1D_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0x25, + .desc = "Level 1 data TLB access Counts level 1 data TLB accesses caused by any memory load or store operation" + }, + {.name = "L1I_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0x26, + .desc = "Level 1 instruction TLB access Counts level 1 instruction TLB accesses, whether the access hits or misses in the TLB" + }, + {.name = "L2D_TLB_REFILL", + .modmsk = ARMV8_ATTRS, + .code = 0x2d, + .desc = "Level 2 data TLB refill Counts level 2 TLB refills caused by memory operations from both data and instruction fetch, except for those caused by TLB maintenance operations and hardware prefetches" + }, + {.name = "L2D_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0x2f, + .desc = "Level 2 data TLB access Counts level 2 TLB accesses except those caused by TLB maintenance operations" + }, + {.name = "REMOTE_ACCESS", + .modmsk = ARMV8_ATTRS, + .code = 0x31, + .desc = "Access to another socket in a multi-socket system Counts accesses to another chip, which is implemented as a different CMN mesh in the system" + }, + {.name = "DTLB_WALK", + .modmsk = ARMV8_ATTRS, + .code = 0x34, + .desc = "Data TLB access with at least one translation table walk Counts number of demand data translation table walks caused by a miss in the L2 TLB and performing at least one memory access" + }, + {.name = "ITLB_WALK", + .modmsk = ARMV8_ATTRS, + .code = 0x35, + .desc = "Instruction TLB access with at least one translation table walk Counts number of instruction translation table walks caused by a miss in the L2 TLB and performing at least one memory access" + }, + {.name = "LL_CACHE_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x36, + .desc = "Last level cache access, read Counts read transactions that were returned from outside the core cluster" + }, + {.name = "LL_CACHE_MISS_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x37, + .desc = "Last level cache miss, read Counts read transactions that were returned from outside the core cluster but missed in the system level cache" + }, + {.name = "L1D_CACHE_LMISS_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x39, + .desc = "Level 1 data cache long-latency read miss Counts cache line refills into the level 1 data cache from any memory read operations, that incurred additional latency" + }, + {.name = "OP_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x3a, + .desc = "Micro-operation architecturally executed Counts micro-operations that are architecturally executed" + }, + {.name = "OP_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x3b, + .desc = "Micro-operation speculatively executed Counts micro-operations speculatively executed" + }, + {.name = "STALL", + .modmsk = ARMV8_ATTRS, + .code = 0x3c, + .desc = "No operation sent for execution Counts cycles when no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall)" + }, + {.name = "STALL_SLOT_BACKEND", + .modmsk = ARMV8_ATTRS, + .code = 0x3d, + .desc = "No operation sent for execution on a Slot due to the backend Counts slots per cycle in which no operations are sent from the rename unit to the backend due to backend resource constraints" + }, + {.name = "STALL_SLOT_FRONTEND", + .modmsk = ARMV8_ATTRS, + .code = 0x3e, + .desc = "No operation sent for execution on a Slot due to the frontend Counts slots per cycle in which no operations are sent to the rename unit from the frontend due to frontend resource constraints" + }, + {.name = "STALL_SLOT", + .modmsk = ARMV8_ATTRS, + .code = 0x3f, + .desc = "No operation sent for execution on a Slot Counts slots per cycle in which no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall)" + }, + {.name = "L1D_CACHE_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x40, + .desc = "Level 1 data cache access, read Counts level 1 data cache accesses from any load operation" + }, + {.name = "L1D_CACHE_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x41, + .desc = "Level 1 data cache access, write Counts level 1 data cache accesses generated by store operations" + }, + {.name = "L1D_CACHE_REFILL_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x42, + .desc = "Level 1 data cache refill, read Counts level 1 data cache refills caused by speculatively executed load instructions where the memory read operation misses in the level 1 data cache" + }, + {.name = "L1D_CACHE_REFILL_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x43, + .desc = "Level 1 data cache refill, write Counts level 1 data cache refills caused by speculatively executed store instructions where the memory write operation misses in the level 1 data cache" + }, + {.name = "L1D_CACHE_REFILL_INNER", + .modmsk = ARMV8_ATTRS, + .code = 0x44, + .desc = "Level 1 data cache refill, inner Counts level 1 data cache refills where the cache line data came from caches inside the immediate cluster of the core" + }, + {.name = "L1D_CACHE_REFILL_OUTER", + .modmsk = ARMV8_ATTRS, + .code = 0x45, + .desc = "Level 1 data cache refill, outer Counts level 1 data cache refills for which the cache line data came from outside the immediate cluster of the core, like an SLC in the system interconnect or DRAM" + }, + {.name = "L1D_CACHE_WB_VICTIM", + .modmsk = ARMV8_ATTRS, + .code = 0x46, + .desc = "Level 1 data cache write-back, victim Counts dirty cache line evictions from the level 1 data cache caused by a new cache line allocation" + }, + {.name = "L1D_CACHE_WB_CLEAN", + .modmsk = ARMV8_ATTRS, + .code = 0x47, + .desc = "Level 1 data cache write-back, cleaning and coherency Counts write-backs from the level 1 data cache that are a result of a coherency operation made by another CPU" + }, + {.name = "L1D_CACHE_INVAL", + .modmsk = ARMV8_ATTRS, + .code = 0x48, + .desc = "Level 1 data cache invalidate Counts each explicit invalidation of a cache line in the level 1 data cache caused by: - Cache Maintenance Operations (CMO) that operate by a virtual address" + }, + {.name = "L1D_TLB_REFILL_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x4c, + .desc = "Level 1 data TLB refill, read Counts level 1 data TLB refills caused by memory read operations" + }, + {.name = "L1D_TLB_REFILL_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x4d, + .desc = "Level 1 data TLB refill, write Counts level 1 data TLB refills caused by data side memory write operations" + }, + {.name = "L1D_TLB_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x4e, + .desc = "Level 1 data TLB access, read Counts level 1 data TLB accesses caused by memory read operations" + }, + {.name = "L1D_TLB_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x4f, + .desc = "Level 1 data TLB access, write Counts any L1 data side TLB accesses caused by memory write operations" + }, + {.name = "L2D_CACHE_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x50, + .desc = "Level 2 data cache access, read Counts level 2 data cache accesses due to memory read operations" + }, + {.name = "L2D_CACHE_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x51, + .desc = "Level 2 data cache access, write Counts level 2 cache accesses due to memory write operations" + }, + {.name = "L2D_CACHE_REFILL_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x52, + .desc = "Level 2 data cache refill, read Counts refills for memory accesses due to memory read operation counted by L2D_CACHE_RD" + }, + {.name = "L2D_CACHE_REFILL_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x53, + .desc = "Level 2 data cache refill, write Counts refills for memory accesses due to memory write operation counted by L2D_CACHE_WR" + }, + {.name = "L2D_CACHE_WB_VICTIM", + .modmsk = ARMV8_ATTRS, + .code = 0x56, + .desc = "Level 2 data cache write-back, victim Counts evictions from the level 2 cache because of a line being allocated into the L2 cache" + }, + {.name = "L2D_CACHE_WB_CLEAN", + .modmsk = ARMV8_ATTRS, + .code = 0x57, + .desc = "Level 2 data cache write-back, cleaning and coherency Counts write-backs from the level 2 cache that are a result of either: 1" + }, + {.name = "L2D_CACHE_INVAL", + .modmsk = ARMV8_ATTRS, + .code = 0x58, + .desc = "Level 2 data cache invalidate Counts each explicit invalidation of a cache line in the level 2 cache by cache maintenance operations that operate by a virtual address, or by external coherency operations" + }, + {.name = "L2D_TLB_REFILL_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x5c, + .desc = "Level 2 data TLB refill, read Counts level 2 TLB refills caused by memory read operations from both data and instruction fetch except for those caused by TLB maintenance operations or hardware prefetches" + }, + {.name = "L2D_TLB_REFILL_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x5d, + .desc = "Level 2 data TLB refill, write Counts level 2 TLB refills caused by memory write operations from both data and instruction fetch except for those caused by TLB maintenance operations" + }, + {.name = "L2D_TLB_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x5e, + .desc = "Level 2 data TLB access, read Counts level 2 TLB accesses caused by memory read operations from both data and instruction fetch except for those caused by TLB maintenance operations" + }, + {.name = "L2D_TLB_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x5f, + .desc = "Level 2 data TLB access, write Counts level 2 TLB accesses caused by memory write operations from both data and instruction fetch except for those caused by TLB maintenance operations" + }, + {.name = "BUS_ACCESS_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x60, + .desc = "Bus access, read Counts memory read transactions seen on the external bus" + }, + {.name = "BUS_ACCESS_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x61, + .desc = "Bus access, write Counts memory write transactions seen on the external bus" + }, + {.name = "MEM_ACCESS_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x66, + .desc = "Data memory access, read Counts memory accesses issued by the CPU due to load operations" + }, + {.name = "MEM_ACCESS_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x67, + .desc = "Data memory access, write Counts memory accesses issued by the CPU due to store operations" + }, + {.name = "UNALIGNED_LD_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x68, + .desc = "Unaligned access, read Counts unaligned memory read operations issued by the CPU" + }, + {.name = "UNALIGNED_ST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x69, + .desc = "Unaligned access, write Counts unaligned memory write operations issued by the CPU" + }, + {.name = "UNALIGNED_LDST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x6a, + .desc = "Unaligned access Counts unaligned memory operations issued by the CPU" + }, + {.name = "LDREX_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x6c, + .desc = "Exclusive operation speculatively executed, Load-Exclusive Counts Load-Exclusive operations that have been speculatively executed" + }, + {.name = "STREX_PASS_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x6d, + .desc = "Exclusive operation speculatively executed, Store-Exclusive pass Counts store-exclusive operations that have been speculatively executed and have successfully completed the store operation" + }, + {.name = "STREX_FAIL_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x6e, + .desc = "Exclusive operation speculatively executed, Store-Exclusive fail Counts store-exclusive operations that have been speculatively executed and have not successfully completed the store operation" + }, + {.name = "STREX_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x6f, + .desc = "Exclusive operation speculatively executed, Store-Exclusive Counts store-exclusive operations that have been speculatively executed" + }, + {.name = "LD_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x70, + .desc = "Operation speculatively executed, load Counts speculatively executed load operations including Single Instruction Multiple Data (SIMD) load operations" + }, + {.name = "ST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x71, + .desc = "Operation speculatively executed, store Counts speculatively executed store operations including Single Instruction Multiple Data (SIMD) store operations" + }, + {.name = "LDST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x72, + .desc = "Operation speculatively executed, load or store Counts load and store operations that have been speculatively executed" + }, + {.name = "DP_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x73, + .desc = "Operation speculatively executed, integer data processing Counts speculatively executed logical or arithmetic instructions such as MOV/MVN operations" + }, + {.name = "ASE_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x74, + .desc = "Operation speculatively executed, Advanced SIMD Counts speculatively executed Advanced SIMD operations excluding load, store and move micro-operations that move data to or from SIMD (vector) registers" + }, + {.name = "VFP_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x75, + .desc = "Operation speculatively executed, scalar floating-point Counts speculatively executed floating point operations" + }, + {.name = "PC_WRITE_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x76, + .desc = "Operation speculatively executed, Software change of the PC Counts speculatively executed operations which cause software changes of the PC" + }, + {.name = "CRYPTO_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x77, + .desc = "Operation speculatively executed, Cryptographic instruction Counts speculatively executed cryptographic operations except for PMULL and VMULL operations" + }, + {.name = "BR_IMMED_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x78, + .desc = "Branch speculatively executed, immediate branch Counts direct branch operations which are speculatively executed" + }, + {.name = "BR_RETURN_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x79, + .desc = "Branch speculatively executed, procedure return Counts procedure return operations (RET, RETAA and RETAB) which are speculatively executed" + }, + {.name = "BR_INDIRECT_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x7a, + .desc = "Branch speculatively executed, indirect branch Counts indirect branch operations including procedure returns, which are speculatively executed" + }, + {.name = "ISB_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x7c, + .desc = "Barrier speculatively executed, ISB Counts ISB operations that are executed" + }, + {.name = "DSB_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x7d, + .desc = "Barrier speculatively executed, DSB Counts DSB operations that are speculatively issued to Load/Store unit in the CPU" + }, + {.name = "DMB_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x7e, + .desc = "Barrier speculatively executed, DMB Counts DMB operations that are speculatively issued to the Load/Store unit in the CPU" + }, + {.name = "CSDB_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x7f, + .desc = "Barrier speculatively executed, CSDB Counts CDSB operations that are speculatively issued to the Load/Store unit in the CPU" + }, + {.name = "EXC_UNDEF", + .modmsk = ARMV8_ATTRS, + .code = 0x81, + .desc = "Exception taken, other synchronous Counts the number of synchronous exceptions which are taken locally that are due to attempting to execute an instruction that is UNDEFINED" + }, + {.name = "EXC_SVC", + .modmsk = ARMV8_ATTRS, + .code = 0x82, + .desc = "Exception taken, Supervisor Call Counts SVC exceptions taken locally" + }, + {.name = "EXC_PABORT", + .modmsk = ARMV8_ATTRS, + .code = 0x83, + .desc = "Exception taken, Instruction Abort Counts synchronous exceptions that are taken locally and caused by Instruction Aborts" + }, + {.name = "EXC_DABORT", + .modmsk = ARMV8_ATTRS, + .code = 0x84, + .desc = "Exception taken, Data Abort or SError Counts exceptions that are taken locally and are caused by data aborts or SErrors" + }, + {.name = "EXC_IRQ", + .modmsk = ARMV8_ATTRS, + .code = 0x86, + .desc = "Exception taken, IRQ Counts IRQ exceptions including the virtual IRQs that are taken locally" + }, + {.name = "EXC_FIQ", + .modmsk = ARMV8_ATTRS, + .code = 0x87, + .desc = "Exception taken, FIQ Counts FIQ exceptions including the virtual FIQs that are taken locally" + }, + {.name = "EXC_SMC", + .modmsk = ARMV8_ATTRS, + .code = 0x88, + .desc = "Exception taken, Secure Monitor Call Counts SMC exceptions take to EL3" + }, + {.name = "EXC_HVC", + .modmsk = ARMV8_ATTRS, + .code = 0x8a, + .desc = "Exception taken, Hypervisor Call Counts HVC exceptions taken to EL2" + }, + {.name = "EXC_TRAP_PABORT", + .modmsk = ARMV8_ATTRS, + .code = 0x8b, + .desc = "Exception taken, Instruction Abort not Taken locally Counts exceptions which are traps not taken locally and are caused by Instruction Aborts" + }, + {.name = "EXC_TRAP_DABORT", + .modmsk = ARMV8_ATTRS, + .code = 0x8c, + .desc = "Exception taken, Data Abort or SError not Taken locally Counts exceptions which are traps not taken locally and are caused by Data Aborts or SError interrupts" + }, + {.name = "EXC_TRAP_OTHER", + .modmsk = ARMV8_ATTRS, + .code = 0x8d, + .desc = "Exception taken, other traps not Taken locally Counts the number of synchronous trap exceptions which are not taken locally and are not SVC, SMC, HVC, data aborts, Instruction Aborts, or interrupts" + }, + {.name = "EXC_TRAP_IRQ", + .modmsk = ARMV8_ATTRS, + .code = 0x8e, + .desc = "Exception taken, IRQ not Taken locally Counts IRQ exceptions including the virtual IRQs that are not taken locally" + }, + {.name = "EXC_TRAP_FIQ", + .modmsk = ARMV8_ATTRS, + .code = 0x8f, + .desc = "Exception taken, FIQ not Taken locally Counts FIQs which are not taken locally but taken from EL0, EL1, or EL2 to EL3 (which would be the normal behavior for FIQs when not executing in EL3)" + }, + {.name = "RC_LD_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x90, + .desc = "Release consistency operation speculatively executed, Load-Acquire Counts any load acquire operations that are speculatively executed" + }, + {.name = "RC_ST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x91, + .desc = "Release consistency operation speculatively executed, Store-Release Counts any store release operations that are speculatively executed" + }, + {.name = "SAMPLE_POP", + .modmsk = ARMV8_ATTRS, + .code = 0x4000, + .desc = "Sample Population" + }, + {.name = "SAMPLE_FEED", + .modmsk = ARMV8_ATTRS, + .code = 0x4001, + .desc = "Sample Taken" + }, + {.name = "SAMPLE_FILTRATE", + .modmsk = ARMV8_ATTRS, + .code = 0x4002, + .desc = "Sample Taken and not removed by filtering" + }, + {.name = "SAMPLE_COLLISION", + .modmsk = ARMV8_ATTRS, + .code = 0x4003, + .desc = "Sample collided with previous sample" + }, + {.name = "CNT_CYCLES", + .modmsk = ARMV8_ATTRS, + .code = 0x4004, + .desc = "Constant frequency cycles Increments at a constant frequency equal to the rate of increment of the System Counter, CNTPCT_EL0" + }, + {.name = "STALL_BACKEND_MEM", + .modmsk = ARMV8_ATTRS, + .code = 0x4005, + .desc = "Memory stall cycles Counts cycles when the backend is stalled because there is a pending demand load request in progress in the last level core cache" + }, + {.name = "L1I_CACHE_LMISS", + .modmsk = ARMV8_ATTRS, + .code = 0x4006, + .desc = "Level 1 instruction cache long-latency miss Counts cache line refills into the level 1 instruction cache, that incurred additional latency" + }, + {.name = "L2D_CACHE_LMISS_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x4009, + .desc = "Level 2 data cache long-latency read miss Counts cache line refills into the level 2 unified cache from any memory read operations that incurred additional latency" + }, + {.name = "LDST_ALIGN_LAT", + .modmsk = ARMV8_ATTRS, + .code = 0x4020, + .desc = "Access with additional latency from alignment Counts the number of memory read and write accesses in a cycle that incurred additional latency, due to the alignment of the address and the size of data being accessed, which results in store crossing a single cache line" + }, + {.name = "LD_ALIGN_LAT", + .modmsk = ARMV8_ATTRS, + .code = 0x4021, + .desc = "Load with additional latency from alignment Counts the number of memory read accesses in a cycle that incurred additional latency, due to the alignment of the address and size of data being accessed, which results in load crossing a single cache line" + }, + {.name = "ST_ALIGN_LAT", + .modmsk = ARMV8_ATTRS, + .code = 0x4022, + .desc = "Store with additional latency from alignment Counts the number of memory write access in a cycle that incurred additional latency, due to the alignment of the address and size of data being accessed incurred additional latency" + }, + {.name = "MEM_ACCESS_CHECKED", + .modmsk = ARMV8_ATTRS, + .code = 0x4024, + .desc = "Checked data memory access Counts the number of memory read and write accesses counted by MEM_ACCESS that are tag checked by the Memory Tagging Extension (MTE)" + }, + {.name = "MEM_ACCESS_CHECKED_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x4025, + .desc = "Checked data memory access, read Counts the number of memory read accesses in a cycle that are tag checked by the Memory Tagging Extension (MTE)" + }, + {.name = "MEM_ACCESS_CHECKED_WR", + .modmsk = ARMV8_ATTRS, + .code = 0x4026, + .desc = "Checked data memory access, write Counts the number of memory write accesses in a cycle that is tag checked by the Memory Tagging Extension (MTE)" + }, + {.name = "SIMD_INST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x8004, + .desc = "Operation speculatively executed, SIMD Counts speculatively executed operations that are SIMD or SVE vector operations or Advanced SIMD non-scalar operations" + }, + {.name = "ASE_INST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x8005, + .desc = "Operation speculatively executed, Advanced SIMD Counts speculatively executed Advanced SIMD operations" + }, + {.name = "SVE_INST_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x8006, + .desc = "Operation speculatively executed, SVE, including load and store Counts speculatively executed operations that are SVE operations" + }, + {.name = "FP_HP_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x8014, + .desc = "Floating-point operation speculatively executed, half precision Counts speculatively executed half precision floating point operations" + }, + {.name = "FP_SP_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x8018, + .desc = "Floating-point operation speculatively executed, single precision Counts speculatively executed single precision floating point operations" + }, + {.name = "FP_DP_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x801c, + .desc = "Floating-point operation speculatively executed, double precision Counts speculatively executed double precision floating point operations" + }, + {.name = "INT_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x8040, + .desc = "Integer operation speculatively executed Counts speculatively executed integer arithmetic operations" + }, + {.name = "SVE_PRED_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x8074, + .desc = "Operation speculatively executed, SVE predicated Counts speculatively executed predicated SVE operations" + }, + {.name = "SVE_PRED_EMPTY_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x8075, + .desc = "Operation speculatively executed, SVE predicated with no active predicates Counts speculatively executed predicated SVE operations with no active predicate elements" + }, + {.name = "SVE_PRED_FULL_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x8076, + .desc = "Operation speculatively executed, SVE predicated with all active predicates Counts speculatively executed predicated SVE operations with all predicate elements active" + }, + {.name = "SVE_PRED_PARTIAL_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x8077, + .desc = "Operation speculatively executed, SVE predicated with partially active predicates Counts speculatively executed predicated SVE operations with at least one but not all active predicate elements" + }, + {.name = "SVE_PRED_NOT_FULL_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x8079, + .desc = "SVE predicated operations speculatively executed with no active or partially active predicates Counts speculatively executed predicated SVE operations with at least one non active predicate elements" + }, + {.name = "PRF_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x8087, + .desc = "Operation speculatively executed, Prefetch Counts speculatively executed operations that prefetch memory" + }, + {.name = "SVE_LDFF_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x80bc, + .desc = "Operation speculatively executed, SVE first-fault load Counts speculatively executed SVE first fault or non-fault load operations" + }, + {.name = "SVE_LDFF_FAULT_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x80bd, + .desc = "Operation speculatively executed, SVE first-fault load which set FFR bit to 0b0 Counts speculatively executed SVE first fault or non-fault load operations that clear at least one bit in the FFR" + }, + {.name = "FP_SCALE_OPS_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x80c0, + .desc = "Scalable floating-point element ALU operations speculatively executed Counts speculatively executed scalable single precision floating point operations" + }, + {.name = "FP_FIXED_OPS_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x80c1, + .desc = "Non-scalable floating-point element ALU operations speculatively executed Counts speculatively executed non-scalable single precision floating point operations" + }, + {.name = "ASE_SVE_INT8_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x80e3, + .desc = "Integer operation speculatively executed, Advanced SIMD or SVE 8-bit Counts speculatively executed Advanced SIMD or SVE integer operations with the largest data type an 8-bit integer" + }, + {.name = "ASE_SVE_INT16_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x80e7, + .desc = "Integer operation speculatively executed, Advanced SIMD or SVE 16-bit Counts speculatively executed Advanced SIMD or SVE integer operations with the largest data type a 16-bit integer" + }, + {.name = "ASE_SVE_INT32_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x80eb, + .desc = "Integer operation speculatively executed, Advanced SIMD or SVE 32-bit Counts speculatively executed Advanced SIMD or SVE integer operations with the largest data type a 32-bit integer" + }, + {.name = "ASE_SVE_INT64_SPEC", + .modmsk = ARMV8_ATTRS, + .code = 0x80ef, + .desc = "Integer operation speculatively executed, Advanced SIMD or SVE 64-bit Counts speculatively executed Advanced SIMD or SVE integer operations with the largest data type a 64-bit integer" + }, + {.name = "BR_IMMED_TAKEN_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x8108, + .desc = "Branch instruction architecturally executed, immediate, taken Counts architecturally executed direct branches that were taken" + }, + {.name = "BR_INDNR_TAKEN_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x810c, + .desc = "Branch instruction architecturally executed, indirect excluding procedure return, taken Counts architecturally executed indirect branches excluding procedure returns that were taken" + }, + {.name = "BR_IMMED_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x8110, + .desc = "Branch instruction architecturally executed, predicted immediate Counts architecturally executed direct branches that were correctly predicted" + }, + {.name = "BR_IMMED_MIS_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x8111, + .desc = "Branch instruction architecturally executed, mispredicted immediate Counts architecturally executed direct branches that were mispredicted and caused a pipeline flush" + }, + {.name = "BR_IND_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x8112, + .desc = "Branch instruction architecturally executed, predicted indirect Counts architecturally executed indirect branches including procedure returns that were correctly predicted" + }, + {.name = "BR_IND_MIS_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x8113, + .desc = "Branch instruction architecturally executed, mispredicted indirect Counts architecturally executed indirect branches including procedure returns that were mispredicted and caused a pipeline flush" + }, + {.name = "BR_RETURN_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x8114, + .desc = "Branch instruction architecturally executed, predicted procedure return Counts architecturally executed procedure returns that were correctly predicted" + }, + {.name = "BR_RETURN_MIS_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x8115, + .desc = "Branch instruction architecturally executed, mispredicted procedure return Counts architecturally executed procedure returns that were mispredicted and caused a pipeline flush" + }, + {.name = "BR_INDNR_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x8116, + .desc = "Branch instruction architecturally executed, predicted indirect excluding procedure return Counts architecturally executed indirect branches excluding procedure returns that were correctly predicted" + }, + {.name = "BR_INDNR_MIS_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x8117, + .desc = "Branch instruction architecturally executed, mispredicted indirect excluding procedure return Counts architecturally executed indirect branches excluding procedure returns that were mispredicted and caused a pipeline flush" + }, + {.name = "BR_TAKEN_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x8118, + .desc = "Branch instruction architecturally executed, predicted branch, taken Counts architecturally executed branches that were taken and were correctly predicted" + }, + {.name = "BR_TAKEN_MIS_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x8119, + .desc = "Branch instruction architecturally executed, mispredicted branch, taken Counts architecturally executed branches that were taken and were mispredicted causing a pipeline flush" + }, + {.name = "BR_SKIP_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x811a, + .desc = "Branch instruction architecturally executed, predicted branch, not taken Counts architecturally executed branches that were not taken and were correctly predicted" + }, + {.name = "BR_SKIP_MIS_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x811b, + .desc = "Branch instruction architecturally executed, mispredicted branch, not taken Counts architecturally executed branches that were not taken and were mispredicted causing a pipeline flush" + }, + {.name = "BR_PRED_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x811c, + .desc = "Branch instruction architecturally executed, predicted branch Counts branch instructions counted by BR_RETIRED which were correctly predicted" + }, + {.name = "BR_IND_RETIRED", + .modmsk = ARMV8_ATTRS, + .code = 0x811d, + .desc = "Instruction architecturally executed, indirect branch Counts architecturally executed indirect branches including procedure returns" + }, + {.name = "INST_FETCH_PERCYC", + .modmsk = ARMV8_ATTRS, + .code = 0x8120, + .desc = "Event in progress, INST FETCH Counts number of instruction fetches outstanding per cycle, which will provide an average latency of instruction fetch" + }, + {.name = "MEM_ACCESS_RD_PERCYC", + .modmsk = ARMV8_ATTRS, + .code = 0x8121, + .desc = "Event in progress, MEM ACCESS RD Counts the number of outstanding loads or memory read accesses per cycle" + }, + {.name = "INST_FETCH", + .modmsk = ARMV8_ATTRS, + .code = 0x8124, + .desc = "Instruction memory access Counts Instruction memory accesses that the PE makes" + }, + {.name = "DTLB_WALK_PERCYC", + .modmsk = ARMV8_ATTRS, + .code = 0x8128, + .desc = "Event in progress, DTLB WALK Counts the number of data translation table walks in progress per cycle" + }, + {.name = "ITLB_WALK_PERCYC", + .modmsk = ARMV8_ATTRS, + .code = 0x8129, + .desc = "Event in progress, ITLB WALK Counts the number of instruction translation table walks in progress per cycle" + }, + {.name = "SAMPLE_FEED_BR", + .modmsk = ARMV8_ATTRS, + .code = 0x812a, + .desc = "Statisical Profiling sample taken, branch Counts statistical profiling samples taken which are branches" + }, + {.name = "SAMPLE_FEED_LD", + .modmsk = ARMV8_ATTRS, + .code = 0x812b, + .desc = "Statisical Profiling sample taken, load Counts statistical profiling samples taken which are loads or load atomic operations" + }, + {.name = "SAMPLE_FEED_ST", + .modmsk = ARMV8_ATTRS, + .code = 0x812c, + .desc = "Statisical Profiling sample taken, store Counts statistical profiling samples taken which are stores or store atomic operations" + }, + {.name = "SAMPLE_FEED_OP", + .modmsk = ARMV8_ATTRS, + .code = 0x812d, + .desc = "Statisical Profiling sample taken, matching operation type Counts statistical profiling samples taken which are matching any operation type filters supported" + }, + {.name = "SAMPLE_FEED_EVENT", + .modmsk = ARMV8_ATTRS, + .code = 0x812e, + .desc = "Statisical Profiling sample taken, matching events Counts statistical profiling samples taken which are matching event packet filter constraints" + }, + {.name = "SAMPLE_FEED_LAT", + .modmsk = ARMV8_ATTRS, + .code = 0x812f, + .desc = "Statisical Profiling sample taken, exceeding minimum latency Counts statistical profiling samples taken which are exceeding minimum latency set by operation latency filter constraints" + }, + {.name = "L1D_TLB_RW", + .modmsk = ARMV8_ATTRS, + .code = 0x8130, + .desc = "Level 1 data TLB demand access Counts level 1 data TLB demand accesses caused by memory read or write operations" + }, + {.name = "L1I_TLB_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x8131, + .desc = "Level 1 instruction TLB demand access Counts level 1 instruction TLB demand accesses whether the access hits or misses in the TLB" + }, + {.name = "L1D_TLB_PRFM", + .modmsk = ARMV8_ATTRS, + .code = 0x8132, + .desc = "Level 1 data TLB software preload Counts level 1 data TLB accesses generated by software prefetch or preload memory accesses" + }, + {.name = "L1I_TLB_PRFM", + .modmsk = ARMV8_ATTRS, + .code = 0x8133, + .desc = "Level 1 instruction TLB software preload Counts level 1 instruction TLB accesses generated by software preload or prefetch instructions" + }, + {.name = "DTLB_HWUPD", + .modmsk = ARMV8_ATTRS, + .code = 0x8134, + .desc = "Data TLB hardware update of translation table Counts number of memory accesses triggered by a data translation table walk and performing an update of a translation table entry" + }, + {.name = "ITLB_HWUPD", + .modmsk = ARMV8_ATTRS, + .code = 0x8135, + .desc = "Instruction TLB hardware update of translation table Counts number of memory accesses triggered by an instruction translation table walk and performing an update of a translation table entry" + }, + {.name = "DTLB_STEP", + .modmsk = ARMV8_ATTRS, + .code = 0x8136, + .desc = "Data TLB translation table walk, step Counts number of memory accesses triggered by a demand data translation table walk and performing a read of a translation table entry" + }, + {.name = "ITLB_STEP", + .modmsk = ARMV8_ATTRS, + .code = 0x8137, + .desc = "Instruction TLB translation table walk, step Counts number of memory accesses triggered by an instruction translation table walk and performing a read of a translation table entry" + }, + {.name = "DTLB_WALK_LARGE", + .modmsk = ARMV8_ATTRS, + .code = 0x8138, + .desc = "Data TLB large page translation table walk Counts number of demand data translation table walks caused by a miss in the L2 TLB and yielding a large page" + }, + {.name = "ITLB_WALK_LARGE", + .modmsk = ARMV8_ATTRS, + .code = 0x8139, + .desc = "Instruction TLB large page translation table walk Counts number of instruction translation table walks caused by a miss in the L2 TLB and yielding a large page" + }, + {.name = "DTLB_WALK_SMALL", + .modmsk = ARMV8_ATTRS, + .code = 0x813a, + .desc = "Data TLB small page translation table walk Counts number of data translation table walks caused by a miss in the L2 TLB and yielding a small page" + }, + {.name = "ITLB_WALK_SMALL", + .modmsk = ARMV8_ATTRS, + .code = 0x813b, + .desc = "Instruction TLB small page translation table walk Counts number of instruction translation table walks caused by a miss in the L2 TLB and yielding a small page" + }, + {.name = "DTLB_WALK_RW", + .modmsk = ARMV8_ATTRS, + .code = 0x813c, + .desc = "Data TLB demand access with at least one translation table walk Counts number of demand data translation table walks caused by a miss in the L2 TLB and performing at least one memory access" + }, + {.name = "ITLB_WALK_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x813d, + .desc = "Instruction TLB demand access with at least one translation table walk Counts number of demand instruction translation table walks caused by a miss in the L2 TLB and performing at least one memory access" + }, + {.name = "DTLB_WALK_PRFM", + .modmsk = ARMV8_ATTRS, + .code = 0x813e, + .desc = "Data TLB software preload access with at least one translation table walk Counts number of software prefetches or preloads generated data translation table walks caused by a miss in the L2 TLB and performing at least one memory access" + }, + {.name = "ITLB_WALK_PRFM", + .modmsk = ARMV8_ATTRS, + .code = 0x813f, + .desc = "Instruction TLB software preload access with at least one translation table walk Counts number of software prefetches or preloads generated instruction translation table walks caused by a miss in the L2 TLB and performing at least one memory access" + }, + {.name = "L1D_CACHE_RW", + .modmsk = ARMV8_ATTRS, + .code = 0x8140, + .desc = "Level 1 data cache demand access Counts level 1 data demand cache accesses from any load or store operation" + }, + {.name = "L1I_CACHE_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x8141, + .desc = "Level 1 instruction cache demand fetch Counts demand instruction fetches which access the level 1 instruction cache" + }, + {.name = "L1D_CACHE_PRFM", + .modmsk = ARMV8_ATTRS, + .code = 0x8142, + .desc = "Level 1 data cache software preload Counts level 1 data cache accesses from software preload or prefetch instructions" + }, + {.name = "L1I_CACHE_PRFM", + .modmsk = ARMV8_ATTRS, + .code = 0x8143, + .desc = "Level 1 instruction cache software preload Counts instruction fetches generated by software preload or prefetch instructions which access the level 1 instruction cache" + }, + {.name = "L1D_CACHE_MISS", + .modmsk = ARMV8_ATTRS, + .code = 0x8144, + .desc = "Level 1 data cache demand access miss Counts cache line misses in the level 1 data cache" + }, + {.name = "L1I_CACHE_HWPRF", + .modmsk = ARMV8_ATTRS, + .code = 0x8145, + .desc = "Level 1 instruction cache hardware prefetch Counts instruction fetches which access the level 1 instruction cache generated by the hardware prefetcher" + }, + {.name = "L1D_CACHE_REFILL_PRFM", + .modmsk = ARMV8_ATTRS, + .code = 0x8146, + .desc = "Level 1 data cache refill, software preload Counts level 1 data cache refills where the cache line access was generated by software preload or prefetch instructions" + }, + {.name = "L1I_CACHE_REFILL_PRFM", + .modmsk = ARMV8_ATTRS, + .code = 0x8147, + .desc = "Level 1 instruction cache refill, software preload Counts cache line refills in the level 1 instruction cache caused by a missed instruction fetch generated by software preload or prefetch instructions" + }, + {.name = "L2D_CACHE_RW", + .modmsk = ARMV8_ATTRS, + .code = 0x8148, + .desc = "Level 2 data cache demand access Counts level 2 cache demand accesses from any load/store operations" + }, + {.name = "L2D_CACHE_PRFM", + .modmsk = ARMV8_ATTRS, + .code = 0x814a, + .desc = "Level 2 data cache software preload Counts level 2 data cache accesses generated by software preload or prefetch instructions" + }, + {.name = "L2D_CACHE_MISS", + .modmsk = ARMV8_ATTRS, + .code = 0x814c, + .desc = "Level 2 data cache demand access miss Counts cache line misses in the level 2 cache" + }, + {.name = "L2D_CACHE_REFILL_PRFM", + .modmsk = ARMV8_ATTRS, + .code = 0x814e, + .desc = "Level 2 data cache refill, software preload Counts refills due to accesses generated as a result of software preload or prefetch instructions as counted by L2D_CACHE_PRFM" + }, + {.name = "L1D_CACHE_HWPRF", + .modmsk = ARMV8_ATTRS, + .code = 0x8154, + .desc = "Level 1 data cache hardware prefetch Counts level 1 data cache accesses from any load/store operations generated by the hardware prefetcher" + }, + {.name = "L2D_CACHE_HWPRF", + .modmsk = ARMV8_ATTRS, + .code = 0x8155, + .desc = "Level 2 data cache hardware prefetch Counts level 2 data cache accesses generated by L2D hardware prefetchers" + }, + {.name = "STALL_FRONTEND_MEMBOUND", + .modmsk = ARMV8_ATTRS, + .code = 0x8158, + .desc = "Frontend stall cycles, memory bound Counts cycles when the frontend could not send any micro-operations to the rename stage due to resource constraints in the memory resources" + }, + {.name = "STALL_FRONTEND_L1I", + .modmsk = ARMV8_ATTRS, + .code = 0x8159, + .desc = "Frontend stall cycles, level 1 instruction cache Counts cycles when the frontend is stalled because there is an instruction fetch request pending in the level 1 instruction cache" + }, + {.name = "STALL_FRONTEND_MEM", + .modmsk = ARMV8_ATTRS, + .code = 0x815b, + .desc = "Frontend stall cycles, last level PE cache or memory Counts cycles when the frontend is stalled because there is an instruction fetch request pending in the last level core cache" + }, + {.name = "STALL_FRONTEND_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0x815c, + .desc = "Frontend stall cycles, TLB Counts when the frontend is stalled on any TLB misses being handled" + }, + {.name = "STALL_FRONTEND_CPUBOUND", + .modmsk = ARMV8_ATTRS, + .code = 0x8160, + .desc = "Frontend stall cycles, processor bound Counts cycles when the frontend could not send any micro-operations to the rename stage due to resource constraints in the CPU resources excluding memory resources" + }, + {.name = "STALL_FRONTEND_FLOW", + .modmsk = ARMV8_ATTRS, + .code = 0x8161, + .desc = "Frontend stall cycles, flow control Counts cycles when the frontend could not send any micro-operations to the rename stage due to resource constraints in the branch prediction unit" + }, + {.name = "STALL_FRONTEND_FLUSH", + .modmsk = ARMV8_ATTRS, + .code = 0x8162, + .desc = "Frontend stall cycles, flush recovery Counts cycles when the frontend could not send any micro-operations to the rename stage as the frontend is recovering from a machine flush or resteer" + }, + {.name = "STALL_BACKEND_MEMBOUND", + .modmsk = ARMV8_ATTRS, + .code = 0x8164, + .desc = "Backend stall cycles, memory bound Counts cycles when the backend could not accept any micro-operations due to resource constraints in the memory resources" + }, + {.name = "STALL_BACKEND_L1D", + .modmsk = ARMV8_ATTRS, + .code = 0x8165, + .desc = "Backend stall cycles, level 1 data cache Counts cycles when the backend is stalled because there is a pending demand load request in progress in the level 1 data cache" + }, + {.name = "STALL_BACKEND_L2D", + .modmsk = ARMV8_ATTRS, + .code = 0x8166, + .desc = "Backend stall cycles, level 2 data cache Counts cycles when the backend is stalled because there is a pending demand load request in progress in the level 2 data cache" + }, + {.name = "STALL_BACKEND_TLB", + .modmsk = ARMV8_ATTRS, + .code = 0x8167, + .desc = "Backend stall cycles, TLB Counts cycles when the backend is stalled on any demand TLB misses being handled" + }, + {.name = "STALL_BACKEND_ST", + .modmsk = ARMV8_ATTRS, + .code = 0x8168, + .desc = "Backend stall cycles, store Counts cycles when the backend is stalled and there is a store that has not reached the pre-commit stage" + }, + {.name = "STALL_BACKEND_CPUBOUND", + .modmsk = ARMV8_ATTRS, + .code = 0x816a, + .desc = "Backend stall cycles, processor bound Counts cycles when the backend could not accept any micro-operations due to any resource constraints in the CPU excluding memory resources" + }, + {.name = "STALL_BACKEND_BUSY", + .modmsk = ARMV8_ATTRS, + .code = 0x816b, + .desc = "Backend stall cycles, backend busy Counts cycles when the backend could not accept any micro-operations because the issue queues are full to take any operations for execution" + }, + {.name = "STALL_BACKEND_ILOCK", + .modmsk = ARMV8_ATTRS, + .code = 0x816c, + .desc = "Backend stall cycles, input dependency Counts cycles when the backend could not accept any micro-operations due to resource constraints imposed by input dependency" + }, + {.name = "STALL_BACKEND_RENAME", + .modmsk = ARMV8_ATTRS, + .code = 0x816d, + .desc = "Backend stall cycles, rename full Counts cycles when backend is stalled even when operations are available from the frontend but at least one is not ready to be sent to the backend because no rename register is available" + }, + {.name = "L1I_CACHE_HIT_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x81c0, + .desc = "Level 1 instruction cache demand fetch hit Counts demand instruction fetches that access the level 1 instruction cache and hit in the L1 instruction cache" + }, + {.name = "L1I_CACHE_HIT_RD_FPRFM", + .modmsk = ARMV8_ATTRS, + .code = 0x81d0, + .desc = "Level 1 instruction cache demand fetch first hit, fetched by software preload Counts demand instruction fetches that access the level 1 instruction cache that hit in the L1 instruction cache and the line was requested by a software prefetch" + }, + {.name = "L1I_CACHE_HIT_RD_FHWPRF", + .modmsk = ARMV8_ATTRS, + .code = 0x81e0, + .desc = "Level 1 instruction cache demand fetch first hit, fetched by hardware prefetcher Counts demand instruction fetches generated by hardware prefetch that access the level 1 instruction cache and hit in the L1 instruction cache" + }, + {.name = "L1I_CACHE_HIT", + .modmsk = ARMV8_ATTRS, + .code = 0x8200, + .desc = "Level 1 instruction cache hit Counts instruction fetches that access the level 1 instruction cache and hit in the level 1 instruction cache" + }, + {.name = "L1I_CACHE_HIT_PRFM", + .modmsk = ARMV8_ATTRS, + .code = 0x8208, + .desc = "Level 1 instruction cache software preload hit Counts instruction fetches generated by software preload or prefetch instructions that access the level 1 instruction cache and hit in the level 1 instruction cache" + }, + {.name = "L1I_LFB_HIT_RD", + .modmsk = ARMV8_ATTRS, + .code = 0x8240, + .desc = "Level 1 instruction cache demand fetch line-fill buffer hit Counts demand instruction fetches that access the level 1 instruction cache and hit in a line that is in the process of being loaded into the level 1 instruction cache" + }, + {.name = "L1I_LFB_HIT_RD_FPRFM", + .modmsk = ARMV8_ATTRS, + .code = 0x8250, + .desc = "Level 1 instruction cache demand fetch line-fill buffer first hit, recently fetched by software preload Counts demand instruction fetches generated by software prefetch instructions that access the level 1 instruction cache and hit in a line that is in the process of being loaded into the level 1 instruction cache" + }, + {.name = "L1I_LFB_HIT_RD_FHWPRF", + .modmsk = ARMV8_ATTRS, + .code = 0x8260, + .desc = "Level 1 instruction cache demand fetch line-fill buffer first hit, recently fetched by hardware prefetcher Counts demand instruction fetches generated by hardware prefetch that access the level 1 instruction cache and hit in a line that is in the process of being loaded into the level 1 instruction cache" + }, + /* END Neoverse V3 specific events */ +}; diff --git a/src/libpfm4/lib/events/power10_events.h b/src/libpfm4/lib/events/power10_events.h index 2f2ad527e..e55caf5e8 100644 --- a/src/libpfm4/lib/events/power10_events.h +++ b/src/libpfm4/lib/events/power10_events.h @@ -1,12 +1,12 @@ /* * File: power10_events.h -* (C) Copyright IBM Corporation, 2021-2022. All Rights Reserved. +* (C) Copyright IBM Corporation, 2023-2024. All Rights Reserved. * Author: Will Schmidt * will_schmidt@vnet.ibm.com * Author: Carl Love * cel@us.ibm.com # -* Content reworked May 11, 2022, - Will Schmidt. +* Content reworked Aug 12, 2024, - Sachin Monga, Jeevitha P. * This file was automatically generated from event lists as * provided by the IBM PowerPC PMU team. Any manual * updates should be clearly marked so they are not lost in @@ -90,11 +90,21 @@ static const pme_power_entry_t power10_pe[] = { .pme_short_desc = "NA;A conditional branch finished with mispredicted direction using the Global Branch History Table.", .pme_long_desc = "NA;A conditional branch finished with mispredicted direction using the Global Branch History Table. Resolved not taken", }, + {.pme_name = "PM_BR_COND_CMPL", + .pme_code = 0x4E058, + .pme_short_desc = "frontend;A conditional branch completed.", + .pme_long_desc = "frontend;A conditional branch completed.", + }, {.pme_name = "PM_BR_MPRED_NTKN_COND_DIR_LBHT_GSEL", .pme_code = 0x000000E080, .pme_short_desc = "NA;A conditional branch finished with mispredicted direction using the Local Branch History Table selected with the global selector.", .pme_long_desc = "NA;A conditional branch finished with mispredicted direction using the Local Branch History Table selected with the global selector. Resolved not taken", }, + {.pme_name = "PM_BR_TKN_FIN", + .pme_code = 0x00000040B4, + .pme_short_desc = "frontend; A taken branch (conditional or unconditional) finished", + .pme_long_desc = "frontend;A taken branch (conditional or unconditional) finished", + }, {.pme_name = "PM_BR_MPRED_NTKN_COND_DIR_LBHT_LSEL", .pme_code = 0x00000058BC, .pme_short_desc = "NA;A conditional branch finished with mispredicted direction using the Local Branch History Table selected by the local selector.", @@ -470,6 +480,26 @@ static const pme_power_entry_t power10_pe[] = { .pme_short_desc = "Data Source;The processor's L1 data cache was reloaded with a valid line that was not in the M (exclusive) state from another core's L2 on the same chip in the same regent due to a demand miss.", .pme_long_desc = "Data Source;The processor's L1 data cache was reloaded with a valid line that was not in the M (exclusive) state from another core's L2 on the same chip in the same regent due to a demand miss.", }, + {.pme_name = "PM_DATA_FROM_L1MISS", + .pme_code = 0x003F40000001C040, + .pme_short_desc = "Data Source;The processor's L1 data cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + .pme_long_desc = "Data Source;The processor's L1 data cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + }, + {.pme_name = "PM_DATA_FROM_L1MISS_ALT2", + .pme_code = 0x003F40000002C040, + .pme_short_desc = "Data Source;The processor's L1 data cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + .pme_long_desc = "Data Source;The processor's L1 data cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + }, + {.pme_name = "PM_DATA_FROM_L1MISS_ALT3", + .pme_code = 0x003F40000003C040, + .pme_short_desc = "Data Source;The processor's L1 data cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + .pme_long_desc = "Data Source;The processor's L1 data cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + }, + {.pme_name = "PM_DATA_FROM_L1MISS_ALT4", + .pme_code = 0x003F40000004C040, + .pme_short_desc = "Data Source;The processor's L1 data cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + .pme_long_desc = "Data Source;The processor's L1 data cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + }, {.pme_name = "PM_DATA_FROM_L2MISS", .pme_code = 0x0003C0000001C040, .pme_short_desc = "Data Source;The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss.", @@ -510,6 +540,11 @@ static const pme_power_entry_t power10_pe[] = { .pme_short_desc = "Data Source;The processor's L1 data cache was reloaded from the local core's L2 due to a demand miss.", .pme_long_desc = "Data Source;The processor's L1 data cache was reloaded from the local core's L2 due to a demand miss.", }, + {.pme_name = "PM_ST_DATA_FROM_L2", + .pme_code = 0x0C0000016080, + .pme_short_desc = "Data Source;Store data line hit in the local L2. Includes cache-line states Sx, Tx, Mx.", + .pme_long_desc = "Data Source;Store data line hit in the local L2. Includes cache-line states Sx, Tx, Mx.Since the event happens in a 2:1 clock domain and is time-sliced across all 4 threads, the event count should be multiplied by 2.", + }, {.pme_name = "PM_DATA_FROM_L31_NON_REGENT_MOD", .pme_code = 0x0AC040000001C040, .pme_short_desc = "Data Source;The processor's L1 data cache was reloaded with a line in the M (exclusive) state from another core's L3 on the same chip in a different regent due to a demand miss.", @@ -650,6 +685,11 @@ static const pme_power_entry_t power10_pe[] = { .pme_short_desc = "Data Source;The processor's L1 data cache was reloaded from the local core's L3 due to a demand miss.", .pme_long_desc = "Data Source;The processor's L1 data cache was reloaded from the local core's L3 due to a demand miss.", }, + {.pme_name = "PM_ST_DATA_FROM_L3", + .pme_code = 0x0C0000016880, + .pme_short_desc = "Data Source;Store data line hit in the local L3. Includes cache-line states Tx and Mx.", + .pme_long_desc = "Data Source;Store data line hit in the local L3. Includes cache-line states Tx and Mx. If the cache line is in the Sx state, the RC machine will send a RWITM command. Since the event happens in a 2:1 clock domain and is time-sliced across all 4 threads, the event count should be multiplied by 2.", + }, {.pme_name = "PM_DATA_FROM_LMEM", .pme_code = 0x094040000001C040, .pme_short_desc = "Data Source;The processor's L1 data cache was reloaded from the local chip's memory due to a demand miss.", @@ -1845,6 +1885,11 @@ static const pme_power_entry_t power10_pe[] = { .pme_short_desc = "pipeline;Cycles in which the oldest instruction in the pipeline was executing in the VSU (includes FXU, VSU, CRU).", .pme_long_desc = "pipeline;Cycles in which the oldest instruction in the pipeline was executing in the VSU (includes FXU, VSU, CRU).", }, + {.pme_name = "PM_EXT_INT", + .pme_code = 0x200F8, + .pme_short_desc = "pipeline;Cycles an external interrupt was active", + .pme_long_desc = "pipeline;Cycles an external interrupt was active", + }, {.pme_name = "PM_FLOP_CMPL", .pme_code = 0x100F4, .pme_short_desc = "floating point;Floating Point Operations Completed.", @@ -1915,6 +1960,16 @@ static const pme_power_entry_t power10_pe[] = { .pme_short_desc = "pmc;PowerPC instruction completed", .pme_long_desc = "pmc;PowerPC instruction completed", }, + {.pme_name = "PM_INST_DISP", + .pme_code = 0x200F2, + .pme_short_desc = "frontend;PowerPC instruction dispatched", + .pme_long_desc = "frontend;PowerPC instruction dispatched", + }, + {.pme_name = "PM_INST_DISP_ALT", + .pme_code = 0x300F2, + .pme_short_desc = "frontend;PowerPC instruction dispatched", + .pme_long_desc = "frontend;PowerPC instruction dispatched", + }, {.pme_name = "PM_INST_CMPL_ALT2", .pme_code = 0x20002, .pme_short_desc = "pmc;PowerPC instruction completed", @@ -1935,6 +1990,31 @@ static const pme_power_entry_t power10_pe[] = { .pme_short_desc = "pmc;Instruction finished", .pme_long_desc = "pmc;Instruction finished", }, + {.pme_name = "PM_INST_FROM_L1", + .pme_code = 0x0000004080, + .pme_short_desc = "NA;An instruction fetch hit in the L1.", + .pme_long_desc = "NA;An instruction fetch hit in the L1. Each fetch group contains 8 instructions. The same line can hit 4 times if 32 sequential instructions are fetched.", + }, + {.pme_name = "PM_INST_FROM_L1MISS", + .pme_code = 0x003F00000001C040, + .pme_short_desc = "NA;The processor's instruction cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + .pme_long_desc = "NA;The processor's instruction cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + }, + {.pme_name = "PM_INST_FROM_L1MISS_ALT2", + .pme_code = 0x003F00000002C040, + .pme_short_desc = "NA;The processor's instruction cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + .pme_long_desc = "NA;The processor's instruction cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + }, + {.pme_name = "PM_INST_FROM_L1MISS_ALT3", + .pme_code = 0x003F00000003C040, + .pme_short_desc = "NA;The processor's instruction cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + .pme_long_desc = "NA;The processor's instruction cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + }, + {.pme_name = "PM_INST_FROM_L1MISS_ALT4", + .pme_code = 0x003F00000004C040, + .pme_short_desc = "NA;The processor's instruction cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + .pme_long_desc = "NA;The processor's instruction cache was reloaded from a source beyond the local core's L1 due to a demand miss.", + }, {.pme_name = "PM_INST_FROM_DMEM", .pme_code = 0x0F4100000001C040, .pme_short_desc = "Data Source;The processor's instruction cache was reloaded from distant memory (MC slow) due to a demand miss.", @@ -2745,6 +2825,16 @@ static const pme_power_entry_t power10_pe[] = { .pme_short_desc = "NA;All successful I-side-instruction-fetch (e.", .pme_long_desc = "NA;All successful I-side-instruction-fetch (e.g. i-demand, i-prefetch) dispatches for this thread. Since the event happens in a 2:1 clock domain and is time-sliced across all 4 threads, the event count should be multiplied by 2.", }, + {.pme_name = "PM_L2_INST_MISS", + .pme_code = 0x000000036880, + .pme_short_desc = "NA;All successful instruction (demand and prefetch) dispatches for this thread that missed in the L2.", + .pme_long_desc = "NA;All successful instruction (demand and prefetch) dispatches for this thread that missed in the L2. Since the event happens in a 2:1 clock domain and is time-sliced across all 4 threads, the event count should be multiplied by 2.", + }, + {.pme_name = "PM_L2_INST_MISS_ALT", + .pme_code = 0x0F0000046080, + .pme_short_desc = "NA;All successful instruction (demand and prefetch) dispatches for this thread that missed in the L2.", + .pme_long_desc = "NA;All successful instruction (demand and prefetch) dispatches for this thread that missed in the L2. Since the event happens in a 2:1 clock domain and is time-sliced across all 4 threads, the event count should be multiplied by 2.", + }, {.pme_name = "PM_L2_ISIDE_DSIDE_ATTEMPT", .pme_code = 0x020000016080, .pme_short_desc = "NA;All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread.", @@ -2830,6 +2920,11 @@ static const pme_power_entry_t power10_pe[] = { .pme_short_desc = "NA;All successful D-Side Store dispatches for this thread that missed in the L2.", .pme_long_desc = "NA;All successful D-Side Store dispatches for this thread that missed in the L2. Since the event happens in a 2:1 clock domain and is time-sliced across all 4 threads, the event count should be multiplied by 2.", }, + {.pme_name = "PM_L2_ST_HIT", + .pme_code = 0x0F0000026880, + .pme_short_desc = "NA;All successful D-side store dispatches for this thread that were L2 hits", + .pme_long_desc = "NA;All successful D-side store dispatches for this thread that were L2 hits. Since the event happens in a 2:1 clock domain and is time-sliced across all 4 threads, the event count should be multiplied by 2.", + }, {.pme_name = "PM_L2_ST", .pme_code = 0x000000016880, .pme_short_desc = "NA;All successful D-side store dispatches for this thread (L2 miss + L2 hits).", @@ -2970,6 +3065,16 @@ static const pme_power_entry_t power10_pe[] = { .pme_short_desc = "pipeline;MMA instruction issued", .pme_long_desc = "pipeline;MMA instruction issued", }, + {.pme_name = "PM_PRED_BR_TKN_COND_DIR", + .pme_code = 0x00000040B8, + .pme_short_desc = "frontend;A conditional branch finished with correctly predicted direction.", + .pme_long_desc = "frontend;A conditional branch finished with correctly predicted direction. Resolved taken", + }, + {.pme_name = "PM_PRED_BR_NTKN_COND_DIR", + .pme_code = 0x00000048B8, + .pme_short_desc = "frontend;A conditional branch finished with correctly predicted direction.", + .pme_long_desc = "frontend;A conditional branch finished with correctly predicted direction. Resolved not taken", + }, {.pme_name = "PM_MPRED_BR_NTKN_COND_DIR", .pme_code = 0x00000048BC, .pme_short_desc = "NA;A conditional branch finished with mispredicted direction.", diff --git a/src/libpfm4/lib/pfmlib_arm_armv8.c b/src/libpfm4/lib/pfmlib_arm_armv8.c index 374aa2c4c..39d1a2db2 100644 --- a/src/libpfm4/lib/pfmlib_arm_armv8.c +++ b/src/libpfm4/lib/pfmlib_arm_armv8.c @@ -35,14 +35,14 @@ #include "events/arm_cortex_a57_events.h" /* A57 event tables */ #include "events/arm_cortex_a53_events.h" /* A53 event tables */ +#include "events/arm_cortex_a55_events.h" /* A53 event tables */ +#include "events/arm_cortex_a76_events.h" /* A76 event tables */ #include "events/arm_xgene_events.h" /* Applied Micro X-Gene tables */ #include "events/arm_cavium_tx2_events.h" /* Marvell ThunderX2 tables */ -#include "events/arm_marvell_tx2_unc_events.h" /* Marvell ThunderX2 PMU tables */ #include "events/arm_fujitsu_a64fx_events.h" /* Fujitsu A64FX PMU tables */ #include "events/arm_neoverse_n1_events.h" /* ARM Neoverse N1 table */ #include "events/arm_neoverse_v1_events.h" /* Arm Neoverse V1 table */ #include "events/arm_hisilicon_kunpeng_events.h" /* HiSilicon Kunpeng PMU tables */ -#include "events/arm_hisilicon_kunpeng_unc_events.h" /* Hisilicon Kunpeng PMU uncore tables */ static int pfm_arm_detect_n1(void *this) @@ -124,6 +124,38 @@ pfm_arm_detect_cortex_a53(void *this) return PFM_ERR_NOTSUPP; } +static int +pfm_arm_detect_cortex_a55(void *this) +{ + int ret; + + ret = pfm_arm_detect(this); + if (ret != PFM_SUCCESS) + return PFM_ERR_NOTSUPP; + + if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */ + (pfm_arm_cfg.part == 0xd05)) { /* Cortex A55 */ + return PFM_SUCCESS; + } + return PFM_ERR_NOTSUPP; +} + +static int +pfm_arm_detect_cortex_a76(void *this) +{ + int ret; + + ret = pfm_arm_detect(this); + if (ret != PFM_SUCCESS) + return PFM_ERR_NOTSUPP; + + if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */ + (pfm_arm_cfg.part == 0xd0b)) { /* Cortex A76 */ + return PFM_SUCCESS; + } + return PFM_ERR_NOTSUPP; +} + static int pfm_arm_detect_xgene(void *this) { @@ -270,6 +302,60 @@ pfmlib_pmu_t arm_cortex_a53_support={ .get_event_nattrs = pfm_arm_get_event_nattrs, }; +/* ARM Cortex A55 support */ +pfmlib_pmu_t arm_cortex_a55_support={ + .desc = "ARM Cortex A55", + .name = "arm_ac55", + .perf_name = "armv8_cortex_a55", + .pmu = PFM_PMU_ARM_CORTEX_A55, + .pme_count = LIBPFM_ARRAY_SIZE(arm_cortex_a55_pe), + .type = PFM_PMU_TYPE_CORE, + .supported_plm = ARMV8_PLM, + .pe = arm_cortex_a55_pe, + + .pmu_detect = pfm_arm_detect_cortex_a55, + .max_encoding = 1, + .num_cntrs = 6, + + .get_event_encoding[PFM_OS_NONE] = pfm_arm_get_encoding, + PFMLIB_ENCODE_PERF(pfm_arm_get_perf_encoding), + .get_event_first = pfm_arm_get_event_first, + .get_event_next = pfm_arm_get_event_next, + .event_is_valid = pfm_arm_event_is_valid, + .validate_table = pfm_arm_validate_table, + .get_event_info = pfm_arm_get_event_info, + .get_event_attr_info = pfm_arm_get_event_attr_info, + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), + .get_event_nattrs = pfm_arm_get_event_nattrs, +}; + +/* ARM Cortex A76 support */ +pfmlib_pmu_t arm_cortex_a76_support={ + .desc = "ARM Cortex A76", + .name = "arm_ac76", + .perf_name = "armv8_cortex_a76", + .pmu = PFM_PMU_ARM_CORTEX_A76, + .pme_count = LIBPFM_ARRAY_SIZE(arm_cortex_a76_pe), + .type = PFM_PMU_TYPE_CORE, + .supported_plm = ARMV8_PLM, + .pe = arm_cortex_a76_pe, + + .pmu_detect = pfm_arm_detect_cortex_a76, + .max_encoding = 1, + .num_cntrs = 6, + + .get_event_encoding[PFM_OS_NONE] = pfm_arm_get_encoding, + PFMLIB_ENCODE_PERF(pfm_arm_get_perf_encoding), + .get_event_first = pfm_arm_get_event_first, + .get_event_next = pfm_arm_get_event_next, + .event_is_valid = pfm_arm_event_is_valid, + .validate_table = pfm_arm_validate_table, + .get_event_info = pfm_arm_get_event_info, + .get_event_attr_info = pfm_arm_get_event_attr_info, + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), + .get_event_nattrs = pfm_arm_get_event_nattrs, +}; + /* Applied Micro X-Gene support */ pfmlib_pmu_t arm_xgene_support={ .desc = "Applied Micro X-Gene", @@ -374,223 +460,6 @@ pfmlib_pmu_t arm_hisilicon_kunpeng_support={ .get_event_nattrs = pfm_arm_get_event_nattrs, }; -/* Hisilicon Kunpeng support */ -// For uncore, each socket has a separate perf name, otherwise they are the same, use macro - -#define DEFINE_KUNPENG_DDRC(n,m) \ -pfmlib_pmu_t arm_hisilicon_kunpeng_sccl##n##_ddrc##m##_support={ \ - .desc = "Hisilicon Kunpeng SCCL"#n" DDRC"#m, \ - .name = "hisi_sccl"#n"_ddrc"#m, \ - .perf_name = "hisi_sccl"#n"_ddrc"#m, \ - .pmu = PFM_PMU_ARM_KUNPENG_UNC_SCCL##n##_DDRC##m, \ - .pme_count = LIBPFM_ARRAY_SIZE(arm_kunpeng_unc_ddrc_pe), \ - .type = PFM_PMU_TYPE_UNCORE, \ - .pe = arm_kunpeng_unc_ddrc_pe, \ - .pmu_detect = pfm_arm_detect_hisilicon_kunpeng, \ - .max_encoding = 1, \ - .num_cntrs = 4, \ - .get_event_encoding[PFM_OS_NONE] = pfm_kunpeng_unc_get_event_encoding, \ - PFMLIB_ENCODE_PERF(pfm_kunpeng_unc_get_perf_encoding), \ - .get_event_first = pfm_arm_get_event_first, \ - .get_event_next = pfm_arm_get_event_next, \ - .event_is_valid = pfm_arm_event_is_valid, \ - .validate_table = pfm_arm_validate_table, \ - .get_event_info = pfm_arm_get_event_info, \ - .get_event_attr_info = pfm_arm_get_event_attr_info, \ - PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), \ - .get_event_nattrs = pfm_arm_get_event_nattrs, \ -}; - -DEFINE_KUNPENG_DDRC(1,0); -DEFINE_KUNPENG_DDRC(1,1); -DEFINE_KUNPENG_DDRC(1,2); -DEFINE_KUNPENG_DDRC(1,3); -DEFINE_KUNPENG_DDRC(3,0); -DEFINE_KUNPENG_DDRC(3,1); -DEFINE_KUNPENG_DDRC(3,2); -DEFINE_KUNPENG_DDRC(3,3); -DEFINE_KUNPENG_DDRC(5,0); -DEFINE_KUNPENG_DDRC(5,1); -DEFINE_KUNPENG_DDRC(5,2); -DEFINE_KUNPENG_DDRC(5,3); -DEFINE_KUNPENG_DDRC(7,0); -DEFINE_KUNPENG_DDRC(7,1); -DEFINE_KUNPENG_DDRC(7,2); -DEFINE_KUNPENG_DDRC(7,3); - -#define DEFINE_KUNPENG_HHA(n,m) \ -pfmlib_pmu_t arm_hisilicon_kunpeng_sccl##n##_hha##m##_support={ \ - .desc = "Hisilicon Kunpeng SCCL"#n" HHA"#m, \ - .name = "hisi_sccl"#n"_hha"#m, \ - .perf_name = "hisi_sccl"#n"_hha"#m, \ - .pmu = PFM_PMU_ARM_KUNPENG_UNC_SCCL##n##_HHA##m, \ - .pme_count = LIBPFM_ARRAY_SIZE(arm_kunpeng_unc_hha_pe), \ - .type = PFM_PMU_TYPE_UNCORE, \ - .pe = arm_kunpeng_unc_hha_pe, \ - .pmu_detect = pfm_arm_detect_hisilicon_kunpeng, \ - .max_encoding = 1, \ - .num_cntrs = 4, \ - .get_event_encoding[PFM_OS_NONE] = pfm_kunpeng_unc_get_event_encoding, \ - PFMLIB_ENCODE_PERF(pfm_kunpeng_unc_get_perf_encoding), \ - .get_event_first = pfm_arm_get_event_first, \ - .get_event_next = pfm_arm_get_event_next, \ - .event_is_valid = pfm_arm_event_is_valid, \ - .validate_table = pfm_arm_validate_table, \ - .get_event_info = pfm_arm_get_event_info, \ - .get_event_attr_info = pfm_arm_get_event_attr_info, \ - PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), \ - .get_event_nattrs = pfm_arm_get_event_nattrs, \ -}; - -DEFINE_KUNPENG_HHA(1,2); -DEFINE_KUNPENG_HHA(1,3); -DEFINE_KUNPENG_HHA(3,0); -DEFINE_KUNPENG_HHA(3,1); -DEFINE_KUNPENG_HHA(5,6); -DEFINE_KUNPENG_HHA(5,7); -DEFINE_KUNPENG_HHA(7,4); -DEFINE_KUNPENG_HHA(7,5); - -#define DEFINE_KUNPENG_L3C(n,m) \ -pfmlib_pmu_t arm_hisilicon_kunpeng_sccl##n##_l3c##m##_support={ \ - .desc = "Hisilicon Kunpeng SCCL"#n" L3C"#m, \ - .name = "hisi_sccl"#n"_l3c"#m, \ - .perf_name = "hisi_sccl"#n"_l3c"#m, \ - .pmu = PFM_PMU_ARM_KUNPENG_UNC_SCCL##n##_L3C##m, \ - .pme_count = LIBPFM_ARRAY_SIZE(arm_kunpeng_unc_l3c_pe), \ - .type = PFM_PMU_TYPE_UNCORE, \ - .pe = arm_kunpeng_unc_l3c_pe, \ - .pmu_detect = pfm_arm_detect_hisilicon_kunpeng, \ - .max_encoding = 1, \ - .num_cntrs = 4, \ - .get_event_encoding[PFM_OS_NONE] = pfm_kunpeng_unc_get_event_encoding, \ - PFMLIB_ENCODE_PERF(pfm_kunpeng_unc_get_perf_encoding), \ - .get_event_first = pfm_arm_get_event_first, \ - .get_event_next = pfm_arm_get_event_next, \ - .event_is_valid = pfm_arm_event_is_valid, \ - .validate_table = pfm_arm_validate_table, \ - .get_event_info = pfm_arm_get_event_info, \ - .get_event_attr_info = pfm_arm_get_event_attr_info, \ - PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), \ - .get_event_nattrs = pfm_arm_get_event_nattrs, \ -}; - -DEFINE_KUNPENG_L3C(1,10); -DEFINE_KUNPENG_L3C(1,11); -DEFINE_KUNPENG_L3C(1,12); -DEFINE_KUNPENG_L3C(1,13); -DEFINE_KUNPENG_L3C(1,14); -DEFINE_KUNPENG_L3C(1,15); -DEFINE_KUNPENG_L3C(1,8); -DEFINE_KUNPENG_L3C(1,9); -DEFINE_KUNPENG_L3C(3,0); -DEFINE_KUNPENG_L3C(3,1); -DEFINE_KUNPENG_L3C(3,2); -DEFINE_KUNPENG_L3C(3,3); -DEFINE_KUNPENG_L3C(3,4); -DEFINE_KUNPENG_L3C(3,5); -DEFINE_KUNPENG_L3C(3,6); -DEFINE_KUNPENG_L3C(3,7); -DEFINE_KUNPENG_L3C(5,24); -DEFINE_KUNPENG_L3C(5,25); -DEFINE_KUNPENG_L3C(5,26); -DEFINE_KUNPENG_L3C(5,27); -DEFINE_KUNPENG_L3C(5,28); -DEFINE_KUNPENG_L3C(5,29); -DEFINE_KUNPENG_L3C(5,30); -DEFINE_KUNPENG_L3C(5,31); -DEFINE_KUNPENG_L3C(7,16); -DEFINE_KUNPENG_L3C(7,17); -DEFINE_KUNPENG_L3C(7,18); -DEFINE_KUNPENG_L3C(7,19); -DEFINE_KUNPENG_L3C(7,20); -DEFINE_KUNPENG_L3C(7,21); -DEFINE_KUNPENG_L3C(7,22); -DEFINE_KUNPENG_L3C(7,23); - -// For uncore, each socket has a separate perf name, otherwise they are the same, use macro - -#define DEFINE_TX2_DMC(n) \ -pfmlib_pmu_t arm_thunderx2_dmc##n##_support={ \ - .desc = "Marvell ThunderX2 Node"#n" DMC", \ - .name = "tx2_dmc"#n, \ - .perf_name = "uncore_dmc_"#n, \ - .pmu = PFM_PMU_ARM_THUNDERX2_DMC##n, \ - .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_dmc_pe), \ - .type = PFM_PMU_TYPE_UNCORE, \ - .pe = arm_thunderx2_unc_dmc_pe, \ - .pmu_detect = pfm_arm_detect_thunderx2, \ - .max_encoding = 1, \ - .num_cntrs = 4, \ - .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ - PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ - .get_event_first = pfm_arm_get_event_first, \ - .get_event_next = pfm_arm_get_event_next, \ - .event_is_valid = pfm_arm_event_is_valid, \ - .validate_table = pfm_arm_validate_table, \ - .get_event_info = pfm_arm_get_event_info, \ - .get_event_attr_info = pfm_arm_get_event_attr_info, \ - PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ - .get_event_nattrs = pfm_arm_get_event_nattrs, \ -}; - -DEFINE_TX2_DMC(0); -DEFINE_TX2_DMC(1); - -#define DEFINE_TX2_LLC(n) \ -pfmlib_pmu_t arm_thunderx2_llc##n##_support={ \ - .desc = "Marvell ThunderX2 node "#n" LLC", \ - .name = "tx2_llc"#n, \ - .perf_name = "uncore_l3c_"#n, \ - .pmu = PFM_PMU_ARM_THUNDERX2_LLC##n, \ - .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_llc_pe), \ - .type = PFM_PMU_TYPE_UNCORE, \ - .pe = arm_thunderx2_unc_llc_pe, \ - .pmu_detect = pfm_arm_detect_thunderx2, \ - .max_encoding = 1, \ - .num_cntrs = 4, \ - .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ - PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ - .get_event_first = pfm_arm_get_event_first, \ - .get_event_next = pfm_arm_get_event_next, \ - .event_is_valid = pfm_arm_event_is_valid, \ - .validate_table = pfm_arm_validate_table, \ - .get_event_info = pfm_arm_get_event_info, \ - .get_event_attr_info = pfm_arm_get_event_attr_info, \ - PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ - .get_event_nattrs = pfm_arm_get_event_nattrs, \ -}; - -DEFINE_TX2_LLC(0); -DEFINE_TX2_LLC(1); - -#define DEFINE_TX2_CCPI(n) \ -pfmlib_pmu_t arm_thunderx2_ccpi##n##_support={ \ - .desc = "Marvell ThunderX2 node "#n" Cross-Socket Interconnect", \ - .name = "tx2_ccpi"#n, \ - .perf_name = "uncore_ccpi2_"#n, \ - .pmu = PFM_PMU_ARM_THUNDERX2_CCPI##n, \ - .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_ccpi_pe), \ - .type = PFM_PMU_TYPE_UNCORE, \ - .pe = arm_thunderx2_unc_ccpi_pe, \ - .pmu_detect = pfm_arm_detect_thunderx2, \ - .max_encoding = 1, \ - .num_cntrs = 4, \ - .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ - PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ - .get_event_first = pfm_arm_get_event_first, \ - .get_event_next = pfm_arm_get_event_next, \ - .event_is_valid = pfm_arm_event_is_valid, \ - .validate_table = pfm_arm_validate_table, \ - .get_event_info = pfm_arm_get_event_info, \ - .get_event_attr_info = pfm_arm_get_event_attr_info, \ - PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ - .get_event_nattrs = pfm_arm_get_event_nattrs, \ -}; - -DEFINE_TX2_CCPI(0); -DEFINE_TX2_CCPI(1); - pfmlib_pmu_t arm_n1_support={ .desc = "ARM Neoverse N1", .name = "arm_n1", diff --git a/src/libpfm4/lib/pfmlib_arm_armv8_kunpeng_unc.c b/src/libpfm4/lib/pfmlib_arm_armv8_kunpeng_unc.c new file mode 100644 index 000000000..703b2a943 --- /dev/null +++ b/src/libpfm4/lib/pfmlib_arm_armv8_kunpeng_unc.c @@ -0,0 +1,223 @@ +/* + * pfmlib_arm_armv8_kunpeng_unc.c : support for HiSilicon Kunpeng uncore PMUs + * + * Copyright (c) 2024 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include +#include +#include + +/* private headers */ +#include "pfmlib_priv.h" /* library private */ +#include "pfmlib_arm_priv.h" +#include "pfmlib_arm_armv8_unc_priv.h" + +#include "events/arm_hisilicon_kunpeng_unc_events.h" /* Hisilicon Kunpeng PMU uncore tables */ + +static int +pfm_arm_detect_hisilicon_kunpeng(void *this) +{ + int ret; + + ret = pfm_arm_detect(this); + if (ret != PFM_SUCCESS) + return PFM_ERR_NOTSUPP; + + if ((pfm_arm_cfg.implementer == 0x48) && /* Hisilicon */ + (pfm_arm_cfg.part == 0xd01)) { /* Kunpeng */ + return PFM_SUCCESS; + } + return PFM_ERR_NOTSUPP; +} + +static void +display_com(void *this, pfmlib_event_desc_t *e, void *val) +{ + const arm_entry_t *pe = this_pe(this); + kunpeng_unc_data_t *reg = val; + + __pfm_vbprintf("[UNC=0x%"PRIx64"] %s\n", + reg->val, + pe[e->event].name); +} + +static void +display_reg(void *this, pfmlib_event_desc_t *e, kunpeng_unc_data_t reg) +{ + pfmlib_pmu_t *pmu = this; + if (pmu->display_reg) + pmu->display_reg(this, e, ®); + else + display_com(this, e, ®); +} + +int +pfm_kunpeng_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e) +{ + //from pe field in for the uncore, get the array with all the event defs + const arm_entry_t *event_list = this_pe(this); + kunpeng_unc_data_t reg; + //get code for the event from the table + reg.val = event_list[e->event].code; + //pass the data back to the caller + e->codes[0] = reg.val; + e->count = 1; + evt_strcat(e->fstr, "%s", event_list[e->event].name); + display_reg(this, e, reg); + return PFM_SUCCESS; +} + + + +/* Hisilicon Kunpeng support */ +// For uncore, each socket has a separate perf name, otherwise they are the same, use macro + +#define DEFINE_KUNPENG_DDRC(n,m) \ +pfmlib_pmu_t arm_hisilicon_kunpeng_sccl##n##_ddrc##m##_support={ \ + .desc = "Hisilicon Kunpeng SCCL"#n" DDRC"#m, \ + .name = "hisi_sccl"#n"_ddrc"#m, \ + .perf_name = "hisi_sccl"#n"_ddrc"#m, \ + .pmu = PFM_PMU_ARM_KUNPENG_UNC_SCCL##n##_DDRC##m, \ + .pme_count = LIBPFM_ARRAY_SIZE(arm_kunpeng_unc_ddrc_pe), \ + .type = PFM_PMU_TYPE_UNCORE, \ + .pe = arm_kunpeng_unc_ddrc_pe, \ + .pmu_detect = pfm_arm_detect_hisilicon_kunpeng, \ + .max_encoding = 1, \ + .num_cntrs = 4, \ + .get_event_encoding[PFM_OS_NONE] = pfm_kunpeng_unc_get_event_encoding, \ + PFMLIB_ENCODE_PERF(pfm_kunpeng_unc_get_perf_encoding), \ + .get_event_first = pfm_arm_get_event_first, \ + .get_event_next = pfm_arm_get_event_next, \ + .event_is_valid = pfm_arm_event_is_valid, \ + .validate_table = pfm_arm_validate_table, \ + .get_event_info = pfm_arm_get_event_info, \ + .get_event_attr_info = pfm_arm_get_event_attr_info, \ + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), \ + .get_event_nattrs = pfm_arm_get_event_nattrs, \ +}; + +DEFINE_KUNPENG_DDRC(1,0); +DEFINE_KUNPENG_DDRC(1,1); +DEFINE_KUNPENG_DDRC(1,2); +DEFINE_KUNPENG_DDRC(1,3); +DEFINE_KUNPENG_DDRC(3,0); +DEFINE_KUNPENG_DDRC(3,1); +DEFINE_KUNPENG_DDRC(3,2); +DEFINE_KUNPENG_DDRC(3,3); +DEFINE_KUNPENG_DDRC(5,0); +DEFINE_KUNPENG_DDRC(5,1); +DEFINE_KUNPENG_DDRC(5,2); +DEFINE_KUNPENG_DDRC(5,3); +DEFINE_KUNPENG_DDRC(7,0); +DEFINE_KUNPENG_DDRC(7,1); +DEFINE_KUNPENG_DDRC(7,2); +DEFINE_KUNPENG_DDRC(7,3); + +#define DEFINE_KUNPENG_HHA(n,m) \ +pfmlib_pmu_t arm_hisilicon_kunpeng_sccl##n##_hha##m##_support={ \ + .desc = "Hisilicon Kunpeng SCCL"#n" HHA"#m, \ + .name = "hisi_sccl"#n"_hha"#m, \ + .perf_name = "hisi_sccl"#n"_hha"#m, \ + .pmu = PFM_PMU_ARM_KUNPENG_UNC_SCCL##n##_HHA##m, \ + .pme_count = LIBPFM_ARRAY_SIZE(arm_kunpeng_unc_hha_pe), \ + .type = PFM_PMU_TYPE_UNCORE, \ + .pe = arm_kunpeng_unc_hha_pe, \ + .pmu_detect = pfm_arm_detect_hisilicon_kunpeng, \ + .max_encoding = 1, \ + .num_cntrs = 4, \ + .get_event_encoding[PFM_OS_NONE] = pfm_kunpeng_unc_get_event_encoding, \ + PFMLIB_ENCODE_PERF(pfm_kunpeng_unc_get_perf_encoding), \ + .get_event_first = pfm_arm_get_event_first, \ + .get_event_next = pfm_arm_get_event_next, \ + .event_is_valid = pfm_arm_event_is_valid, \ + .validate_table = pfm_arm_validate_table, \ + .get_event_info = pfm_arm_get_event_info, \ + .get_event_attr_info = pfm_arm_get_event_attr_info, \ + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), \ + .get_event_nattrs = pfm_arm_get_event_nattrs, \ +}; + +DEFINE_KUNPENG_HHA(1,2); +DEFINE_KUNPENG_HHA(1,3); +DEFINE_KUNPENG_HHA(3,0); +DEFINE_KUNPENG_HHA(3,1); +DEFINE_KUNPENG_HHA(5,6); +DEFINE_KUNPENG_HHA(5,7); +DEFINE_KUNPENG_HHA(7,4); +DEFINE_KUNPENG_HHA(7,5); + +#define DEFINE_KUNPENG_L3C(n,m) \ +pfmlib_pmu_t arm_hisilicon_kunpeng_sccl##n##_l3c##m##_support={ \ + .desc = "Hisilicon Kunpeng SCCL"#n" L3C"#m, \ + .name = "hisi_sccl"#n"_l3c"#m, \ + .perf_name = "hisi_sccl"#n"_l3c"#m, \ + .pmu = PFM_PMU_ARM_KUNPENG_UNC_SCCL##n##_L3C##m, \ + .pme_count = LIBPFM_ARRAY_SIZE(arm_kunpeng_unc_l3c_pe), \ + .type = PFM_PMU_TYPE_UNCORE, \ + .pe = arm_kunpeng_unc_l3c_pe, \ + .pmu_detect = pfm_arm_detect_hisilicon_kunpeng, \ + .max_encoding = 1, \ + .num_cntrs = 4, \ + .get_event_encoding[PFM_OS_NONE] = pfm_kunpeng_unc_get_event_encoding, \ + PFMLIB_ENCODE_PERF(pfm_kunpeng_unc_get_perf_encoding), \ + .get_event_first = pfm_arm_get_event_first, \ + .get_event_next = pfm_arm_get_event_next, \ + .event_is_valid = pfm_arm_event_is_valid, \ + .validate_table = pfm_arm_validate_table, \ + .get_event_info = pfm_arm_get_event_info, \ + .get_event_attr_info = pfm_arm_get_event_attr_info, \ + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), \ + .get_event_nattrs = pfm_arm_get_event_nattrs, \ +}; + +DEFINE_KUNPENG_L3C(1,10); +DEFINE_KUNPENG_L3C(1,11); +DEFINE_KUNPENG_L3C(1,12); +DEFINE_KUNPENG_L3C(1,13); +DEFINE_KUNPENG_L3C(1,14); +DEFINE_KUNPENG_L3C(1,15); +DEFINE_KUNPENG_L3C(1,8); +DEFINE_KUNPENG_L3C(1,9); +DEFINE_KUNPENG_L3C(3,0); +DEFINE_KUNPENG_L3C(3,1); +DEFINE_KUNPENG_L3C(3,2); +DEFINE_KUNPENG_L3C(3,3); +DEFINE_KUNPENG_L3C(3,4); +DEFINE_KUNPENG_L3C(3,5); +DEFINE_KUNPENG_L3C(3,6); +DEFINE_KUNPENG_L3C(3,7); +DEFINE_KUNPENG_L3C(5,24); +DEFINE_KUNPENG_L3C(5,25); +DEFINE_KUNPENG_L3C(5,26); +DEFINE_KUNPENG_L3C(5,27); +DEFINE_KUNPENG_L3C(5,28); +DEFINE_KUNPENG_L3C(5,29); +DEFINE_KUNPENG_L3C(5,30); +DEFINE_KUNPENG_L3C(5,31); +DEFINE_KUNPENG_L3C(7,16); +DEFINE_KUNPENG_L3C(7,17); +DEFINE_KUNPENG_L3C(7,18); +DEFINE_KUNPENG_L3C(7,19); +DEFINE_KUNPENG_L3C(7,20); +DEFINE_KUNPENG_L3C(7,21); +DEFINE_KUNPENG_L3C(7,22); +DEFINE_KUNPENG_L3C(7,23); diff --git a/src/libpfm4/lib/pfmlib_kunpeng_unc_perf_event.c b/src/libpfm4/lib/pfmlib_arm_armv8_kunpeng_unc_perf_event.c similarity index 72% rename from src/libpfm4/lib/pfmlib_kunpeng_unc_perf_event.c rename to src/libpfm4/lib/pfmlib_arm_armv8_kunpeng_unc_perf_event.c index 34c57a540..65c3855af 100644 --- a/src/libpfm4/lib/pfmlib_kunpeng_unc_perf_event.c +++ b/src/libpfm4/lib/pfmlib_arm_armv8_kunpeng_unc_perf_event.c @@ -32,47 +32,7 @@ #include "pfmlib_priv.h" #include "pfmlib_perf_event_priv.h" #include "pfmlib_arm_priv.h" - -typedef struct { - uint64_t val; -} kunpeng_unc_data_t; - -static void -display_com(void *this, pfmlib_event_desc_t *e, void *val) -{ - const arm_entry_t *pe = this_pe(this); - kunpeng_unc_data_t *reg = val; - - __pfm_vbprintf("[UNC=0x%"PRIx64"] %s\n", - reg->val, - pe[e->event].name); -} - -static void -display_reg(void *this, pfmlib_event_desc_t *e, kunpeng_unc_data_t reg) -{ - pfmlib_pmu_t *pmu = this; - if (pmu->display_reg) - pmu->display_reg(this, e, ®); - else - display_com(this, e, ®); -} - -int -pfm_kunpeng_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e) -{ - //from pe field in for the uncore, get the array with all the event defs - const arm_entry_t *event_list = this_pe(this); - kunpeng_unc_data_t reg; - //get code for the event from the table - reg.val = event_list[e->event].code; - //pass the data back to the caller - e->codes[0] = reg.val; - e->count = 1; - evt_strcat(e->fstr, "%s", event_list[e->event].name); - display_reg(this, e, reg); - return PFM_SUCCESS; -} +#include "pfmlib_arm_armv8_unc_priv.h" static int find_pmu_type_by_name(const char *name) diff --git a/src/libpfm4/lib/pfmlib_arm_armv8_thunderx2_unc.c b/src/libpfm4/lib/pfmlib_arm_armv8_thunderx2_unc.c new file mode 100644 index 000000000..107163a89 --- /dev/null +++ b/src/libpfm4/lib/pfmlib_arm_armv8_thunderx2_unc.c @@ -0,0 +1,156 @@ +/* + * pfmlib_arm_armv8_thunderx2_unc.c : support for Marvell ThunderX2 uncore PMUs + * + * Copyright (c) 2024 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include +#include +#include + +/* private headers */ +#include "pfmlib_priv.h" /* library private */ +#include "pfmlib_arm_priv.h" + +#include "pfmlib_arm_armv8_unc_priv.h" +#include "events/arm_marvell_tx2_unc_events.h" /* Marvell ThunderX2 PMU tables */ + +static int +pfm_arm_detect_thunderx2(void *this) +{ + int ret; + + ret = pfm_arm_detect(this); + if (ret != PFM_SUCCESS) + return PFM_ERR_NOTSUPP; + + if ((pfm_arm_cfg.implementer == 0x42) && /* Broadcom */ + (pfm_arm_cfg.part == 0x516)) { /* Thunder2x */ + return PFM_SUCCESS; + } + if ((pfm_arm_cfg.implementer == 0x43) && /* Cavium */ + (pfm_arm_cfg.part == 0xaf)) { /* Thunder2x */ + return PFM_SUCCESS; + } + return PFM_ERR_NOTSUPP; +} + +static int +pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e) +{ + //from pe field in for the uncore, get the array with all the event defs + const arm_entry_t *event_list = this_pe(this); + tx2_unc_data_t reg; + + //get code for the event from the table + reg.val = event_list[e->event].code; + + //pass the data back to the caller + e->codes[0] = reg.val; + e->count = 1; + + evt_strcat(e->fstr, "%s", event_list[e->event].name); + + return PFM_SUCCESS; +} + +// For uncore, each socket has a separate perf name, otherwise they are the same, use macro + +#define DEFINE_TX2_DMC(n) \ +pfmlib_pmu_t arm_thunderx2_dmc##n##_support={ \ + .desc = "Marvell ThunderX2 Node"#n" DMC", \ + .name = "tx2_dmc"#n, \ + .perf_name = "uncore_dmc_"#n, \ + .pmu = PFM_PMU_ARM_THUNDERX2_DMC##n, \ + .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_dmc_pe), \ + .type = PFM_PMU_TYPE_UNCORE, \ + .pe = arm_thunderx2_unc_dmc_pe, \ + .pmu_detect = pfm_arm_detect_thunderx2, \ + .max_encoding = 1, \ + .num_cntrs = 4, \ + .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ + PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ + .get_event_first = pfm_arm_get_event_first, \ + .get_event_next = pfm_arm_get_event_next, \ + .event_is_valid = pfm_arm_event_is_valid, \ + .validate_table = pfm_arm_validate_table, \ + .get_event_info = pfm_arm_get_event_info, \ + .get_event_attr_info = pfm_arm_get_event_attr_info, \ + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ + .get_event_nattrs = pfm_arm_get_event_nattrs, \ +}; + +DEFINE_TX2_DMC(0); +DEFINE_TX2_DMC(1); + +#define DEFINE_TX2_LLC(n) \ +pfmlib_pmu_t arm_thunderx2_llc##n##_support={ \ + .desc = "Marvell ThunderX2 node "#n" LLC", \ + .name = "tx2_llc"#n, \ + .perf_name = "uncore_l3c_"#n, \ + .pmu = PFM_PMU_ARM_THUNDERX2_LLC##n, \ + .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_llc_pe), \ + .type = PFM_PMU_TYPE_UNCORE, \ + .pe = arm_thunderx2_unc_llc_pe, \ + .pmu_detect = pfm_arm_detect_thunderx2, \ + .max_encoding = 1, \ + .num_cntrs = 4, \ + .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ + PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ + .get_event_first = pfm_arm_get_event_first, \ + .get_event_next = pfm_arm_get_event_next, \ + .event_is_valid = pfm_arm_event_is_valid, \ + .validate_table = pfm_arm_validate_table, \ + .get_event_info = pfm_arm_get_event_info, \ + .get_event_attr_info = pfm_arm_get_event_attr_info, \ + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ + .get_event_nattrs = pfm_arm_get_event_nattrs, \ +}; + +DEFINE_TX2_LLC(0); +DEFINE_TX2_LLC(1); + +#define DEFINE_TX2_CCPI(n) \ +pfmlib_pmu_t arm_thunderx2_ccpi##n##_support={ \ + .desc = "Marvell ThunderX2 node "#n" Cross-Socket Interconnect", \ + .name = "tx2_ccpi"#n, \ + .perf_name = "uncore_ccpi2_"#n, \ + .pmu = PFM_PMU_ARM_THUNDERX2_CCPI##n, \ + .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_ccpi_pe), \ + .type = PFM_PMU_TYPE_UNCORE, \ + .pe = arm_thunderx2_unc_ccpi_pe, \ + .pmu_detect = pfm_arm_detect_thunderx2, \ + .max_encoding = 1, \ + .num_cntrs = 4, \ + .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ + PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ + .get_event_first = pfm_arm_get_event_first, \ + .get_event_next = pfm_arm_get_event_next, \ + .event_is_valid = pfm_arm_event_is_valid, \ + .validate_table = pfm_arm_validate_table, \ + .get_event_info = pfm_arm_get_event_info, \ + .get_event_attr_info = pfm_arm_get_event_attr_info, \ + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ + .get_event_nattrs = pfm_arm_get_event_nattrs, \ +}; + +DEFINE_TX2_CCPI(0); +DEFINE_TX2_CCPI(1); diff --git a/src/libpfm4/lib/pfmlib_tx2_unc_perf_event.c b/src/libpfm4/lib/pfmlib_arm_armv8_thunderx2_unc_perf_event.c similarity index 57% rename from src/libpfm4/lib/pfmlib_tx2_unc_perf_event.c rename to src/libpfm4/lib/pfmlib_arm_armv8_thunderx2_unc_perf_event.c index 154cb0a63..58aceae9c 100644 --- a/src/libpfm4/lib/pfmlib_tx2_unc_perf_event.c +++ b/src/libpfm4/lib/pfmlib_arm_armv8_thunderx2_unc_perf_event.c @@ -9,35 +9,7 @@ #include "pfmlib_priv.h" #include "pfmlib_perf_event_priv.h" #include "pfmlib_arm_priv.h" - -typedef union { - uint64_t val; - struct { - unsigned long unc_res1:32; /* reserved */ - } com; /* reserved space for future extensions */ -} tx2_unc_data_t; - -static void -display_com(void *this, pfmlib_event_desc_t *e, void *val) -{ - const arm_entry_t *pe = this_pe(this); - tx2_unc_data_t *reg = val; - - __pfm_vbprintf("[UNC=0x%"PRIx64"] %s\n", - reg->val, - pe[e->event].name); -} - -static void -display_reg(void *this, pfmlib_event_desc_t *e, tx2_unc_data_t reg) -{ - pfmlib_pmu_t *pmu = this; - if (pmu->display_reg) - pmu->display_reg(this, e, ®); - else - display_com(this, e, ®); -} - +#include "pfmlib_arm_armv8_unc_priv.h" static int find_pmu_type_by_name(const char *name) @@ -64,22 +36,6 @@ find_pmu_type_by_name(const char *name) return type; } -int -pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e) -{ - //from pe field in for the uncore, get the array with all the event defs - const arm_entry_t *event_list = this_pe(this); - tx2_unc_data_t reg; - //get code for the event from the table - reg.val = event_list[e->event].code; - //pass the data back to the caller - e->codes[0] = reg.val; - e->count = 1; - evt_strcat(e->fstr, "%s", event_list[e->event].name); - display_reg(this, e, reg); - return PFM_SUCCESS; -} - int pfm_tx2_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e) { diff --git a/src/libpfm4/lib/pfmlib_arm_armv8_unc.c b/src/libpfm4/lib/pfmlib_arm_armv8_unc.c new file mode 100644 index 000000000..e129240a4 --- /dev/null +++ b/src/libpfm4/lib/pfmlib_arm_armv8_unc.c @@ -0,0 +1,308 @@ +/* + * pfmlib_arm_armv8_unc.c : support for ARMv8 uncore PMUs + * + * Copyright (c) 2024 Google Inc. All rights reserved + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include +#include +#include + +/* private headers */ +#include "pfmlib_priv.h" /* library private */ +#include "pfmlib_arm_priv.h" + +#include "pfmlib_arm_armv8_unc_priv.h" +#include "events/arm_marvell_tx2_unc_events.h" /* Marvell ThunderX2 PMU tables */ +#include "events/arm_hisilicon_kunpeng_unc_events.h" /* Hisilicon Kunpeng PMU uncore tables */ + +static int +pfm_arm_detect_thunderx2(void *this) +{ + int ret; + + ret = pfm_arm_detect(this); + if (ret != PFM_SUCCESS) + return PFM_ERR_NOTSUPP; + + if ((pfm_arm_cfg.implementer == 0x42) && /* Broadcom */ + (pfm_arm_cfg.part == 0x516)) { /* Thunder2x */ + return PFM_SUCCESS; + } + if ((pfm_arm_cfg.implementer == 0x43) && /* Cavium */ + (pfm_arm_cfg.part == 0xaf)) { /* Thunder2x */ + return PFM_SUCCESS; + } + return PFM_ERR_NOTSUPP; +} + +static int +pfm_arm_detect_hisilicon_kunpeng(void *this) +{ + int ret; + + ret = pfm_arm_detect(this); + if (ret != PFM_SUCCESS) + return PFM_ERR_NOTSUPP; + + if ((pfm_arm_cfg.implementer == 0x48) && /* Hisilicon */ + (pfm_arm_cfg.part == 0xd01)) { /* Kunpeng */ + return PFM_SUCCESS; + } + return PFM_ERR_NOTSUPP; +} + +static int +pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e) +{ + //from pe field in for the uncore, get the array with all the event defs + const arm_entry_t *event_list = this_pe(this); + tx2_unc_data_t reg; + + //get code for the event from the table + reg.val = event_list[e->event].code; + + //pass the data back to the caller + e->codes[0] = reg.val; + e->count = 1; + + evt_strcat(e->fstr, "%s", event_list[e->event].name); + + return PFM_SUCCESS; +} + +/* Hisilicon Kunpeng support */ +// For uncore, each socket has a separate perf name, otherwise they are the same, use macro + +#define DEFINE_KUNPENG_DDRC(n,m) \ +pfmlib_pmu_t arm_hisilicon_kunpeng_sccl##n##_ddrc##m##_support={ \ + .desc = "Hisilicon Kunpeng SCCL"#n" DDRC"#m, \ + .name = "hisi_sccl"#n"_ddrc"#m, \ + .perf_name = "hisi_sccl"#n"_ddrc"#m, \ + .pmu = PFM_PMU_ARM_KUNPENG_UNC_SCCL##n##_DDRC##m, \ + .pme_count = LIBPFM_ARRAY_SIZE(arm_kunpeng_unc_ddrc_pe), \ + .type = PFM_PMU_TYPE_UNCORE, \ + .pe = arm_kunpeng_unc_ddrc_pe, \ + .pmu_detect = pfm_arm_detect_hisilicon_kunpeng, \ + .max_encoding = 1, \ + .num_cntrs = 4, \ + .get_event_encoding[PFM_OS_NONE] = pfm_kunpeng_unc_get_event_encoding, \ + PFMLIB_ENCODE_PERF(pfm_kunpeng_unc_get_perf_encoding), \ + .get_event_first = pfm_arm_get_event_first, \ + .get_event_next = pfm_arm_get_event_next, \ + .event_is_valid = pfm_arm_event_is_valid, \ + .validate_table = pfm_arm_validate_table, \ + .get_event_info = pfm_arm_get_event_info, \ + .get_event_attr_info = pfm_arm_get_event_attr_info, \ + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), \ + .get_event_nattrs = pfm_arm_get_event_nattrs, \ +}; + +DEFINE_KUNPENG_DDRC(1,0); +DEFINE_KUNPENG_DDRC(1,1); +DEFINE_KUNPENG_DDRC(1,2); +DEFINE_KUNPENG_DDRC(1,3); +DEFINE_KUNPENG_DDRC(3,0); +DEFINE_KUNPENG_DDRC(3,1); +DEFINE_KUNPENG_DDRC(3,2); +DEFINE_KUNPENG_DDRC(3,3); +DEFINE_KUNPENG_DDRC(5,0); +DEFINE_KUNPENG_DDRC(5,1); +DEFINE_KUNPENG_DDRC(5,2); +DEFINE_KUNPENG_DDRC(5,3); +DEFINE_KUNPENG_DDRC(7,0); +DEFINE_KUNPENG_DDRC(7,1); +DEFINE_KUNPENG_DDRC(7,2); +DEFINE_KUNPENG_DDRC(7,3); + +#define DEFINE_KUNPENG_HHA(n,m) \ +pfmlib_pmu_t arm_hisilicon_kunpeng_sccl##n##_hha##m##_support={ \ + .desc = "Hisilicon Kunpeng SCCL"#n" HHA"#m, \ + .name = "hisi_sccl"#n"_hha"#m, \ + .perf_name = "hisi_sccl"#n"_hha"#m, \ + .pmu = PFM_PMU_ARM_KUNPENG_UNC_SCCL##n##_HHA##m, \ + .pme_count = LIBPFM_ARRAY_SIZE(arm_kunpeng_unc_hha_pe), \ + .type = PFM_PMU_TYPE_UNCORE, \ + .pe = arm_kunpeng_unc_hha_pe, \ + .pmu_detect = pfm_arm_detect_hisilicon_kunpeng, \ + .max_encoding = 1, \ + .num_cntrs = 4, \ + .get_event_encoding[PFM_OS_NONE] = pfm_kunpeng_unc_get_event_encoding, \ + PFMLIB_ENCODE_PERF(pfm_kunpeng_unc_get_perf_encoding), \ + .get_event_first = pfm_arm_get_event_first, \ + .get_event_next = pfm_arm_get_event_next, \ + .event_is_valid = pfm_arm_event_is_valid, \ + .validate_table = pfm_arm_validate_table, \ + .get_event_info = pfm_arm_get_event_info, \ + .get_event_attr_info = pfm_arm_get_event_attr_info, \ + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), \ + .get_event_nattrs = pfm_arm_get_event_nattrs, \ +}; + +DEFINE_KUNPENG_HHA(1,2); +DEFINE_KUNPENG_HHA(1,3); +DEFINE_KUNPENG_HHA(3,0); +DEFINE_KUNPENG_HHA(3,1); +DEFINE_KUNPENG_HHA(5,6); +DEFINE_KUNPENG_HHA(5,7); +DEFINE_KUNPENG_HHA(7,4); +DEFINE_KUNPENG_HHA(7,5); + +#define DEFINE_KUNPENG_L3C(n,m) \ +pfmlib_pmu_t arm_hisilicon_kunpeng_sccl##n##_l3c##m##_support={ \ + .desc = "Hisilicon Kunpeng SCCL"#n" L3C"#m, \ + .name = "hisi_sccl"#n"_l3c"#m, \ + .perf_name = "hisi_sccl"#n"_l3c"#m, \ + .pmu = PFM_PMU_ARM_KUNPENG_UNC_SCCL##n##_L3C##m, \ + .pme_count = LIBPFM_ARRAY_SIZE(arm_kunpeng_unc_l3c_pe), \ + .type = PFM_PMU_TYPE_UNCORE, \ + .pe = arm_kunpeng_unc_l3c_pe, \ + .pmu_detect = pfm_arm_detect_hisilicon_kunpeng, \ + .max_encoding = 1, \ + .num_cntrs = 4, \ + .get_event_encoding[PFM_OS_NONE] = pfm_kunpeng_unc_get_event_encoding, \ + PFMLIB_ENCODE_PERF(pfm_kunpeng_unc_get_perf_encoding), \ + .get_event_first = pfm_arm_get_event_first, \ + .get_event_next = pfm_arm_get_event_next, \ + .event_is_valid = pfm_arm_event_is_valid, \ + .validate_table = pfm_arm_validate_table, \ + .get_event_info = pfm_arm_get_event_info, \ + .get_event_attr_info = pfm_arm_get_event_attr_info, \ + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), \ + .get_event_nattrs = pfm_arm_get_event_nattrs, \ +}; + +DEFINE_KUNPENG_L3C(1,10); +DEFINE_KUNPENG_L3C(1,11); +DEFINE_KUNPENG_L3C(1,12); +DEFINE_KUNPENG_L3C(1,13); +DEFINE_KUNPENG_L3C(1,14); +DEFINE_KUNPENG_L3C(1,15); +DEFINE_KUNPENG_L3C(1,8); +DEFINE_KUNPENG_L3C(1,9); +DEFINE_KUNPENG_L3C(3,0); +DEFINE_KUNPENG_L3C(3,1); +DEFINE_KUNPENG_L3C(3,2); +DEFINE_KUNPENG_L3C(3,3); +DEFINE_KUNPENG_L3C(3,4); +DEFINE_KUNPENG_L3C(3,5); +DEFINE_KUNPENG_L3C(3,6); +DEFINE_KUNPENG_L3C(3,7); +DEFINE_KUNPENG_L3C(5,24); +DEFINE_KUNPENG_L3C(5,25); +DEFINE_KUNPENG_L3C(5,26); +DEFINE_KUNPENG_L3C(5,27); +DEFINE_KUNPENG_L3C(5,28); +DEFINE_KUNPENG_L3C(5,29); +DEFINE_KUNPENG_L3C(5,30); +DEFINE_KUNPENG_L3C(5,31); +DEFINE_KUNPENG_L3C(7,16); +DEFINE_KUNPENG_L3C(7,17); +DEFINE_KUNPENG_L3C(7,18); +DEFINE_KUNPENG_L3C(7,19); +DEFINE_KUNPENG_L3C(7,20); +DEFINE_KUNPENG_L3C(7,21); +DEFINE_KUNPENG_L3C(7,22); +DEFINE_KUNPENG_L3C(7,23); + +// For uncore, each socket has a separate perf name, otherwise they are the same, use macro + +#define DEFINE_TX2_DMC(n) \ +pfmlib_pmu_t arm_thunderx2_dmc##n##_support={ \ + .desc = "Marvell ThunderX2 Node"#n" DMC", \ + .name = "tx2_dmc"#n, \ + .perf_name = "uncore_dmc_"#n, \ + .pmu = PFM_PMU_ARM_THUNDERX2_DMC##n, \ + .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_dmc_pe), \ + .type = PFM_PMU_TYPE_UNCORE, \ + .pe = arm_thunderx2_unc_dmc_pe, \ + .pmu_detect = pfm_arm_detect_thunderx2, \ + .max_encoding = 1, \ + .num_cntrs = 4, \ + .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ + PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ + .get_event_first = pfm_arm_get_event_first, \ + .get_event_next = pfm_arm_get_event_next, \ + .event_is_valid = pfm_arm_event_is_valid, \ + .validate_table = pfm_arm_validate_table, \ + .get_event_info = pfm_arm_get_event_info, \ + .get_event_attr_info = pfm_arm_get_event_attr_info, \ + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ + .get_event_nattrs = pfm_arm_get_event_nattrs, \ +}; + +DEFINE_TX2_DMC(0); +DEFINE_TX2_DMC(1); + +#define DEFINE_TX2_LLC(n) \ +pfmlib_pmu_t arm_thunderx2_llc##n##_support={ \ + .desc = "Marvell ThunderX2 node "#n" LLC", \ + .name = "tx2_llc"#n, \ + .perf_name = "uncore_l3c_"#n, \ + .pmu = PFM_PMU_ARM_THUNDERX2_LLC##n, \ + .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_llc_pe), \ + .type = PFM_PMU_TYPE_UNCORE, \ + .pe = arm_thunderx2_unc_llc_pe, \ + .pmu_detect = pfm_arm_detect_thunderx2, \ + .max_encoding = 1, \ + .num_cntrs = 4, \ + .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ + PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ + .get_event_first = pfm_arm_get_event_first, \ + .get_event_next = pfm_arm_get_event_next, \ + .event_is_valid = pfm_arm_event_is_valid, \ + .validate_table = pfm_arm_validate_table, \ + .get_event_info = pfm_arm_get_event_info, \ + .get_event_attr_info = pfm_arm_get_event_attr_info, \ + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ + .get_event_nattrs = pfm_arm_get_event_nattrs, \ +}; + +DEFINE_TX2_LLC(0); +DEFINE_TX2_LLC(1); + +#define DEFINE_TX2_CCPI(n) \ +pfmlib_pmu_t arm_thunderx2_ccpi##n##_support={ \ + .desc = "Marvell ThunderX2 node "#n" Cross-Socket Interconnect", \ + .name = "tx2_ccpi"#n, \ + .perf_name = "uncore_ccpi2_"#n, \ + .pmu = PFM_PMU_ARM_THUNDERX2_CCPI##n, \ + .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_ccpi_pe), \ + .type = PFM_PMU_TYPE_UNCORE, \ + .pe = arm_thunderx2_unc_ccpi_pe, \ + .pmu_detect = pfm_arm_detect_thunderx2, \ + .max_encoding = 1, \ + .num_cntrs = 4, \ + .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ + PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ + .get_event_first = pfm_arm_get_event_first, \ + .get_event_next = pfm_arm_get_event_next, \ + .event_is_valid = pfm_arm_event_is_valid, \ + .validate_table = pfm_arm_validate_table, \ + .get_event_info = pfm_arm_get_event_info, \ + .get_event_attr_info = pfm_arm_get_event_attr_info, \ + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ + .get_event_nattrs = pfm_arm_get_event_nattrs, \ +}; + +DEFINE_TX2_CCPI(0); +DEFINE_TX2_CCPI(1); + diff --git a/src/libpfm4/lib/pfmlib_arm_armv8_unc_priv.h b/src/libpfm4/lib/pfmlib_arm_armv8_unc_priv.h new file mode 100644 index 000000000..97c0328e4 --- /dev/null +++ b/src/libpfm4/lib/pfmlib_arm_armv8_unc_priv.h @@ -0,0 +1,23 @@ +#ifndef PFMLIB_ARM_ARMV8_UNC_PRIV_H +#define PFMLIB_ARM_ARMV8_UNC_PRIV_H + +#include + +typedef union { + uint64_t val; + struct { + unsigned long unc_res1:32; /* reserved */ + } com; /* reserved space for future extensions */ +} tx2_unc_data_t; + +typedef struct { + uint64_t val; +} kunpeng_unc_data_t; + +extern int pfm_tx2_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); + +//extern int pfm_kunpeng_get_perf_encoding(void *this, pfmlib_event_desc_t *e); + +extern int pfm_kunpeng_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e); +extern int pfm_kunpeng_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); +#endif /* PFMLIB_ARM_ARMV8_UNC_PRIV_H */ diff --git a/src/libpfm4/lib/pfmlib_arm_armv9.c b/src/libpfm4/lib/pfmlib_arm_armv9.c index d7cc2e3b2..20193a300 100644 --- a/src/libpfm4/lib/pfmlib_arm_armv9.c +++ b/src/libpfm4/lib/pfmlib_arm_armv9.c @@ -35,6 +35,7 @@ #include "events/arm_neoverse_n2_events.h" /* Arm Neoverse N2 table */ #include "events/arm_neoverse_v2_events.h" /* Arm Neoverse V2 table */ +#include "events/arm_neoverse_v3_events.h" /* Arm Neoverse V3 table */ static int pfm_arm_detect_n2(void *this) @@ -68,6 +69,21 @@ pfm_arm_detect_v2(void *this) return PFM_ERR_NOTSUPP; } +static int +pfm_arm_detect_v3(void *this) +{ + int ret; + + ret = pfm_arm_detect(this); + if (ret != PFM_SUCCESS) + return PFM_ERR_NOTSUPP; + + if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */ + (pfm_arm_cfg.part == 0xd84)) { /* Neoverse V3 */ + return PFM_SUCCESS; + } + return PFM_ERR_NOTSUPP; +} pfmlib_pmu_t arm_n2_support={ .desc = "Arm Neoverse N2", @@ -118,3 +134,28 @@ pfmlib_pmu_t arm_v2_support={ PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), .get_event_nattrs = pfm_arm_get_event_nattrs, }; + +pfmlib_pmu_t arm_v3_support={ + .desc = "Arm Neoverse V3", + .name = "arm_v3", + .pmu = PFM_PMU_ARM_V3, + .pme_count = LIBPFM_ARRAY_SIZE(arm_neoverse_v3_pe), + .type = PFM_PMU_TYPE_CORE, + .supported_plm = ARMV9_PLM, + .pe = arm_neoverse_v3_pe, + + .pmu_detect = pfm_arm_detect_v3, + .max_encoding = 1, + .num_cntrs = 6, + + .get_event_encoding[PFM_OS_NONE] = pfm_arm_get_encoding, + PFMLIB_ENCODE_PERF(pfm_arm_get_perf_encoding), + .get_event_first = pfm_arm_get_event_first, + .get_event_next = pfm_arm_get_event_next, + .event_is_valid = pfm_arm_event_is_valid, + .validate_table = pfm_arm_validate_table, + .get_event_info = pfm_arm_get_event_info, + .get_event_attr_info = pfm_arm_get_event_attr_info, + PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), + .get_event_nattrs = pfm_arm_get_event_nattrs, +}; diff --git a/src/libpfm4/lib/pfmlib_common.c b/src/libpfm4/lib/pfmlib_common.c index 47ad2168e..425462e2a 100644 --- a/src/libpfm4/lib/pfmlib_common.c +++ b/src/libpfm4/lib/pfmlib_common.c @@ -658,7 +658,9 @@ static pfmlib_pmu_t *pfmlib_pmus[]= &arm_qcom_krait_support, &arm_cortex_a57_support, &arm_cortex_a53_support, + &arm_cortex_a55_support, &arm_cortex_a72_support, + &arm_cortex_a76_support, &arm_xgene_support, &arm_thunderx2_support, &arm_thunderx2_dmc0_support, @@ -671,6 +673,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= &arm_n2_support, &arm_v1_support, &arm_v2_support, + &arm_v3_support, &arm_hisilicon_kunpeng_support, &arm_hisilicon_kunpeng_sccl1_ddrc0_support, &arm_hisilicon_kunpeng_sccl1_ddrc1_support, @@ -732,7 +735,9 @@ static pfmlib_pmu_t *pfmlib_pmus[]= #ifdef CONFIG_PFMLIB_ARCH_ARM64 &arm_cortex_a57_support, &arm_cortex_a53_support, + &arm_cortex_a55_support, &arm_cortex_a72_support, + &arm_cortex_a76_support, &arm_xgene_support, &arm_thunderx2_support, &arm_thunderx2_dmc0_support, @@ -802,6 +807,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= &arm_n2_support, &arm_v1_support, &arm_v2_support, + &arm_v3_support, &arm_hisilicon_kunpeng_support, #endif diff --git a/src/libpfm4/lib/pfmlib_priv.h b/src/libpfm4/lib/pfmlib_priv.h index 673392070..da7597a87 100644 --- a/src/libpfm4/lib/pfmlib_priv.h +++ b/src/libpfm4/lib/pfmlib_priv.h @@ -834,12 +834,15 @@ extern pfmlib_pmu_t arm_1176_support; extern pfmlib_pmu_t arm_qcom_krait_support; extern pfmlib_pmu_t arm_cortex_a57_support; extern pfmlib_pmu_t arm_cortex_a53_support; +extern pfmlib_pmu_t arm_cortex_a55_support; extern pfmlib_pmu_t arm_cortex_a72_support; +extern pfmlib_pmu_t arm_cortex_a76_support; extern pfmlib_pmu_t arm_xgene_support; extern pfmlib_pmu_t arm_n1_support; extern pfmlib_pmu_t arm_n2_support; extern pfmlib_pmu_t arm_v1_support; extern pfmlib_pmu_t arm_v2_support; +extern pfmlib_pmu_t arm_v3_support; extern pfmlib_pmu_t arm_thunderx2_support; extern pfmlib_pmu_t arm_thunderx2_dmc0_support; diff --git a/src/libpfm4/tests/validate_arm.c b/src/libpfm4/tests/validate_arm.c index db0439f69..58e0b2a1d 100644 --- a/src/libpfm4/tests/validate_arm.c +++ b/src/libpfm4/tests/validate_arm.c @@ -258,6 +258,41 @@ static const test_event_t arm_test_events[]={ .codes[0] = 0x8000007, .fstr = "arm_ac53::ST_RETIRED:k=1:u=1:hv=0", }, + { SRC_LINE, + .name = "arm_v3::CPU_CYCLES", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x8000011, + .fstr = "arm_v3::CPU_CYCLES:k=1:u=1:hv=0", + }, + { SRC_LINE, + .name = "arm_v3::INST_FETCH_PERCYC", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x8008120, + .fstr = "arm_v3::INST_FETCH_PERCYC:k=1:u=1:hv=0", + }, + { SRC_LINE, + .name = "arm_v3::INST_RETIRED", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x8000008, + .fstr = "arm_v3::INST_RETIRED:k=1:u=1:hv=0", + }, + { SRC_LINE, + .name = "arm_v3::DTLB_WALK_PERCYC", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x8008128, + .fstr = "arm_v3::DTLB_WALK_PERCYC:k=1:u=1:hv=0", + }, + { SRC_LINE, + .name = "arm_v3::SAMPLE_FEED_LD", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x800812b, + .fstr = "arm_v3::SAMPLE_FEED_LD:k=1:u=1:hv=0", + }, }; #define NUM_TEST_EVENTS (int)(sizeof(arm_test_events)/sizeof(test_event_t)) diff --git a/src/libpfm4/tests/validate_arm64.c b/src/libpfm4/tests/validate_arm64.c index c4535539f..a3d75a0e0 100644 --- a/src/libpfm4/tests/validate_arm64.c +++ b/src/libpfm4/tests/validate_arm64.c @@ -403,7 +403,28 @@ static const test_event_t arm64_test_events[]={ .count = 1, .codes[0] = 0x00, .fstr = "hisi_sccl1_ddrc0::flux_wr", - } + }, + { SRC_LINE, + .name = "arm_v3::INST_RETIRED", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x8000008, + .fstr = "arm_v3::INST_RETIRED:k=1:u=1:hv=0", + }, + { SRC_LINE, + .name = "arm_v3::DTLB_WALK_PERCYC", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x8008128, + .fstr = "arm_v3::DTLB_WALK_PERCYC:k=1:u=1:hv=0", + }, + { SRC_LINE, + .name = "arm_v3::SAMPLE_FEED_LD", + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0x800812b, + .fstr = "arm_v3::SAMPLE_FEED_LD:k=1:u=1:hv=0", + }, }; #define NUM_TEST_EVENTS (int)(sizeof(arm64_test_events)/sizeof(test_event_t))