From ffde394e4969300f23779718796d72092813d595 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 8 Jan 2025 06:30:16 -0800 Subject: [PATCH 01/23] perf/x86/intel/uncore: Clean up func_id mainline inclusion from mainline-v6.14-rc1 commit 3f710be02ea648001ba18fb2c9fa7765e743dec2 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3f710be02ea648001ba18fb2c9fa7765e743dec2 ------------------------------------- Intel-SIG: commit 3f710be02ea6 ("perf/x86/intel/uncore: Clean up func_id") ------------------------------------- perf/x86/intel/uncore: Clean up func_id The below warning may be triggered on GNR when the PCIE uncore units are exposed. WARNING: CPU: 4 PID: 1 at arch/x86/events/intel/uncore.c:1169 uncore_pci_pmu_register+0x158/0x190 The current uncore driver assumes that all the devices in the same PMU have the exact same devfn. It's true for the previous platforms. But it doesn't work for the new PCIE uncore units on GNR. The assumption doesn't make sense. There is no reason to limit the devices from the same PMU to the same devfn. Also, the current code just throws the warning, but still registers the device. The WARN_ON_ONCE() should be removed. The func_id is used by the later event_init() to check if a event->pmu has valid devices. For cpu and mmio uncore PMUs, they are always valid. For pci uncore PMUs, it's set when the PMU is registered. It can be replaced by the pmu->registered. Clean up the func_id. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Eric Hu Link: https://lkml.kernel.org/r/20250108143017.1793781-1-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/uncore.c | 20 +++++++------------- arch/x86/events/intel/uncore.h | 1 - arch/x86/events/intel/uncore_snb.c | 2 +- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 35220524d0ed9..4e3dbecd1cd14 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -732,7 +732,7 @@ static int uncore_pmu_event_init(struct perf_event *event) pmu = uncore_event_to_pmu(event); /* no device found for this pmu */ - if (pmu->func_id < 0) + if (!pmu->registered) return -ENOENT; /* Sampling not supported yet */ @@ -983,7 +983,7 @@ static void uncore_types_exit(struct intel_uncore_type **types) uncore_type_exit(*types); } -static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) +static int __init uncore_type_init(struct intel_uncore_type *type) { struct intel_uncore_pmu *pmus; size_t size; @@ -996,7 +996,6 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) size = uncore_max_dies() * sizeof(struct intel_uncore_box *); for (i = 0; i < type->num_boxes; i++) { - pmus[i].func_id = setid ? i : -1; pmus[i].pmu_idx = i; pmus[i].type = type; pmus[i].boxes = kzalloc(size, GFP_KERNEL); @@ -1046,12 +1045,12 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) } static int __init -uncore_types_init(struct intel_uncore_type **types, bool setid) +uncore_types_init(struct intel_uncore_type **types) { int ret; for (; *types; types++) { - ret = uncore_type_init(*types, setid); + ret = uncore_type_init(*types); if (ret) return ret; } @@ -1151,11 +1150,6 @@ static int uncore_pci_pmu_register(struct pci_dev *pdev, if (!box) return -ENOMEM; - if (pmu->func_id < 0) - pmu->func_id = pdev->devfn; - else - WARN_ON_ONCE(pmu->func_id != pdev->devfn); - atomic_inc(&box->refcnt); box->dieid = die; box->pci_dev = pdev; @@ -1401,7 +1395,7 @@ static int __init uncore_pci_init(void) goto err; } - ret = uncore_types_init(uncore_pci_uncores, false); + ret = uncore_types_init(uncore_pci_uncores); if (ret) goto errtype; @@ -1669,7 +1663,7 @@ static int __init uncore_cpu_init(void) { int ret; - ret = uncore_types_init(uncore_msr_uncores, true); + ret = uncore_types_init(uncore_msr_uncores); if (ret) goto err; @@ -1688,7 +1682,7 @@ static int __init uncore_mmio_init(void) struct intel_uncore_type **types = uncore_mmio_uncores; int ret; - ret = uncore_types_init(types, true); + ret = uncore_types_init(types); if (ret) goto err; diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 3434c955c5fbf..06ebb17b038c9 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -123,7 +123,6 @@ struct intel_uncore_pmu { struct pmu pmu; char name[UNCORE_PMU_NAME_LEN]; int pmu_idx; - int func_id; bool registered; atomic_t activeboxes; cpumask_t cpu_mask; diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 6112a6c5b8f4e..c2588e2eafb96 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -724,7 +724,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event) pmu = uncore_event_to_pmu(event); /* no device found for this pmu */ - if (pmu->func_id < 0) + if (!pmu->registered) return -ENOENT; /* Sampling not supported yet */ -- Gitee From b82f748edaf9e6afe3031172c70c9b06c25c13de Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 8 Jan 2025 06:30:17 -0800 Subject: [PATCH 02/23] perf/x86/intel/uncore: Support more units on Granite Rapids mainline inclusion from mainline-v6.14-rc1 commit 6d642735cdb6cdb814d2b6c81652caa53ce04842 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d642735cdb6cdb814d2b6c81652caa53ce04842 ------------------------------------- Intel-SIG: commit 6d642735cdb6 ("perf/x86/intel/uncore: Support more units on Granite Rapids") ------------------------------------- perf/x86/intel/uncore: Support more units on Granite Rapids The same CXL PMONs support is also avaiable on GNR. Apply spr_uncore_cxlcm and spr_uncore_cxldp to GNR as well. The other units were broken on early HW samples, so they were ignored in the early enabling patch. The issue has been fixed and verified on the later production HW. Add UPI, B2UPI, B2HOT, PCIEX16 and PCIEX8 for GNR. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Eric Hu Link: https://lkml.kernel.org/r/20250108143017.1793781-2-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/uncore_snbep.c | 48 ++++++++++++++++++---------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b723bf5b717e3..fa981dfde254a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -6138,17 +6138,8 @@ void spr_uncore_mmio_init(void) /* GNR uncore support */ #define UNCORE_GNR_NUM_UNCORE_TYPES 23 -#define UNCORE_GNR_TYPE_15 15 -#define UNCORE_GNR_B2UPI 18 -#define UNCORE_GNR_TYPE_21 21 -#define UNCORE_GNR_TYPE_22 22 int gnr_uncore_units_ignore[] = { - UNCORE_SPR_UPI, - UNCORE_GNR_TYPE_15, - UNCORE_GNR_B2UPI, - UNCORE_GNR_TYPE_21, - UNCORE_GNR_TYPE_22, UNCORE_IGNORE_END }; @@ -6157,6 +6148,31 @@ static struct intel_uncore_type gnr_uncore_ubox = { .attr_update = uncore_alias_groups, }; +static struct intel_uncore_type gnr_uncore_pciex8 = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "pciex8", +}; + +static struct intel_uncore_type gnr_uncore_pciex16 = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "pciex16", +}; + +static struct intel_uncore_type gnr_uncore_upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "upi", +}; + +static struct intel_uncore_type gnr_uncore_b2upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "b2upi", +}; + +static struct intel_uncore_type gnr_uncore_b2hot = { + .name = "b2hot", + .attr_update = uncore_alias_groups, +}; + static struct intel_uncore_type gnr_uncore_b2cmi = { SPR_UNCORE_PCI_COMMON_FORMAT(), .name = "b2cmi", @@ -6181,21 +6197,21 @@ static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = { &gnr_uncore_ubox, &spr_uncore_imc, NULL, + &gnr_uncore_upi, NULL, NULL, NULL, + &spr_uncore_cxlcm, + &spr_uncore_cxldp, NULL, - NULL, - NULL, - NULL, - NULL, + &gnr_uncore_b2hot, &gnr_uncore_b2cmi, &gnr_uncore_b2cxl, - NULL, + &gnr_uncore_b2upi, NULL, &gnr_uncore_mdf_sbo, - NULL, - NULL, + &gnr_uncore_pciex16, + &gnr_uncore_pciex8, }; static struct freerunning_counters gnr_iio_freerunning[] = { -- Gitee From 8e1d210321aaef656ab8fdbb524ad400c825b4ee Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 11 Feb 2025 13:30:15 -0800 Subject: [PATCH 03/23] perf vendor events: Add Clearwaterforest events mainline inclusion from mainline-v6.15-rc1 commit e415c1493fa1e93afaec697385b8952d932c41bc category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e415c1493fa1e93afaec697385b8952d932c41bc ------------------------------------- Intel-SIG: commit e415c1493fa1 ("perf vendor events: Add Clearwaterforest events") ------------------------------------- perf vendor events: Add Clearwaterforest events Add events v1.00. Bring in the events from: https://github.com/intel/perfmon/tree/main/CWF/events Co-developed-by: Caleb Biggers Signed-off-by: Caleb Biggers Acked-by: Kan Liang Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250211213031.114209-9-irogers@google.com Signed-off-by: Namhyung Kim Signed-off-by: Quanxian Wang --- .../arch/x86/clearwaterforest/cache.json | 144 ++++++++++++++++++ .../arch/x86/clearwaterforest/counter.json | 7 + .../arch/x86/clearwaterforest/frontend.json | 18 +++ .../arch/x86/clearwaterforest/memory.json | 22 +++ .../arch/x86/clearwaterforest/other.json | 22 +++ .../arch/x86/clearwaterforest/pipeline.json | 113 ++++++++++++++ .../x86/clearwaterforest/virtual-memory.json | 29 ++++ tools/perf/pmu-events/arch/x86/mapfile.csv | 1 + 8 files changed, 356 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/cache.json create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/counter.json create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/frontend.json create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/memory.json create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/other.json create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/pipeline.json create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/virtual-memory.json diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/cache.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/cache.json new file mode 100644 index 0000000000000..875361b30f1df --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/cache.json @@ -0,0 +1,144 @@ +[ + { + "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "1000003", + "UMask": "0x41" + }, + { + "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "1000003", + "UMask": "0x4f" + }, + { + "BriefDescription": "Counts the number of load ops retired.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", + "SampleAfterValue": "1000003", + "UMask": "0x81" + }, + { + "BriefDescription": "Counts the number of store ops retired.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_STORES", + "SampleAfterValue": "1000003", + "UMask": "0x82" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_1024", + "MSRIndex": "0x3F6", + "MSRValue": "0x400", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_2048", + "MSRIndex": "0x3F6", + "MSRValue": "0x800", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of stores uops retired same as MEM_UOPS_RETIRED.ALL_STORES", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", + "SampleAfterValue": "1000003", + "UMask": "0x6" + } +] diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/counter.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/counter.json new file mode 100644 index 0000000000000..a0eaf5b6f2bc8 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/counter.json @@ -0,0 +1,7 @@ +[ + { + "Unit": "core", + "CountersNumFixed": "3", + "CountersNumGeneric": "39" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/frontend.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/frontend.json new file mode 100644 index 0000000000000..7a9250e5c8f29 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/frontend.json @@ -0,0 +1,18 @@ +[ + { + "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x80", + "EventName": "ICACHE.ACCESSES", + "SampleAfterValue": "1000003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump and the instruction cache registers bytes are not present. -", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x80", + "EventName": "ICACHE.MISSES", + "SampleAfterValue": "1000003", + "UMask": "0x2" + } +] diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/memory.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/memory.json new file mode 100644 index 0000000000000..f5007e56f39bb --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/memory.json @@ -0,0 +1,22 @@ +[ + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x33FBFC00001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x33FBFC00002", + "SampleAfterValue": "100003", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/other.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/other.json new file mode 100644 index 0000000000000..80454e497f835 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/other.json @@ -0,0 +1,22 @@ +[ + { + "BriefDescription": "Counts demand data reads that have any type of response.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10002", + "SampleAfterValue": "100003", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/pipeline.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/pipeline.json new file mode 100644 index 0000000000000..6a5faa704b852 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/pipeline.json @@ -0,0 +1,113 @@ +[ + { + "BriefDescription": "Counts the total number of branch instructions retired for all branch types.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.CORE", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. [This event is alias to CPU_CLK_UNHALTED.THREAD_P]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of unhalted reference clock cycles.", + "Counter": "Fixed counter 2", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "SampleAfterValue": "1000003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. [This event is alias to CPU_CLK_UNHALTED.CORE_P]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of instructions retired.", + "Counter": "Fixed counter 0", + "EventName": "INST_RETIRED.ANY", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of instructions retired.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.ANY_P", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", + "Counter": "36", + "EventName": "TOPDOWN_BAD_SPECULATION.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls. [This event is alias to TOPDOWN_BE_BOUND.ALL_P]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xa4", + "EventName": "TOPDOWN_BE_BOUND.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls. [This event is alias to TOPDOWN_BE_BOUND.ALL]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xa4", + "EventName": "TOPDOWN_BE_BOUND.ALL_P", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of retirement slots not consumed due to front end stalls.", + "Counter": "37", + "EventName": "TOPDOWN_FE_BOUND.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x6" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of consumed retirement slots.", + "Counter": "38", + "EventName": "TOPDOWN_RETIRING.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x7" + } +] diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/virtual-memory.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/virtual-memory.json new file mode 100644 index 0000000000000..78f2b835c1fa4 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/virtual-memory.json @@ -0,0 +1,29 @@ +[ + { + "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "1000003", + "UMask": "0xe" + }, + { + "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "1000003", + "UMask": "0xe" + }, + { + "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "1000003", + "UMask": "0xe" + } +] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 9f97b32533a05..b9e2034d038bd 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -33,6 +33,7 @@ GenuineIntel-6-25,v2,westmereep-sp,core GenuineIntel-6-2F,v2,westmereex,core GenuineIntel-6-55-[01234],v1,skylakex,core GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core +GenuineIntel-6-DD,v1.00,clearwaterforest,core GenuineIntel-6-7D,v1,icelake,core GenuineIntel-6-7E,v1,icelake,core GenuineIntel-6-86,v1,tremontx,core -- Gitee From 3849a9917c2af988503fadc60edeaeafb664955c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 11 Dec 2024 08:11:46 -0800 Subject: [PATCH 04/23] perf/x86/intel/uncore: Add Clearwater Forest support mainline inclusion from mainline-v6.13-rc5 commit b6ccddd6fe1fd49c7a82b6fbed01cccad21a29c7 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b6ccddd6fe1fd49c7a82b6fbed01cccad21a29c7 ------------------------------------- Intel-SIG: commit b6ccddd6fe1f ("perf/x86/intel/uncore: Add Clearwater Forest support") ------------------------------------- perf/x86/intel/uncore: Add Clearwater Forest support From the perspective of the uncore PMU, the Clearwater Forest is the same as the previous Sierra Forest. The only difference is the event list, which will be supported in the perf tool later. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20241211161146.235253-1-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/uncore.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 4e3dbecd1cd14..a2e18807887e3 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1872,6 +1872,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, &gnr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT, &gnr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_DARKMONT_X, &gnr_uncore_init), {}, }; MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); -- Gitee From 4c45f308333064c455c11acf278e8d96163e8177 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 7 Jul 2025 13:17:47 -0700 Subject: [PATCH 05/23] perf/x86/intel/uncore: Support MSR portal for discovery tables mainline inclusion from mainline-v6.17-rc1 commit cf002dafedd06241175e4dbce39ba90a4b75822c category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cf002dafedd06241175e4dbce39ba90a4b75822c ------------------------------------- Intel-SIG: commit cf002dafedd0 ("perf/x86/intel/uncore: Support MSR portal for discovery tables") ------------------------------------- perf/x86/intel/uncore: Support MSR portal for discovery tables Starting from the Panther Lake, the discovery table mechanism is also supported in client platforms. The difference is that the portal of the global discovery table is retrieved from an MSR. The layout of discovery tables are the same as the server platforms. Factor out __parse_discovery_table() to parse discover tables. The uncore PMON is Die scope. Need to parse the discovery tables for each die. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dapeng Mi Link: https://lore.kernel.org/r/20250707201750.616527-2-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/uncore_discovery.c | 79 ++++++++++++++++++++---- arch/x86/events/intel/uncore_discovery.h | 3 + 2 files changed, 70 insertions(+), 12 deletions(-) diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 9da52386e91d5..79656d4d46df2 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -273,24 +273,15 @@ uncore_ignore_unit(struct uncore_unit_discovery *unit, int *ignore) return false; } -static int parse_discovery_table(struct pci_dev *dev, int die, - u32 bar_offset, bool *parsed, - int *ignore) +static int __parse_discovery_table(resource_size_t addr, int die, + bool *parsed, int *ignore) { struct uncore_global_discovery global; struct uncore_unit_discovery unit; void __iomem *io_addr; - resource_size_t addr; unsigned long size; - u32 val; int i; - pci_read_config_dword(dev, bar_offset, &val); - - if (val & UNCORE_DISCOVERY_MASK) - return -EINVAL; - - addr = (resource_size_t)(val & ~UNCORE_DISCOVERY_MASK); size = UNCORE_DISCOVERY_GLOBAL_MAP_SIZE; io_addr = ioremap(addr, size); if (!io_addr) @@ -333,7 +324,32 @@ static int parse_discovery_table(struct pci_dev *dev, int die, return 0; } -bool intel_uncore_has_discovery_tables(int *ignore) +static int parse_discovery_table(struct pci_dev *dev, int die, + u32 bar_offset, bool *parsed, + int *ignore) +{ + resource_size_t addr; + u32 val; + + pci_read_config_dword(dev, bar_offset, &val); + + if (val & ~PCI_BASE_ADDRESS_MEM_MASK & ~PCI_BASE_ADDRESS_MEM_TYPE_64) + return -EINVAL; + + addr = (resource_size_t)(val & PCI_BASE_ADDRESS_MEM_MASK); +#ifdef CONFIG_PHYS_ADDR_T_64BIT + if ((val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) { + u32 val2; + + pci_read_config_dword(dev, bar_offset + 4, &val2); + addr |= ((resource_size_t)val2) << 32; + } +#endif + + return __parse_discovery_table(addr, die, parsed, ignore); +} + +static bool intel_uncore_has_discovery_tables_pci(int *ignore) { u32 device, val, entry_id, bar_offset; int die, dvsec = 0, ret = true; @@ -382,6 +398,45 @@ bool intel_uncore_has_discovery_tables(int *ignore) return ret; } +static bool intel_uncore_has_discovery_tables_msr(int *ignore) +{ + unsigned long *die_mask; + bool parsed = false; + int cpu, die; + u64 base; + + die_mask = kcalloc(BITS_TO_LONGS(uncore_max_dies()), + sizeof(unsigned long), GFP_KERNEL); + if (!die_mask) + return false; + + cpus_read_lock(); + for_each_online_cpu(cpu) { + die = topology_logical_die_id(cpu); + if (__test_and_set_bit(die, die_mask)) + continue; + + if (rdmsrl_safe_on_cpu(cpu, UNCORE_DISCOVERY_MSR, &base)) + continue; + + if (!base) + continue; + + __parse_discovery_table(base, die, &parsed, ignore); + } + + cpus_read_unlock(); + + kfree(die_mask); + return parsed; +} + +bool intel_uncore_has_discovery_tables(int *ignore) +{ + return intel_uncore_has_discovery_tables_msr(ignore) || + intel_uncore_has_discovery_tables_pci(ignore); +} + void intel_uncore_clear_discovery_tables(void) { struct intel_uncore_discovery_type *type, *next; diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h index 17d017059b797..e08c0d07d8ad8 100644 --- a/arch/x86/events/intel/uncore_discovery.h +++ b/arch/x86/events/intel/uncore_discovery.h @@ -1,5 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +/* Store the full address of the global discovery table */ +#define UNCORE_DISCOVERY_MSR 0x201e + /* Generic device ID of a discovery table device */ #define UNCORE_DISCOVERY_TABLE_DEVICE 0x09a7 /* Capability ID for a discovery table device */ -- Gitee From c641b585ec8e663619816bbf34537a5765c54350 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 7 Jul 2025 13:17:48 -0700 Subject: [PATCH 06/23] perf/x86/intel/uncore: Support customized MMIO map size mainline inclusion from mainline-v6.17-rc1 commit fca24bf2b6b619770d7f1222c0284791d7766239 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fca24bf2b6b619770d7f1222c0284791d7766239 ------------------------------------- Intel-SIG: commit fca24bf2b6b6 ("perf/x86/intel/uncore: Support customized MMIO map size") ------------------------------------- perf/x86/intel/uncore: Support customized MMIO map size For a server platform, the MMIO map size is always 0x4000. However, a client platform may have a smaller map size. Make the map size customizable. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dapeng Mi Link: https://lore.kernel.org/r/20250707201750.616527-3-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/uncore_discovery.c | 2 +- arch/x86/events/intel/uncore_snbep.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 79656d4d46df2..a543a54fd7140 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -650,7 +650,7 @@ void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box) } addr = unit->addr; - box->io_addr = ioremap(addr, UNCORE_GENERIC_MMIO_SIZE); + box->io_addr = ioremap(addr, type->mmio_map_size); if (!box->io_addr) { pr_warn("Uncore type %d box %d: ioremap error for 0x%llx.\n", type->type_id, unit->id, (unsigned long long)addr); diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index fa981dfde254a..fd6496fd9bf3c 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -5949,6 +5949,8 @@ static void uncore_type_customized_copy(struct intel_uncore_type *to_type, to_type->format_group = from_type->format_group; if (from_type->attr_update) to_type->attr_update = from_type->attr_update; + if (from_type->mmio_map_size) + to_type->mmio_map_size = from_type->mmio_map_size; } static struct intel_uncore_type ** -- Gitee From 641b6eec8d9458a614c8b27e798ddc728a76be7f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:01 -0700 Subject: [PATCH 07/23] perf/x86/intel: Use the common uarch name for the shared functions mainline inclusion from mainline-v6.7-rc1 commit d4b5694c75d4eba8238d541a55da0c67e876213e category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d4b5694c75d4eba8238d541a55da0c67e876213e ------------------------------------- Intel-SIG: commit d4b5694c75d4 ("perf/x86/intel: Use the common uarch name for the shared functions") ------------------------------------- perf/x86/intel: Use the common uarch name for the shared functions From PMU's perspective, the SPR/GNR server has a similar uarch to the ADL/MTL client p-core. Many functions are shared. However, the shared function name uses the abbreviation of the server product code name, rather than the common uarch code name. Rename these internal shared functions by the common uarch name. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-2-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 63 ++++++++++++++++++------------------ arch/x86/events/intel/ds.c | 2 +- arch/x86/events/perf_event.h | 2 +- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 1786e8d85b6b0..b953fdeaff573 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -297,7 +297,7 @@ static struct extra_reg intel_icl_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; -static struct extra_reg intel_spr_extra_regs[] __read_mostly = { +static struct extra_reg intel_glc_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), @@ -307,7 +307,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; -static struct event_constraint intel_spr_event_constraints[] = { +static struct event_constraint intel_glc_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -347,7 +347,7 @@ static struct event_constraint intel_spr_event_constraints[] = { EVENT_CONSTRAINT_END }; -static struct extra_reg intel_gnr_extra_regs[] __read_mostly = { +static struct extra_reg intel_rwc_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), @@ -471,7 +471,7 @@ static u64 intel_pmu_event_map(int hw_event) return intel_perfmon_event_map[hw_event]; } -static __initconst const u64 spr_hw_cache_event_ids +static __initconst const u64 glc_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -550,7 +550,7 @@ static __initconst const u64 spr_hw_cache_event_ids }, }; -static __initconst const u64 spr_hw_cache_extra_regs +static __initconst const u64 glc_hw_cache_extra_regs [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -4227,7 +4227,7 @@ icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, } static struct event_constraint * -spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx, +glc_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { struct event_constraint *c; @@ -4316,7 +4316,7 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); if (pmu->cpu_type == hybrid_big) - return spr_get_event_constraints(cpuc, idx, event); + return glc_get_event_constraints(cpuc, idx, event); else if (pmu->cpu_type == hybrid_small) return tnt_get_event_constraints(cpuc, idx, event); @@ -4363,7 +4363,7 @@ rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx, { struct event_constraint *c; - c = spr_get_event_constraints(cpuc, idx, event); + c = glc_get_event_constraints(cpuc, idx, event); /* The Retire Latency is not supported by the fixed counter 0. */ if (event->attr.precise_ip && @@ -4463,7 +4463,7 @@ static u64 nhm_limit_period(struct perf_event *event, u64 left) return max(left, 32ULL); } -static u64 spr_limit_period(struct perf_event *event, u64 left) +static u64 glc_limit_period(struct perf_event *event, u64 left) { if (event->attr.precise_ip == 3) return max(left, 128ULL); @@ -5344,14 +5344,14 @@ static struct attribute *icl_tsx_events_attrs[] = { EVENT_ATTR_STR(mem-stores, mem_st_spr, "event=0xcd,umask=0x2"); EVENT_ATTR_STR(mem-loads-aux, mem_ld_aux, "event=0x03,umask=0x82"); -static struct attribute *spr_events_attrs[] = { +static struct attribute *glc_events_attrs[] = { EVENT_PTR(mem_ld_hsw), EVENT_PTR(mem_st_spr), EVENT_PTR(mem_ld_aux), NULL, }; -static struct attribute *spr_td_events_attrs[] = { +static struct attribute *glc_td_events_attrs[] = { EVENT_PTR(slots), EVENT_PTR(td_retiring), EVENT_PTR(td_bad_spec), @@ -5364,7 +5364,7 @@ static struct attribute *spr_td_events_attrs[] = { NULL, }; -static struct attribute *spr_tsx_events_attrs[] = { +static struct attribute *glc_tsx_events_attrs[] = { EVENT_PTR(tx_start), EVENT_PTR(tx_abort), EVENT_PTR(tx_commit), @@ -6252,7 +6252,7 @@ __init int intel_pmu_init(void) intel_pmu_pebs_data_source_grt(); x86_pmu.pebs_latency_data = adl_latency_data_small; x86_pmu.get_event_constraints = tnt_get_event_constraints; - x86_pmu.limit_period = spr_limit_period; + x86_pmu.limit_period = glc_limit_period; td_attr = tnt_events_attrs; mem_attr = grt_mem_attrs; extra_attr = nhm_format_attr; @@ -6283,7 +6283,7 @@ __init int intel_pmu_init(void) intel_pmu_pebs_data_source_cmt(); x86_pmu.pebs_latency_data = mtl_latency_data_small; x86_pmu.get_event_constraints = cmt_get_event_constraints; - x86_pmu.limit_period = spr_limit_period; + x86_pmu.limit_period = glc_limit_period; td_attr = cmt_events_attrs; mem_attr = grt_mem_attrs; extra_attr = cmt_format_attr; @@ -6591,26 +6591,27 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_EMERALDRAPIDS_X: x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; - x86_pmu.extra_regs = intel_spr_extra_regs; + x86_pmu.extra_regs = intel_glc_extra_regs; pr_cont("Sapphire Rapids events, "); name = "sapphire_rapids"; goto glc_common; case INTEL_FAM6_GRANITERAPIDS_X: case INTEL_FAM6_GRANITERAPIDS_D: - x86_pmu.extra_regs = intel_gnr_extra_regs; + if (!x86_pmu.extra_regs) + x86_pmu.extra_regs = intel_rwc_extra_regs; pr_cont("Granite Rapids events, "); name = "granite_rapids"; glc_common: pmem = true; x86_pmu.late_ack = true; - memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + memcpy(hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - x86_pmu.event_constraints = intel_spr_event_constraints; - x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints; - x86_pmu.limit_period = spr_limit_period; + x86_pmu.event_constraints = intel_glc_event_constraints; + x86_pmu.pebs_constraints = intel_glc_pebs_event_constraints; + x86_pmu.limit_period = glc_limit_period; x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_block = true; @@ -6620,13 +6621,13 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_INSTR_LATENCY; x86_pmu.hw_config = hsw_hw_config; - x86_pmu.get_event_constraints = spr_get_event_constraints; + x86_pmu.get_event_constraints = glc_get_event_constraints; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; extra_skl_attr = skl_format_attr; - mem_attr = spr_events_attrs; - td_attr = spr_td_events_attrs; - tsx_attr = spr_tsx_events_attrs; + mem_attr = glc_events_attrs; + td_attr = glc_td_events_attrs; + tsx_attr = glc_tsx_events_attrs; x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); x86_pmu.lbr_pt_coexist = true; intel_pmu_pebs_data_source_skl(pmem); @@ -6670,7 +6671,7 @@ __init int intel_pmu_init(void) x86_pmu.filter_match = intel_pmu_filter_match; x86_pmu.get_event_constraints = adl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; - x86_pmu.limit_period = spr_limit_period; + x86_pmu.limit_period = glc_limit_period; x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; /* * The rtm_abort_event is used to check whether to enable GPRs @@ -6719,11 +6720,11 @@ __init int intel_pmu_init(void) pmu->intel_cap.perf_metrics = 1; pmu->intel_cap.pebs_output_pt_available = 0; - memcpy(pmu->hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); - memcpy(pmu->hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); - pmu->event_constraints = intel_spr_event_constraints; - pmu->pebs_constraints = intel_spr_pebs_event_constraints; - pmu->extra_regs = intel_spr_extra_regs; + memcpy(pmu->hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); + memcpy(pmu->hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); + pmu->event_constraints = intel_glc_event_constraints; + pmu->pebs_constraints = intel_glc_pebs_event_constraints; + pmu->extra_regs = intel_glc_extra_regs; /* Initialize Atom core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index e490a9e3b403d..885affe47e568 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1046,7 +1046,7 @@ struct event_constraint intel_icl_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; -struct event_constraint intel_spr_pebs_event_constraints[] = { +struct event_constraint intel_glc_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ae3d8a0b701e0..bba85fb5e234f 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1432,7 +1432,7 @@ extern struct event_constraint intel_skl_pebs_event_constraints[]; extern struct event_constraint intel_icl_pebs_event_constraints[]; -extern struct event_constraint intel_spr_pebs_event_constraints[]; +extern struct event_constraint intel_glc_pebs_event_constraints[]; struct event_constraint *intel_pebs_constraints(struct perf_event *event); -- Gitee From 71506b6732e668e21142a3f80c742006157b76b6 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:02 -0700 Subject: [PATCH 08/23] perf/x86/intel: Factor out the initialization code for SPR mainline inclusion from mainline-v6.7-rc1 commit 0ba0c03528e918a8f6b5aa63d502fdc6a9d80fc7 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0ba0c03528e918a8f6b5aa63d502fdc6a9d80fc7 ------------------------------------- Intel-SIG: commit 0ba0c03528e9 ("perf/x86/intel: Factor out the initialization code for SPR") ------------------------------------- perf/x86/intel: Factor out the initialization code for SPR The SPR and ADL p-core have a similar uarch. Most of the initialization code can be shared. Factor out intel_pmu_init_glc() for the common initialization code. The common part of the ADL p-core will be replaced by the later patch. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-3-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 45 +++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index b953fdeaff573..4dd2e1d6620e2 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -5950,6 +5950,28 @@ static __always_inline bool is_mtl(u8 x86_model) (x86_model == INTEL_FAM6_METEORLAKE_L); } +static __always_inline void intel_pmu_init_glc(struct pmu *pmu) +{ + x86_pmu.late_ack = true; + x86_pmu.limit_period = glc_limit_period; + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); + x86_pmu.lbr_pt_coexist = true; + x86_pmu.num_topdown_events = 8; + x86_pmu.update_topdown_event = icl_update_topdown_event; + x86_pmu.set_topdown_event_period = icl_set_topdown_event_period; + + memcpy(hybrid_var(pmu, hw_cache_event_ids), glc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hybrid_var(pmu, hw_cache_extra_regs), glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + hybrid(pmu, event_constraints) = intel_glc_event_constraints; + hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints; +} + __init int intel_pmu_init(void) { struct attribute **extra_skl_attr = &empty_attrs; @@ -6604,21 +6626,7 @@ __init int intel_pmu_init(void) name = "granite_rapids"; glc_common: - pmem = true; - x86_pmu.late_ack = true; - memcpy(hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - - x86_pmu.event_constraints = intel_glc_event_constraints; - x86_pmu.pebs_constraints = intel_glc_pebs_event_constraints; - x86_pmu.limit_period = glc_limit_period; - x86_pmu.pebs_aliases = NULL; - x86_pmu.pebs_prec_dist = true; - x86_pmu.pebs_block = true; - x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.flags |= PMU_FL_PEBS_ALL; - x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + intel_pmu_init_glc(NULL); x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = glc_get_event_constraints; @@ -6628,12 +6636,7 @@ __init int intel_pmu_init(void) mem_attr = glc_events_attrs; td_attr = glc_td_events_attrs; tsx_attr = glc_tsx_events_attrs; - x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); - x86_pmu.lbr_pt_coexist = true; - intel_pmu_pebs_data_source_skl(pmem); - x86_pmu.num_topdown_events = 8; - x86_pmu.update_topdown_event = icl_update_topdown_event; - x86_pmu.set_topdown_event_period = icl_set_topdown_event_period; + intel_pmu_pebs_data_source_skl(true); break; case INTEL_FAM6_ALDERLAKE: -- Gitee From 64a16db8eb899c2e6cd4fdf1c0a47ad3182fb214 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:03 -0700 Subject: [PATCH 09/23] perf/x86/intel: Factor out the initialization code for ADL e-core mainline inclusion from mainline-v6.7-rc1 commit d87d221f854b62f5e8026505497d33404ef6050c category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d87d221f854b62f5e8026505497d33404ef6050c ------------------------------------- Intel-SIG: commit d87d221f854b ("perf/x86/intel: Factor out the initialization code for ADL e-core") ------------------------------------- perf/x86/intel: Factor out the initialization code for ADL e-core From PMU's perspective, the ADL e-core and newer SRF/GRR have a similar uarch. Most of the initialization code can be shared. Factor out intel_pmu_init_grt() for the common initialization code. The common part of the ADL e-core will be replaced by the later patch. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-4-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 58 +++++++++++++----------------------- 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 4dd2e1d6620e2..8328c729bd31e 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -5972,6 +5972,25 @@ static __always_inline void intel_pmu_init_glc(struct pmu *pmu) hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints; } +static __always_inline void intel_pmu_init_grt(struct pmu *pmu) +{ + x86_pmu.mid_ack = true; + x86_pmu.limit_period = glc_limit_period; + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + + memcpy(hybrid_var(pmu, hw_cache_event_ids), glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hybrid_var(pmu, hw_cache_extra_regs), tnt_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + hybrid_var(pmu, hw_cache_event_ids)[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + hybrid(pmu, event_constraints) = intel_slm_event_constraints; + hybrid(pmu, pebs_constraints) = intel_grt_pebs_event_constraints; + hybrid(pmu, extra_regs) = intel_grt_extra_regs; +} + __init int intel_pmu_init(void) { struct attribute **extra_skl_attr = &empty_attrs; @@ -6253,28 +6272,10 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_ATOM_GRACEMONT: - x86_pmu.mid_ack = true; - memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, - sizeof(hw_cache_extra_regs)); - hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; - - x86_pmu.event_constraints = intel_slm_event_constraints; - x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints; - x86_pmu.extra_regs = intel_grt_extra_regs; - - x86_pmu.pebs_aliases = NULL; - x86_pmu.pebs_prec_dist = true; - x86_pmu.pebs_block = true; - x86_pmu.lbr_pt_coexist = true; - x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - + intel_pmu_init_grt(NULL); intel_pmu_pebs_data_source_grt(); x86_pmu.pebs_latency_data = adl_latency_data_small; x86_pmu.get_event_constraints = tnt_get_event_constraints; - x86_pmu.limit_period = glc_limit_period; td_attr = tnt_events_attrs; mem_attr = grt_mem_attrs; extra_attr = nhm_format_attr; @@ -6284,28 +6285,11 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ATOM_CRESTMONT: case INTEL_FAM6_ATOM_CRESTMONT_X: - x86_pmu.mid_ack = true; - memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, - sizeof(hw_cache_extra_regs)); - hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; - - x86_pmu.event_constraints = intel_slm_event_constraints; - x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints; + intel_pmu_init_grt(NULL); x86_pmu.extra_regs = intel_cmt_extra_regs; - - x86_pmu.pebs_aliases = NULL; - x86_pmu.pebs_prec_dist = true; - x86_pmu.lbr_pt_coexist = true; - x86_pmu.pebs_block = true; - x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - intel_pmu_pebs_data_source_cmt(); x86_pmu.pebs_latency_data = mtl_latency_data_small; x86_pmu.get_event_constraints = cmt_get_event_constraints; - x86_pmu.limit_period = glc_limit_period; td_attr = cmt_events_attrs; mem_attr = grt_mem_attrs; extra_attr = cmt_format_attr; -- Gitee From 8ac43092884a3526d71deedd620c38f18650abf6 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:04 -0700 Subject: [PATCH 10/23] perf/x86/intel: Apply the common initialization code for ADL mainline inclusion from mainline-v6.7-rc1 commit 299a5fc8e783eed705015e83e381912dbbf3eabc category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=299a5fc8e783eed705015e83e381912dbbf3eabc ------------------------------------- Intel-SIG: commit 299a5fc8e783 ("perf/x86/intel: Apply the common initialization code for ADL") ------------------------------------- perf/x86/intel: Apply the common initialization code for ADL Use the intel_pmu_init_glc() and intel_pmu_init_grt() to replace the duplicate code for ADL. The current code already checks the PERF_X86_EVENT_TOPDOWN flag before invoking the Topdown metrics functions. (The PERF_X86_EVENT_TOPDOWN flag is to indicate the Topdown metric feature, which is only available for the p-core.) Drop the unnecessary adl_set_topdown_event_period() and adl_update_topdown_event(). Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-5-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 52 ++---------------------------------- 1 file changed, 2 insertions(+), 50 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 8328c729bd31e..678822c03d65e 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2514,16 +2514,6 @@ static int icl_set_topdown_event_period(struct perf_event *event) return 0; } -static int adl_set_topdown_event_period(struct perf_event *event) -{ - struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - - if (pmu->cpu_type != hybrid_big) - return 0; - - return icl_set_topdown_event_period(event); -} - static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx) { u32 val; @@ -2664,17 +2654,6 @@ static u64 icl_update_topdown_event(struct perf_event *event) x86_pmu.num_topdown_events - 1); } -static u64 adl_update_topdown_event(struct perf_event *event) -{ - struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - - if (pmu->cpu_type != hybrid_big) - return 0; - - return icl_update_topdown_event(event); -} - - static void intel_pmu_read_topdown_event(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -6642,31 +6621,12 @@ __init int intel_pmu_init(void) static_branch_enable(&perf_is_hybrid); x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS; - x86_pmu.pebs_aliases = NULL; - x86_pmu.pebs_prec_dist = true; - x86_pmu.pebs_block = true; - x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.flags |= PMU_FL_PEBS_ALL; - x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - x86_pmu.lbr_pt_coexist = true; x86_pmu.pebs_latency_data = adl_latency_data_small; - x86_pmu.num_topdown_events = 8; - x86_pmu.update_topdown_event = adl_update_topdown_event; - x86_pmu.set_topdown_event_period = adl_set_topdown_event_period; x86_pmu.filter_match = intel_pmu_filter_match; x86_pmu.get_event_constraints = adl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; - x86_pmu.limit_period = glc_limit_period; x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; - /* - * The rtm_abort_event is used to check whether to enable GPRs - * for the RTM abort event. Atom doesn't have the RTM abort - * event. There is no harmful to set it in the common - * x86_pmu.rtm_abort_event. - */ - x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); td_attr = adl_hybrid_events_attrs; mem_attr = adl_hybrid_mem_attrs; @@ -6678,6 +6638,7 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; pmu->name = "cpu_core"; pmu->cpu_type = hybrid_big; + intel_pmu_init_glc(&pmu->pmu); pmu->late_ack = true; if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { pmu->num_counters = x86_pmu.num_counters + 2; @@ -6707,16 +6668,13 @@ __init int intel_pmu_init(void) pmu->intel_cap.perf_metrics = 1; pmu->intel_cap.pebs_output_pt_available = 0; - memcpy(pmu->hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); - memcpy(pmu->hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); - pmu->event_constraints = intel_glc_event_constraints; - pmu->pebs_constraints = intel_glc_pebs_event_constraints; pmu->extra_regs = intel_glc_extra_regs; /* Initialize Atom core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; pmu->name = "cpu_atom"; pmu->cpu_type = hybrid_small; + intel_pmu_init_grt(&pmu->pmu); pmu->mid_ack = true; pmu->num_counters = x86_pmu.num_counters; pmu->num_counters_fixed = x86_pmu.num_counters_fixed; @@ -6728,12 +6686,6 @@ __init int intel_pmu_init(void) pmu->intel_cap.perf_metrics = 0; pmu->intel_cap.pebs_output_pt_available = 1; - memcpy(pmu->hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); - memcpy(pmu->hw_cache_extra_regs, tnt_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); - pmu->hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; - pmu->event_constraints = intel_slm_event_constraints; - pmu->pebs_constraints = intel_grt_pebs_event_constraints; - pmu->extra_regs = intel_grt_extra_regs; if (is_mtl(boot_cpu_data.x86_model)) { x86_pmu.pebs_latency_data = mtl_latency_data_small; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? -- Gitee From a8fff363f2d0b2f4f8cdf7fb19205361deee7a56 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:05 -0700 Subject: [PATCH 11/23] perf/x86/intel: Clean up the hybrid CPU type handling code mainline inclusion from mainline-v6.7-rc1 commit b0560bfd4b70277a4936c82e50e940aa253c95bf category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b0560bfd4b70277a4936c82e50e940aa253c95bf ------------------------------------- Intel-SIG: commit b0560bfd4b70 ("perf/x86/intel: Clean up the hybrid CPU type handling code") ------------------------------------- perf/x86/intel: Clean up the hybrid CPU type handling code There is a fairly long list of grievances about the current code. The main beefs: 1. hybrid_big_small assumes that the *HARDWARE* (CPUID) provided core types are a bitmap. They are not. If Intel happened to make a core type of 0xff, hilarity would ensue. 2. adl_get_hybrid_cpu_type() utterly inscrutable. There are precisely zero comments and zero changelog about what it is attempting to do. According to Kan, the adl_get_hybrid_cpu_type() is there because some Alder Lake (ADL) CPUs can do some silly things. Some ADL models are *supposed* to be hybrid CPUs with big and little cores, but there are some SKUs that only have big cores. CPUID(0x1a) on those CPUs does not say that the CPUs are big cores. It apparently just returns 0x0. It confuses perf because it expects to see either 0x40 (Core) or 0x20 (Atom). The perf workaround for this is to watch for a CPU core saying it is type 0x0. If that happens on an Alder Lake, it calls x86_pmu.get_hybrid_cpu_type() and just assumes that the core is a Core (0x40) CPU. To fix up the mess, separate out the CPU types and the 'pmu' types. This allows 'hybrid_pmu_type' bitmaps without worrying that some future CPU type will set multiple bits. Since the types are now separate, add a function to glue them back together again. Actual comment on the situation in the glue function (find_hybrid_pmu_for_cpu()). Also, give ->get_hybrid_cpu_type() a real return type and make it clear that it is overriding the *CPU* type, not the PMU type. Rename cpu_type to pmu_type in the struct x86_hybrid_pmu to reflect the change. Originally-by: Dave Hansen Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-6-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/core.c | 8 ++-- arch/x86/events/intel/core.c | 71 ++++++++++++++++++++++++------------ arch/x86/events/intel/ds.c | 2 +- arch/x86/events/perf_event.h | 35 +++++++++++------- 4 files changed, 74 insertions(+), 42 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 283aa4602eb18..7fe35038cefd6 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1892,9 +1892,9 @@ ssize_t events_hybrid_sysfs_show(struct device *dev, str = pmu_attr->event_str; for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { - if (!(x86_pmu.hybrid_pmu[i].cpu_type & pmu_attr->pmu_type)) + if (!(x86_pmu.hybrid_pmu[i].pmu_type & pmu_attr->pmu_type)) continue; - if (x86_pmu.hybrid_pmu[i].cpu_type & pmu->cpu_type) { + if (x86_pmu.hybrid_pmu[i].pmu_type & pmu->pmu_type) { next_str = strchr(str, ';'); if (next_str) return snprintf(page, next_str - str + 1, "%s", str); @@ -2177,11 +2177,11 @@ static int __init init_hw_perf_events(void) hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE; err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name, - (hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1); + (hybrid_pmu->pmu_type == hybrid_big) ? PERF_TYPE_RAW : -1); if (err) break; - if (cpu_type == hybrid_pmu->cpu_type) + if (cpu_type == hybrid_pmu->pmu_type) x86_pmu_update_cpu_context(&hybrid_pmu->pmu, raw_smp_processor_id()); } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 678822c03d65e..b7833a2fc9916 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3845,7 +3845,7 @@ static inline bool require_mem_loads_aux_event(struct perf_event *event) return false; if (is_hybrid()) - return hybrid_pmu(event->pmu)->cpu_type == hybrid_big; + return hybrid_pmu(event->pmu)->pmu_type == hybrid_big; return true; } @@ -4294,9 +4294,9 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, { struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - if (pmu->cpu_type == hybrid_big) + if (pmu->pmu_type == hybrid_big) return glc_get_event_constraints(cpuc, idx, event); - else if (pmu->cpu_type == hybrid_small) + else if (pmu->pmu_type == hybrid_small) return tnt_get_event_constraints(cpuc, idx, event); WARN_ON(1); @@ -4366,9 +4366,9 @@ mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, { struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - if (pmu->cpu_type == hybrid_big) + if (pmu->pmu_type == hybrid_big) return rwc_get_event_constraints(cpuc, idx, event); - if (pmu->cpu_type == hybrid_small) + if (pmu->pmu_type == hybrid_small) return cmt_get_event_constraints(cpuc, idx, event); WARN_ON(1); @@ -4379,18 +4379,18 @@ static int adl_hw_config(struct perf_event *event) { struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - if (pmu->cpu_type == hybrid_big) + if (pmu->pmu_type == hybrid_big) return hsw_hw_config(event); - else if (pmu->cpu_type == hybrid_small) + else if (pmu->pmu_type == hybrid_small) return intel_pmu_hw_config(event); WARN_ON(1); return -EOPNOTSUPP; } -static u8 adl_get_hybrid_cpu_type(void) +static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) { - return hybrid_big; + return HYBRID_INTEL_CORE; } static inline bool erratum_hsw11(struct perf_event *event) @@ -4587,22 +4587,47 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) } } -static bool init_hybrid_pmu(int cpu) +static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) { - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); u8 cpu_type = get_this_hybrid_cpu_type(); - struct x86_hybrid_pmu *pmu = NULL; int i; - if (!cpu_type && x86_pmu.get_hybrid_cpu_type) - cpu_type = x86_pmu.get_hybrid_cpu_type(); + /* + * This is running on a CPU model that is known to have hybrid + * configurations. But the CPU told us it is not hybrid, shame + * on it. There should be a fixup function provided for these + * troublesome CPUs (->get_hybrid_cpu_type). + */ + if (cpu_type == HYBRID_INTEL_NONE) { + if (x86_pmu.get_hybrid_cpu_type) + cpu_type = x86_pmu.get_hybrid_cpu_type(); + else + return NULL; + } + /* + * This essentially just maps between the 'hybrid_cpu_type' + * and 'hybrid_pmu_type' enums: + */ for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { - if (x86_pmu.hybrid_pmu[i].cpu_type == cpu_type) { - pmu = &x86_pmu.hybrid_pmu[i]; - break; - } + enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type; + + if (cpu_type == HYBRID_INTEL_CORE && + pmu_type == hybrid_big) + return &x86_pmu.hybrid_pmu[i]; + if (cpu_type == HYBRID_INTEL_ATOM && + pmu_type == hybrid_small) + return &x86_pmu.hybrid_pmu[i]; } + + return NULL; +} + +static bool init_hybrid_pmu(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + struct x86_hybrid_pmu *pmu = find_hybrid_pmu_for_cpu(); + if (WARN_ON_ONCE(!pmu || (pmu->pmu.type == -1))) { cpuc->pmu = NULL; return false; @@ -5699,7 +5724,7 @@ static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr) struct perf_pmu_events_hybrid_attr *pmu_attr = container_of(attr, struct perf_pmu_events_hybrid_attr, attr.attr); - return pmu->cpu_type & pmu_attr->pmu_type; + return pmu->pmu_type & pmu_attr->pmu_type; } static umode_t hybrid_events_is_visible(struct kobject *kobj, @@ -5736,7 +5761,7 @@ static umode_t hybrid_format_is_visible(struct kobject *kobj, container_of(attr, struct perf_pmu_format_hybrid_attr, attr.attr); int cpu = hybrid_find_supported_cpu(pmu); - return (cpu >= 0) && (pmu->cpu_type & pmu_attr->pmu_type) ? attr->mode : 0; + return (cpu >= 0) && (pmu->pmu_type & pmu_attr->pmu_type) ? attr->mode : 0; } static umode_t hybrid_td_is_visible(struct kobject *kobj, @@ -5750,7 +5775,7 @@ static umode_t hybrid_td_is_visible(struct kobject *kobj, return 0; /* Only the big core supports perf metrics */ - if (pmu->cpu_type == hybrid_big) + if (pmu->pmu_type == hybrid_big) return pmu->intel_cap.perf_metrics ? attr->mode : 0; return attr->mode; @@ -6637,7 +6662,7 @@ __init int intel_pmu_init(void) /* Initialize big core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; pmu->name = "cpu_core"; - pmu->cpu_type = hybrid_big; + pmu->pmu_type = hybrid_big; intel_pmu_init_glc(&pmu->pmu); pmu->late_ack = true; if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { @@ -6673,7 +6698,7 @@ __init int intel_pmu_init(void) /* Initialize Atom core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; pmu->name = "cpu_atom"; - pmu->cpu_type = hybrid_small; + pmu->pmu_type = hybrid_small; intel_pmu_init_grt(&pmu->pmu); pmu->mid_ack = true; pmu->num_counters = x86_pmu.num_counters; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 885affe47e568..b7ad205681b82 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -259,7 +259,7 @@ static u64 __adl_latency_data_small(struct perf_event *event, u64 status, { u64 val; - WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big); + WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big); dse &= PERF_PEBS_DATA_SOURCE_MASK; val = hybrid_var(event->pmu, pebs_data_source)[dse]; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index bba85fb5e234f..7584671563cd4 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -648,10 +648,29 @@ enum { #define PERF_PEBS_DATA_SOURCE_MAX 0x10 #define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1) +enum hybrid_cpu_type { + HYBRID_INTEL_NONE, + HYBRID_INTEL_ATOM = 0x20, + HYBRID_INTEL_CORE = 0x40, +}; + +enum hybrid_pmu_type { + not_hybrid, + hybrid_small = BIT(0), + hybrid_big = BIT(1), + + hybrid_big_small = hybrid_big | hybrid_small, /* only used for matching */ +}; + +#define X86_HYBRID_PMU_ATOM_IDX 0 +#define X86_HYBRID_PMU_CORE_IDX 1 + +#define X86_HYBRID_NUM_PMUS 2 + struct x86_hybrid_pmu { struct pmu pmu; const char *name; - u8 cpu_type; + enum hybrid_pmu_type pmu_type; cpumask_t supported_cpus; union perf_capabilities intel_cap; u64 intel_ctrl; @@ -717,18 +736,6 @@ extern struct static_key_false perf_is_hybrid; __Fp; \ }) -enum hybrid_pmu_type { - hybrid_big = 0x40, - hybrid_small = 0x20, - - hybrid_big_small = hybrid_big | hybrid_small, -}; - -#define X86_HYBRID_PMU_ATOM_IDX 0 -#define X86_HYBRID_PMU_CORE_IDX 1 - -#define X86_HYBRID_NUM_PMUS 2 - /* * struct x86_pmu - generic x86 pmu */ @@ -930,7 +937,7 @@ struct x86_pmu { */ int num_hybrid_pmus; struct x86_hybrid_pmu *hybrid_pmu; - u8 (*get_hybrid_cpu_type) (void); + enum hybrid_cpu_type (*get_hybrid_cpu_type) (void); }; struct x86_perf_task_context_opt { -- Gitee From 6f5c7e20d8ef1bffa7b1ed906689e5a2e52e7dd2 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:06 -0700 Subject: [PATCH 12/23] perf/x86/intel: Add common intel_pmu_init_hybrid() mainline inclusion from mainline-v6.7-rc1 commit 97588df87b56e27fd2b5d928d61c7a53e38afbb0 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=97588df87b56e27fd2b5d928d61c7a53e38afbb0 ------------------------------------- Intel-SIG: commit 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") ------------------------------------- perf/x86/intel: Add common intel_pmu_init_hybrid() The current hybrid initialization codes aren't well organized and are hard to read. Factor out intel_pmu_init_hybrid() to do a common setup for each hybrid PMU. The PMU-specific capability will be updated later via either hard code (ADL) or CPUID hybrid enumeration (MTL). Splitting the ADL and MTL initialization codes, since they have different uarches. The hard code PMU capabilities are not required for MTL either. They can be enumerated by the new leaf 0x23 and IA32_PERF_CAPABILITIES MSR. The hybrid enumeration of the IA32_PERF_CAPABILITIES MSR is broken on MTL. Using the default value. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-7-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 150 +++++++++++++++++++++++------------ 1 file changed, 101 insertions(+), 49 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index b7833a2fc9916..a9a3bd5e27b02 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4572,6 +4572,16 @@ static void intel_pmu_check_num_counters(int *num_counters, int *num_counters_fixed, u64 *intel_ctrl, u64 fixed_mask); +static inline bool intel_pmu_broken_perf_cap(void) +{ + /* The Perf Metric (Bit 15) is always cleared */ + if ((boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE) || + (boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE_L)) + return true; + + return false; +} + static void update_pmu_cap(struct x86_hybrid_pmu *pmu) { unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF); @@ -4584,7 +4594,27 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) pmu->num_counters_fixed = fls(ebx); intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed, &pmu->intel_ctrl, ebx); + pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + pmu->unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, + 0, pmu->num_counters, 0, 0); + } + + + if (!intel_pmu_broken_perf_cap()) { + /* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */ + rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities); } + + if (pmu->intel_cap.perf_metrics) + pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; + else + pmu->intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS); + + if (pmu->intel_cap.pebs_output_pt_available) + pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; + else + pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT; } static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) @@ -5948,10 +5978,52 @@ static void intel_pmu_check_hybrid_pmus(u64 fixed_mask) } } -static __always_inline bool is_mtl(u8 x86_model) +static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = { + { hybrid_small, "cpu_atom" }, + { hybrid_big, "cpu_core" }, +}; + +static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) { - return (x86_model == INTEL_FAM6_METEORLAKE) || - (x86_model == INTEL_FAM6_METEORLAKE_L); + unsigned long pmus_mask = pmus; + struct x86_hybrid_pmu *pmu; + int idx = 0, bit; + + x86_pmu.num_hybrid_pmus = hweight_long(pmus_mask); + x86_pmu.hybrid_pmu = kcalloc(x86_pmu.num_hybrid_pmus, + sizeof(struct x86_hybrid_pmu), + GFP_KERNEL); + if (!x86_pmu.hybrid_pmu) + return -ENOMEM; + + static_branch_enable(&perf_is_hybrid); + x86_pmu.filter_match = intel_pmu_filter_match; + + for_each_set_bit(bit, &pmus_mask, ARRAY_SIZE(intel_hybrid_pmu_type_map)) { + pmu = &x86_pmu.hybrid_pmu[idx++]; + pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id; + pmu->name = intel_hybrid_pmu_type_map[bit].name; + + pmu->num_counters = x86_pmu.num_counters; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + pmu->unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, + 0, pmu->num_counters, 0, 0); + + pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; + if (pmu->pmu_type & hybrid_small) { + pmu->intel_cap.perf_metrics = 0; + pmu->intel_cap.pebs_output_pt_available = 1; + pmu->mid_ack = true; + } else if (pmu->pmu_type & hybrid_big) { + pmu->intel_cap.perf_metrics = 1; + pmu->intel_cap.pebs_output_pt_available = 0; + pmu->late_ack = true; + } + } + + return 0; } static __always_inline void intel_pmu_init_glc(struct pmu *pmu) @@ -6631,24 +6703,15 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ALDERLAKE_L: case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: - case INTEL_FAM6_METEORLAKE: - case INTEL_FAM6_METEORLAKE_L: /* * Alder Lake has 2 types of CPU, core and atom. * * Initialize the common PerfMon capabilities here. */ - x86_pmu.hybrid_pmu = kcalloc(X86_HYBRID_NUM_PMUS, - sizeof(struct x86_hybrid_pmu), - GFP_KERNEL); - if (!x86_pmu.hybrid_pmu) - return -ENOMEM; - static_branch_enable(&perf_is_hybrid); - x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS; + intel_pmu_init_hybrid(hybrid_big_small); x86_pmu.pebs_latency_data = adl_latency_data_small; - x86_pmu.filter_match = intel_pmu_filter_match; x86_pmu.get_event_constraints = adl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; @@ -6661,10 +6724,7 @@ __init int intel_pmu_init(void) /* Initialize big core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; - pmu->name = "cpu_core"; - pmu->pmu_type = hybrid_big; intel_pmu_init_glc(&pmu->pmu); - pmu->late_ack = true; if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { pmu->num_counters = x86_pmu.num_counters + 2; pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; @@ -6689,44 +6749,36 @@ __init int intel_pmu_init(void) pmu->unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, 0, pmu->num_counters, 0, 0); - pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; - pmu->intel_cap.perf_metrics = 1; - pmu->intel_cap.pebs_output_pt_available = 0; pmu->extra_regs = intel_glc_extra_regs; /* Initialize Atom core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; - pmu->name = "cpu_atom"; - pmu->pmu_type = hybrid_small; intel_pmu_init_grt(&pmu->pmu); - pmu->mid_ack = true; - pmu->num_counters = x86_pmu.num_counters; - pmu->num_counters_fixed = x86_pmu.num_counters_fixed; - pmu->max_pebs_events = x86_pmu.max_pebs_events; - pmu->unconstrained = (struct event_constraint) - __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, - 0, pmu->num_counters, 0, 0); - pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; - pmu->intel_cap.perf_metrics = 0; - pmu->intel_cap.pebs_output_pt_available = 1; - - if (is_mtl(boot_cpu_data.x86_model)) { - x86_pmu.pebs_latency_data = mtl_latency_data_small; - extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? - mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; - mem_attr = mtl_hybrid_mem_attrs; - intel_pmu_pebs_data_source_mtl(); - x86_pmu.get_event_constraints = mtl_get_event_constraints; - pmu->extra_regs = intel_cmt_extra_regs; - pr_cont("Meteorlake Hybrid events, "); - name = "meteorlake_hybrid"; - } else { - x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; - intel_pmu_pebs_data_source_adl(); - pr_cont("Alderlake Hybrid events, "); - name = "alderlake_hybrid"; - } + + x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + intel_pmu_pebs_data_source_adl(); + pr_cont("Alderlake Hybrid events, "); + name = "alderlake_hybrid"; + break; + + case INTEL_FAM6_METEORLAKE: + case INTEL_FAM6_METEORLAKE_L: + intel_pmu_init_hybrid(hybrid_big_small); + + x86_pmu.pebs_latency_data = mtl_latency_data_small; + x86_pmu.get_event_constraints = mtl_get_event_constraints; + x86_pmu.hw_config = adl_hw_config; + + td_attr = adl_hybrid_events_attrs; + mem_attr = mtl_hybrid_mem_attrs; + tsx_attr = adl_hybrid_tsx_attrs; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; + + /* Initialize big core specific PerfMon capabilities.*/ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; + intel_pmu_init_glc(&pmu->pmu); break; default: @@ -6832,7 +6884,7 @@ __init int intel_pmu_init(void) if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; - if (is_hybrid()) + if (is_hybrid() && !boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) intel_pmu_check_hybrid_pmus((u64)fixed_mask); if (x86_pmu.intel_cap.pebs_timing_info) -- Gitee From 478acb673dd8ef1f2a88faef109a87f050e8b7df Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 11 Sep 2023 06:51:28 -0700 Subject: [PATCH 13/23] perf/x86/intel: Fix broken fixed event constraints extension mainline inclusion from mainline-v6.7-rc1 commit 950ecdc672aec9cd29036b2e2535b07c103af494 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=950ecdc672aec9cd29036b2e2535b07c103af494 ------------------------------------- Intel-SIG: commit 950ecdc672ae ("perf/x86/intel: Fix broken fixed event constraints extension") ------------------------------------- perf/x86/intel: Fix broken fixed event constraints extension Unnecessary multiplexing is triggered when running an "instructions" event on an MTL. perf stat -e cpu_core/instructions/,cpu_core/instructions/ -a sleep 1 Performance counter stats for 'system wide': 115,489,000 cpu_core/instructions/ (50.02%) 127,433,777 cpu_core/instructions/ (49.98%) 1.002294504 seconds time elapsed Linux architectural perf events, e.g., cycles and instructions, usually have dedicated fixed counters. These events also have equivalent events which can be used in the general-purpose counters. The counters are precious. In the intel_pmu_check_event_constraints(), perf check/extend the event constraints of these events. So these events can utilize both fixed counters and general-purpose counters. The following cleanup commit: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") forgot adding the intel_pmu_check_event_constraints() into update_pmu_cap(). The architectural perf events cannot utilize the general-purpose counters. The code to check and update the counters, event constraints and extra_regs is the same among hybrid systems. Move intel_pmu_check_hybrid_pmus() to init_hybrid_pmu(), and emove the duplicate check in update_pmu_cap(). Fixes: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230911135128.2322833-1-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 65 +++++++++++++++--------------------- 1 file changed, 26 insertions(+), 39 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a9a3bd5e27b02..505a2cc51743d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4572,6 +4572,13 @@ static void intel_pmu_check_num_counters(int *num_counters, int *num_counters_fixed, u64 *intel_ctrl, u64 fixed_mask); +static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints, + int num_counters, + int num_counters_fixed, + u64 intel_ctrl); + +static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs); + static inline bool intel_pmu_broken_perf_cap(void) { /* The Perf Metric (Bit 15) is always cleared */ @@ -4592,12 +4599,6 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) &eax, &ebx, &ecx, &edx); pmu->num_counters = fls(eax); pmu->num_counters_fixed = fls(ebx); - intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed, - &pmu->intel_ctrl, ebx); - pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); - pmu->unconstrained = (struct event_constraint) - __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, - 0, pmu->num_counters, 0, 0); } @@ -4605,6 +4606,16 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) /* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */ rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities); } +} + +static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) +{ + intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed, + &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1); + pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + pmu->unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, + 0, pmu->num_counters, 0, 0); if (pmu->intel_cap.perf_metrics) pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; @@ -4615,6 +4626,13 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; else pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT; + + intel_pmu_check_event_constraints(pmu->event_constraints, + pmu->num_counters, + pmu->num_counters_fixed, + pmu->intel_ctrl); + + intel_pmu_check_extra_regs(pmu->extra_regs); } static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) @@ -4670,6 +4688,8 @@ static bool init_hybrid_pmu(int cpu) if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) update_pmu_cap(pmu); + intel_pmu_check_hybrid_pmus(pmu); + if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed)) return false; @@ -5948,36 +5968,6 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs) } } -static void intel_pmu_check_hybrid_pmus(u64 fixed_mask) -{ - struct x86_hybrid_pmu *pmu; - int i; - - for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { - pmu = &x86_pmu.hybrid_pmu[i]; - - intel_pmu_check_num_counters(&pmu->num_counters, - &pmu->num_counters_fixed, - &pmu->intel_ctrl, - fixed_mask); - - if (pmu->intel_cap.perf_metrics) { - pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; - pmu->intel_ctrl |= INTEL_PMC_MSK_FIXED_SLOTS; - } - - if (pmu->intel_cap.pebs_output_pt_available) - pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; - - intel_pmu_check_event_constraints(pmu->event_constraints, - pmu->num_counters, - pmu->num_counters_fixed, - pmu->intel_ctrl); - - intel_pmu_check_extra_regs(pmu->extra_regs); - } -} - static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = { { hybrid_small, "cpu_atom" }, { hybrid_big, "cpu_core" }, @@ -6884,9 +6874,6 @@ __init int intel_pmu_init(void) if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; - if (is_hybrid() && !boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) - intel_pmu_check_hybrid_pmus((u64)fixed_mask); - if (x86_pmu.intel_cap.pebs_timing_info) x86_pmu.flags |= PMU_FL_RETIRE_LATENCY; -- Gitee From fbbfc7b8329de5b95f526f809da9ac0fa1b68655 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 21 Nov 2023 09:46:28 +0800 Subject: [PATCH 14/23] perf/x86/intel: Correct incorrect 'or' operation for PMU capabilities mainline inclusion from mainline-v6.7-rc3 commit e8df9d9f4209c04161321d8c12640ae560f65939 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e8df9d9f4209c04161321d8c12640ae560f65939 ------------------------------------- Intel-SIG: commit e8df9d9f4209 ("perf/x86/intel: Correct incorrect 'or' operation for PMU capabilities") ------------------------------------- perf/x86/intel: Correct incorrect 'or' operation for PMU capabilities When running perf-stat command on Intel hybrid platform, perf-stat reports the following errors: sudo taskset -c 7 ./perf stat -vvvv -e cpu_atom/instructions/ sleep 1 Opening: cpu/cycles/:HG ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) config 0xa00000000 disabled 1 ------------------------------------------------------------ sys_perf_event_open: pid 0 cpu -1 group_fd -1 flags 0x8 sys_perf_event_open failed, error -16 Performance counter stats for 'sleep 1': cpu_atom/instructions/ It looks the cpu_atom/instructions/ event can't be enabled on atom PMU even when the process is pinned on atom core. Investigation shows that exclusive_event_init() helper always returns -EBUSY error in the perf event creation. That's strange since the atom PMU should not be an exclusive PMU. Further investigation shows the issue was introduced by commit: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") The commit originally intents to clear the bit PERF_PMU_CAP_AUX_OUTPUT from PMU capabilities if intel_cap.pebs_output_pt_available is not set, but it incorrectly uses 'or' operation and leads to all PMU capabilities bits are set to 1 except bit PERF_PMU_CAP_AUX_OUTPUT. Testing this fix on Intel hybrid platforms, the observed issues disappear. Fixes: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") Signed-off-by: Dapeng Mi Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231121014628.729989-1-dapeng1.mi@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 505a2cc51743d..e723e1e0e461f 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4625,7 +4625,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) if (pmu->intel_cap.pebs_output_pt_available) pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; else - pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT; + pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT; intel_pmu_check_event_constraints(pmu->event_constraints, pmu->num_counters, -- Gitee From 85b358df178f46598199efd62bfa6e7f4511e71b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 26 Jun 2024 07:35:35 -0700 Subject: [PATCH 15/23] perf/x86: Add Lunar Lake and Arrow Lake support mainline inclusion from mainline-v6.11-rc1 commit a932aa0e868f92c5219d84b58c1131fc05ddcf79 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a932aa0e868f92c5219d84b58c1131fc05ddcf79 ------------------------------------- Intel-SIG: commit a932aa0e868f ("perf/x86: Add Lunar Lake and Arrow Lake support") ------------------------------------- perf/x86: Add Lunar Lake and Arrow Lake support From PMU's perspective, Lunar Lake and Arrow Lake are similar to the previous generation Meteor Lake. Both are hybrid platforms, with e-core and p-core. The key differences include: - The e-core supports 3 new fixed counters - The p-core supports an updated PEBS Data Source format - More GP counters (Updated event constraint table) - New Architectural performance monitoring V6 (New Perfmon MSRs aliasing, umask2, eq). - New PEBS format V6 (Counters Snapshotting group) - New RDPMC metrics clear mode The legacy features, the 3 new fixed counters and updated event constraint table are enabled in this patch. The new PEBS data source format, the architectural performance monitoring V6, the PEBS format V6, and the new RDPMC metrics clear mode are supported in the following patches. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Reviewed-by: Ian Rogers Link: https://lkml.kernel.org/r/20240626143545.480761-4-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 117 ++++++++++++++++++++++++++++++ arch/x86/events/intel/ds.c | 24 ++++++ arch/x86/events/perf_event.h | 2 + arch/x86/include/asm/perf_event.h | 4 + 4 files changed, 147 insertions(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index e723e1e0e461f..272b002b4ba57 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -210,6 +210,17 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly = EVENT_CONSTRAINT_END }; +static struct event_constraint intel_skt_event_constraints[] __read_mostly = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */ + FIXED_EVENT_CONSTRAINT(0x0073, 4), /* TOPDOWN_BAD_SPECULATION.ALL */ + FIXED_EVENT_CONSTRAINT(0x019c, 5), /* TOPDOWN_FE_BOUND.ALL */ + FIXED_EVENT_CONSTRAINT(0x02c2, 6), /* TOPDOWN_RETIRING.ALL */ + EVENT_CONSTRAINT_END +}; + static struct event_constraint intel_skl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -358,6 +369,55 @@ static struct extra_reg intel_rwc_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +static struct event_constraint intel_lnc_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */ + FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7), + + INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), + INTEL_UEVENT_CONSTRAINT(0x0175, 0x4), + + INTEL_EVENT_CONSTRAINT(0x2e, 0x3ff), + INTEL_EVENT_CONSTRAINT(0x3c, 0x3ff), + /* + * Generally event codes < 0x90 are restricted to counters 0-3. + * The 0x2E and 0x3C are exception, which has no restriction. + */ + INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf), + + INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), + INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), + INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1), + INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1), + INTEL_UEVENT_CONSTRAINT(0x10a4, 0x1), + INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8), + INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3), + INTEL_EVENT_CONSTRAINT(0xce, 0x1), + + INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf), + /* + * Generally event codes >= 0x90 are likely to have no restrictions. + * The exception are defined as above. + */ + INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0x3ff), + + EVENT_CONSTRAINT_END +}; + + EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); @@ -5686,6 +5746,23 @@ static struct attribute *adl_hybrid_events_attrs[] = { NULL, }; +EVENT_ATTR_STR_HYBRID(topdown-retiring, td_retiring_lnl, "event=0xc2,umask=0x02;event=0x00,umask=0x80", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(topdown-fe-bound, td_fe_bound_lnl, "event=0x9c,umask=0x01;event=0x00,umask=0x82", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(topdown-be-bound, td_be_bound_lnl, "event=0xa4,umask=0x02;event=0x00,umask=0x83", hybrid_big_small); + +static struct attribute *lnl_hybrid_events_attrs[] = { + EVENT_PTR(slots_adl), + EVENT_PTR(td_retiring_lnl), + EVENT_PTR(td_bad_spec_adl), + EVENT_PTR(td_fe_bound_lnl), + EVENT_PTR(td_be_bound_lnl), + EVENT_PTR(td_heavy_ops_adl), + EVENT_PTR(td_br_mis_adl), + EVENT_PTR(td_fetch_lat_adl), + EVENT_PTR(td_mem_bound_adl), + NULL +}; + /* Must be in IDX order */ EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small); EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small); @@ -6057,6 +6134,21 @@ static __always_inline void intel_pmu_init_grt(struct pmu *pmu) hybrid(pmu, extra_regs) = intel_grt_extra_regs; } +static __always_inline void intel_pmu_init_lnc(struct pmu *pmu) +{ + intel_pmu_init_glc(pmu); + hybrid(pmu, event_constraints) = intel_lnc_event_constraints; + hybrid(pmu, pebs_constraints) = intel_lnc_pebs_event_constraints; + hybrid(pmu, extra_regs) = intel_rwc_extra_regs; +} + +static __always_inline void intel_pmu_init_skt(struct pmu *pmu) +{ + intel_pmu_init_grt(pmu); + hybrid(pmu, event_constraints) = intel_skt_event_constraints; + hybrid(pmu, extra_regs) = intel_cmt_extra_regs; +} + __init int intel_pmu_init(void) { struct attribute **extra_skl_attr = &empty_attrs; @@ -6771,6 +6863,31 @@ __init int intel_pmu_init(void) intel_pmu_init_glc(&pmu->pmu); break; + case INTEL_FAM6_LUNARLAKE_M: + case INTEL_FAM6_ARROWLAKE: + intel_pmu_init_hybrid(hybrid_big_small); + + x86_pmu.get_event_constraints = mtl_get_event_constraints; + x86_pmu.hw_config = adl_hw_config; + + td_attr = lnl_hybrid_events_attrs; + mem_attr = mtl_hybrid_mem_attrs; + tsx_attr = adl_hybrid_tsx_attrs; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; + + /* Initialize big core specific PerfMon capabilities.*/ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; + intel_pmu_init_lnc(&pmu->pmu); + + /* Initialize Atom core specific PerfMon capabilities.*/ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; + intel_pmu_init_skt(&pmu->pmu); + + pr_cont("Lunarlake Hybrid events, "); + name = "lunarlake_hybrid"; + break; + default: switch (x86_pmu.version) { case 1: diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index b7ad205681b82..1fa0a03a516c8 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1073,6 +1073,30 @@ struct event_constraint intel_glc_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_lnc_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), + + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ + + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), + + INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), + + /* + * Everything else is handled by PMU_FL_PEBS_ALL, because we + * need the full constraints from the main table. + */ + + EVENT_CONSTRAINT_END +}; + struct event_constraint *intel_pebs_constraints(struct perf_event *event) { struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 7584671563cd4..ac5605772a2a3 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1441,6 +1441,8 @@ extern struct event_constraint intel_icl_pebs_event_constraints[]; extern struct event_constraint intel_glc_pebs_event_constraints[]; +extern struct event_constraint intel_lnc_pebs_event_constraints[]; + struct event_constraint *intel_pebs_constraints(struct perf_event *event); void intel_pmu_pebs_add(struct perf_event *event); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 45fabe4f9dfe1..dd1941a6469f5 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -302,6 +302,10 @@ struct x86_pmu_capability { #define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3) #define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS) +/* TOPDOWN_BAD_SPECULATION.ALL: fixed counter 4 (Atom only) */ +/* TOPDOWN_FE_BOUND.ALL: fixed counter 5 (Atom only) */ +/* TOPDOWN_RETIRING.ALL: fixed counter 6 (Atom only) */ + /* * We model BTS tracing as another fixed-mode PMC. * -- Gitee From 9f903640fc83921775cf69be613b39e433759fdc Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 26 Jun 2024 07:35:36 -0700 Subject: [PATCH 16/23] perf/x86/intel: Rename model-specific pebs_latency_data functions mainline inclusion from mainline-v6.11-rc1 commit 090262439f66df03d4e9d0e52e14104b729e2ef8 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=090262439f66df03d4e9d0e52e14104b729e2ef8 ------------------------------------- Intel-SIG: commit 090262439f66 ("perf/x86/intel: Rename model-specific pebs_latency_data functions") ------------------------------------- perf/x86/intel: Rename model-specific pebs_latency_data functions The model-specific pebs_latency_data functions of ADL and MTL use the "small" as a postfix to indicate the e-core. The postfix is too generic for a model-specific function. It cannot provide useful information that can directly map it to a specific uarch, which can facilitate the development and maintenance. Use the abbr of the uarch to rename the model-specific functions. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ian Rogers Link: https://lkml.kernel.org/r/20240626143545.480761-5-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 9 ++++----- arch/x86/events/intel/ds.c | 20 ++++++++++---------- arch/x86/events/perf_event.h | 4 ++-- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 272b002b4ba57..27a4e44e4702d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6432,7 +6432,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ATOM_GRACEMONT: intel_pmu_init_grt(NULL); intel_pmu_pebs_data_source_grt(); - x86_pmu.pebs_latency_data = adl_latency_data_small; + x86_pmu.pebs_latency_data = grt_latency_data; x86_pmu.get_event_constraints = tnt_get_event_constraints; td_attr = tnt_events_attrs; mem_attr = grt_mem_attrs; @@ -6446,7 +6446,7 @@ __init int intel_pmu_init(void) intel_pmu_init_grt(NULL); x86_pmu.extra_regs = intel_cmt_extra_regs; intel_pmu_pebs_data_source_cmt(); - x86_pmu.pebs_latency_data = mtl_latency_data_small; + x86_pmu.pebs_latency_data = cmt_latency_data; x86_pmu.get_event_constraints = cmt_get_event_constraints; td_attr = cmt_events_attrs; mem_attr = grt_mem_attrs; @@ -6792,8 +6792,7 @@ __init int intel_pmu_init(void) */ intel_pmu_init_hybrid(hybrid_big_small); - x86_pmu.pebs_latency_data = adl_latency_data_small; - + x86_pmu.pebs_latency_data = grt_latency_data; x86_pmu.get_event_constraints = adl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; @@ -6848,7 +6847,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_METEORLAKE_L: intel_pmu_init_hybrid(hybrid_big_small); - x86_pmu.pebs_latency_data = mtl_latency_data_small; + x86_pmu.pebs_latency_data = cmt_latency_data; x86_pmu.get_event_constraints = mtl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 1fa0a03a516c8..1c4ba038a0c31 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -254,8 +254,8 @@ static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) } /* Retrieve the latency data for e-core of ADL */ -static u64 __adl_latency_data_small(struct perf_event *event, u64 status, - u8 dse, bool tlb, bool lock, bool blk) +static u64 __grt_latency_data(struct perf_event *event, u64 status, + u8 dse, bool tlb, bool lock, bool blk) { u64 val; @@ -274,27 +274,27 @@ static u64 __adl_latency_data_small(struct perf_event *event, u64 status, return val; } -u64 adl_latency_data_small(struct perf_event *event, u64 status) +u64 grt_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; dse.val = status; - return __adl_latency_data_small(event, status, dse.ld_dse, - dse.ld_locked, dse.ld_stlb_miss, - dse.ld_data_blk); + return __grt_latency_data(event, status, dse.ld_dse, + dse.ld_locked, dse.ld_stlb_miss, + dse.ld_data_blk); } /* Retrieve the latency data for e-core of MTL */ -u64 mtl_latency_data_small(struct perf_event *event, u64 status) +u64 cmt_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; dse.val = status; - return __adl_latency_data_small(event, status, dse.mtl_dse, - dse.mtl_stlb_miss, dse.mtl_locked, - dse.mtl_fwd_blk); + return __grt_latency_data(event, status, dse.mtl_dse, + dse.mtl_stlb_miss, dse.mtl_locked, + dse.mtl_fwd_blk); } static u64 load_latency_data(struct perf_event *event, u64 status) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ac5605772a2a3..70676bcf2b5dd 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1407,9 +1407,9 @@ void intel_pmu_disable_bts(void); int intel_pmu_drain_bts_buffer(void); -u64 adl_latency_data_small(struct perf_event *event, u64 status); +u64 grt_latency_data(struct perf_event *event, u64 status); -u64 mtl_latency_data_small(struct perf_event *event, u64 status); +u64 cmt_latency_data(struct perf_event *event, u64 status); extern struct event_constraint intel_core2_pebs_event_constraints[]; -- Gitee From 497a444010a34192218fea8d62c44df0926feb48 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 26 Jun 2024 07:35:37 -0700 Subject: [PATCH 17/23] perf/x86/intel: Support new data source for Lunar Lake mainline inclusion from mainline-v6.11-rc1 commit 608f6976c309793ceea37292c54b057dab091944 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=608f6976c309793ceea37292c54b057dab091944 ------------------------------------- Intel-SIG: commit 608f6976c309 ("perf/x86/intel: Support new data source for Lunar Lake") ------------------------------------- perf/x86/intel: Support new data source for Lunar Lake A new PEBS data source format is introduced for the p-core of Lunar Lake. The data source field is extended to 8 bits with new encodings. A new layout is introduced into the union intel_x86_pebs_dse. Introduce the lnl_latency_data() to parse the new format. Enlarge the pebs_data_source[] accordingly to include new encodings. Only the mem load and the mem store events can generate the data source. Introduce INTEL_HYBRID_LDLAT_CONSTRAINT and INTEL_HYBRID_STLAT_CONSTRAINT to mark them. Add two new bits for the new cache-related data src, L2_MHB and MSC. The L2_MHB is short for L2 Miss Handling Buffer, which is similar to LFB (Line Fill Buffer), but to track the L2 Cache misses. The MSC stands for the memory-side cache. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Reviewed-by: Ian Rogers Link: https://lkml.kernel.org/r/20240626143545.480761-6-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 2 + arch/x86/events/intel/ds.c | 94 ++++++++++++++++++++++++++++++++- arch/x86/events/perf_event.h | 16 +++++- include/uapi/linux/perf_event.h | 6 ++- 4 files changed, 113 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 27a4e44e4702d..f2e1c216292bf 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6866,6 +6866,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ARROWLAKE: intel_pmu_init_hybrid(hybrid_big_small); + x86_pmu.pebs_latency_data = lnl_latency_data; x86_pmu.get_event_constraints = mtl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; @@ -6883,6 +6884,7 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; intel_pmu_init_skt(&pmu->pmu); + intel_pmu_pebs_data_source_lnl(); pr_cont("Lunarlake Hybrid events, "); name = "lunarlake_hybrid"; break; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 1c4ba038a0c31..882f8b23e7f99 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -60,6 +60,15 @@ union intel_x86_pebs_dse { unsigned int mtl_fwd_blk:1; unsigned int ld_reserved4:24; }; + struct { + unsigned int lnc_dse:8; + unsigned int ld_reserved5:2; + unsigned int lnc_stlb_miss:1; + unsigned int lnc_locked:1; + unsigned int lnc_data_blk:1; + unsigned int lnc_addr_blk:1; + unsigned int ld_reserved6:18; + }; }; @@ -74,7 +83,7 @@ union intel_x86_pebs_dse { #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) /* Version for Sandy Bridge and later */ -static u64 pebs_data_source[] = { +static u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = { P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ @@ -170,6 +179,40 @@ void __init intel_pmu_pebs_data_source_cmt(void) __intel_pmu_pebs_data_source_cmt(pebs_data_source); } +/* Version for Lion Cove and later */ +static u64 lnc_pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* 0x00: ukn L3 */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 hit */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x02: L1 hit */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x03: LFB/L1 Miss Handling Buffer hit */ + 0, /* 0x04: Reserved */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x05: L2 Hit */ + OP_LH | LEVEL(L2_MHB) | P(SNOOP, NONE), /* 0x06: L2 Miss Handling Buffer Hit */ + 0, /* 0x07: Reserved */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x08: L3 Hit */ + 0, /* 0x09: Reserved */ + 0, /* 0x0a: Reserved */ + 0, /* 0x0b: Reserved */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* 0x0c: L3 Hit Snoop Fwd */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0d: L3 Hit Snoop HitM */ + 0, /* 0x0e: Reserved */ + P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0f: L3 Miss Snoop HitM */ + OP_LH | LEVEL(MSC) | P(SNOOP, NONE), /* 0x10: Memory-side Cache Hit */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* 0x11: Local Memory Hit */ +}; + +void __init intel_pmu_pebs_data_source_lnl(void) +{ + u64 *data_source; + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source; + memcpy(data_source, lnc_pebs_data_source, sizeof(lnc_pebs_data_source)); + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + __intel_pmu_pebs_data_source_cmt(data_source); +} + static u64 precise_store_data(u64 status) { union intel_x86_pebs_dse dse; @@ -261,7 +304,7 @@ static u64 __grt_latency_data(struct perf_event *event, u64 status, WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big); - dse &= PERF_PEBS_DATA_SOURCE_MASK; + dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; val = hybrid_var(event->pmu, pebs_data_source)[dse]; pebs_set_tlb_lock(&val, tlb, lock); @@ -297,6 +340,51 @@ u64 cmt_latency_data(struct perf_event *event, u64 status) dse.mtl_fwd_blk); } +static u64 lnc_latency_data(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + union perf_mem_data_src src; + u64 val; + + dse.val = status; + + /* LNC core latency data */ + val = hybrid_var(event->pmu, pebs_data_source)[status & PERF_PEBS_DATA_SOURCE_MASK]; + if (!val) + val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA); + + if (dse.lnc_stlb_miss) + val |= P(TLB, MISS) | P(TLB, L2); + else + val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + if (dse.lnc_locked) + val |= P(LOCK, LOCKED); + + if (dse.lnc_data_blk) + val |= P(BLK, DATA); + if (dse.lnc_addr_blk) + val |= P(BLK, ADDR); + if (!dse.lnc_data_blk && !dse.lnc_addr_blk) + val |= P(BLK, NA); + + src.val = val; + if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) + src.mem_op = P(OP, STORE); + + return src.val; +} + +u64 lnl_latency_data(struct perf_event *event, u64 status) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->pmu_type == hybrid_small) + return cmt_latency_data(event, status); + + return lnc_latency_data(event, status); +} + static u64 load_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; @@ -1077,6 +1165,8 @@ struct event_constraint intel_lnc_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), + INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3ff), + INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3), INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 70676bcf2b5dd..6dd64aa2ac95e 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -466,6 +466,14 @@ struct cpu_hw_events { __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID) +#define INTEL_HYBRID_LDLAT_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_LD_HSW) + +#define INTEL_HYBRID_STLAT_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_ST_HSW) + /* Event constraint, but match on all event flags too. */ #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) @@ -645,8 +653,10 @@ enum { x86_lbr_exclusive_max, }; -#define PERF_PEBS_DATA_SOURCE_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_MAX 0x100 #define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1) +#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) enum hybrid_cpu_type { HYBRID_INTEL_NONE, @@ -1411,6 +1421,8 @@ u64 grt_latency_data(struct perf_event *event, u64 status); u64 cmt_latency_data(struct perf_event *event, u64 status); +u64 lnl_latency_data(struct perf_event *event, u64 status); + extern struct event_constraint intel_core2_pebs_event_constraints[]; extern struct event_constraint intel_atom_pebs_event_constraints[]; @@ -1526,6 +1538,8 @@ void intel_pmu_pebs_data_source_mtl(void); void intel_pmu_pebs_data_source_cmt(void); +void intel_pmu_pebs_data_source_lnl(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 414ebae700920..efc16ff9717f1 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1276,11 +1276,13 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0x8 available */ +#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ +/* 0x7 available */ #define PERF_MEM_LVLNUM_EXTN_MEM 0x09 /* Extension memory */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ #define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ #define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ -- Gitee From 8c4254747f01eb7aae9cff6c2239ac562a8f17d3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: [PATCH 18/23] tools/include: Sync uapi/linux/perf.h with the kernel sources mainline inclusion from mainline-v6.11-rc4 commit 8ec9497d3ef34fab216e277eca5035811f06b421 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8ec9497d3ef34fab216e277eca5035811f06b421 ------------------------------------- Intel-SIG: commit 8ec9497d3ef3 ("tools/include: Sync uapi/linux/perf.h with the kernel sources") ------------------------------------- tools/include: Sync uapi/linux/perf.h with the kernel sources To pick up changes from: 608f6976c309 perf/x86/intel: Support new data source for Lunar Lake This should be used to beautify perf syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Ian Rogers Cc: Adrian Hunter Cc: "Liang, Kan" Cc: linux-perf-users@vger.kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Quanxian Wang --- tools/include/uapi/linux/perf_event.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 03923346cc053..1744b4c20a7bb 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1252,9 +1252,11 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0xa available */ +#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ +/* 7-0xa available */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ #define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ #define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ -- Gitee From 6147a69c8b5f5a0b42f68e2800669508a2557fd6 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 16 Dec 2024 12:45:02 -0800 Subject: [PATCH 19/23] perf/x86/intel/ds: Add PEBS format 6 mainline inclusion from mainline-v6.13-rc5 commit b8c3a2502a205321fe66c356f4b70cabd8e1a5fc category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b8c3a2502a205321fe66c356f4b70cabd8e1a5fc ------------------------------------- Intel-SIG: commit b8c3a2502a20 ("perf/x86/intel/ds: Add PEBS format 6") ------------------------------------- perf/x86/intel/ds: Add PEBS format 6 The only difference between 5 and 6 is the new counters snapshotting group, without the following counters snapshotting enabling patches, it's impossible to utilize the feature in a PEBS record. It's safe to share the same code path with format 5. Add format 6, so the end user can at least utilize the legacy PEBS features. Fixes: a932aa0e868f ("perf/x86: Add Lunar Lake and Arrow Lake support") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20241216204505.748363-1-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/ds.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 882f8b23e7f99..e4d2f04f8a7e7 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2440,6 +2440,7 @@ void __init intel_ds_init(void) case 4: case 5: + case 6: x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl; x86_pmu.pebs_record_size = sizeof(struct pebs_basic); if (x86_pmu.intel_cap.pebs_baseline) { -- Gitee From 99294a207d4ee379b22a416a3a1021c531fb8b71 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 16 Dec 2024 08:02:52 -0800 Subject: [PATCH 20/23] perf/x86/intel: Fix bitmask of OCR and FRONTEND events for LNC mainline inclusion from mainline-v6.13-rc5 commit aa5d2ca7c179c40669edb5e96d931bf9828dea3d category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=aa5d2ca7c179c40669edb5e96d931bf9828dea3d ------------------------------------- Intel-SIG: commit aa5d2ca7c179 ("perf/x86/intel: Fix bitmask of OCR and FRONTEND events for LNC") ------------------------------------- perf/x86/intel: Fix bitmask of OCR and FRONTEND events for LNC The released OCR and FRONTEND events utilized more bits on Lunar Lake p-core. The corresponding mask in the extra_regs has to be extended to unblock the extra bits. Add a dedicated intel_lnc_extra_regs. Fixes: a932aa0e868f ("perf/x86: Add Lunar Lake and Arrow Lake support") Reported-by: Andi Kleen Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20241216160252.430858-1-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f2e1c216292bf..3f03ffc5ea2f4 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -417,6 +417,16 @@ static struct event_constraint intel_lnc_event_constraints[] = { EVENT_CONSTRAINT_END }; +static struct extra_reg intel_lnc_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0xfffffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0xfffffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE), + INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), + INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0xf, FE), + INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), + EVENT_EXTRA_END +}; EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); @@ -6139,7 +6149,7 @@ static __always_inline void intel_pmu_init_lnc(struct pmu *pmu) intel_pmu_init_glc(pmu); hybrid(pmu, event_constraints) = intel_lnc_event_constraints; hybrid(pmu, pebs_constraints) = intel_lnc_pebs_event_constraints; - hybrid(pmu, extra_regs) = intel_rwc_extra_regs; + hybrid(pmu, extra_regs) = intel_lnc_extra_regs; } static __always_inline void intel_pmu_init_skt(struct pmu *pmu) -- Gitee From 86aa81bcb85d19306ad2ef95b3e9dc20f8b7a599 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 19 Feb 2025 06:10:05 -0800 Subject: [PATCH 21/23] perf/x86/intel: Fix event constraints for LNC mainline inclusion from mainline-v6.14-rc4 commit 782cffeec9ad96daa64ffb2d527b2a052fb02552 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=782cffeec9ad96daa64ffb2d527b2a052fb02552 ------------------------------------- Intel-SIG: commit 782cffeec9ad ("perf/x86/intel: Fix event constraints for LNC") ------------------------------------- perf/x86/intel: Fix event constraints for LNC According to the latest event list, update the event constraint tables for Lion Cove core. The general rule (the event codes < 0x90 are restricted to counters 0-3.) has been removed. There is no restriction for most of the performance monitoring events. Fixes: a932aa0e868f ("perf/x86: Add Lunar Lake and Arrow Lake support") Reported-by: Amiri Khalil Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250219141005.2446823-1-kan.liang@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 20 +++++++------------- arch/x86/events/intel/ds.c | 2 +- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 3f03ffc5ea2f4..d3d6aef2549a4 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -385,34 +385,28 @@ static struct event_constraint intel_lnc_event_constraints[] = { METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6), METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7), + INTEL_EVENT_CONSTRAINT(0x20, 0xf), + + INTEL_UEVENT_CONSTRAINT(0x012a, 0xf), + INTEL_UEVENT_CONSTRAINT(0x012b, 0xf), INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), INTEL_UEVENT_CONSTRAINT(0x0175, 0x4), INTEL_EVENT_CONSTRAINT(0x2e, 0x3ff), INTEL_EVENT_CONSTRAINT(0x3c, 0x3ff), - /* - * Generally event codes < 0x90 are restricted to counters 0-3. - * The 0x2E and 0x3C are exception, which has no restriction. - */ - INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf), - INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf), - INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1), INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1), INTEL_UEVENT_CONSTRAINT(0x10a4, 0x1), INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8), + INTEL_UEVENT_CONSTRAINT(0x01cd, 0x3fc), INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3), - INTEL_EVENT_CONSTRAINT(0xce, 0x1), INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf), - /* - * Generally event codes >= 0x90 are likely to have no restrictions. - * The exception are defined as above. - */ - INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0x3ff), + + INTEL_UEVENT_CONSTRAINT(0x00e0, 0xf), EVENT_CONSTRAINT_END }; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index e4d2f04f8a7e7..54563c6d0a5f2 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1165,7 +1165,7 @@ struct event_constraint intel_lnc_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), - INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3ff), + INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3fc), INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3), INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ -- Gitee From 721da222a1bd3386616a8af05a080f078ab96c71 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Jan 2025 17:22:52 +0100 Subject: [PATCH 22/23] x86/cpu: Update Intel Family comments mainline inclusion from mainline-v6.15-rc1 commit 43bb700cff6bc2f0d337006b864192227fb05dc1 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=43bb700cff6bc2f0d337006b864192227fb05dc1 ------------------------------------- Intel-SIG: commit 43bb700cff6b ("x86/cpu: Update Intel Family comments") ------------------------------------- x86/cpu: Update Intel Family comments Because who can ever remember all these names. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250127162252.GK16742@noisy.programming.kicks-ass.net Signed-off-by: Quanxian Wang --- arch/x86/include/asm/intel-family.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 1eae393a81a91..3d5a87f307e1c 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -102,9 +102,9 @@ #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ -#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF +#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF /* Raptor Cove */ -#define INTEL_FAM6_GRANITERAPIDS_X 0xAD +#define INTEL_FAM6_GRANITERAPIDS_X 0xAD /* Redwood Cove */ #define INTEL_FAM6_GRANITERAPIDS_D 0xAE /* "Hybrid" Processors (P-Core/E-Core) */ @@ -117,9 +117,17 @@ #define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */ #define INTEL_FAM6_RAPTORLAKE_P 0xBA -#define INTEL_FAM6_METEORLAKE 0xAC +#define INTEL_FAM6_METEORLAKE 0xAC /* Redwood Cove / Crestmont */ #define INTEL_FAM6_METEORLAKE_L 0xAA +#define INTEL_FAM6_ARROWLAKE 0xC6 +#define INTEL_FAM6_ARROWLAKE_H 0xC5 /* Lion Cove / Skymont */ +#define INTEL_FAM6_ARROWLAKE_U 0xB5 + +#define INTEL_FAM6_LUNARLAKE_M 0xBD /* Lion Cove / Skymont */ + +#define INTEL_FAM6_PANTHERLAKE_L 0xCC /* Cougar Cove / Crestmont */ + /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ -- Gitee From 598d38e10b6216d0c3e3ad4450428faccdc22da2 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 15 Apr 2025 11:44:08 +0000 Subject: [PATCH 23/23] perf/x86/intel: Add PMU support for Clearwater Forest mainline inclusion from mainline-v6.16-rc1 commit 48d66c89dce1e3687174608a5f5c31d5961a9916 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID2L3S CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=48d66c89dce1e3687174608a5f5c31d5961a9916 ------------------------------------- Intel-SIG: commit 48d66c89dce1 ("perf/x86/intel: Add PMU support for Clearwater Forest") ------------------------------------- perf/x86/intel: Add PMU support for Clearwater Forest From the PMU's perspective, Clearwater Forest is similar to the previous generation Sierra Forest. The key differences are the ARCH PEBS feature and the new added 3 fixed counters for topdown L1 metrics events. The ARCH PEBS is supported in the following patches. This patch provides support for basic perfmon features and 3 new added fixed counters. Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20250415114428.341182-3-dapeng1.mi@linux.intel.com Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index d3d6aef2549a4..34f9e7efe6fbb 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2212,6 +2212,18 @@ static struct extra_reg intel_cmt_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_skt, "event=0x9c,umask=0x01"); +EVENT_ATTR_STR(topdown-retiring, td_retiring_skt, "event=0xc2,umask=0x02"); +EVENT_ATTR_STR(topdown-be-bound, td_be_bound_skt, "event=0xa4,umask=0x02"); + +static struct attribute *skt_events_attrs[] = { + EVENT_PTR(td_fe_bound_skt), + EVENT_PTR(td_retiring_skt), + EVENT_PTR(td_bad_spec_cmt), + EVENT_PTR(td_be_bound_skt), + NULL, +}; + #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ #define KNL_MCDRAM_LOCAL BIT_ULL(21) @@ -6459,6 +6471,18 @@ __init int intel_pmu_init(void) name = "crestmont"; break; + case INTEL_FAM6_ATOM_DARKMONT_X: + intel_pmu_init_skt(NULL); + intel_pmu_pebs_data_source_cmt(); + x86_pmu.pebs_latency_data = cmt_latency_data; + x86_pmu.get_event_constraints = cmt_get_event_constraints; + td_attr = skt_events_attrs; + mem_attr = grt_mem_attrs; + extra_attr = cmt_format_attr; + pr_cont("Darkmont events, "); + name = "darkmont"; + break; + case INTEL_FAM6_WESTMERE: case INTEL_FAM6_WESTMERE_EP: case INTEL_FAM6_WESTMERE_EX: -- Gitee