From c10d951b8652703f7fa55600704e2f0b60a88622 Mon Sep 17 00:00:00 2001 From: Zhou Wang Date: Thu, 7 May 2020 10:58:25 +0800 Subject: [PATCH 01/39] drivers/perf: hisi: Permit modular builds of HiSilicon uncore drivers This patch lets HiSilicon uncore PMU driver can be built as modules. A common module and three specific uncore PMU driver modules will be built. Export necessary functions in hisi_uncore_pmu module, and change irq_set_affinity to irq_set_affinity_hint to pass compile. Signed-off-by: Zhou Wang Tested-by: Qi Liu Reviewed-by: Shaokun Zhang Link: https://lore.kernel.org/r/1588820305-174479-1-git-send-email-wangzhou1@hisilicon.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- drivers/perf/Kconfig | 9 ++------ drivers/perf/hisilicon/Kconfig | 7 ++++++ drivers/perf/hisilicon/Makefile | 3 ++- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 10 ++++---- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 10 ++++---- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 10 ++++---- drivers/perf/hisilicon/hisi_uncore_pmu.c | 23 +++++++++++++++++-- 7 files changed, 50 insertions(+), 22 deletions(-) create mode 100644 drivers/perf/hisilicon/Kconfig diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 09ae8a970880..a9261cf48293 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -79,13 +79,6 @@ config FSL_IMX8_DDR_PMU can give information about memory throughput and other related events. -config HISI_PMU - bool "HiSilicon SoC PMU" - depends on ARM64 && ACPI - help - Support for HiSilicon SoC uncore performance monitoring - unit (PMU), such as: L3C, HHA and DDRC. - config QCOM_L2_PMU bool "Qualcomm Technologies L2-cache PMU" depends on ARCH_QCOM && ARM64 && ACPI @@ -129,4 +122,6 @@ config ARM_SPE_PMU Extension, which provides periodic sampling of operations in the CPU pipeline and reports this via the perf AUX interface. +source "drivers/perf/hisilicon/Kconfig" + endmenu diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig new file mode 100644 index 000000000000..c5d1b7019fff --- /dev/null +++ b/drivers/perf/hisilicon/Kconfig @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +config HISI_PMU + tristate "HiSilicon SoC PMU drivers" + depends on ARM64 && ACPI + help + Support for HiSilicon SoC L3 Cache performance monitor, Hydra Home + Agent performance monitor and DDR Controller performance monitor. diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index c3a96ec2bf66..e8377061845f 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o +obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ + hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index b79c96b14328..883764deba3b 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -398,8 +398,9 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1); if (ret) { dev_err(ddrc_pmu->dev, "DDRC PMU register failed!\n"); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, - &ddrc_pmu->node); + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, &ddrc_pmu->node); + irq_set_affinity_hint(ddrc_pmu->irq, NULL); } return ret; @@ -410,8 +411,9 @@ static int hisi_ddrc_pmu_remove(struct platform_device *pdev) struct hisi_pmu *ddrc_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&ddrc_pmu->pmu); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, - &ddrc_pmu->node); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, + &ddrc_pmu->node); + irq_set_affinity_hint(ddrc_pmu->irq, NULL); return 0; } diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 78865b4ac4a6..d2cdece7cb5c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -409,8 +409,9 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) ret = perf_pmu_register(&hha_pmu->pmu, name, -1); if (ret) { dev_err(hha_pmu->dev, "HHA PMU register failed!\n"); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, - &hha_pmu->node); + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, &hha_pmu->node); + irq_set_affinity_hint(hha_pmu->irq, NULL); } return ret; @@ -421,8 +422,9 @@ static int hisi_hha_pmu_remove(struct platform_device *pdev) struct hisi_pmu *hha_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&hha_pmu->pmu); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, - &hha_pmu->node); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, + &hha_pmu->node); + irq_set_affinity_hint(hha_pmu->irq, NULL); return 0; } diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 9dd50c3bc74e..c07689833e5f 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -399,8 +399,9 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); if (ret) { dev_err(l3c_pmu->dev, "L3C PMU register failed!\n"); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, - &l3c_pmu->node); + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, &l3c_pmu->node); + irq_set_affinity_hint(l3c_pmu->irq, NULL); } return ret; @@ -411,8 +412,9 @@ static int hisi_l3c_pmu_remove(struct platform_device *pdev) struct hisi_pmu *l3c_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&l3c_pmu->pmu); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, - &l3c_pmu->node); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, + &l3c_pmu->node); + irq_set_affinity_hint(l3c_pmu->irq, NULL); return 0; } diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 79f76f8dda8e..7a7e583893cb 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -34,6 +34,7 @@ ssize_t hisi_format_sysfs_show(struct device *dev, return sprintf(buf, "%s\n", (char *)eattr->var); } +EXPORT_SYMBOL_GPL(hisi_format_sysfs_show); /* * PMU event attributes @@ -47,6 +48,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev, return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var); } +EXPORT_SYMBOL_GPL(hisi_event_sysfs_show); /* * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show @@ -58,6 +60,7 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, return sprintf(buf, "%d\n", hisi_pmu->on_cpu); } +EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show); static bool hisi_validate_event_group(struct perf_event *event) { @@ -96,6 +99,7 @@ int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx) { return idx >= 0 && idx < hisi_pmu->num_counters; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_counter_valid); int hisi_uncore_pmu_get_event_idx(struct perf_event *event) { @@ -112,6 +116,7 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event) return idx; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx); static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) { @@ -172,6 +177,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) return 0; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_init); /* * Set the counter to count the event that we're interested in, @@ -219,6 +225,7 @@ void hisi_uncore_pmu_set_event_period(struct perf_event *event) /* Write start value to the hardware event counter */ hisi_pmu->ops->write_counter(hisi_pmu, hwc, val); } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_set_event_period); void hisi_uncore_pmu_event_update(struct perf_event *event) { @@ -239,6 +246,7 @@ void hisi_uncore_pmu_event_update(struct perf_event *event) HISI_MAX_PERIOD(hisi_pmu->counter_bits); local64_add(delta, &event->count); } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_update); void hisi_uncore_pmu_start(struct perf_event *event, int flags) { @@ -261,6 +269,7 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags) hisi_uncore_pmu_enable_event(event); perf_event_update_userpage(event); } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_start); void hisi_uncore_pmu_stop(struct perf_event *event, int flags) { @@ -277,6 +286,7 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags) hisi_uncore_pmu_event_update(event); hwc->state |= PERF_HES_UPTODATE; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_stop); int hisi_uncore_pmu_add(struct perf_event *event, int flags) { @@ -299,6 +309,7 @@ int hisi_uncore_pmu_add(struct perf_event *event, int flags) return 0; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_add); void hisi_uncore_pmu_del(struct perf_event *event, int flags) { @@ -310,12 +321,14 @@ void hisi_uncore_pmu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_del); void hisi_uncore_pmu_read(struct perf_event *event) { /* Read hardware counter and update the perf counter statistics */ hisi_uncore_pmu_event_update(event); } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read); void hisi_uncore_pmu_enable(struct pmu *pmu) { @@ -328,6 +341,7 @@ void hisi_uncore_pmu_enable(struct pmu *pmu) hisi_pmu->ops->start_counters(hisi_pmu); } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_enable); void hisi_uncore_pmu_disable(struct pmu *pmu) { @@ -335,6 +349,7 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) hisi_pmu->ops->stop_counters(hisi_pmu); } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable); /* * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1. @@ -398,10 +413,11 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) hisi_pmu->on_cpu = cpu; /* Overflow interrupt also should use the same CPU */ - WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); + WARN_ON(irq_set_affinity_hint(hisi_pmu->irq, cpumask_of(cpu))); return 0; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_online_cpu); int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { @@ -430,7 +446,10 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target); /* Use this CPU for event counting */ hisi_pmu->on_cpu = target; - WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target))); + WARN_ON(irq_set_affinity_hint(hisi_pmu->irq, cpumask_of(target))); return 0; } +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); + +MODULE_LICENSE("GPL v2"); -- Gitee From 0f7c6b66c362e4a9008ce2befcc6a3b25eb02d4b Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Tue, 9 Aug 2022 23:06:41 +0800 Subject: [PATCH 02/39] drivers/perf: hisi: Add driver for HiSilicon PCIe PMU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCIe PMU Root Complex Integrated End Point(RCiEP) device is supported to sample bandwidth, latency, buffer occupation etc. Each PMU RCiEP device monitors multiple Root Ports, and each RCiEP is registered as a PMU in /sys/bus/event_source/devices, so users can select target PMU, and use filter to do further sets. Filtering options contains: event - select the event. port - select target Root Ports. Information of Root Ports are shown under sysfs. bdf - select requester_id of target EP device. trig_len - set trigger condition for starting event statistics. trig_mode - set trigger mode. 0 means starting to statistic when bigger than trigger condition, and 1 means smaller. thr_len - set threshold for statistics. thr_mode - set threshold mode. 0 means count when bigger than threshold, and 1 means smaller. Acked-by: Krzysztof WilczyƄski Reviewed-by: John Garry Signed-off-by: Qi Liu Reviewed-by: Shaokun Zhang Link: https://lore.kernel.org/r/20211202080633.2919-3-liuqi115@huawei.com Signed-off-by: Will Deacon Signed-off-by: Wangming Shao Reviewed-by: Junhao He Reviewed-by: Yang Jihong Signed-off-by: Zheng Zengkai Signed-off-by: huwentao Conflicts: MAINTAINERS drivers/perf/hisilicon/Makefile include/linux/cpuhotplug.h --- MAINTAINERS | 2 + drivers/perf/hisilicon/Kconfig | 9 + drivers/perf/hisilicon/Makefile | 2 + drivers/perf/hisilicon/hisi_pcie_pmu.c | 951 +++++++++++++++++++++++++ include/linux/cpuhotplug.h | 3 + 5 files changed, 967 insertions(+) create mode 100644 drivers/perf/hisilicon/hisi_pcie_pmu.c diff --git a/MAINTAINERS b/MAINTAINERS index dd2d56531d91..f70f52cc4b40 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7402,8 +7402,10 @@ F: Documentation/devicetree/bindings/net/hisilicon*.txt HISILICON PMU DRIVER M: Shaokun Zhang +M: Qi Liu W: http://www.hisilicon.com S: Supported +F: Documentation/admin-guide/perf/hisi-pcie-pmu.rst F: drivers/perf/hisilicon F: Documentation/admin-guide/perf/hisi-pmu.rst diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig index c5d1b7019fff..5546218b5598 100644 --- a/drivers/perf/hisilicon/Kconfig +++ b/drivers/perf/hisilicon/Kconfig @@ -5,3 +5,12 @@ config HISI_PMU help Support for HiSilicon SoC L3 Cache performance monitor, Hydra Home Agent performance monitor and DDR Controller performance monitor. + +config HISI_PCIE_PMU + tristate "HiSilicon PCIE PERF PMU" + depends on PCI && ARM64 + help + Provide support for HiSilicon PCIe performance monitoring unit (PMU) + RCiEP devices. + Adds the PCIe PMU into perf events system for monitoring latency, + bandwidth etc. diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index e8377061845f..63c558fcffb1 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o + +obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c new file mode 100644 index 000000000000..2f18838754ec --- /dev/null +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -0,0 +1,951 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This driver adds support for PCIe PMU RCiEP device. Related + * perf events are bandwidth, latency etc. + * + * Copyright (C) 2021 HiSilicon Limited + * Author: Qi Liu + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "hisi_pcie_pmu" +/* Define registers */ +#define HISI_PCIE_GLOBAL_CTRL 0x00 +#define HISI_PCIE_EVENT_CTRL 0x010 +#define HISI_PCIE_CNT 0x090 +#define HISI_PCIE_EXT_CNT 0x110 +#define HISI_PCIE_INT_STAT 0x150 +#define HISI_PCIE_INT_MASK 0x154 +#define HISI_PCIE_REG_BDF 0xfe0 +#define HISI_PCIE_REG_VERSION 0xfe4 +#define HISI_PCIE_REG_INFO 0xfe8 + +/* Define command in HISI_PCIE_GLOBAL_CTRL */ +#define HISI_PCIE_GLOBAL_EN 0x01 +#define HISI_PCIE_GLOBAL_NONE 0 + +/* Define command in HISI_PCIE_EVENT_CTRL */ +#define HISI_PCIE_EVENT_EN BIT_ULL(20) +#define HISI_PCIE_RESET_CNT BIT_ULL(22) +#define HISI_PCIE_INIT_SET BIT_ULL(34) +#define HISI_PCIE_THR_EN BIT_ULL(26) +#define HISI_PCIE_TARGET_EN BIT_ULL(32) +#define HISI_PCIE_TRIG_EN BIT_ULL(52) + +/* Define offsets in HISI_PCIE_EVENT_CTRL */ +#define HISI_PCIE_EVENT_M GENMASK_ULL(15, 0) +#define HISI_PCIE_THR_MODE_M GENMASK_ULL(27, 27) +#define HISI_PCIE_THR_M GENMASK_ULL(31, 28) +#define HISI_PCIE_TARGET_M GENMASK_ULL(52, 36) +#define HISI_PCIE_TRIG_MODE_M GENMASK_ULL(53, 53) +#define HISI_PCIE_TRIG_M GENMASK_ULL(59, 56) + +#define HISI_PCIE_MAX_COUNTERS 8 +#define HISI_PCIE_REG_STEP 8 +#define HISI_PCIE_THR_MAX_VAL 10 +#define HISI_PCIE_TRIG_MAX_VAL 10 +#define HISI_PCIE_MAX_PERIOD (GENMASK_ULL(63, 0)) +#define HISI_PCIE_INIT_VAL BIT_ULL(63) + +struct hisi_pcie_pmu { + struct perf_event *hw_events[HISI_PCIE_MAX_COUNTERS]; + struct hlist_node node; + struct pci_dev *pdev; + struct pmu pmu; + void __iomem *base; + int irq; + u32 identifier; + /* Minimum and maximum BDF of root ports monitored by PMU */ + u16 bdf_min; + u16 bdf_max; + int on_cpu; +}; + +struct hisi_pcie_reg_pair { + u16 lo; + u16 hi; +}; + +#define to_pcie_pmu(p) (container_of((p), struct hisi_pcie_pmu, pmu)) +#define GET_PCI_DEVFN(bdf) ((bdf) & 0xff) + +#define HISI_PCIE_PMU_FILTER_ATTR(_name, _config, _hi, _lo) \ + static u64 hisi_pcie_get_##_name(struct perf_event *event) \ + { \ + return FIELD_GET(GENMASK(_hi, _lo), event->attr._config); \ + } \ + +HISI_PCIE_PMU_FILTER_ATTR(event, config, 16, 0); +HISI_PCIE_PMU_FILTER_ATTR(thr_len, config1, 3, 0); +HISI_PCIE_PMU_FILTER_ATTR(thr_mode, config1, 4, 4); +HISI_PCIE_PMU_FILTER_ATTR(trig_len, config1, 8, 5); +HISI_PCIE_PMU_FILTER_ATTR(trig_mode, config1, 9, 9); +HISI_PCIE_PMU_FILTER_ATTR(port, config2, 15, 0); +HISI_PCIE_PMU_FILTER_ATTR(bdf, config2, 31, 16); + +static ssize_t hisi_pcie_format_sysfs_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + + return sysfs_emit(buf, "%s\n", (char *)eattr->var); +} + +static ssize_t hisi_pcie_event_sysfs_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct perf_pmu_events_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_attr, attr); + + return sysfs_emit(buf, "config=0x%llx\n", pmu_attr->id); +} + +#define HISI_PCIE_PMU_FORMAT_ATTR(_name, _format) \ + (&((struct dev_ext_attribute[]){ \ + { .attr = __ATTR(_name, 0444, hisi_pcie_format_sysfs_show, \ + NULL), \ + .var = (void *)_format } \ + })[0].attr.attr) + +#define HISI_PCIE_PMU_EVENT_ATTR(_name, _id) \ + (&((struct perf_pmu_events_attr[]) { \ + { .attr = __ATTR(_name, 0444, hisi_pcie_event_sysfs_show, NULL), \ + .id = _id, } \ + })[0].attr.attr) + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(pcie_pmu->on_cpu)); +} +static DEVICE_ATTR_RO(cpumask); + +static ssize_t identifier_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%#x\n", pcie_pmu->identifier); +} +static DEVICE_ATTR_RO(identifier); + +static ssize_t bus_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%#04x\n", PCI_BUS_NUM(pcie_pmu->bdf_min)); +} +static DEVICE_ATTR_RO(bus); + +static struct hisi_pcie_reg_pair +hisi_pcie_parse_reg_value(struct hisi_pcie_pmu *pcie_pmu, u32 reg_off) +{ + u32 val = readl_relaxed(pcie_pmu->base + reg_off); + struct hisi_pcie_reg_pair regs = { + .lo = val, + .hi = val >> 16, + }; + + return regs; +} + +/* + * Hardware counter and ext_counter work together for bandwidth, latency, bus + * utilization and buffer occupancy events. For example, RX memory write latency + * events(index = 0x0010), counter counts total delay cycles and ext_counter + * counts RX memory write PCIe packets number. + * + * As we don't want PMU driver to process these two data, "delay cycles" can + * be treated as an independent event(index = 0x0010), "RX memory write packets + * number" as another(index = 0x10010). BIT 16 is used to distinguish and 0-15 + * bits are "real" event index, which can be used to set HISI_PCIE_EVENT_CTRL. + */ +#define EXT_COUNTER_IS_USED(idx) ((idx) & BIT(16)) + +static u32 hisi_pcie_get_real_event(struct perf_event *event) +{ + return hisi_pcie_get_event(event) & GENMASK(15, 0); +} + +static u32 hisi_pcie_pmu_get_offset(u32 offset, u32 idx) +{ + return offset + HISI_PCIE_REG_STEP * idx; +} + +static u32 hisi_pcie_pmu_readl(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset, + u32 idx) +{ + u32 offset = hisi_pcie_pmu_get_offset(reg_offset, idx); + + return readl_relaxed(pcie_pmu->base + offset); +} + +static void hisi_pcie_pmu_writel(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset, u32 idx, u32 val) +{ + u32 offset = hisi_pcie_pmu_get_offset(reg_offset, idx); + + writel_relaxed(val, pcie_pmu->base + offset); +} + +static u64 hisi_pcie_pmu_readq(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset, u32 idx) +{ + u32 offset = hisi_pcie_pmu_get_offset(reg_offset, idx); + + return readq_relaxed(pcie_pmu->base + offset); +} + +static void hisi_pcie_pmu_writeq(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset, u32 idx, u64 val) +{ + u32 offset = hisi_pcie_pmu_get_offset(reg_offset, idx); + + writeq_relaxed(val, pcie_pmu->base + offset); +} + +static void hisi_pcie_pmu_config_filter(struct perf_event *event) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 reg = HISI_PCIE_INIT_SET; + u64 port, trig_len, thr_len; + + /* Config HISI_PCIE_EVENT_CTRL according to event. */ + reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event)); + + /* Config HISI_PCIE_EVENT_CTRL according to root port or EP device. */ + port = hisi_pcie_get_port(event); + if (port) + reg |= FIELD_PREP(HISI_PCIE_TARGET_M, port); + else + reg |= HISI_PCIE_TARGET_EN | + FIELD_PREP(HISI_PCIE_TARGET_M, hisi_pcie_get_bdf(event)); + + /* Config HISI_PCIE_EVENT_CTRL according to trigger condition. */ + trig_len = hisi_pcie_get_trig_len(event); + if (trig_len) { + reg |= FIELD_PREP(HISI_PCIE_TRIG_M, trig_len); + reg |= FIELD_PREP(HISI_PCIE_TRIG_MODE_M, hisi_pcie_get_trig_mode(event)); + reg |= HISI_PCIE_TRIG_EN; + } + + /* Config HISI_PCIE_EVENT_CTRL according to threshold condition. */ + thr_len = hisi_pcie_get_thr_len(event); + if (thr_len) { + reg |= FIELD_PREP(HISI_PCIE_THR_M, thr_len); + reg |= FIELD_PREP(HISI_PCIE_THR_MODE_M, hisi_pcie_get_thr_mode(event)); + reg |= HISI_PCIE_THR_EN; + } + + hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg); +} + +static void hisi_pcie_pmu_clear_filter(struct perf_event *event) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, HISI_PCIE_INIT_SET); +} + +static bool hisi_pcie_pmu_valid_requester_id(struct hisi_pcie_pmu *pcie_pmu, u32 bdf) +{ + struct pci_dev *root_port, *pdev; + u16 rp_bdf; + + pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pcie_pmu->pdev->bus), PCI_BUS_NUM(bdf), + GET_PCI_DEVFN(bdf)); + if (!pdev) + return false; + + root_port = pcie_find_root_port(pdev); + if (!root_port) { + pci_dev_put(pdev); + return false; + } + + pci_dev_put(pdev); + rp_bdf = pci_dev_id(root_port); + return rp_bdf >= pcie_pmu->bdf_min && rp_bdf <= pcie_pmu->bdf_max; +} + +static bool hisi_pcie_pmu_valid_filter(struct perf_event *event, + struct hisi_pcie_pmu *pcie_pmu) +{ + u32 requester_id = hisi_pcie_get_bdf(event); + + if (hisi_pcie_get_thr_len(event) > HISI_PCIE_THR_MAX_VAL) + return false; + + if (hisi_pcie_get_trig_len(event) > HISI_PCIE_TRIG_MAX_VAL) + return false; + + if (requester_id) { + if (!hisi_pcie_pmu_valid_requester_id(pcie_pmu, requester_id)) + return false; + } + + return true; +} + +static bool hisi_pcie_pmu_cmp_event(struct perf_event *target, + struct perf_event *event) +{ + return hisi_pcie_get_real_event(target) == hisi_pcie_get_real_event(event); +} + +static bool hisi_pcie_pmu_validate_event_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct perf_event *event_group[HISI_PCIE_MAX_COUNTERS]; + int counters = 1; + int num; + + event_group[0] = leader; + if (!is_software_event(leader)) { + if (leader->pmu != event->pmu) + return false; + + if (leader != event && !hisi_pcie_pmu_cmp_event(leader, event)) + event_group[counters++] = event; + } + + for_each_sibling_event(sibling, event->group_leader) { + if (is_software_event(sibling)) + continue; + + if (sibling->pmu != event->pmu) + return false; + + for (num = 0; num < counters; num++) { + if (hisi_pcie_pmu_cmp_event(event_group[num], sibling)) + break; + } + + if (num == counters) + event_group[counters++] = sibling; + } + + return counters <= HISI_PCIE_MAX_COUNTERS; +} + +static int hisi_pcie_pmu_event_init(struct perf_event *event) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + event->cpu = pcie_pmu->on_cpu; + + if (EXT_COUNTER_IS_USED(hisi_pcie_get_event(event))) + hwc->event_base = HISI_PCIE_EXT_CNT; + else + hwc->event_base = HISI_PCIE_CNT; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* Sampling is not supported. */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EOPNOTSUPP; + + if (!hisi_pcie_pmu_valid_filter(event, pcie_pmu)) + return -EINVAL; + + if (!hisi_pcie_pmu_validate_event_group(event)) + return -EINVAL; + + return 0; +} + +static u64 hisi_pcie_pmu_read_counter(struct perf_event *event) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); + u32 idx = event->hw.idx; + + return hisi_pcie_pmu_readq(pcie_pmu, event->hw.event_base, idx); +} + +static int hisi_pcie_pmu_find_related_event(struct hisi_pcie_pmu *pcie_pmu, + struct perf_event *event) +{ + struct perf_event *sibling; + int idx; + + for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) { + sibling = pcie_pmu->hw_events[idx]; + if (!sibling) + continue; + + if (!hisi_pcie_pmu_cmp_event(sibling, event)) + continue; + + /* Related events must be used in group */ + if (sibling->group_leader == event->group_leader) + return idx; + else + return -EINVAL; + } + + return idx; +} + +static int hisi_pcie_pmu_get_event_idx(struct hisi_pcie_pmu *pcie_pmu) +{ + int idx; + + for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) { + if (!pcie_pmu->hw_events[idx]) + return idx; + } + + return -EINVAL; +} + +static void hisi_pcie_pmu_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 new_cnt, prev_cnt, delta; + + do { + prev_cnt = local64_read(&hwc->prev_count); + new_cnt = hisi_pcie_pmu_read_counter(event); + } while (local64_cmpxchg(&hwc->prev_count, prev_cnt, + new_cnt) != prev_cnt); + + delta = (new_cnt - prev_cnt) & HISI_PCIE_MAX_PERIOD; + local64_add(delta, &event->count); +} + +static void hisi_pcie_pmu_read(struct perf_event *event) +{ + hisi_pcie_pmu_event_update(event); +} + +static void hisi_pcie_pmu_set_period(struct perf_event *event) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + local64_set(&hwc->prev_count, HISI_PCIE_INIT_VAL); + hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_CNT, idx, HISI_PCIE_INIT_VAL); + hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EXT_CNT, idx, HISI_PCIE_INIT_VAL); +} + +static void hisi_pcie_pmu_enable_counter(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u64 val; + + val = hisi_pcie_pmu_readq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx); + val |= HISI_PCIE_EVENT_EN; + hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx, val); +} + +static void hisi_pcie_pmu_disable_counter(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u64 val; + + val = hisi_pcie_pmu_readq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx); + val &= ~HISI_PCIE_EVENT_EN; + hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx, val); +} + +static void hisi_pcie_pmu_enable_int(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + + hisi_pcie_pmu_writel(pcie_pmu, HISI_PCIE_INT_MASK, idx, 0); +} + +static void hisi_pcie_pmu_disable_int(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + + hisi_pcie_pmu_writel(pcie_pmu, HISI_PCIE_INT_MASK, idx, 1); +} + +static void hisi_pcie_pmu_reset_counter(struct hisi_pcie_pmu *pcie_pmu, int idx) +{ + hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx, HISI_PCIE_RESET_CNT); + hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx, HISI_PCIE_INIT_SET); +} + +static void hisi_pcie_pmu_start(struct perf_event *event, int flags) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + u64 prev_cnt; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + hisi_pcie_pmu_config_filter(event); + hisi_pcie_pmu_enable_counter(pcie_pmu, hwc); + hisi_pcie_pmu_enable_int(pcie_pmu, hwc); + hisi_pcie_pmu_set_period(event); + + if (flags & PERF_EF_RELOAD) { + prev_cnt = local64_read(&hwc->prev_count); + hisi_pcie_pmu_writeq(pcie_pmu, hwc->event_base, idx, prev_cnt); + } + + perf_event_update_userpage(event); +} + +static void hisi_pcie_pmu_stop(struct perf_event *event, int flags) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + hisi_pcie_pmu_event_update(event); + hisi_pcie_pmu_disable_int(pcie_pmu, hwc); + hisi_pcie_pmu_disable_counter(pcie_pmu, hwc); + hisi_pcie_pmu_clear_filter(event); + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + if (hwc->state & PERF_HES_UPTODATE) + return; + + hwc->state |= PERF_HES_UPTODATE; +} + +static int hisi_pcie_pmu_add(struct perf_event *event, int flags) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + + /* Check all working events to find a related event. */ + idx = hisi_pcie_pmu_find_related_event(pcie_pmu, event); + if (idx < 0) + return idx; + + /* Current event shares an enabled counter with the related event */ + if (idx < HISI_PCIE_MAX_COUNTERS) { + hwc->idx = idx; + goto start_count; + } + + idx = hisi_pcie_pmu_get_event_idx(pcie_pmu); + if (idx < 0) + return idx; + + hwc->idx = idx; + pcie_pmu->hw_events[idx] = event; + /* Reset Counter to avoid previous statistic interference. */ + hisi_pcie_pmu_reset_counter(pcie_pmu, idx); + +start_count: + if (flags & PERF_EF_START) + hisi_pcie_pmu_start(event, PERF_EF_RELOAD); + + return 0; +} + +static void hisi_pcie_pmu_del(struct perf_event *event, int flags) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + hisi_pcie_pmu_stop(event, PERF_EF_UPDATE); + pcie_pmu->hw_events[hwc->idx] = NULL; + perf_event_update_userpage(event); +} + +static void hisi_pcie_pmu_enable(struct pmu *pmu) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(pmu); + int num; + + for (num = 0; num < HISI_PCIE_MAX_COUNTERS; num++) { + if (pcie_pmu->hw_events[num]) + break; + } + + if (num == HISI_PCIE_MAX_COUNTERS) + return; + + writel(HISI_PCIE_GLOBAL_EN, pcie_pmu->base + HISI_PCIE_GLOBAL_CTRL); +} + +static void hisi_pcie_pmu_disable(struct pmu *pmu) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(pmu); + + writel(HISI_PCIE_GLOBAL_NONE, pcie_pmu->base + HISI_PCIE_GLOBAL_CTRL); +} + +static irqreturn_t hisi_pcie_pmu_irq(int irq, void *data) +{ + struct hisi_pcie_pmu *pcie_pmu = data; + irqreturn_t ret = IRQ_NONE; + struct perf_event *event; + u32 overflown; + int idx; + + for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) { + overflown = hisi_pcie_pmu_readl(pcie_pmu, HISI_PCIE_INT_STAT, idx); + if (!overflown) + continue; + + /* Clear status of interrupt. */ + hisi_pcie_pmu_writel(pcie_pmu, HISI_PCIE_INT_STAT, idx, 1); + event = pcie_pmu->hw_events[idx]; + if (!event) + continue; + + hisi_pcie_pmu_event_update(event); + hisi_pcie_pmu_set_period(event); + ret = IRQ_HANDLED; + } + + return ret; +} + +static int hisi_pcie_pmu_irq_register(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_pmu) +{ + int irq, ret; + + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); + if (ret < 0) { + pci_err(pdev, "Failed to enable MSI vectors: %d\n", ret); + return ret; + } + + irq = pci_irq_vector(pdev, 0); + ret = request_irq(irq, hisi_pcie_pmu_irq, IRQF_NOBALANCING | IRQF_NO_THREAD, DRV_NAME, + pcie_pmu); + if (ret) { + pci_err(pdev, "Failed to register IRQ: %d\n", ret); + pci_free_irq_vectors(pdev); + return ret; + } + + pcie_pmu->irq = irq; + + return 0; +} + +static void hisi_pcie_pmu_irq_unregister(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_pmu) +{ + free_irq(pcie_pmu->irq, pcie_pmu); + pci_free_irq_vectors(pdev); +} + +static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node); + + if (pcie_pmu->on_cpu == -1) { + pcie_pmu->on_cpu = cpu; + WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu))); + } + + return 0; +} + +static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node); + unsigned int target; + + /* Nothing to do if this CPU doesn't own the PMU */ + if (pcie_pmu->on_cpu != cpu) + return 0; + + pcie_pmu->on_cpu = -1; + /* Choose a new CPU from all online cpus. */ + target = cpumask_first(cpu_online_mask); + if (target >= nr_cpu_ids) { + pci_err(pcie_pmu->pdev, "There is no CPU to set\n"); + return 0; + } + + perf_pmu_migrate_context(&pcie_pmu->pmu, cpu, target); + /* Use this CPU for event counting */ + pcie_pmu->on_cpu = target; + WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(target))); + + return 0; +} + +static struct attribute *hisi_pcie_pmu_events_attr[] = { + HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_latency, 0x0010), + HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_cnt, 0x10010), + HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_latency, 0x0210), + HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_cnt, 0x10210), + HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_latency, 0x0011), + HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_cnt, 0x10011), + HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x1005), + HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x11005), + HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x2004), + HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x12004), + NULL +}; + +static struct attribute_group hisi_pcie_pmu_events_group = { + .name = "events", + .attrs = hisi_pcie_pmu_events_attr, +}; + +static struct attribute *hisi_pcie_pmu_format_attr[] = { + HISI_PCIE_PMU_FORMAT_ATTR(event, "config:0-16"), + HISI_PCIE_PMU_FORMAT_ATTR(thr_len, "config1:0-3"), + HISI_PCIE_PMU_FORMAT_ATTR(thr_mode, "config1:4"), + HISI_PCIE_PMU_FORMAT_ATTR(trig_len, "config1:5-8"), + HISI_PCIE_PMU_FORMAT_ATTR(trig_mode, "config1:9"), + HISI_PCIE_PMU_FORMAT_ATTR(port, "config2:0-15"), + HISI_PCIE_PMU_FORMAT_ATTR(bdf, "config2:16-31"), + NULL +}; + +static const struct attribute_group hisi_pcie_pmu_format_group = { + .name = "format", + .attrs = hisi_pcie_pmu_format_attr, +}; + +static struct attribute *hisi_pcie_pmu_bus_attrs[] = { + &dev_attr_bus.attr, + NULL +}; + +static const struct attribute_group hisi_pcie_pmu_bus_attr_group = { + .attrs = hisi_pcie_pmu_bus_attrs, +}; + +static struct attribute *hisi_pcie_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static const struct attribute_group hisi_pcie_pmu_cpumask_attr_group = { + .attrs = hisi_pcie_pmu_cpumask_attrs, +}; + +static struct attribute *hisi_pcie_pmu_identifier_attrs[] = { + &dev_attr_identifier.attr, + NULL +}; + +static const struct attribute_group hisi_pcie_pmu_identifier_attr_group = { + .attrs = hisi_pcie_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_pcie_pmu_attr_groups[] = { + &hisi_pcie_pmu_events_group, + &hisi_pcie_pmu_format_group, + &hisi_pcie_pmu_bus_attr_group, + &hisi_pcie_pmu_cpumask_attr_group, + &hisi_pcie_pmu_identifier_attr_group, + NULL +}; + +static int hisi_pcie_alloc_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_pmu) +{ + struct hisi_pcie_reg_pair regs; + u16 sicl_id, core_id; + char *name; + + regs = hisi_pcie_parse_reg_value(pcie_pmu, HISI_PCIE_REG_BDF); + pcie_pmu->bdf_min = regs.lo; + pcie_pmu->bdf_max = regs.hi; + + regs = hisi_pcie_parse_reg_value(pcie_pmu, HISI_PCIE_REG_INFO); + sicl_id = regs.hi; + core_id = regs.lo; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_pcie%u_core%u", sicl_id, core_id); + if (!name) + return -ENOMEM; + + pcie_pmu->pdev = pdev; + pcie_pmu->on_cpu = -1; + pcie_pmu->identifier = readl(pcie_pmu->base + HISI_PCIE_REG_VERSION); + pcie_pmu->pmu = (struct pmu) { + .name = name, + .module = THIS_MODULE, + .event_init = hisi_pcie_pmu_event_init, + .pmu_enable = hisi_pcie_pmu_enable, + .pmu_disable = hisi_pcie_pmu_disable, + .add = hisi_pcie_pmu_add, + .del = hisi_pcie_pmu_del, + .start = hisi_pcie_pmu_start, + .stop = hisi_pcie_pmu_stop, + .read = hisi_pcie_pmu_read, + .task_ctx_nr = perf_invalid_context, + .attr_groups = hisi_pcie_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + return 0; +} + +static int hisi_pcie_init_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_pmu) +{ + int ret; + + pcie_pmu->base = pci_ioremap_bar(pdev, 2); + if (!pcie_pmu->base) { + pci_err(pdev, "Ioremap failed for pcie_pmu resource\n"); + return -ENOMEM; + } + + ret = hisi_pcie_alloc_pmu(pdev, pcie_pmu); + if (ret) + goto err_iounmap; + + ret = hisi_pcie_pmu_irq_register(pdev, pcie_pmu); + if (ret) + goto err_iounmap; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, &pcie_pmu->node); + if (ret) { + pci_err(pdev, "Failed to register hotplug: %d\n", ret); + goto err_irq_unregister; + } + + ret = perf_pmu_register(&pcie_pmu->pmu, pcie_pmu->pmu.name, -1); + if (ret) { + pci_err(pdev, "Failed to register PCIe PMU: %d\n", ret); + goto err_hotplug_unregister; + } + + return ret; + +err_hotplug_unregister: + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, &pcie_pmu->node); + +err_irq_unregister: + hisi_pcie_pmu_irq_unregister(pdev, pcie_pmu); + +err_iounmap: + iounmap(pcie_pmu->base); + + return ret; +} + +static void hisi_pcie_uninit_pmu(struct pci_dev *pdev) +{ + struct hisi_pcie_pmu *pcie_pmu = pci_get_drvdata(pdev); + + perf_pmu_unregister(&pcie_pmu->pmu); + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, &pcie_pmu->node); + hisi_pcie_pmu_irq_unregister(pdev, pcie_pmu); + iounmap(pcie_pmu->base); +} + +static int hisi_pcie_init_dev(struct pci_dev *pdev) +{ + int ret; + + ret = pcim_enable_device(pdev); + if (ret) { + pci_err(pdev, "Failed to enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(2), DRV_NAME); + if (ret < 0) { + pci_err(pdev, "Failed to request PCI mem regions: %d\n", ret); + return ret; + } + + pci_set_master(pdev); + + return 0; +} + +static int hisi_pcie_pmu_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct hisi_pcie_pmu *pcie_pmu; + int ret; + + pcie_pmu = devm_kzalloc(&pdev->dev, sizeof(*pcie_pmu), GFP_KERNEL); + if (!pcie_pmu) + return -ENOMEM; + + ret = hisi_pcie_init_dev(pdev); + if (ret) + return ret; + + ret = hisi_pcie_init_pmu(pdev, pcie_pmu); + if (ret) + return ret; + + pci_set_drvdata(pdev, pcie_pmu); + + return ret; +} + +static void hisi_pcie_pmu_remove(struct pci_dev *pdev) +{ + hisi_pcie_uninit_pmu(pdev); + pci_set_drvdata(pdev, NULL); +} + +static const struct pci_device_id hisi_pcie_pmu_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa12d) }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, hisi_pcie_pmu_ids); + +static struct pci_driver hisi_pcie_pmu_driver = { + .name = DRV_NAME, + .id_table = hisi_pcie_pmu_ids, + .probe = hisi_pcie_pmu_probe, + .remove = hisi_pcie_pmu_remove, +}; + +static int __init hisi_pcie_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, + "AP_PERF_ARM_HISI_PCIE_PMU_ONLINE", + hisi_pcie_pmu_online_cpu, + hisi_pcie_pmu_offline_cpu); + if (ret) { + pr_err("Failed to setup PCIe PMU hotplug: %d\n", ret); + return ret; + } + + ret = pci_register_driver(&hisi_pcie_pmu_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE); + + return ret; +} +module_init(hisi_pcie_module_init); + +static void __exit hisi_pcie_module_exit(void) +{ + pci_unregister_driver(&hisi_pcie_pmu_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE); +} +module_exit(hisi_pcie_module_exit); + +MODULE_DESCRIPTION("HiSilicon PCIe PMU driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Qi Liu "); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 2d55cee638fc..2bdc493f7594 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -165,6 +165,9 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, + #ifndef __GENKSYMS__ + CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, + #endif CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, -- Gitee From f33e6f3614b58fdcccf0bffb8846edd424b3f496 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 21 Nov 2022 22:02:11 +0800 Subject: [PATCH 03/39] drivers/perf: hisi: Fix some event id for hisi-pcie-pmu Some event id of hisi-pcie-pmu is incorrect, fix them. Fixes: 8404b0fbc7fb ("drivers/perf: hisi: Add driver for HiSilicon PCIe PMU") Signed-off-by: Yicong Yang Signed-off-by: Junhao He Reviewed-by: Yang Jihong Reviewed-by: Jay Fang Signed-off-by: Zheng Zengkai Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 2f18838754ec..e54f37a1dc92 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -696,10 +696,10 @@ static struct attribute *hisi_pcie_pmu_events_attr[] = { HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_cnt, 0x10210), HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_latency, 0x0011), HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_cnt, 0x10011), - HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x1005), - HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x11005), - HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x2004), - HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x12004), + HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x0804), + HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x10804), + HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x0405), + HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x10405), NULL }; -- Gitee From 75a063462424d0e897c7d873a8f5c3122c019637 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Thu, 2 Dec 2021 16:06:32 +0800 Subject: [PATCH 04/39] docs: perf: Add description for HiSilicon PCIe PMU driver PCIe PMU Root Complex Integrated End Point(RCiEP) device is supported on HiSilicon HIP09 platform. Document it to provide guidance on how to use it. Reviewed-by: John Garry Signed-off-by: Qi Liu Reviewed-by: Shaokun Zhang Link: https://lore.kernel.org/r/20211202080633.2919-2-liuqi115@huawei.com Signed-off-by: Will Deacon Signed-off-by: zhangyuyang Signed-off-by: huwentao --- .../admin-guide/perf/hisi-pcie-pmu.rst | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 Documentation/admin-guide/perf/hisi-pcie-pmu.rst diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst new file mode 100644 index 000000000000..294ebbdb22af --- /dev/null +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst @@ -0,0 +1,106 @@ +================================================ +HiSilicon PCIe Performance Monitoring Unit (PMU) +================================================ + +On Hip09, HiSilicon PCIe Performance Monitoring Unit (PMU) could monitor +bandwidth, latency, bus utilization and buffer occupancy data of PCIe. + +Each PCIe Core has a PMU to monitor multi Root Ports of this PCIe Core and +all Endpoints downstream these Root Ports. + + +HiSilicon PCIe PMU driver +========================= + +The PCIe PMU driver registers a perf PMU with the name of its sicl-id and PCIe +Core id.:: + + /sys/bus/event_source/hisi_pcie_ + +PMU driver provides description of available events and filter options in sysfs, +see /sys/bus/event_source/devices/hisi_pcie_. + +The "format" directory describes all formats of the config (events) and config1 +(filter options) fields of the perf_event_attr structure. The "events" directory +describes all documented events shown in perf list. + +The "identifier" sysfs file allows users to identify the version of the +PMU hardware device. + +The "bus" sysfs file allows users to get the bus number of Root Ports +monitored by PMU. + +Example usage of perf:: + + $# perf list + hisi_pcie0_0/rx_mwr_latency/ [kernel PMU event] + hisi_pcie0_0/rx_mwr_cnt/ [kernel PMU event] + ------------------------------------------ + + $# perf stat -e hisi_pcie0_0/rx_mwr_latency/ + $# perf stat -e hisi_pcie0_0/rx_mwr_cnt/ + $# perf stat -g -e hisi_pcie0_0/rx_mwr_latency/ -e hisi_pcie0_0/rx_mwr_cnt/ + +The current driver does not support sampling. So "perf record" is unsupported. +Also attach to a task is unsupported for PCIe PMU. + +Filter options +-------------- + +1. Target filter +PMU could only monitor the performance of traffic downstream target Root Ports +or downstream target Endpoint. PCIe PMU driver support "port" and "bdf" +interfaces for users, and these two interfaces aren't supported at the same +time. + +-port +"port" filter can be used in all PCIe PMU events, target Root Port can be +selected by configuring the 16-bits-bitmap "port". Multi ports can be selected +for AP-layer-events, and only one port can be selected for TL/DL-layer-events. + +For example, if target Root Port is 0000:00:00.0 (x8 lanes), bit0 of bitmap +should be set, port=0x1; if target Root Port is 0000:00:04.0 (x4 lanes), +bit8 is set, port=0x100; if these two Root Ports are both monitored, port=0x101. + +Example usage of perf:: + + $# perf stat -e hisi_pcie0_0/rx_mwr_latency,port=0x1/ sleep 5 + +-bdf + +"bdf" filter can only be used in bandwidth events, target Endpoint is selected +by configuring BDF to "bdf". Counter only counts the bandwidth of message +requested by target Endpoint. + +For example, "bdf=0x3900" means BDF of target Endpoint is 0000:39:00.0. + +Example usage of perf:: + + $# perf stat -e hisi_pcie0_0/rx_mrd_flux,bdf=0x3900/ sleep 5 + +2. Trigger filter +Event statistics start when the first time TLP length is greater/smaller +than trigger condition. You can set the trigger condition by writing "trig_len", +and set the trigger mode by writing "trig_mode". This filter can only be used +in bandwidth events. + +For example, "trig_len=4" means trigger condition is 2^4 DW, "trig_mode=0" +means statistics start when TLP length > trigger condition, "trig_mode=1" +means start when TLP length < condition. + +Example usage of perf:: + + $# perf stat -e hisi_pcie0_0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5 + +3. Threshold filter +Counter counts when TLP length within the specified range. You can set the +threshold by writing "thr_len", and set the threshold mode by writing +"thr_mode". This filter can only be used in bandwidth events. + +For example, "thr_len=4" means threshold is 2^4 DW, "thr_mode=0" means +counter counts when TLP length >= threshold, and "thr_mode=1" means counts +when TLP length < threshold. + +Example usage of perf:: + + $# perf stat -e hisi_pcie0_0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5 -- Gitee From 5301e17a90128ee584780dcceb2caf198fa96c18 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 17 Nov 2022 16:41:34 +0800 Subject: [PATCH 05/39] docs: perf: Fix PMU instance name of hisi-pcie-pmu The PMU instance will be called hisi_pcie_core rather than hisi_pcie_. Fix this in the documentation. Fixes: c8602008e247 ("docs: perf: Add description for HiSilicon PCIe PMU driver") Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20221117084136.53572-3-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: zhangyuyang Signed-off-by: huwentao --- .../admin-guide/perf/hisi-pcie-pmu.rst | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst index 294ebbdb22af..bbe66480ff85 100644 --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst @@ -15,10 +15,10 @@ HiSilicon PCIe PMU driver The PCIe PMU driver registers a perf PMU with the name of its sicl-id and PCIe Core id.:: - /sys/bus/event_source/hisi_pcie_ + /sys/bus/event_source/hisi_pcie_core PMU driver provides description of available events and filter options in sysfs, -see /sys/bus/event_source/devices/hisi_pcie_. +see /sys/bus/event_source/devices/hisi_pcie_core. The "format" directory describes all formats of the config (events) and config1 (filter options) fields of the perf_event_attr structure. The "events" directory @@ -33,13 +33,13 @@ monitored by PMU. Example usage of perf:: $# perf list - hisi_pcie0_0/rx_mwr_latency/ [kernel PMU event] - hisi_pcie0_0/rx_mwr_cnt/ [kernel PMU event] + hisi_pcie0_core0/rx_mwr_latency/ [kernel PMU event] + hisi_pcie0_core0/rx_mwr_cnt/ [kernel PMU event] ------------------------------------------ - $# perf stat -e hisi_pcie0_0/rx_mwr_latency/ - $# perf stat -e hisi_pcie0_0/rx_mwr_cnt/ - $# perf stat -g -e hisi_pcie0_0/rx_mwr_latency/ -e hisi_pcie0_0/rx_mwr_cnt/ + $# perf stat -e hisi_pcie0_core0/rx_mwr_latency/ + $# perf stat -e hisi_pcie0_core0/rx_mwr_cnt/ + $# perf stat -g -e hisi_pcie0_core0/rx_mwr_latency/ -e hisi_pcie0_core0/rx_mwr_cnt/ The current driver does not support sampling. So "perf record" is unsupported. Also attach to a task is unsupported for PCIe PMU. @@ -64,7 +64,7 @@ bit8 is set, port=0x100; if these two Root Ports are both monitored, port=0x101. Example usage of perf:: - $# perf stat -e hisi_pcie0_0/rx_mwr_latency,port=0x1/ sleep 5 + $# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0x1/ sleep 5 -bdf @@ -76,7 +76,7 @@ For example, "bdf=0x3900" means BDF of target Endpoint is 0000:39:00.0. Example usage of perf:: - $# perf stat -e hisi_pcie0_0/rx_mrd_flux,bdf=0x3900/ sleep 5 + $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,bdf=0x3900/ sleep 5 2. Trigger filter Event statistics start when the first time TLP length is greater/smaller @@ -90,7 +90,7 @@ means start when TLP length < condition. Example usage of perf:: - $# perf stat -e hisi_pcie0_0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5 + $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5 3. Threshold filter Counter counts when TLP length within the specified range. You can set the @@ -103,4 +103,4 @@ when TLP length < threshold. Example usage of perf:: - $# perf stat -e hisi_pcie0_0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5 + $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5 -- Gitee From 6c7c8ec301e1b125088f3c444a44abd0c697efde Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Thu, 17 Nov 2022 16:41:35 +0800 Subject: [PATCH 06/39] Documentation: perf: Indent filter options list of hisi-pcie-pmu The "Filter options" list have a rather ugly indentation. Also, the first paragraph after list name is rendered without separator (as continuation from the name). Align the list by indenting the list items and add a blank line separator for each list name. Reviewed-by: Jonathan Cameron Signed-off-by: Bagas Sanjaya Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20221117084136.53572-4-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: zhangyuyang Signed-off-by: huwentao --- .../admin-guide/perf/hisi-pcie-pmu.rst | 80 ++++++++++--------- 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst index bbe66480ff85..645f08f65429 100644 --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst @@ -48,59 +48,65 @@ Filter options -------------- 1. Target filter -PMU could only monitor the performance of traffic downstream target Root Ports -or downstream target Endpoint. PCIe PMU driver support "port" and "bdf" -interfaces for users, and these two interfaces aren't supported at the same -time. --port -"port" filter can be used in all PCIe PMU events, target Root Port can be -selected by configuring the 16-bits-bitmap "port". Multi ports can be selected -for AP-layer-events, and only one port can be selected for TL/DL-layer-events. + PMU could only monitor the performance of traffic downstream target Root + Ports or downstream target Endpoint. PCIe PMU driver support "port" and + "bdf" interfaces for users, and these two interfaces aren't supported at the + same time. -For example, if target Root Port is 0000:00:00.0 (x8 lanes), bit0 of bitmap -should be set, port=0x1; if target Root Port is 0000:00:04.0 (x4 lanes), -bit8 is set, port=0x100; if these two Root Ports are both monitored, port=0x101. + - port -Example usage of perf:: + "port" filter can be used in all PCIe PMU events, target Root Port can be + selected by configuring the 16-bits-bitmap "port". Multi ports can be + selected for AP-layer-events, and only one port can be selected for + TL/DL-layer-events. - $# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0x1/ sleep 5 + For example, if target Root Port is 0000:00:00.0 (x8 lanes), bit0 of + bitmap should be set, port=0x1; if target Root Port is 0000:00:04.0 (x4 + lanes), bit8 is set, port=0x100; if these two Root Ports are both + monitored, port=0x101. --bdf + Example usage of perf:: -"bdf" filter can only be used in bandwidth events, target Endpoint is selected -by configuring BDF to "bdf". Counter only counts the bandwidth of message -requested by target Endpoint. + $# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0x1/ sleep 5 -For example, "bdf=0x3900" means BDF of target Endpoint is 0000:39:00.0. + - bdf -Example usage of perf:: + "bdf" filter can only be used in bandwidth events, target Endpoint is + selected by configuring BDF to "bdf". Counter only counts the bandwidth of + message requested by target Endpoint. + + For example, "bdf=0x3900" means BDF of target Endpoint is 0000:39:00.0. + + Example usage of perf:: - $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,bdf=0x3900/ sleep 5 + $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,bdf=0x3900/ sleep 5 2. Trigger filter -Event statistics start when the first time TLP length is greater/smaller -than trigger condition. You can set the trigger condition by writing "trig_len", -and set the trigger mode by writing "trig_mode". This filter can only be used -in bandwidth events. -For example, "trig_len=4" means trigger condition is 2^4 DW, "trig_mode=0" -means statistics start when TLP length > trigger condition, "trig_mode=1" -means start when TLP length < condition. + Event statistics start when the first time TLP length is greater/smaller + than trigger condition. You can set the trigger condition by writing + "trig_len", and set the trigger mode by writing "trig_mode". This filter can + only be used in bandwidth events. -Example usage of perf:: + For example, "trig_len=4" means trigger condition is 2^4 DW, "trig_mode=0" + means statistics start when TLP length > trigger condition, "trig_mode=1" + means start when TLP length < condition. + + Example usage of perf:: - $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5 + $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5 3. Threshold filter -Counter counts when TLP length within the specified range. You can set the -threshold by writing "thr_len", and set the threshold mode by writing -"thr_mode". This filter can only be used in bandwidth events. -For example, "thr_len=4" means threshold is 2^4 DW, "thr_mode=0" means -counter counts when TLP length >= threshold, and "thr_mode=1" means counts -when TLP length < threshold. + Counter counts when TLP length within the specified range. You can set the + threshold by writing "thr_len", and set the threshold mode by writing + "thr_mode". This filter can only be used in bandwidth events. -Example usage of perf:: + For example, "thr_len=4" means threshold is 2^4 DW, "thr_mode=0" means + counter counts when TLP length >= threshold, and "thr_mode=1" means counts + when TLP length < threshold. + + Example usage of perf:: - $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5 + $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5 -- Gitee From 41ed1d93f70a272b92529cedd31f6bc0e4696252 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 17 Nov 2022 16:41:36 +0800 Subject: [PATCH 07/39] drivers/perf: hisi: Add TLP filter support The PMU support to filter the TLP when counting the bandwidth with below options: - only count the TLP headers - only count the TLP payloads - count both TLP headers and payloads In the current driver it's default to count the TLP payloads only, which will have an implicity side effects that on the traffic only have header only TLPs, we'll get no data. Make this user configuration through "len_mode" parameter and make it default to count both TLP headers and payloads when user not specified. Also update the documentation for it. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20221117084136.53572-5-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: zhangyuyang Signed-off-by: huwentao --- .../admin-guide/perf/hisi-pcie-pmu.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst index 645f08f65429..7e863662e2d4 100644 --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst @@ -110,3 +110,21 @@ Filter options Example usage of perf:: $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5 + +4. TLP Length filter + + When counting bandwidth, the data can be composed of certain parts of TLP + packets. You can specify it through "len_mode": + + - 2'b00: Reserved (Do not use this since the behaviour is undefined) + - 2'b01: Bandwidth of TLP payloads + - 2'b10: Bandwidth of TLP headers + - 2'b11: Bandwidth of both TLP payloads and headers + + For example, "len_mode=2" means only counting the bandwidth of TLP headers + and "len_mode=3" means the final bandwidth data is composed of both TLP + headers and payloads. Default value if not specified is 2'b11. + + Example usage of perf:: + + $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5 -- Gitee From 7b4999548c3b1d316ab2399f8a72e832a546d510 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 23 Feb 2024 18:33:59 +0800 Subject: [PATCH 08/39] docs: perf: Update usage for target filter of hisi-pcie-pmu One of the "port" and "bdf" target filter interface must be set, and the related events should preferably used in the same group. Update the usage in the documentation. Signed-off-by: Junhao He Signed-off-by: Yicong Yang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240223103359.18669-9-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: zhangyuyang Signed-off-by: huwentao --- .../admin-guide/perf/hisi-pcie-pmu.rst | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst index 7e863662e2d4..678d3865560c 100644 --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst @@ -37,9 +37,20 @@ Example usage of perf:: hisi_pcie0_core0/rx_mwr_cnt/ [kernel PMU event] ------------------------------------------ - $# perf stat -e hisi_pcie0_core0/rx_mwr_latency/ - $# perf stat -e hisi_pcie0_core0/rx_mwr_cnt/ - $# perf stat -g -e hisi_pcie0_core0/rx_mwr_latency/ -e hisi_pcie0_core0/rx_mwr_cnt/ + $# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0xffff/ + $# perf stat -e hisi_pcie0_core0/rx_mwr_cnt,port=0xffff/ + +The related events usually used to calculate the bandwidth, latency or others. +They need to start and end counting at the same time, therefore related events +are best used in the same event group to get the expected value. There are two +ways to know if they are related events: +a) By event name, such as the latency events "xxx_latency, xxx_cnt" or + bandwidth events "xxx_flux, xxx_time". +b) By event type, such as "event=0xXXXX, event=0x1XXXX". + +Example usage of perf group:: + + $# perf stat -e "{hisi_pcie0_core0/rx_mwr_latency,port=0xffff/,hisi_pcie0_core0/rx_mwr_cnt,port=0xffff/}" The current driver does not support sampling. So "perf record" is unsupported. Also attach to a task is unsupported for PCIe PMU. @@ -51,8 +62,12 @@ Filter options PMU could only monitor the performance of traffic downstream target Root Ports or downstream target Endpoint. PCIe PMU driver support "port" and - "bdf" interfaces for users, and these two interfaces aren't supported at the - same time. + "bdf" interfaces for users. + Please notice that, one of these two interfaces must be set, and these two + interfaces aren't supported at the same time. If they are both set, only + "port" filter is valid. + If "port" filter not being set or is set explicitly to zero (default), the + "bdf" filter will be in effect, because "bdf=0" meaning 0000:000:00.0. - port @@ -95,7 +110,7 @@ Filter options Example usage of perf:: - $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5 + $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,trig_len=0x4,trig_mode=1/ sleep 5 3. Threshold filter @@ -109,7 +124,7 @@ Filter options Example usage of perf:: - $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5 + $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,thr_len=0x4,thr_mode=1/ sleep 5 4. TLP Length filter @@ -127,4 +142,4 @@ Filter options Example usage of perf:: - $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5 + $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,len_mode=0x1/ sleep 5 -- Gitee From bd9e1a0de7a6f855221d323a658bd6f1b6f7faff Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 5 Mar 2024 20:25:17 +0800 Subject: [PATCH 09/39] docs: perf: Fix build warning of hisi-pcie-pmu.rst `make htmldocs SPHINXDIRS="admin-guide"` shows below warnings: Documentation/admin-guide/perf/hisi-pcie-pmu.rst:48: ERROR: Unexpected indentation. Documentation/admin-guide/perf/hisi-pcie-pmu.rst:49: WARNING: Block quote ends without a blank line; unexpected unindent. Fix this. Closes: https://lore.kernel.org/lkml/20231011172250.5a6498e5@canb.auug.org.au/ Fixes: 89a032923d4b ("docs: perf: Update usage for target filter of hisi-pcie-pmu") Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20240305122517.12179-1-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: zhangyuyang Signed-off-by: huwentao --- Documentation/admin-guide/perf/hisi-pcie-pmu.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst index 678d3865560c..5541ff40e06a 100644 --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst @@ -44,6 +44,7 @@ The related events usually used to calculate the bandwidth, latency or others. They need to start and end counting at the same time, therefore related events are best used in the same event group to get the expected value. There are two ways to know if they are related events: + a) By event name, such as the latency events "xxx_latency, xxx_cnt" or bandwidth events "xxx_flux, xxx_time". b) By event type, such as "event=0xXXXX, event=0x1XXXX". -- Gitee From f19524395347254008e4540bb2b6d465028d3e66 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Fri, 13 Sep 2024 17:03:50 +0800 Subject: [PATCH 10/39] docs: fix 'make htmldocs' warning in perf Fix following 'make htmldocs' warnings: ./Documentation/admin-guide/perf/hisi-pcie-pmu.rst: WARNING: document isn't included in any toctree Fixes: c8602008e247 ("docs: perf: Add description for HiSilicon PCIe PMU driver") Signed-off-by: Wan Jiabing Reviewed-by: John Garry Link: https://lore.kernel.org/r/20220228031700.1669086-1-wanjiabing@vivo.com Signed-off-by: Jonathan Corbet Signed-off-by: huwentao Conflicts: Documentation/admin-guide/perf/index.rst --- Documentation/admin-guide/perf/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index ee4bfd2a740f..9d385055fd09 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -8,6 +8,7 @@ Performance monitor support :maxdepth: 1 hisi-pmu + hisi-pcie-pmu qcom_l2_pmu qcom_l3_pmu arm-ccn -- Gitee From 3d7b6a09be4bc54a9ff1735907b46da04b308e9f Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 29 Aug 2024 17:03:30 +0800 Subject: [PATCH 11/39] drivers/perf: hisi_pcie: Record hardware counts correctly Currently we set the period and record it as the initial value of the counter without checking it's set to the hardware successfully or not. However the counter maybe unwritable if the target event is unsupported by the device. In such case we will pass user a wrong count: [start counts when setting the period] hwc->prev_count = 0x8000000000000000 device.counter_value = 0 // the counter is not set as the period [when user reads the counter] event->count = device.counter_value - hwc->prev_count = 0x8000000000000000 // wrong. should be 0. Fix this by record the hardware counter counts correctly when setting the period. Fixes: 8404b0fbc7fb ("drivers/perf: hisi: Add driver for HiSilicon PCIe PMU") Signed-off-by: Yicong Yang Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240829090332.28756-2-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: zhangyuyang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index e54f37a1dc92..bf2ecb4e16e0 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -437,10 +437,24 @@ static void hisi_pcie_pmu_set_period(struct perf_event *event) struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + u64 orig_cnt, cnt; + + orig_cnt = hisi_pcie_pmu_read_counter(event); local64_set(&hwc->prev_count, HISI_PCIE_INIT_VAL); hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_CNT, idx, HISI_PCIE_INIT_VAL); hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EXT_CNT, idx, HISI_PCIE_INIT_VAL); + + /* + * The counter maybe unwritable if the target event is unsupported. + * Check this by comparing the counts after setting the period. If + * the counts stay unchanged after setting the period then update + * the hwc->prev_count correctly. Otherwise the final counts user + * get maybe totally wrong. + */ + cnt = hisi_pcie_pmu_read_counter(event); + if (orig_cnt == cnt) + local64_set(&hwc->prev_count, cnt); } static void hisi_pcie_pmu_enable_counter(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc) -- Gitee From 599857223ab54330a2cc80dabefe17ee5c9af64d Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 29 Aug 2024 17:03:31 +0800 Subject: [PATCH 12/39] drivers/perf: hisi_pcie: Fix TLP headers bandwidth counting We make the initial value of event ctrl register as HISI_PCIE_INIT_SET and modify according to the user options. This will make TLP headers bandwidth only counting never take effect since HISI_PCIE_INIT_SET configures to count the TLP payloads bandwidth. Fix this by making the initial value of event ctrl register as 0. Fixes: 17d573984d4d ("drivers/perf: hisi: Add TLP filter support") Signed-off-by: Yicong Yang Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240829090332.28756-3-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: zhangyuyang Signed-off-by: huwentao Conflicts: drivers/perf/hisilicon/hisi_pcie_pmu.c --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index bf2ecb4e16e0..a5e22bb673b1 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -218,7 +218,7 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event) { struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - u64 reg = HISI_PCIE_INIT_SET; + u64 reg = 0; u64 port, trig_len, thr_len; /* Config HISI_PCIE_EVENT_CTRL according to event. */ -- Gitee From 894c51b63a215960145ba40da3a645570faf85bb Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 29 Aug 2024 17:03:32 +0800 Subject: [PATCH 13/39] drivers/perf: hisi_pcie: Export supported Root Ports [bdf_min, bdf_max] Currently users can get the Root Ports supported by the PCIe PMU by "bus" sysfs attributes which indicates the PCIe bus number where Root Ports are located. This maybe insufficient since Root Ports supported by different PCIe PMUs may be located on the same PCIe bus. So export the BDF range the Root Ports additionally. Signed-off-by: Yicong Yang Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240829090332.28756-4-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: zhangyuyang Signed-off-by: huwentao --- .../admin-guide/perf/hisi-pcie-pmu.rst | 4 +++- drivers/perf/hisilicon/hisi_pcie_pmu.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst index 5541ff40e06a..083ca50de896 100644 --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst @@ -28,7 +28,9 @@ The "identifier" sysfs file allows users to identify the version of the PMU hardware device. The "bus" sysfs file allows users to get the bus number of Root Ports -monitored by PMU. +monitored by PMU. Furthermore users can get the Root Ports range in +[bdf_min, bdf_max] from "bdf_min" and "bdf_max" sysfs attributes +respectively. Example usage of perf:: diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index a5e22bb673b1..c4f399049266 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -150,6 +150,22 @@ static ssize_t bus_show(struct device *dev, struct device_attribute *attr, char } static DEVICE_ATTR_RO(bus); +static ssize_t bdf_min_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%#04x\n", pcie_pmu->bdf_min); +} +static DEVICE_ATTR_RO(bdf_min); + +static ssize_t bdf_max_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%#04x\n", pcie_pmu->bdf_max); +} +static DEVICE_ATTR_RO(bdf_max); + static struct hisi_pcie_reg_pair hisi_pcie_parse_reg_value(struct hisi_pcie_pmu *pcie_pmu, u32 reg_off) { @@ -740,6 +756,8 @@ static const struct attribute_group hisi_pcie_pmu_format_group = { static struct attribute *hisi_pcie_pmu_bus_attrs[] = { &dev_attr_bus.attr, + &dev_attr_bdf_max.attr, + &dev_attr_bdf_min.attr, NULL }; -- Gitee From 136661f8ae1dd5f8ed091fba44325569da76fb7b Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 8 Jun 2023 14:07:55 +0800 Subject: [PATCH 14/39] drivers/perf: hisi: Don't migrate perf to the CPU going to teardown The driver needs to migrate the perf context if the current using CPU going to teardown. By the time calling the cpuhp::teardown() callback the cpu_online_mask() hasn't updated yet and still includes the CPU going to teardown. In current driver's implementation we may migrate the context to the teardown CPU and leads to the below calltrace: ... [ 368.104662][ T932] task:cpuhp/0 state:D stack: 0 pid: 15 ppid: 2 flags:0x00000008 [ 368.113699][ T932] Call trace: [ 368.116834][ T932] __switch_to+0x7c/0xbc [ 368.120924][ T932] __schedule+0x338/0x6f0 [ 368.125098][ T932] schedule+0x50/0xe0 [ 368.128926][ T932] schedule_preempt_disabled+0x18/0x24 [ 368.134229][ T932] __mutex_lock.constprop.0+0x1d4/0x5dc [ 368.139617][ T932] __mutex_lock_slowpath+0x1c/0x30 [ 368.144573][ T932] mutex_lock+0x50/0x60 [ 368.148579][ T932] perf_pmu_migrate_context+0x84/0x2b0 [ 368.153884][ T932] hisi_pcie_pmu_offline_cpu+0x90/0xe0 [hisi_pcie_pmu] [ 368.160579][ T932] cpuhp_invoke_callback+0x2a0/0x650 [ 368.165707][ T932] cpuhp_thread_fun+0xe4/0x190 [ 368.170316][ T932] smpboot_thread_fn+0x15c/0x1a0 [ 368.175099][ T932] kthread+0x108/0x13c [ 368.179012][ T932] ret_from_fork+0x10/0x18 ... Use function cpumask_any_but() to find one correct active cpu to fixes this issue. Fixes: 8404b0fbc7fb ("drivers/perf: hisi: Add driver for HiSilicon PCIe PMU") Signed-off-by: Junhao He Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index c4f399049266..ace71258fb0a 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -705,7 +705,7 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) pcie_pmu->on_cpu = -1; /* Choose a new CPU from all online cpus. */ - target = cpumask_first(cpu_online_mask); + target = cpumask_any_but(cpu_online_mask, cpu); if (target >= nr_cpu_ids) { pci_err(pcie_pmu->pdev, "There is no CPU to set\n"); return 0; -- Gitee From 318c5b1e9aa976be50e0cf6983edd2ce7fe568c2 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 28 Oct 2023 18:53:54 +0800 Subject: [PATCH 15/39] drivers/perf: hisi_pcie: Check the type first in pmu::event_init() Check whether the event type matches the PMU type firstly in pmu::event_init() before touching the event. Otherwise we'll change the events of others and lead to incorrect results. Since in perf_init_event() we may call every pmu's event_init() in a certain case, we should not modify the event if it's not ours. Fixes: 8404b0fbc7fb ("drivers/perf: hisi: Add driver for HiSilicon PCIe PMU") Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20231024092954.42297-2-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index ace71258fb0a..a1ea06a55b4a 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -361,6 +361,10 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event) struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + /* Check the type first before going on, otherwise it's not our event */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + event->cpu = pcie_pmu->on_cpu; if (EXT_COUNTER_IS_USED(hisi_pcie_get_event(event))) @@ -368,9 +372,6 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event) else hwc->event_base = HISI_PCIE_CNT; - if (event->attr.type != event->pmu->type) - return -ENOENT; - /* Sampling is not supported. */ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) return -EOPNOTSUPP; -- Gitee From 8d638018f369c1fac87c27864be336c127006e17 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 28 Oct 2023 18:53:55 +0800 Subject: [PATCH 16/39] drivers/perf: hisi_pcie: Initialize event->cpu only on success Initialize the event->cpu only on success. To be more reasonable and keep consistent with other PMUs. Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20231024092954.42297-3-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: Junhao He Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index a1ea06a55b4a..8dd398803825 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -365,8 +365,6 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event) if (event->attr.type != event->pmu->type) return -ENOENT; - event->cpu = pcie_pmu->on_cpu; - if (EXT_COUNTER_IS_USED(hisi_pcie_get_event(event))) hwc->event_base = HISI_PCIE_EXT_CNT; else @@ -382,6 +380,8 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event) if (!hisi_pcie_pmu_validate_event_group(event)) return -EINVAL; + event->cpu = pcie_pmu->on_cpu; + return 0; } -- Gitee From d361c8c4bfe46e050b5e7f11fa5ea4615edec038 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 23 Feb 2024 18:33:52 +0800 Subject: [PATCH 17/39] drivers/perf: hisi_pcie: Rename hisi_pcie_pmu_{config,clear}_filter() hisi_pcie_pmu_{config,clear}_filter() are config/clear HISI_PCIE_EVENT_CTRL register which contains not only the filter but also the event code. The function names are bit misleading. Rename it to hisi_pcie_pmu_{config,clear}_event_ctrl() to reflects their functions more accurately. Signed-off-by: Yicong Yang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240223103359.18669-2-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: Slim6882 Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 8dd398803825..83f3df7d2efe 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -230,7 +230,7 @@ static void hisi_pcie_pmu_writeq(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset, writeq_relaxed(val, pcie_pmu->base + offset); } -static void hisi_pcie_pmu_config_filter(struct perf_event *event) +static void hisi_pcie_pmu_config_event_ctrl(struct perf_event *event) { struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -267,7 +267,7 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event) hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg); } -static void hisi_pcie_pmu_clear_filter(struct perf_event *event) +static void hisi_pcie_pmu_clear_event_ctrl(struct perf_event *event) { struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -527,7 +527,7 @@ static void hisi_pcie_pmu_start(struct perf_event *event, int flags) WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); hwc->state = 0; - hisi_pcie_pmu_config_filter(event); + hisi_pcie_pmu_config_event_ctrl(event); hisi_pcie_pmu_enable_counter(pcie_pmu, hwc); hisi_pcie_pmu_enable_int(pcie_pmu, hwc); hisi_pcie_pmu_set_period(event); @@ -548,7 +548,7 @@ static void hisi_pcie_pmu_stop(struct perf_event *event, int flags) hisi_pcie_pmu_event_update(event); hisi_pcie_pmu_disable_int(pcie_pmu, hwc); hisi_pcie_pmu_disable_counter(pcie_pmu, hwc); - hisi_pcie_pmu_clear_filter(event); + hisi_pcie_pmu_clear_event_ctrl(event); WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; -- Gitee From 71a928e5ce00a6151a0162d99c9fc2e85fe29ed6 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 23 Feb 2024 18:33:53 +0800 Subject: [PATCH 18/39] drivers/perf: hisi_pcie: Introduce hisi_pcie_pmu_get_event_ctrl_val() Factor out retrieving of the register value for the corresponding event from hisi_pcie_config_event_ctrl() into a new function hisi_pcie_pmu_get_event_ctrl_val() allowing future reuse. Signed-off-by: Yicong Yang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240223103359.18669-3-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: Slim6882 Signed-off-by: huwentao Conflicts: drivers/perf/hisilicon/hisi_pcie_pmu.c --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 83f3df7d2efe..5db8185e2f5f 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -230,10 +230,8 @@ static void hisi_pcie_pmu_writeq(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset, writeq_relaxed(val, pcie_pmu->base + offset); } -static void hisi_pcie_pmu_config_event_ctrl(struct perf_event *event) +static u64 hisi_pcie_pmu_get_event_ctrl_val(struct perf_event *event) { - struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; u64 reg = 0; u64 port, trig_len, thr_len; @@ -264,6 +262,15 @@ static void hisi_pcie_pmu_config_event_ctrl(struct perf_event *event) reg |= HISI_PCIE_THR_EN; } + return reg; +} + +static void hisi_pcie_pmu_config_event_ctrl(struct perf_event *event) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 reg = hisi_pcie_pmu_get_event_ctrl_val(event); + hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg); } -- Gitee From 49120fc63f441eafcc4459f13cd3893e7ee52b4b Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 23 Feb 2024 18:33:54 +0800 Subject: [PATCH 19/39] drivers/perf: hisi_pcie: Fix incorrect counting under metric mode The metric counting shows incorrect results if the events in the metric group using the same event but different filter options. This is because we only judge the event code to decide whether the event in the metric group should share the same hardware counter, but ignore the settings of the filter. For example, on a platform of 2 ports 0x1 and 0x2 but only port 0x1 has a downstream PCIe NVME device. The metric counting shows both ports have the same counts because we misassign these two events to one same hardware counter: [root@localhost perf-iostat]# ./perf stat -e '{hisi_pcie0_core1/event=0x0104,port=0x2/,hisi_pcie0_core1/event=0x0104,port=0x1/}' Performance counter stats for 'system wide': 7907484924 hisi_pcie0_core1/event=0x0104,port=0x2/ 7907484924 hisi_pcie0_core1/event=0x0104,port=0x1/ 10.153863691 seconds time elapsed Fix this by using the whole config rather than the event only to judge whether two events are the same and should share the same hardware counter. With this patch, the metric counting in the above case tends to be corrected: [root@localhost perf-iostat]# ./perf stat -e '{hisi_pcie0_core1/event=0x0104,port=0x2/,hisi_pcie0_core1/event=0x0104,port=0x1/}' Performance counter stats for 'system wide': 0 hisi_pcie0_core1/event=0x0104,port=0x2/ 8123122077 hisi_pcie0_core1/event=0x0104,port=0x1/ 10.152875631 seconds time elapsed Fixes: 8404b0fbc7fb ("drivers/perf: hisi: Add driver for HiSilicon PCIe PMU") Signed-off-by: Yicong Yang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240223103359.18669-4-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: Slim6882 Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 5db8185e2f5f..1f067ccced44 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -322,10 +322,16 @@ static bool hisi_pcie_pmu_valid_filter(struct perf_event *event, return true; } +/* + * Check Whether two events share the same config. The same config means not + * only the event code, but also the filter settings of the two events are + * the same. + */ static bool hisi_pcie_pmu_cmp_event(struct perf_event *target, struct perf_event *event) { - return hisi_pcie_get_real_event(target) == hisi_pcie_get_real_event(event); + return hisi_pcie_pmu_get_event_ctrl_val(target) == + hisi_pcie_pmu_get_event_ctrl_val(event); } static bool hisi_pcie_pmu_validate_event_group(struct perf_event *event) -- Gitee From 66d3f17ce7c55dfb00a671c3ea996f0c2fb3abe8 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 23 Feb 2024 18:33:55 +0800 Subject: [PATCH 20/39] drivers/perf: hisi_pcie: Add more events for counting TLP bandwidth A typical PCIe transaction is consisted of various TLP packets in both direction. For counting bandwidth only memory read events are exported currently. Add memory write and completion counting events of both direction to complete the bandwidth counting. Signed-off-by: Yicong Yang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240223103359.18669-5-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: Slim6882 Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 1f067ccced44..67e443a3263e 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -740,10 +740,18 @@ static struct attribute *hisi_pcie_pmu_events_attr[] = { HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_cnt, 0x10210), HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_latency, 0x0011), HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_cnt, 0x10011), + HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_flux, 0x0104), + HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_time, 0x10104), HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x0804), HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x10804), + HISI_PCIE_PMU_EVENT_ATTR(rx_cpl_flux, 0x2004), + HISI_PCIE_PMU_EVENT_ATTR(rx_cpl_time, 0x12004), + HISI_PCIE_PMU_EVENT_ATTR(tx_mwr_flux, 0x0105), + HISI_PCIE_PMU_EVENT_ATTR(tx_mwr_time, 0x10105), HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x0405), HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x10405), + HISI_PCIE_PMU_EVENT_ATTR(tx_cpl_flux, 0x1005), + HISI_PCIE_PMU_EVENT_ATTR(tx_cpl_time, 0x11005), NULL }; -- Gitee From 533b4c30b7c706d906385fdbf980b02b32802e37 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 23 Feb 2024 18:33:56 +0800 Subject: [PATCH 21/39] drivers/perf: hisi_pcie: Check the target filter properly The PMU can monitor traffic of certain target Root Port or downstream target Endpoint. User can specify the target filter by the "port" or "bdf" option respectively. The PMU can only monitor the Root Port or Endpoint on the same PCIe core so the value of "port" or "bdf" should be valid and will be checked by the driver. Currently at least and only one of "port" and "bdf" option must be set. If "port" filter is not set or is set explicitly to zero (default), driver will regard the user specifies a "bdf" option since "port" option is a bitmask of the target Root Ports and zero is not a valid value. If user not explicitly set "port" or "bdf" filter, the driver uses "bdf" default value (zero) to set target filter, but driver will skip the check of bdf=0, although it's a valid value (meaning 0000:000:00.0). Then the user just gets zero. Therefore, we need to check if both "port" and "bdf" are invalid, then return failure and report warning. Testing: before the patch: 0 hisi_pcie0_core1/rx_mrd_flux/ 0 hisi_pcie0_core1/rx_mrd_flux,port=0/ 24,124 hisi_pcie0_core1/rx_mrd_flux,port=1/ 0 hisi_pcie0_core1/rx_mrd_flux,bdf=0/ 0 hisi_pcie0_core1/rx_mrd_flux,port=0x800/ hisi_pcie0_core1/rx_mrd_flux,bdf=1/ 24,132 hisi_pcie0_core1/rx_mrd_flux,bdf=0x1700/ hisi_pcie0_core1/rx_mrd_flux,port=0x0,bdf=0x0/ hisi_pcie0_core1/rx_mrd_flux,port=0x0,bdf=0x1/ 24,138 hisi_pcie0_core1/rx_mrd_flux,port=0x0,bdf=0x1700/ 24,126 hisi_pcie0_core1/rx_mrd_flux,port=0x1,bdf=0x0/ after the patch: hisi_pcie0_core1/rx_mrd_flux/ hisi_pcie0_core1/rx_mrd_flux,port=0/ 24,153 hisi_pcie0_core1/rx_mrd_flux,port=1/ 0 hisi_pcie0_core1/rx_mrd_flux,port=0x800/ hisi_pcie0_core1/rx_mrd_flux,bdf=0/ hisi_pcie0_core1/rx_mrd_flux,bdf=1/ 24,117 hisi_pcie0_core1/rx_mrd_flux,bdf=0x1700/ hisi_pcie0_core1/rx_mrd_flux,port=0x0,bdf=0x0/ hisi_pcie0_core1/rx_mrd_flux,port=0x0,bdf=0x1/ 24,120 hisi_pcie0_core1/rx_mrd_flux,port=0x0,bdf=0x1700/ 24,123 hisi_pcie0_core1/rx_mrd_flux,port=0x1,bdf=0x0/ Signed-off-by: Junhao He Signed-off-by: Yicong Yang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240223103359.18669-6-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: Slim6882 Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 67e443a3263e..89f505bd0ee8 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -314,10 +314,10 @@ static bool hisi_pcie_pmu_valid_filter(struct perf_event *event, if (hisi_pcie_get_trig_len(event) > HISI_PCIE_TRIG_MAX_VAL) return false; - if (requester_id) { - if (!hisi_pcie_pmu_valid_requester_id(pcie_pmu, requester_id)) - return false; - } + /* Need to explicitly set filter of "port" or "bdf" */ + if (!hisi_pcie_get_port(event) && + !hisi_pcie_pmu_valid_requester_id(pcie_pmu, requester_id)) + return false; return true; } -- Gitee From 9d57c6dd90d893476e95de75b1aafb1a8783d6be Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 23 Feb 2024 18:33:57 +0800 Subject: [PATCH 22/39] drivers/perf: hisi_pcie: Relax the check on related events If we use two events with the same filter and related event type (see the following example), the driver check whether they are related events and are in the same group, otherwise the function hisi_pcie_pmu_find_related_event() return -EINVAL, then the 2nd event cannot count but the 1st event is running, although the PCIe PMU has other idle counters. In this case, The perf event scheduler will make the two events to multiplex a counter, if the user use the formula (1st event_value / 2nd event_value) to calculate the bandwidth, he/she won't get the correct value, because they are not counting at the same period. This patch tries to fix this by making the related events to use different idle counters if they are not in the same event group. And finally, I'm going to say. The related events are best used in the same group [1]. There are two ways to know if they are related events. a) By event name, such as the latency events "xxx_latency, xxx_cnt" or bandwidth events "xxx_flux, xxx_time". b) By event type, such as "event=0xXXXX, event=0x1XXXX". Use group to count the related events: [1] -e "{pmu_name/xxx_latency,port=1/,pmu_name/xxx_cnt,port=1/}" example: 1st event: hisi_pcie0_core1/event=0x804,port=1 2nd event: hisi_pcie0_core1/event=0x10804,port=1 test cmd: perf stat -e hisi_pcie0_core1/event=0x804,port=1/ \ -e hisi_pcie0_core1/event=0x10804,port=1/ before patch: 25,281 hisi_pcie0_core1/event=0x804,port=1/ (49.91%) 470,598 hisi_pcie0_core1/event=0x10804,port=1/ (50.09%) after patch: 24,147 hisi_pcie0_core1/event=0x804,port=1/ 474,558 hisi_pcie0_core1/event=0x10804,port=1/ Signed-off-by: Junhao He Signed-off-by: Yicong Yang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240223103359.18669-7-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: Slim6882 Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 89f505bd0ee8..64a0793ad14f 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -417,14 +417,10 @@ static int hisi_pcie_pmu_find_related_event(struct hisi_pcie_pmu *pcie_pmu, if (!sibling) continue; - if (!hisi_pcie_pmu_cmp_event(sibling, event)) - continue; - /* Related events must be used in group */ - if (sibling->group_leader == event->group_leader) + if (hisi_pcie_pmu_cmp_event(sibling, event) && + sibling->group_leader == event->group_leader) return idx; - else - return -EINVAL; } return idx; -- Gitee From abcc6195c4c1ba839245e575a18682128686a65b Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 23 Feb 2024 18:33:58 +0800 Subject: [PATCH 23/39] drivers/perf: hisi_pcie: Merge find_related_event() and get_event_idx() The function xxx_find_related_event() scan all working events to find related events. During this process, we also can find the idle counters. If not found related events, return the first idle counter to simplify the code. Signed-off-by: Junhao He Signed-off-by: Yicong Yang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240223103359.18669-8-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: Slim6882 Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 51 ++++++++++---------------- 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 64a0793ad14f..28919d1f9fac 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -406,16 +406,24 @@ static u64 hisi_pcie_pmu_read_counter(struct perf_event *event) return hisi_pcie_pmu_readq(pcie_pmu, event->hw.event_base, idx); } -static int hisi_pcie_pmu_find_related_event(struct hisi_pcie_pmu *pcie_pmu, - struct perf_event *event) +/* + * Check all work events, if a relevant event is found then we return it + * first, otherwise return the first idle counter (need to reset). + */ +static int hisi_pcie_pmu_get_event_idx(struct hisi_pcie_pmu *pcie_pmu, + struct perf_event *event) { + int first_idle = -EAGAIN; struct perf_event *sibling; int idx; for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) { sibling = pcie_pmu->hw_events[idx]; - if (!sibling) + if (!sibling) { + if (first_idle == -EAGAIN) + first_idle = idx; continue; + } /* Related events must be used in group */ if (hisi_pcie_pmu_cmp_event(sibling, event) && @@ -423,19 +431,7 @@ static int hisi_pcie_pmu_find_related_event(struct hisi_pcie_pmu *pcie_pmu, return idx; } - return idx; -} - -static int hisi_pcie_pmu_get_event_idx(struct hisi_pcie_pmu *pcie_pmu) -{ - int idx; - - for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) { - if (!pcie_pmu->hw_events[idx]) - return idx; - } - - return -EINVAL; + return first_idle; } static void hisi_pcie_pmu_event_update(struct perf_event *event) @@ -575,27 +571,18 @@ static int hisi_pcie_pmu_add(struct perf_event *event, int flags) hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; - /* Check all working events to find a related event. */ - idx = hisi_pcie_pmu_find_related_event(pcie_pmu, event); - if (idx < 0) - return idx; - - /* Current event shares an enabled counter with the related event */ - if (idx < HISI_PCIE_MAX_COUNTERS) { - hwc->idx = idx; - goto start_count; - } - - idx = hisi_pcie_pmu_get_event_idx(pcie_pmu); + idx = hisi_pcie_pmu_get_event_idx(pcie_pmu, event); if (idx < 0) return idx; hwc->idx = idx; - pcie_pmu->hw_events[idx] = event; - /* Reset Counter to avoid previous statistic interference. */ - hisi_pcie_pmu_reset_counter(pcie_pmu, idx); -start_count: + /* No enabled counter found with related event, reset it */ + if (!pcie_pmu->hw_events[idx]) { + hisi_pcie_pmu_reset_counter(pcie_pmu, idx); + pcie_pmu->hw_events[idx] = event; + } + if (flags & PERF_EF_START) hisi_pcie_pmu_start(event, PERF_EF_RELOAD); -- Gitee From 7b335f7e1b74b1957686431cad034d1515c72d4e Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 25 Apr 2024 20:46:25 +0800 Subject: [PATCH 24/39] drivers/perf: hisi_pcie: Fix out-of-bound access when valid event group The perf tool allows users to create event groups through following cmd [1], but the driver does not check whether the array index is out of bounds when writing data to the event_group array. If the number of events in an event_group is greater than HISI_PCIE_MAX_COUNTERS, the memory write overflow of event_group array occurs. Add array index check to fix the possible array out of bounds violation, and return directly when write new events are written to array bounds. There are 9 different events in an event_group. [1] perf stat -e '{pmu/event1/, ... ,pmu/event9/}' Fixes: 8404b0fbc7fb ("drivers/perf: hisi: Add driver for HiSilicon PCIe PMU") Signed-off-by: Junhao He Reviewed-by: Jijie Shao Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240425124627.13764-2-hejunhao3@huawei.com Signed-off-by: Will Deacon Signed-off-by: chenyi Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 28919d1f9fac..438780ec8141 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -358,15 +358,27 @@ static bool hisi_pcie_pmu_validate_event_group(struct perf_event *event) return false; for (num = 0; num < counters; num++) { + /* + * If we find a related event, then it's a valid group + * since we don't need to allocate a new counter for it. + */ if (hisi_pcie_pmu_cmp_event(event_group[num], sibling)) break; } + /* + * Otherwise it's a new event but if there's no available counter, + * fail the check since we cannot schedule all the events in + * the group simultaneously. + */ + if (num == HISI_PCIE_MAX_COUNTERS) + return false; + if (num == counters) event_group[counters++] = sibling; } - return counters <= HISI_PCIE_MAX_COUNTERS; + return true; } static int hisi_pcie_pmu_event_init(struct perf_event *event) -- Gitee From 97d5370808fc47a929a7cedcf25d9f57385f1a0b Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Fri, 4 Sep 2020 17:57:38 +0800 Subject: [PATCH 25/39] arm64: perf: Remove unnecessary event_idx check event_idx is obtained from armv8pmu_get_event_idx(), and this idx must be between ARMV8_IDX_CYCLE_COUNTER and cpu_pmu->num_events. So it's unnecessary to do this check. Let's remove it. Signed-off-by: Qi Liu Link: https://lore.kernel.org/r/1599213458-28394-1-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon Signed-off-by: huwentao Conflicts: arch/arm64/kernel/perf_event.c --- arch/arm64/kernel/perf_event.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 19128d994ee9..08c919cbfb6b 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -344,8 +344,6 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { */ #define ARMV8_IDX_CYCLE_COUNTER 0 #define ARMV8_IDX_COUNTER0 1 -#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ - (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) /* * We must chain two programmable counters for 64 bit events, @@ -389,12 +387,6 @@ static inline int armv8pmu_has_overflowed(u32 pmovsr) return pmovsr & ARMV8_PMU_OVERFLOWED_MASK; } -static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx) -{ - return idx >= ARMV8_IDX_CYCLE_COUNTER && - idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu); -} - static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) { return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); @@ -426,15 +418,11 @@ static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) static u64 armv8pmu_read_counter(struct perf_event *event) { - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; u64 value = 0; - if (!armv8pmu_counter_valid(cpu_pmu, idx)) - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) + if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); else value = armv8pmu_read_hw_counter(event); @@ -463,14 +451,10 @@ static inline void armv8pmu_write_hw_counter(struct perf_event *event, static void armv8pmu_write_counter(struct perf_event *event, u64 value) { - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - if (!armv8pmu_counter_valid(cpu_pmu, idx)) - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) { + if (idx == ARMV8_IDX_CYCLE_COUNTER) { /* * The cycles counter is really a 64-bit counter. * When treating it as a 32-bit counter, we only count -- Gitee From 3ee179cc1fb32ca9c401149211e9fd856459028a Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 7 Nov 2019 15:56:04 +0800 Subject: [PATCH 26/39] drivers/perf: hisi: update the sccl_id/ccl_id for certain HiSilicon platform For some HiSilicon platform, the originally designed SCCL_ID and CCL_ID are not satisfied with much rich topology when the MT is set, so we extend the SCCL_ID to MPIDR[aff3] and CCL_ID to MPIDR[aff2]. Let's update this for HiSilicon uncore PMU driver. Cc: John Garry Cc: Hanjun Guo Cc: Mark Rutland Cc: Will Deacon Signed-off-by: Shaokun Zhang Signed-off-by: Will Deacon Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 26 ++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 7a7e583893cb..65969b041e3d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -15,6 +15,7 @@ #include #include +#include #include #include "hisi_uncore_pmu.h" @@ -353,8 +354,10 @@ EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable); /* * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1. - * If multi-threading is supported, CCL_ID is the low 3-bits in MPIDR[Aff2] - * and SCCL_ID is the upper 5-bits of Aff2 field; if not, SCCL_ID + * If multi-threading is supported, On Huawei Kunpeng 920 SoC whose cpu + * core is tsv110, CCL_ID is the low 3-bits in MPIDR[Aff2] and SCCL_ID + * is the upper 5-bits of Aff2 field; while for other cpu types, SCCL_ID + * is in MPIDR[Aff3] and CCL_ID is in MPIDR[Aff2], if not, SCCL_ID * is in MPIDR[Aff2] and CCL_ID is in MPIDR[Aff1]. */ static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id) @@ -362,12 +365,19 @@ static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id) u64 mpidr = read_cpuid_mpidr(); if (mpidr & MPIDR_MT_BITMASK) { - int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); - - if (sccl_id) - *sccl_id = aff2 >> 3; - if (ccl_id) - *ccl_id = aff2 & 0x7; + if (read_cpuid_part_number() == HISI_CPU_PART_TSV110) { + int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + + if (sccl_id) + *sccl_id = aff2 >> 3; + if (ccl_id) + *ccl_id = aff2 & 0x7; + } else { + if (sccl_id) + *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 3); + if (ccl_id) + *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + } } else { if (sccl_id) *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); -- Gitee From ea86c5a2309e4b66b8a4a7085d15597796041535 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 14 Nov 2019 08:57:03 +0800 Subject: [PATCH 27/39] drivers/perf: hisi: Simplify hisi_read_sccl_and_ccl_id and its comment hisi_read_sccl_and_ccl_id is not readable and its comment is a little confused, so simplify the function and its comment as Mark's suggestion. Acked-by: Mark Rutland Reviewed-by: John Garry Suggested-by: Mark Rutland Signed-off-by: Shaokun Zhang Signed-off-by: Will Deacon Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 58 +++++++++++++----------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 65969b041e3d..97aff877a4e7 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -352,38 +352,44 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) } EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable); + /* - * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1. - * If multi-threading is supported, On Huawei Kunpeng 920 SoC whose cpu - * core is tsv110, CCL_ID is the low 3-bits in MPIDR[Aff2] and SCCL_ID - * is the upper 5-bits of Aff2 field; while for other cpu types, SCCL_ID - * is in MPIDR[Aff3] and CCL_ID is in MPIDR[Aff2], if not, SCCL_ID - * is in MPIDR[Aff2] and CCL_ID is in MPIDR[Aff1]. + * The Super CPU Cluster (SCCL) and CPU Cluster (CCL) IDs can be + * determined from the MPIDR_EL1, but the encoding varies by CPU: + * + * - For MT variants of TSV110: + * SCCL is Aff2[7:3], CCL is Aff2[2:0] + * + * - For other MT parts: + * SCCL is Aff3[7:0], CCL is Aff2[7:0] + * + * - For non-MT parts: + * SCCL is Aff2[7:0], CCL is Aff1[7:0] */ -static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id) +static void hisi_read_sccl_and_ccl_id(int *scclp, int *cclp) { u64 mpidr = read_cpuid_mpidr(); - - if (mpidr & MPIDR_MT_BITMASK) { - if (read_cpuid_part_number() == HISI_CPU_PART_TSV110) { - int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); - - if (sccl_id) - *sccl_id = aff2 >> 3; - if (ccl_id) - *ccl_id = aff2 & 0x7; - } else { - if (sccl_id) - *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 3); - if (ccl_id) - *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); - } + int aff3 = MPIDR_AFFINITY_LEVEL(mpidr, 3); + int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + int aff1 = MPIDR_AFFINITY_LEVEL(mpidr, 1); + bool mt = mpidr & MPIDR_MT_BITMASK; + int sccl, ccl; + + if (mt && read_cpuid_part_number() == HISI_CPU_PART_TSV110) { + sccl = aff2 >> 3; + ccl = aff2 & 0x7; + } else if (mt) { + sccl = aff3; + ccl = aff2; } else { - if (sccl_id) - *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); - if (ccl_id) - *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + sccl = aff2; + ccl = aff1; } + + if (scclp) + *scclp = sccl; + if (cclp) + *cclp = ccl; } /* -- Gitee From 1924c5605d0c6e4533f87688dc15e017ff589677 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Fri, 30 Jul 2021 15:44:02 +0800 Subject: [PATCH 28/39] drivers/perf: hisi: Remove unnecessary check of counter index The sanity check for counter index has been done in the function hisi_uncore_pmu_get_event_idx, so remove the redundant interface hisi_uncore_pmu_counter_valid() and sanity check. Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-2-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon Reviewed-by: Shaokun Zhang Signed-off-by: Zheng Zengkai Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 20 +++---------------- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 18 ++--------------- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 18 ++--------------- drivers/perf/hisilicon/hisi_uncore_pmu.c | 11 ---------- drivers/perf/hisilicon/hisi_uncore_pmu.h | 1 - 5 files changed, 7 insertions(+), 61 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 883764deba3b..b76cb19bf23a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -64,29 +64,15 @@ static u32 hisi_ddrc_pmu_get_counter_offset(int cntr_idx) static u64 hisi_ddrc_pmu_read_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { - /* Use event code as counter index */ - u32 idx = GET_DDRC_EVENTID(hwc); - - if (!hisi_uncore_pmu_counter_valid(ddrc_pmu, idx)) { - dev_err(ddrc_pmu->dev, "Unsupported event index:%d!\n", idx); - return 0; - } - - return readl(ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(idx)); + return readl(ddrc_pmu->base + + hisi_ddrc_pmu_get_counter_offset(hwc->idx)); } static void hisi_ddrc_pmu_write_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc, u64 val) { - u32 idx = GET_DDRC_EVENTID(hwc); - - if (!hisi_uncore_pmu_counter_valid(ddrc_pmu, idx)) { - dev_err(ddrc_pmu->dev, "Unsupported event index:%d!\n", idx); - return; - } - writel((u32)val, - ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(idx)); + ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(hwc->idx)); } /* diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index d2cdece7cb5c..1712f3826fb7 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -50,29 +50,15 @@ static u32 hisi_hha_pmu_get_counter_offset(int cntr_idx) static u64 hisi_hha_pmu_read_counter(struct hisi_pmu *hha_pmu, struct hw_perf_event *hwc) { - u32 idx = hwc->idx; - - if (!hisi_uncore_pmu_counter_valid(hha_pmu, idx)) { - dev_err(hha_pmu->dev, "Unsupported event index:%d!\n", idx); - return 0; - } - /* Read 64 bits and like L3C, top 16 bits are RAZ */ - return readq(hha_pmu->base + hisi_hha_pmu_get_counter_offset(idx)); + return readq(hha_pmu->base + hisi_hha_pmu_get_counter_offset(hwc->idx)); } static void hisi_hha_pmu_write_counter(struct hisi_pmu *hha_pmu, struct hw_perf_event *hwc, u64 val) { - u32 idx = hwc->idx; - - if (!hisi_uncore_pmu_counter_valid(hha_pmu, idx)) { - dev_err(hha_pmu->dev, "Unsupported event index:%d!\n", idx); - return; - } - /* Write 64 bits and like L3C, top 16 bits are WI */ - writeq(val, hha_pmu->base + hisi_hha_pmu_get_counter_offset(idx)); + writeq(val, hha_pmu->base + hisi_hha_pmu_get_counter_offset(hwc->idx)); } static void hisi_hha_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index c07689833e5f..11d7d10b0858 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -49,29 +49,15 @@ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx) static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc) { - u32 idx = hwc->idx; - - if (!hisi_uncore_pmu_counter_valid(l3c_pmu, idx)) { - dev_err(l3c_pmu->dev, "Unsupported event index:%d!\n", idx); - return 0; - } - /* Read 64-bits and the upper 16 bits are RAZ */ - return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(idx)); + return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); } static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc, u64 val) { - u32 idx = hwc->idx; - - if (!hisi_uncore_pmu_counter_valid(l3c_pmu, idx)) { - dev_err(l3c_pmu->dev, "Unsupported event index:%d!\n", idx); - return; - } - /* Write 64-bits and the upper 16 bits are WI */ - writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(idx)); + writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); } static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 97aff877a4e7..8819b1996d2e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -96,12 +96,6 @@ static bool hisi_validate_event_group(struct perf_event *event) return counters <= hisi_pmu->num_counters; } -int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx) -{ - return idx >= 0 && idx < hisi_pmu->num_counters; -} -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_counter_valid); - int hisi_uncore_pmu_get_event_idx(struct perf_event *event) { struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu); @@ -121,11 +115,6 @@ EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx); static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) { - if (!hisi_uncore_pmu_counter_valid(hisi_pmu, idx)) { - dev_err(hisi_pmu->dev, "Unsupported event index:%d!\n", idx); - return; - } - clear_bit(idx, hisi_pmu->pmu_events.used_mask); } diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 25b0c97b3eb0..89cc356249c5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -76,7 +76,6 @@ struct hisi_pmu { int check_event; }; -int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx); int hisi_uncore_pmu_get_event_idx(struct perf_event *event); void hisi_uncore_pmu_read(struct perf_event *event); int hisi_uncore_pmu_add(struct perf_event *event, int flags); -- Gitee From d1d1de368d8af2db494be829d70fd25244f02535 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 30 Jul 2021 15:43:58 +0800 Subject: [PATCH 29/39] drivers/perf: hisi: Add identifier sysfs file To allow userspace to identify the specific implementation of the device, add an "identifier" sysfs file. Encoding is as follows (same for all uncore drivers): hi1620: 0x0 hi1630: 0x30 Signed-off-by: John Garry Link: https://lore.kernel.org/r/1602149181-237415-2-git-send-email-john.garry@huawei.com Signed-off-by: Will Deacon Reviewed-by: Shaokun Zhang Signed-off-by: Zheng Zengkai Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 16 ++++++++++++++++ drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 16 ++++++++++++++++ drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 16 ++++++++++++++++ drivers/perf/hisilicon/hisi_uncore_pmu.c | 10 ++++++++++ drivers/perf/hisilicon/hisi_uncore_pmu.h | 7 +++++++ 5 files changed, 65 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index b76cb19bf23a..20fa072ddd2e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -33,6 +33,7 @@ #define DDRC_INT_MASK 0x6c8 #define DDRC_INT_STATUS 0x6cc #define DDRC_INT_CLEAR 0x6d0 +#define DDRC_VERSION 0x710 /* DDRC has 8-counters */ #define DDRC_NR_COUNTERS 0x8 @@ -256,6 +257,8 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, return PTR_ERR(ddrc_pmu->base); } + ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION); + return 0; } @@ -297,10 +300,23 @@ static const struct attribute_group hisi_ddrc_pmu_cpumask_attr_group = { .attrs = hisi_ddrc_pmu_cpumask_attrs, }; +static struct device_attribute hisi_ddrc_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_ddrc_pmu_identifier_attrs[] = { + &hisi_ddrc_pmu_identifier_attr.attr, + NULL +}; + +static struct attribute_group hisi_ddrc_pmu_identifier_group = { + .attrs = hisi_ddrc_pmu_identifier_attrs, +}; + static const struct attribute_group *hisi_ddrc_pmu_attr_groups[] = { &hisi_ddrc_pmu_format_group, &hisi_ddrc_pmu_events_group, &hisi_ddrc_pmu_cpumask_attr_group, + &hisi_ddrc_pmu_identifier_group, NULL, }; diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 1712f3826fb7..0acde982a45f 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -23,6 +23,7 @@ #define HHA_INT_MASK 0x0804 #define HHA_INT_STATUS 0x0808 #define HHA_INT_CLEAR 0x080C +#define HHA_VERSION 0x1cf0 #define HHA_PERF_CTRL 0x1E00 #define HHA_EVENT_CTRL 0x1E04 #define HHA_EVENT_TYPE0 0x1E80 @@ -249,6 +250,8 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, return PTR_ERR(hha_pmu->base); } + hha_pmu->identifier = readl(hha_pmu->base + HHA_VERSION); + return 0; } @@ -308,10 +311,23 @@ static const struct attribute_group hisi_hha_pmu_cpumask_attr_group = { .attrs = hisi_hha_pmu_cpumask_attrs, }; +static struct device_attribute hisi_hha_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_hha_pmu_identifier_attrs[] = { + &hisi_hha_pmu_identifier_attr.attr, + NULL +}; + +static struct attribute_group hisi_hha_pmu_identifier_group = { + .attrs = hisi_hha_pmu_identifier_attrs, +}; + static const struct attribute_group *hisi_hha_pmu_attr_groups[] = { &hisi_hha_pmu_format_group, &hisi_hha_pmu_events_group, &hisi_hha_pmu_cpumask_attr_group, + &hisi_hha_pmu_identifier_group, NULL, }; diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 11d7d10b0858..ea71e955265b 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -25,6 +25,7 @@ #define L3C_INT_STATUS 0x0808 #define L3C_INT_CLEAR 0x080c #define L3C_EVENT_CTRL 0x1c00 +#define L3C_VERSION 0x1cf0 #define L3C_EVENT_TYPE0 0x1d00 /* * Each counter is 48-bits and [48:63] are reserved @@ -252,6 +253,8 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, return PTR_ERR(l3c_pmu->base); } + l3c_pmu->identifier = readl(l3c_pmu->base + L3C_VERSION); + return 0; } @@ -298,10 +301,23 @@ static const struct attribute_group hisi_l3c_pmu_cpumask_attr_group = { .attrs = hisi_l3c_pmu_cpumask_attrs, }; +static struct device_attribute hisi_l3c_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_l3c_pmu_identifier_attrs[] = { + &hisi_l3c_pmu_identifier_attr.attr, + NULL +}; + +static struct attribute_group hisi_l3c_pmu_identifier_group = { + .attrs = hisi_l3c_pmu_identifier_attrs, +}; + static const struct attribute_group *hisi_l3c_pmu_attr_groups[] = { &hisi_l3c_pmu_format_group, &hisi_l3c_pmu_events_group, &hisi_l3c_pmu_cpumask_attr_group, + &hisi_l3c_pmu_identifier_group, NULL, }; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 8819b1996d2e..ae84a43ec991 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -113,6 +113,16 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event) } EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx); +ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); + + return snprintf(page, PAGE_SIZE, "0x%08x\n", hisi_pmu->identifier); +} +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show); + static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) { clear_bit(idx, hisi_pmu->pmu_events.used_mask); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 89cc356249c5..8734632e15bf 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -74,6 +74,7 @@ struct hisi_pmu { int counter_bits; /* check event code range */ int check_event; + u32 identifier; }; int hisi_uncore_pmu_get_event_idx(struct perf_event *event); @@ -95,4 +96,10 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, struct device_attribute *attr, char *buf); int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node); int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node); + +ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, + struct device_attribute *attr, + char *page); + + #endif /* __HISI_UNCORE_PMU_H__ */ -- Gitee From 3b73ebb9074e67ee9c96e5af640ce097c9a4d534 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Fri, 30 Jul 2021 15:44:03 +0800 Subject: [PATCH 30/39] drivers/perf: hisi: Refactor code for more uncore PMUs On HiSilicon uncore PMU drivers, interrupt handling function and interrupt registration function are very similar in differents PMU modules. Let's refactor the frame. Two new callbacks are added for the HW accessors: * hisi_uncore_ops::get_int_status returns a bitmap of events which have overflowed and raised an interrupt * hisi_uncore_ops::clear_int_status clears the overflow status for a specific event These callback functions are used by a common IRQ handler, hisi_uncore_pmu_isr(). One more function hisi_uncore_pmu_init_irq() is added to replace each PMU initialization IRQ interface and simplify the code. Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-3-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon Reviewed-by: Shaokun Zhang Signed-off-by: Zheng Zengkai Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 59 +++---------------- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 59 +++---------------- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 59 +++---------------- drivers/perf/hisilicon/hisi_uncore_pmu.c | 54 +++++++++++++++++ drivers/perf/hisilicon/hisi_uncore_pmu.h | 6 +- 5 files changed, 80 insertions(+), 157 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 20fa072ddd2e..333992e787f7 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include "hisi_uncore_pmu.h" @@ -165,60 +164,14 @@ static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu, writel(val, ddrc_pmu->base + DDRC_INT_MASK); } -static irqreturn_t hisi_ddrc_pmu_isr(int irq, void *dev_id) +static u32 hisi_ddrc_pmu_get_int_status(struct hisi_pmu *ddrc_pmu) { - struct hisi_pmu *ddrc_pmu = dev_id; - struct perf_event *event; - unsigned long overflown; - int idx; - - /* Read the DDRC_INT_STATUS register */ - overflown = readl(ddrc_pmu->base + DDRC_INT_STATUS); - if (!overflown) - return IRQ_NONE; - - /* - * Find the counter index which overflowed if the bit was set - * and handle it - */ - for_each_set_bit(idx, &overflown, DDRC_NR_COUNTERS) { - /* Write 1 to clear the IRQ status flag */ - writel((1 << idx), ddrc_pmu->base + DDRC_INT_CLEAR); - - /* Get the corresponding event struct */ - event = ddrc_pmu->pmu_events.hw_events[idx]; - if (!event) - continue; - - hisi_uncore_pmu_event_update(event); - hisi_uncore_pmu_set_event_period(event); - } - - return IRQ_HANDLED; + return readl(ddrc_pmu->base + DDRC_INT_STATUS); } -static int hisi_ddrc_pmu_init_irq(struct hisi_pmu *ddrc_pmu, - struct platform_device *pdev) +static void hisi_ddrc_pmu_clear_int_status(struct hisi_pmu *ddrc_pmu, int idx) { - int irq, ret; - - /* Read and init IRQ */ - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - ret = devm_request_irq(&pdev->dev, irq, hisi_ddrc_pmu_isr, - IRQF_NOBALANCING | IRQF_NO_THREAD, - dev_name(&pdev->dev), ddrc_pmu); - if (ret < 0) { - dev_err(&pdev->dev, - "Fail to request IRQ:%d ret:%d\n", irq, ret); - return ret; - } - - ddrc_pmu->irq = irq; - - return 0; + writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR); } static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { @@ -331,6 +284,8 @@ static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = { .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, .write_counter = hisi_ddrc_pmu_write_counter, .read_counter = hisi_ddrc_pmu_read_counter, + .get_int_status = hisi_ddrc_pmu_get_int_status, + .clear_int_status = hisi_ddrc_pmu_clear_int_status, }; static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, @@ -342,7 +297,7 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - ret = hisi_ddrc_pmu_init_irq(ddrc_pmu, pdev); + ret = hisi_uncore_pmu_init_irq(ddrc_pmu, pdev); if (ret) return ret; diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 0acde982a45f..aea952de7b3f 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include "hisi_uncore_pmu.h" @@ -155,60 +154,14 @@ static void hisi_hha_pmu_disable_counter_int(struct hisi_pmu *hha_pmu, writel(val, hha_pmu->base + HHA_INT_MASK); } -static irqreturn_t hisi_hha_pmu_isr(int irq, void *dev_id) +static u32 hisi_hha_pmu_get_int_status(struct hisi_pmu *hha_pmu) { - struct hisi_pmu *hha_pmu = dev_id; - struct perf_event *event; - unsigned long overflown; - int idx; - - /* Read HHA_INT_STATUS register */ - overflown = readl(hha_pmu->base + HHA_INT_STATUS); - if (!overflown) - return IRQ_NONE; - - /* - * Find the counter index which overflowed if the bit was set - * and handle it - */ - for_each_set_bit(idx, &overflown, HHA_NR_COUNTERS) { - /* Write 1 to clear the IRQ status flag */ - writel((1 << idx), hha_pmu->base + HHA_INT_CLEAR); - - /* Get the corresponding event struct */ - event = hha_pmu->pmu_events.hw_events[idx]; - if (!event) - continue; - - hisi_uncore_pmu_event_update(event); - hisi_uncore_pmu_set_event_period(event); - } - - return IRQ_HANDLED; + return readl(hha_pmu->base + HHA_INT_STATUS); } -static int hisi_hha_pmu_init_irq(struct hisi_pmu *hha_pmu, - struct platform_device *pdev) +static void hisi_hha_pmu_clear_int_status(struct hisi_pmu *hha_pmu, int idx) { - int irq, ret; - - /* Read and init IRQ */ - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - ret = devm_request_irq(&pdev->dev, irq, hisi_hha_pmu_isr, - IRQF_NOBALANCING | IRQF_NO_THREAD, - dev_name(&pdev->dev), hha_pmu); - if (ret < 0) { - dev_err(&pdev->dev, - "Fail to request IRQ:%d ret:%d\n", irq, ret); - return ret; - } - - hha_pmu->irq = irq; - - return 0; + writel(1 << idx, hha_pmu->base + HHA_INT_CLEAR); } static const struct acpi_device_id hisi_hha_pmu_acpi_match[] = { @@ -342,6 +295,8 @@ static const struct hisi_uncore_ops hisi_uncore_hha_ops = { .disable_counter_int = hisi_hha_pmu_disable_counter_int, .write_counter = hisi_hha_pmu_write_counter, .read_counter = hisi_hha_pmu_read_counter, + .get_int_status = hisi_hha_pmu_get_int_status, + .clear_int_status = hisi_hha_pmu_clear_int_status, }; static int hisi_hha_pmu_dev_probe(struct platform_device *pdev, @@ -353,7 +308,7 @@ static int hisi_hha_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - ret = hisi_hha_pmu_init_irq(hha_pmu, pdev); + ret = hisi_uncore_pmu_init_irq(hha_pmu, pdev); if (ret) return ret; diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index ea71e955265b..4c1d274e5da2 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include "hisi_uncore_pmu.h" @@ -154,60 +153,14 @@ static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu, writel(val, l3c_pmu->base + L3C_INT_MASK); } -static irqreturn_t hisi_l3c_pmu_isr(int irq, void *dev_id) +static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu) { - struct hisi_pmu *l3c_pmu = dev_id; - struct perf_event *event; - unsigned long overflown; - int idx; - - /* Read L3C_INT_STATUS register */ - overflown = readl(l3c_pmu->base + L3C_INT_STATUS); - if (!overflown) - return IRQ_NONE; - - /* - * Find the counter index which overflowed if the bit was set - * and handle it. - */ - for_each_set_bit(idx, &overflown, L3C_NR_COUNTERS) { - /* Write 1 to clear the IRQ status flag */ - writel((1 << idx), l3c_pmu->base + L3C_INT_CLEAR); - - /* Get the corresponding event struct */ - event = l3c_pmu->pmu_events.hw_events[idx]; - if (!event) - continue; - - hisi_uncore_pmu_event_update(event); - hisi_uncore_pmu_set_event_period(event); - } - - return IRQ_HANDLED; + return readl(l3c_pmu->base + L3C_INT_STATUS); } -static int hisi_l3c_pmu_init_irq(struct hisi_pmu *l3c_pmu, - struct platform_device *pdev) +static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) { - int irq, ret; - - /* Read and init IRQ */ - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - ret = devm_request_irq(&pdev->dev, irq, hisi_l3c_pmu_isr, - IRQF_NOBALANCING | IRQF_NO_THREAD, - dev_name(&pdev->dev), l3c_pmu); - if (ret < 0) { - dev_err(&pdev->dev, - "Fail to request IRQ:%d ret:%d\n", irq, ret); - return ret; - } - - l3c_pmu->irq = irq; - - return 0; + writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR); } static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { @@ -332,6 +285,8 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .disable_counter_int = hisi_l3c_pmu_disable_counter_int, .write_counter = hisi_l3c_pmu_write_counter, .read_counter = hisi_l3c_pmu_read_counter, + .get_int_status = hisi_l3c_pmu_get_int_status, + .clear_int_status = hisi_l3c_pmu_clear_int_status, }; static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, @@ -343,7 +298,7 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - ret = hisi_l3c_pmu_init_irq(l3c_pmu, pdev); + ret = hisi_uncore_pmu_init_irq(l3c_pmu, pdev); if (ret) return ret; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index ae84a43ec991..29e2f94a190d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -128,6 +128,60 @@ static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) clear_bit(idx, hisi_pmu->pmu_events.used_mask); } +static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) +{ + struct hisi_pmu *hisi_pmu = data; + struct perf_event *event; + unsigned long overflown; + int idx; + + overflown = hisi_pmu->ops->get_int_status(hisi_pmu); + if (!overflown) + return IRQ_NONE; + + /* + * Find the counter index which overflowed if the bit was set + * and handle it. + */ + for_each_set_bit(idx, &overflown, hisi_pmu->num_counters) { + /* Write 1 to clear the IRQ status flag */ + hisi_pmu->ops->clear_int_status(hisi_pmu, idx); + /* Get the corresponding event struct */ + event = hisi_pmu->pmu_events.hw_events[idx]; + if (!event) + continue; + + hisi_uncore_pmu_event_update(event); + hisi_uncore_pmu_set_event_period(event); + } + + return IRQ_HANDLED; +} + +int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, + struct platform_device *pdev) +{ + int irq, ret; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + ret = devm_request_irq(&pdev->dev, irq, hisi_uncore_pmu_isr, + IRQF_NOBALANCING | IRQF_NO_THREAD, + dev_name(&pdev->dev), hisi_pmu); + if (ret < 0) { + dev_err(&pdev->dev, + "Fail to request IRQ: %d ret: %d.\n", irq, ret); + return ret; + } + + hisi_pmu->irq = irq; + + return 0; +} +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_init_irq); + int hisi_uncore_pmu_event_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 8734632e15bf..cc638f839fc8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #undef pr_fmt @@ -46,6 +47,8 @@ struct hisi_uncore_ops { void (*disable_counter_int)(struct hisi_pmu *, struct hw_perf_event *); void (*start_counters)(struct hisi_pmu *); void (*stop_counters)(struct hisi_pmu *); + u32 (*get_int_status)(struct hisi_pmu *hisi_pmu); + void (*clear_int_status)(struct hisi_pmu *hisi_pmu, int idx); }; struct hisi_pmu_hwevents { @@ -100,6 +103,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node); ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, struct device_attribute *attr, char *page); - +int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, + struct platform_device *pdev); #endif /* __HISI_UNCORE_PMU_H__ */ -- Gitee From 3385f716e883a8a1ca57d2cdc4f38ba79a142776 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Fri, 30 Jul 2021 15:44:04 +0800 Subject: [PATCH 31/39] drivers/perf: hisi: Add PMU version for uncore PMU drivers. For HiSilicon uncore PMU, more versions are supported and some variables shall be added suffix to distinguish the version which are prepared for the new drivers. Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Acked-by: Mark Rutland Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-4-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon Reviewed-by: Shaokun Zhang Signed-off-by: Zheng Zengkai Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 96 ++++++++++--------- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 27 +++--- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 23 ++--- 3 files changed, 75 insertions(+), 71 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 333992e787f7..cb980cf673ef 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -36,7 +36,8 @@ /* DDRC has 8-counters */ #define DDRC_NR_COUNTERS 0x8 -#define DDRC_PERF_CTRL_EN 0x2 +#define DDRC_V1_PERF_CTRL_EN 0x2 +#define DDRC_V1_NR_EVENTS 0x7 /* * For DDRC PMU, there are eight-events and every event has been mapped @@ -53,26 +54,26 @@ static const u32 ddrc_reg_off[] = { /* * Select the counter register offset using the counter index. - * In DDRC there are no programmable counter, the count - * is readed form the statistics counter register itself. + * In PMU v1, there are no programmable counter, the count + * is read form the statistics counter register itself. */ -static u32 hisi_ddrc_pmu_get_counter_offset(int cntr_idx) +static u32 hisi_ddrc_pmu_v1_get_counter_offset(int cntr_idx) { return ddrc_reg_off[cntr_idx]; } -static u64 hisi_ddrc_pmu_read_counter(struct hisi_pmu *ddrc_pmu, +static u64 hisi_ddrc_pmu_v1_read_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { return readl(ddrc_pmu->base + - hisi_ddrc_pmu_get_counter_offset(hwc->idx)); + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); } -static void hisi_ddrc_pmu_write_counter(struct hisi_pmu *ddrc_pmu, +static void hisi_ddrc_pmu_v1_write_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc, u64 val) { writel((u32)val, - ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(hwc->idx)); + ddrc_pmu->base + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); } /* @@ -84,28 +85,28 @@ static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, { } -static void hisi_ddrc_pmu_start_counters(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_v1_start_counters(struct hisi_pmu *ddrc_pmu) { u32 val; /* Set perf_enable in DDRC_PERF_CTRL to start event counting */ val = readl(ddrc_pmu->base + DDRC_PERF_CTRL); - val |= DDRC_PERF_CTRL_EN; + val |= DDRC_V1_PERF_CTRL_EN; writel(val, ddrc_pmu->base + DDRC_PERF_CTRL); } -static void hisi_ddrc_pmu_stop_counters(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_v1_stop_counters(struct hisi_pmu *ddrc_pmu) { u32 val; /* Clear perf_enable in DDRC_PERF_CTRL to stop event counting */ val = readl(ddrc_pmu->base + DDRC_PERF_CTRL); - val &= ~DDRC_PERF_CTRL_EN; + val &= ~DDRC_V1_PERF_CTRL_EN; writel(val, ddrc_pmu->base + DDRC_PERF_CTRL); } -static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_v1_enable_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { u32 val; @@ -115,8 +116,8 @@ static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu, writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL); } -static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_v1_disable_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { u32 val; @@ -126,7 +127,7 @@ static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu, writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL); } -static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event) +static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event) { struct hisi_pmu *ddrc_pmu = to_hisi_pmu(event->pmu); unsigned long *used_mask = ddrc_pmu->pmu_events.used_mask; @@ -142,8 +143,8 @@ static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event) return idx; } -static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { u32 val; @@ -153,8 +154,8 @@ static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu, writel(val, ddrc_pmu->base + DDRC_INT_MASK); } -static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_v1_disable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { u32 val; @@ -164,12 +165,13 @@ static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu, writel(val, ddrc_pmu->base + DDRC_INT_MASK); } -static u32 hisi_ddrc_pmu_get_int_status(struct hisi_pmu *ddrc_pmu) +static u32 hisi_ddrc_pmu_v1_get_int_status(struct hisi_pmu *ddrc_pmu) { return readl(ddrc_pmu->base + DDRC_INT_STATUS); } -static void hisi_ddrc_pmu_clear_int_status(struct hisi_pmu *ddrc_pmu, int idx) +static void hisi_ddrc_pmu_v1_clear_int_status(struct hisi_pmu *ddrc_pmu, + int idx) { writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR); } @@ -215,17 +217,17 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, return 0; } -static struct attribute *hisi_ddrc_pmu_format_attr[] = { +static struct attribute *hisi_ddrc_pmu_v1_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-4"), NULL, }; -static const struct attribute_group hisi_ddrc_pmu_format_group = { +static const struct attribute_group hisi_ddrc_pmu_v1_format_group = { .name = "format", - .attrs = hisi_ddrc_pmu_format_attr, + .attrs = hisi_ddrc_pmu_v1_format_attr, }; -static struct attribute *hisi_ddrc_pmu_events_attr[] = { +static struct attribute *hisi_ddrc_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(flux_wr, 0x00), HISI_PMU_EVENT_ATTR(flux_rd, 0x01), HISI_PMU_EVENT_ATTR(flux_wcmd, 0x02), @@ -237,9 +239,9 @@ static struct attribute *hisi_ddrc_pmu_events_attr[] = { NULL, }; -static const struct attribute_group hisi_ddrc_pmu_events_group = { +static const struct attribute_group hisi_ddrc_pmu_v1_events_group = { .name = "events", - .attrs = hisi_ddrc_pmu_events_attr, + .attrs = hisi_ddrc_pmu_v1_events_attr, }; static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); @@ -265,27 +267,27 @@ static struct attribute_group hisi_ddrc_pmu_identifier_group = { .attrs = hisi_ddrc_pmu_identifier_attrs, }; -static const struct attribute_group *hisi_ddrc_pmu_attr_groups[] = { - &hisi_ddrc_pmu_format_group, - &hisi_ddrc_pmu_events_group, +static const struct attribute_group *hisi_ddrc_pmu_v1_attr_groups[] = { + &hisi_ddrc_pmu_v1_format_group, + &hisi_ddrc_pmu_v1_events_group, &hisi_ddrc_pmu_cpumask_attr_group, &hisi_ddrc_pmu_identifier_group, NULL, }; -static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = { +static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = { .write_evtype = hisi_ddrc_pmu_write_evtype, - .get_event_idx = hisi_ddrc_pmu_get_event_idx, - .start_counters = hisi_ddrc_pmu_start_counters, - .stop_counters = hisi_ddrc_pmu_stop_counters, - .enable_counter = hisi_ddrc_pmu_enable_counter, - .disable_counter = hisi_ddrc_pmu_disable_counter, - .enable_counter_int = hisi_ddrc_pmu_enable_counter_int, - .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, - .write_counter = hisi_ddrc_pmu_write_counter, - .read_counter = hisi_ddrc_pmu_read_counter, - .get_int_status = hisi_ddrc_pmu_get_int_status, - .clear_int_status = hisi_ddrc_pmu_clear_int_status, + .get_event_idx = hisi_ddrc_pmu_v1_get_event_idx, + .start_counters = hisi_ddrc_pmu_v1_start_counters, + .stop_counters = hisi_ddrc_pmu_v1_stop_counters, + .enable_counter = hisi_ddrc_pmu_v1_enable_counter, + .disable_counter = hisi_ddrc_pmu_v1_disable_counter, + .enable_counter_int = hisi_ddrc_pmu_v1_enable_counter_int, + .disable_counter_int = hisi_ddrc_pmu_v1_disable_counter_int, + .write_counter = hisi_ddrc_pmu_v1_write_counter, + .read_counter = hisi_ddrc_pmu_v1_read_counter, + .get_int_status = hisi_ddrc_pmu_v1_get_int_status, + .clear_int_status = hisi_ddrc_pmu_v1_clear_int_status, }; static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, @@ -303,10 +305,10 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, ddrc_pmu->num_counters = DDRC_NR_COUNTERS; ddrc_pmu->counter_bits = 32; - ddrc_pmu->ops = &hisi_uncore_ddrc_ops; + ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops; ddrc_pmu->dev = &pdev->dev; ddrc_pmu->on_cpu = -1; - ddrc_pmu->check_event = 7; + ddrc_pmu->check_event = DDRC_V1_NR_EVENTS; return 0; } @@ -348,7 +350,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) .start = hisi_uncore_pmu_start, .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, - .attr_groups = hisi_ddrc_pmu_attr_groups, + .attr_groups = hisi_ddrc_pmu_v1_attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index aea952de7b3f..854817aa4404 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -33,10 +33,11 @@ #define HHA_CNT0_LOWER 0x1F00 /* HHA has 16-counters */ -#define HHA_NR_COUNTERS 0x10 +#define HHA_V1_NR_COUNTERS 0x10 #define HHA_PERF_CTRL_EN 0x1 #define HHA_EVTYPE_NONE 0xff +#define HHA_V1_NR_EVENT 0x65 /* * Select the counter register offset using the counter index @@ -208,17 +209,17 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, return 0; } -static struct attribute *hisi_hha_pmu_format_attr[] = { +static struct attribute *hisi_hha_pmu_v1_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), NULL, }; -static const struct attribute_group hisi_hha_pmu_format_group = { +static const struct attribute_group hisi_hha_pmu_v1_format_group = { .name = "format", - .attrs = hisi_hha_pmu_format_attr, + .attrs = hisi_hha_pmu_v1_format_attr, }; -static struct attribute *hisi_hha_pmu_events_attr[] = { +static struct attribute *hisi_hha_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(rx_ops_num, 0x00), HISI_PMU_EVENT_ATTR(rx_outer, 0x01), HISI_PMU_EVENT_ATTR(rx_sccl, 0x02), @@ -248,9 +249,9 @@ static struct attribute *hisi_hha_pmu_events_attr[] = { NULL, }; -static const struct attribute_group hisi_hha_pmu_events_group = { +static const struct attribute_group hisi_hha_pmu_v1_events_group = { .name = "events", - .attrs = hisi_hha_pmu_events_attr, + .attrs = hisi_hha_pmu_v1_events_attr, }; static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); @@ -276,9 +277,9 @@ static struct attribute_group hisi_hha_pmu_identifier_group = { .attrs = hisi_hha_pmu_identifier_attrs, }; -static const struct attribute_group *hisi_hha_pmu_attr_groups[] = { - &hisi_hha_pmu_format_group, - &hisi_hha_pmu_events_group, +static const struct attribute_group *hisi_hha_pmu_v1_attr_groups[] = { + &hisi_hha_pmu_v1_format_group, + &hisi_hha_pmu_v1_events_group, &hisi_hha_pmu_cpumask_attr_group, &hisi_hha_pmu_identifier_group, NULL, @@ -312,12 +313,12 @@ static int hisi_hha_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - hha_pmu->num_counters = HHA_NR_COUNTERS; + hha_pmu->num_counters = HHA_V1_NR_COUNTERS; hha_pmu->counter_bits = 48; hha_pmu->ops = &hisi_uncore_hha_ops; hha_pmu->dev = &pdev->dev; hha_pmu->on_cpu = -1; - hha_pmu->check_event = 0x65; + hha_pmu->check_event = HHA_V1_NR_EVENT; return 0; } @@ -359,7 +360,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) .start = hisi_uncore_pmu_start, .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, - .attr_groups = hisi_hha_pmu_attr_groups, + .attr_groups = hisi_hha_pmu_v1_attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 4c1d274e5da2..dd9409766689 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -37,6 +37,7 @@ #define L3C_PERF_CTRL_EN 0x10000 #define L3C_EVTYPE_NONE 0xff +#define L3C_V1_NR_EVENTS 0x59 /* * Select the counter register offset using the counter index @@ -211,17 +212,17 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, return 0; } -static struct attribute *hisi_l3c_pmu_format_attr[] = { +static struct attribute *hisi_l3c_pmu_v1_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), NULL, }; -static const struct attribute_group hisi_l3c_pmu_format_group = { +static const struct attribute_group hisi_l3c_pmu_v1_format_group = { .name = "format", - .attrs = hisi_l3c_pmu_format_attr, + .attrs = hisi_l3c_pmu_v1_format_attr, }; -static struct attribute *hisi_l3c_pmu_events_attr[] = { +static struct attribute *hisi_l3c_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00), HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01), HISI_PMU_EVENT_ATTR(rd_hit_cpipe, 0x02), @@ -238,9 +239,9 @@ static struct attribute *hisi_l3c_pmu_events_attr[] = { NULL, }; -static const struct attribute_group hisi_l3c_pmu_events_group = { +static const struct attribute_group hisi_l3c_pmu_v1_events_group = { .name = "events", - .attrs = hisi_l3c_pmu_events_attr, + .attrs = hisi_l3c_pmu_v1_events_attr, }; static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); @@ -266,9 +267,9 @@ static struct attribute_group hisi_l3c_pmu_identifier_group = { .attrs = hisi_l3c_pmu_identifier_attrs, }; -static const struct attribute_group *hisi_l3c_pmu_attr_groups[] = { - &hisi_l3c_pmu_format_group, - &hisi_l3c_pmu_events_group, +static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = { + &hisi_l3c_pmu_v1_format_group, + &hisi_l3c_pmu_v1_events_group, &hisi_l3c_pmu_cpumask_attr_group, &hisi_l3c_pmu_identifier_group, NULL, @@ -307,7 +308,7 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, l3c_pmu->ops = &hisi_uncore_l3c_ops; l3c_pmu->dev = &pdev->dev; l3c_pmu->on_cpu = -1; - l3c_pmu->check_event = 0x59; + l3c_pmu->check_event = L3C_V1_NR_EVENTS; return 0; } @@ -349,7 +350,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) .start = hisi_uncore_pmu_start, .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, - .attr_groups = hisi_l3c_pmu_attr_groups, + .attr_groups = hisi_l3c_pmu_v1_attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; -- Gitee From 5d8ebc0ed02e0dabcafc0f4fd90bf6a2e243af07 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Fri, 30 Jul 2021 15:44:05 +0800 Subject: [PATCH 32/39] drivers/perf: hisi: Add new functions for L3C PMU On HiSilicon Hip09 platform, some new functions are enhanced on L3C PMU: * tt_req: it is the abbreviation of tracetag request and allows user to count only read/write/atomic operations. tt_req is 3-bit and details are listed in the hisi-pmu document. $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_req=0x4/ sleep 5 * tt_core: it is the abbreviation of tracetag core and allows user to filter by core/thread within the cluster, it is a 8-bit bitmap that each bit represents the corresponding core/thread in this L3C. $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0xf/ sleep 5 * datasrc_cfg: it is the abbreviation of data source configuration and allows user to check where the data comes from, such as: from local DDR, cross-die DDR or cross-socket DDR. Its is 5-bit and represents different data source in the SoC. $# perf stat -a -e hisi_sccl3_l3c0/dat_access,datasrc_cfg=0xe/ sleep 5 * datasrc_skt: it is the abbreviation of data source from another socket and is used in the multi-chips, if user wants to check the cross-socket datat source, it shall be added in perf command. Only one bit is used to control this. $# perf stat -a -e hisi_sccl3_l3c0/dat_access,datasrc_cfg=0x10,datasrc_skt=1/ sleep 5 Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-5-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon Reviewed-by: Shaokun Zhang Signed-off-by: Zheng Zengkai Signed-off-by: huwentao Conflicts: drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 259 +++++++++++++++++-- drivers/perf/hisilicon/hisi_uncore_pmu.c | 8 +- drivers/perf/hisilicon/hisi_uncore_pmu.h | 11 + 3 files changed, 258 insertions(+), 20 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index dd9409766689..0077df302e73 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -23,12 +23,17 @@ #define L3C_INT_MASK 0x0800 #define L3C_INT_STATUS 0x0808 #define L3C_INT_CLEAR 0x080c +#define L3C_CORE_CTRL 0x1b04 +#define L3C_TRACETAG_CTRL 0x1b20 +#define L3C_DATSRC_TYPE 0x1b48 +#define L3C_DATSRC_CTRL 0x1bf0 #define L3C_EVENT_CTRL 0x1c00 #define L3C_VERSION 0x1cf0 #define L3C_EVENT_TYPE0 0x1d00 /* - * Each counter is 48-bits and [48:63] are reserved - * which are Read-As-Zero and Writes-Ignored. + * If the HW version only supports a 48-bit counter, then + * bits [63:48] are reserved, which are Read-As-Zero and + * Writes-Ignored. */ #define L3C_CNTR0_LOWER 0x1e00 @@ -36,8 +41,186 @@ #define L3C_NR_COUNTERS 0x8 #define L3C_PERF_CTRL_EN 0x10000 +#define L3C_TRACETAG_EN BIT(31) +#define L3C_TRACETAG_REQ_SHIFT 7 +#define L3C_TRACETAG_MARK_EN BIT(0) +#define L3C_TRACETAG_REQ_EN (L3C_TRACETAG_MARK_EN | BIT(2)) +#define L3C_TRACETAG_CORE_EN (L3C_TRACETAG_MARK_EN | BIT(3)) +#define L3C_CORE_EN BIT(20) +#define L3C_COER_NONE 0x0 +#define L3C_DATSRC_MASK 0xFF +#define L3C_DATSRC_SKT_EN BIT(23) +#define L3C_DATSRC_NONE 0x0 #define L3C_EVTYPE_NONE 0xff #define L3C_V1_NR_EVENTS 0x59 +#define L3C_V2_NR_EVENTS 0xFF + +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8); +HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); +HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); + +static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + u32 tt_req = hisi_get_tt_req(event); + + if (tt_req) { + u32 val; + + /* Set request-type for tracetag */ + val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val |= tt_req << L3C_TRACETAG_REQ_SHIFT; + val |= L3C_TRACETAG_REQ_EN; + writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + + /* Enable request-tracetag statistics */ + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val |= L3C_TRACETAG_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + } +} + +static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + u32 tt_req = hisi_get_tt_req(event); + + if (tt_req) { + u32 val; + + /* Clear request-type */ + val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT); + val &= ~L3C_TRACETAG_REQ_EN; + writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + + /* Disable request-tracetag statistics */ + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val &= ~L3C_TRACETAG_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + } +} + +static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u32 reg, reg_idx, shift, val; + int idx = hwc->idx; + + /* + * Select the appropriate datasource register(L3C_DATSRC_TYPE0/1). + * There are 2 datasource ctrl register for the 8 hardware counters. + * Datasrc is 8-bits and for the former 4 hardware counters, + * L3C_DATSRC_TYPE0 is chosen. For the latter 4 hardware counters, + * L3C_DATSRC_TYPE1 is chosen. + */ + reg = L3C_DATSRC_TYPE + (idx / 4) * 4; + reg_idx = idx % 4; + shift = 8 * reg_idx; + + val = readl(l3c_pmu->base + reg); + val &= ~(L3C_DATSRC_MASK << shift); + val |= ds_cfg << shift; + writel(val, l3c_pmu->base + reg); +} + +static void hisi_l3c_pmu_config_ds(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + u32 ds_cfg = hisi_get_datasrc_cfg(event); + u32 ds_skt = hisi_get_datasrc_skt(event); + + if (ds_cfg) + hisi_l3c_pmu_write_ds(event, ds_cfg); + + if (ds_skt) { + u32 val; + + val = readl(l3c_pmu->base + L3C_DATSRC_CTRL); + val |= L3C_DATSRC_SKT_EN; + writel(val, l3c_pmu->base + L3C_DATSRC_CTRL); + } +} + +static void hisi_l3c_pmu_clear_ds(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + u32 ds_cfg = hisi_get_datasrc_cfg(event); + u32 ds_skt = hisi_get_datasrc_skt(event); + + if (ds_cfg) + hisi_l3c_pmu_write_ds(event, L3C_DATSRC_NONE); + + if (ds_skt) { + u32 val; + + val = readl(l3c_pmu->base + L3C_DATSRC_CTRL); + val &= ~L3C_DATSRC_SKT_EN; + writel(val, l3c_pmu->base + L3C_DATSRC_CTRL); + } +} + +static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + u32 core = hisi_get_tt_core(event); + + if (core) { + u32 val; + + /* Config and enable core information */ + writel(core, l3c_pmu->base + L3C_CORE_CTRL); + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val |= L3C_CORE_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + + /* Enable core-tracetag statistics */ + val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val |= L3C_TRACETAG_CORE_EN; + writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + } +} + +static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + u32 core = hisi_get_tt_core(event); + + if (core) { + u32 val; + + /* Clear core information */ + writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL); + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val &= ~L3C_CORE_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + + /* Disable core-tracetag statistics */ + val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val &= ~L3C_TRACETAG_CORE_EN; + writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + } +} + +static void hisi_l3c_pmu_enable_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_l3c_pmu_config_req_tracetag(event); + hisi_l3c_pmu_config_core_tracetag(event); + hisi_l3c_pmu_config_ds(event); + } +} + +static void hisi_l3c_pmu_disable_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_l3c_pmu_clear_ds(event); + hisi_l3c_pmu_clear_core_tracetag(event); + hisi_l3c_pmu_clear_req_tracetag(event); + } +} /* * Select the counter register offset using the counter index @@ -50,14 +233,12 @@ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx) static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc) { - /* Read 64-bits and the upper 16 bits are RAZ */ return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); } static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc, u64 val) { - /* Write 64-bits and the upper 16 bits are WI */ writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); } @@ -166,24 +347,15 @@ static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { { "HISI0213", }, - {}, + { "HISI0214", }, + {} }; MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); static int hisi_l3c_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { - unsigned long long id; struct resource *res; - acpi_status status; - - status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), - "_UID", NULL, &id); - if (ACPI_FAILURE(status)) - return -EINVAL; - - l3c_pmu->index_id = id; - /* * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. @@ -222,6 +394,20 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = { .attrs = hisi_l3c_pmu_v1_format_attr, }; +static struct attribute *hisi_l3c_pmu_v2_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"), + HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), + HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"), + HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"), + NULL +}; + +static const struct attribute_group hisi_l3c_pmu_v2_format_group = { + .name = "format", + .attrs = hisi_l3c_pmu_v2_format_attr, +}; + static struct attribute *hisi_l3c_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00), HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01), @@ -244,6 +430,19 @@ static const struct attribute_group hisi_l3c_pmu_v1_events_group = { .attrs = hisi_l3c_pmu_v1_events_attr, }; +static struct attribute *hisi_l3c_pmu_v2_events_attr[] = { + HISI_PMU_EVENT_ATTR(l3c_hit, 0x48), + HISI_PMU_EVENT_ATTR(cycles, 0x7f), + HISI_PMU_EVENT_ATTR(l3c_ref, 0xb8), + HISI_PMU_EVENT_ATTR(dat_access, 0xb9), + NULL +}; + +static const struct attribute_group hisi_l3c_pmu_v2_events_group = { + .name = "events", + .attrs = hisi_l3c_pmu_v2_events_attr, +}; + static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = { @@ -275,6 +474,14 @@ static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = { NULL, }; +static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = { + &hisi_l3c_pmu_v2_format_group, + &hisi_l3c_pmu_v2_events_group, + &hisi_l3c_pmu_cpumask_attr_group, + &hisi_l3c_pmu_identifier_group, + NULL +}; + static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .write_evtype = hisi_l3c_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -288,6 +495,8 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .read_counter = hisi_l3c_pmu_read_counter, .get_int_status = hisi_l3c_pmu_get_int_status, .clear_int_status = hisi_l3c_pmu_clear_int_status, + .enable_filter = hisi_l3c_pmu_enable_filter, + .disable_filter = hisi_l3c_pmu_disable_filter, }; static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, @@ -303,12 +512,20 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; + if (l3c_pmu->identifier >= HISI_PMU_V2) { + l3c_pmu->counter_bits = 64; + l3c_pmu->check_event = L3C_V2_NR_EVENTS; + l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups; + } else { + l3c_pmu->counter_bits = 48; + l3c_pmu->check_event = L3C_V1_NR_EVENTS; + l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups; + } + l3c_pmu->num_counters = L3C_NR_COUNTERS; - l3c_pmu->counter_bits = 48; l3c_pmu->ops = &hisi_uncore_l3c_ops; l3c_pmu->dev = &pdev->dev; l3c_pmu->on_cpu = -1; - l3c_pmu->check_event = L3C_V1_NR_EVENTS; return 0; } @@ -336,8 +553,12 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) return ret; } + /* + * CCL_ID is used to identify the L3C in the same SCCL which was + * used _UID by mistake. + */ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", - l3c_pmu->sccl_id, l3c_pmu->index_id); + l3c_pmu->sccl_id, l3c_pmu->ccl_id); l3c_pmu->pmu = (struct pmu) { .name = name, .module = THIS_MODULE, @@ -350,7 +571,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) .start = hisi_uncore_pmu_start, .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, - .attr_groups = hisi_l3c_pmu_v1_attr_groups, + .attr_groups = l3c_pmu->pmu_events.attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 29e2f94a190d..c7a62a871183 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -21,7 +21,7 @@ #include "hisi_uncore_pmu.h" #define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff) -#define HISI_MAX_PERIOD(nr) (BIT_ULL(nr) - 1) +#define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0)) /* * PMU format attributes @@ -245,6 +245,9 @@ static void hisi_uncore_pmu_enable_event(struct perf_event *event) hisi_pmu->ops->write_evtype(hisi_pmu, hwc->idx, HISI_GET_EVENTID(event)); + if (hisi_pmu->ops->enable_filter) + hisi_pmu->ops->enable_filter(event); + hisi_pmu->ops->enable_counter_int(hisi_pmu, hwc); hisi_pmu->ops->enable_counter(hisi_pmu, hwc); } @@ -259,6 +262,9 @@ static void hisi_uncore_pmu_disable_event(struct perf_event *event) hisi_pmu->ops->disable_counter(hisi_pmu, hwc); hisi_pmu->ops->disable_counter_int(hisi_pmu, hwc); + + if (hisi_pmu->ops->disable_filter) + hisi_pmu->ops->disable_filter(event); } void hisi_uncore_pmu_set_event_period(struct perf_event *event) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index cc638f839fc8..596b7914a38f 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -11,6 +11,7 @@ #ifndef __HISI_UNCORE_PMU_H__ #define __HISI_UNCORE_PMU_H__ +#include #include #include #include @@ -21,6 +22,7 @@ #undef pr_fmt #define pr_fmt(fmt) "hisi_pmu: " fmt +#define HISI_PMU_V2 0x30 #define HISI_MAX_COUNTERS 0x10 #define to_hisi_pmu(p) (container_of(p, struct hisi_pmu, pmu)) @@ -34,6 +36,12 @@ #define HISI_PMU_EVENT_ATTR(_name, _config) \ HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config) +#define HISI_PMU_EVENT_ATTR_EXTRACTOR(name, config, hi, lo) \ + static inline u32 hisi_get_##name(struct perf_event *event) \ + { \ + return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \ + } + struct hisi_pmu; struct hisi_uncore_ops { @@ -49,11 +57,14 @@ struct hisi_uncore_ops { void (*stop_counters)(struct hisi_pmu *); u32 (*get_int_status)(struct hisi_pmu *hisi_pmu); void (*clear_int_status)(struct hisi_pmu *hisi_pmu, int idx); + void (*enable_filter)(struct perf_event *event); + void (*disable_filter)(struct perf_event *event); }; struct hisi_pmu_hwevents { struct perf_event *hw_events[HISI_MAX_COUNTERS]; DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS); + const struct attribute_group **attr_groups; }; /* Generic pmu struct for different pmu types */ -- Gitee From 5b9ea0fadb660d91f490764e490f560d648815a5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 30 Jul 2021 15:44:10 +0800 Subject: [PATCH 33/39] genirq: Export affinity setter for modules Perf modules abuse irq_set_affinity_hint() to set the affinity of system PMU interrupts just because irq_set_affinity() was not exported. The fact that irq_set_affinity_hint() actually sets the affinity is a non-documented side effect and the name is clearly saying it's a hint. To clean this up, export the real affinity setter. Signed-off-by: Thomas Gleixner Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20210518093117.968251441@linutronix.de Reviewed-by: Shaokun Zhang Signed-off-by: Zheng Zengkai Signed-off-by: huwentao --- include/linux/interrupt.h | 35 ++--------------------------------- kernel/irq/manage.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index cf34dbaf2c11..569f87d699cc 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -301,39 +301,8 @@ struct irq_affinity_desc { extern cpumask_var_t irq_default_affinity; -/* Internal implementation. Use the helpers below */ -extern int __irq_set_affinity(unsigned int irq, const struct cpumask *cpumask, - bool force); - -/** - * irq_set_affinity - Set the irq affinity of a given irq - * @irq: Interrupt to set affinity - * @cpumask: cpumask - * - * Fails if cpumask does not contain an online CPU - */ -static inline int -irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) -{ - return __irq_set_affinity(irq, cpumask, false); -} - -/** - * irq_force_affinity - Force the irq affinity of a given irq - * @irq: Interrupt to set affinity - * @cpumask: cpumask - * - * Same as irq_set_affinity, but without checking the mask against - * online cpus. - * - * Solely for low level cpu hotplug code, where we need to make per - * cpu interrupts affine before the cpu becomes online. - */ -static inline int -irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) -{ - return __irq_set_affinity(irq, cpumask, true); -} +extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask); +extern int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask); extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 918fe0593386..39cee1fea51e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -332,7 +332,8 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, return ret; } -int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) +static int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, + bool force) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -347,6 +348,36 @@ int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) return ret; } +/** + * irq_set_affinity - Set the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @cpumask: cpumask + * + * Fails if cpumask does not contain an online CPU + */ +int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) +{ + return __irq_set_affinity(irq, cpumask, false); +} +EXPORT_SYMBOL_GPL(irq_set_affinity); + +/** + * irq_force_affinity - Force the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @cpumask: cpumask + * + * Same as irq_set_affinity, but without checking the mask against + * online cpus. + * + * Solely for low level cpu hotplug code, where we need to make per + * cpu interrupts affine before the cpu becomes online. + */ +int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) +{ + return __irq_set_affinity(irq, cpumask, true); +} +EXPORT_SYMBOL_GPL(irq_force_affinity); + int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) { unsigned long flags; -- Gitee From e7c6d3bb3d14e0bd18ae2586c83a23b1f86fe1d2 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 23 May 2023 14:44:29 +0800 Subject: [PATCH 34/39] drivers/perf: hisi: add NULL check for name When allocations fails that can be NULL now. If the name provided is NULL, then the initialization process of the PMU type and dev will be skipped in function perf_pmu_register(). Consequently, the PMU will not be able to register into the kernel. Moreover, in the case of unregister the PMU, the function device_del() will need to handle NULL pointers, which potentially can cause issues. So move this allocation above the cpuhp_state_add_instance() and directly return if it does fail. Signed-off-by: Junhao He Signed-off-by: huwentao Conflicts: drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c drivers/perf/hisilicon/hisi_uncore_hha_pmu.c drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 16 ++++++++++------ drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 7 +++++-- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 11 +++++------ 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index cb980cf673ef..dbc8f13e985d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -329,12 +329,6 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, - &ddrc_pmu->node); - if (ret) { - dev_err(&pdev->dev, "Error %d registering hotplug;\n", ret); - return ret; - } name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, ddrc_pmu->index_id); @@ -354,6 +348,16 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, + &ddrc_pmu->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug;\n", ret); + return ret; + } + ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1); if (ret) { dev_err(ddrc_pmu->dev, "DDRC PMU register failed!\n"); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 854817aa4404..9ae049270fe5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -339,6 +339,11 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) if (ret) return ret; + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", + hha_pmu->sccl_id, hha_pmu->index_id); + if (!name) + return -ENOMEM; + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, &hha_pmu->node); if (ret) { @@ -346,8 +351,6 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) return ret; } - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", - hha_pmu->sccl_id, hha_pmu->index_id); hha_pmu->pmu = (struct pmu) { .name = name, .module = THIS_MODULE, diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 0077df302e73..66227265de70 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -546,6 +546,11 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) if (ret) return ret; + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", + l3c_pmu->sccl_id, l3c_pmu->ccl_id); + if (!name) + return -ENOMEM; + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, &l3c_pmu->node); if (ret) { @@ -553,12 +558,6 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) return ret; } - /* - * CCL_ID is used to identify the L3C in the same SCCL which was - * used _UID by mistake. - */ - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", - l3c_pmu->sccl_id, l3c_pmu->ccl_id); l3c_pmu->pmu = (struct pmu) { .name = name, .module = THIS_MODULE, -- Gitee From 16cc6ef57308253d74a61d8ce1210b6017ee1c4a Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Fri, 30 Jul 2021 15:44:07 +0800 Subject: [PATCH 35/39] drivers/perf: hisi: Update DDRC PMU for programmable counter DDRC PMU's events are useful for performance profiling, but the events are limited and counter is fixed. On HiSilicon Hip09 platform, PMU counters are the programmable and more events are supported. Let's add the DDRC PMU v2 driver. Bandwidth events are exposed directly in driver and some more events will listed in JSON file later. Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-7-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon Reviewed-by: Shaokun Zhang Signed-off-by: Zheng Zengkai Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 207 ++++++++++++++++-- drivers/perf/hisilicon/hisi_uncore_pmu.h | 2 + 2 files changed, 196 insertions(+), 13 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index dbc8f13e985d..2dd726c16461 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -18,7 +18,7 @@ #include "hisi_uncore_pmu.h" -/* DDRC register definition */ +/* DDRC register definition in v1 */ #define DDRC_PERF_CTRL 0x010 #define DDRC_FLUX_WR 0x380 #define DDRC_FLUX_RD 0x384 @@ -34,13 +34,24 @@ #define DDRC_INT_CLEAR 0x6d0 #define DDRC_VERSION 0x710 +/* DDRC register definition in v2 */ +#define DDRC_V2_INT_MASK 0x528 +#define DDRC_V2_INT_STATUS 0x52c +#define DDRC_V2_INT_CLEAR 0x530 +#define DDRC_V2_EVENT_CNT 0xe00 +#define DDRC_V2_EVENT_CTRL 0xe70 +#define DDRC_V2_EVENT_TYPE 0xe74 +#define DDRC_V2_PERF_CTRL 0xeA0 + /* DDRC has 8-counters */ #define DDRC_NR_COUNTERS 0x8 #define DDRC_V1_PERF_CTRL_EN 0x2 +#define DDRC_V2_PERF_CTRL_EN 0x1 #define DDRC_V1_NR_EVENTS 0x7 +#define DDRC_V2_NR_EVENTS 0x90 /* - * For DDRC PMU, there are eight-events and every event has been mapped + * For PMU v1, there are eight-events and every event has been mapped * to fixed-purpose counters which register offset is not consistent. * Therefore there is no write event type and we assume that event * code (0 to 7) is equal to counter index in PMU driver. @@ -62,6 +73,11 @@ static u32 hisi_ddrc_pmu_v1_get_counter_offset(int cntr_idx) return ddrc_reg_off[cntr_idx]; } +static u32 hisi_ddrc_pmu_v2_get_counter_offset(int cntr_idx) +{ + return DDRC_V2_EVENT_CNT + cntr_idx * 8; +} + static u64 hisi_ddrc_pmu_v1_read_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { @@ -76,13 +92,34 @@ static void hisi_ddrc_pmu_v1_write_counter(struct hisi_pmu *ddrc_pmu, ddrc_pmu->base + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); } +static u64 hisi_ddrc_pmu_v2_read_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) +{ + return readq(ddrc_pmu->base + + hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx)); +} + +static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, + ddrc_pmu->base + hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx)); +} + /* - * For DDRC PMU, event has been mapped to fixed-purpose counter by hardware, - * so there is no need to write event type. + * For DDRC PMU v1, event has been mapped to fixed-purpose counter by hardware, + * so there is no need to write event type, while it is programmable counter in + * PMU v2. */ static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, u32 type) { + u32 offset; + + if (hha_pmu->identifier >= HISI_PMU_V2) { + offset = DDRC_V2_EVENT_TYPE + 4 * idx; + writel(type, hha_pmu->base + offset); + } } static void hisi_ddrc_pmu_v1_start_counters(struct hisi_pmu *ddrc_pmu) @@ -143,6 +180,49 @@ static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event) return idx; } +static int hisi_ddrc_pmu_v2_get_event_idx(struct perf_event *event) +{ + return hisi_uncore_pmu_get_event_idx(event); +} + +static void hisi_ddrc_pmu_v2_start_counters(struct hisi_pmu *ddrc_pmu) +{ + u32 val; + + val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL); + val |= DDRC_V2_PERF_CTRL_EN; + writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL); +} + +static void hisi_ddrc_pmu_v2_stop_counters(struct hisi_pmu *ddrc_pmu) +{ + u32 val; + + val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL); + val &= ~DDRC_V2_PERF_CTRL_EN; + writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL); +} + +static void hisi_ddrc_pmu_v2_enable_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL); + val |= 1 << hwc->idx; + writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); +} + +static void hisi_ddrc_pmu_v2_disable_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL); + val &= ~(1 << hwc->idx); + writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); +} + static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { @@ -150,7 +230,7 @@ static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu, /* Write 0 to enable interrupt */ val = readl(ddrc_pmu->base + DDRC_INT_MASK); - val &= ~(1 << GET_DDRC_EVENTID(hwc)); + val &= ~(1 << hwc->idx); writel(val, ddrc_pmu->base + DDRC_INT_MASK); } @@ -161,10 +241,30 @@ static void hisi_ddrc_pmu_v1_disable_counter_int(struct hisi_pmu *ddrc_pmu, /* Write 1 to mask interrupt */ val = readl(ddrc_pmu->base + DDRC_INT_MASK); - val |= (1 << GET_DDRC_EVENTID(hwc)); + val |= 1 << hwc->idx; writel(val, ddrc_pmu->base + DDRC_INT_MASK); } +static void hisi_ddrc_pmu_v2_enable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK); + val &= ~(1 << hwc->idx); + writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK); +} + +static void hisi_ddrc_pmu_v2_disable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK); + val |= 1 << hwc->idx; + writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK); +} + static u32 hisi_ddrc_pmu_v1_get_int_status(struct hisi_pmu *ddrc_pmu) { return readl(ddrc_pmu->base + DDRC_INT_STATUS); @@ -176,9 +276,21 @@ static void hisi_ddrc_pmu_v1_clear_int_status(struct hisi_pmu *ddrc_pmu, writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR); } +static u32 hisi_ddrc_pmu_v2_get_int_status(struct hisi_pmu *ddrc_pmu) +{ + return readl(ddrc_pmu->base + DDRC_V2_INT_STATUS); +} + +static void hisi_ddrc_pmu_v2_clear_int_status(struct hisi_pmu *ddrc_pmu, + int idx) +{ + writel(1 << idx, ddrc_pmu->base + DDRC_V2_INT_CLEAR); +} + static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { { "HISI0233", }, - {}, + { "HISI0234", }, + {} }; MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); @@ -213,6 +325,13 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, } ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION); + if (ddrc_pmu->identifier >= HISI_PMU_V2) { + if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", + &ddrc_pmu->sub_id)) { + dev_err(&pdev->dev, "Can not read sub-id!\n"); + return -EINVAL; + } + } return 0; } @@ -227,6 +346,16 @@ static const struct attribute_group hisi_ddrc_pmu_v1_format_group = { .attrs = hisi_ddrc_pmu_v1_format_attr, }; +static struct attribute *hisi_ddrc_pmu_v2_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + NULL +}; + +static const struct attribute_group hisi_ddrc_pmu_v2_format_group = { + .name = "format", + .attrs = hisi_ddrc_pmu_v2_format_attr, +}; + static struct attribute *hisi_ddrc_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(flux_wr, 0x00), HISI_PMU_EVENT_ATTR(flux_rd, 0x01), @@ -244,6 +373,18 @@ static const struct attribute_group hisi_ddrc_pmu_v1_events_group = { .attrs = hisi_ddrc_pmu_v1_events_attr, }; +static struct attribute *hisi_ddrc_pmu_v2_events_attr[] = { + HISI_PMU_EVENT_ATTR(cycles, 0x00), + HISI_PMU_EVENT_ATTR(flux_wr, 0x83), + HISI_PMU_EVENT_ATTR(flux_rd, 0x84), + NULL +}; + +static const struct attribute_group hisi_ddrc_pmu_v2_events_group = { + .name = "events", + .attrs = hisi_ddrc_pmu_v2_events_attr, +}; + static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); static struct attribute *hisi_ddrc_pmu_cpumask_attrs[] = { @@ -275,6 +416,14 @@ static const struct attribute_group *hisi_ddrc_pmu_v1_attr_groups[] = { NULL, }; +static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = { + &hisi_ddrc_pmu_v2_format_group, + &hisi_ddrc_pmu_v2_events_group, + &hisi_ddrc_pmu_cpumask_attr_group, + &hisi_ddrc_pmu_identifier_group, + NULL +}; + static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = { .write_evtype = hisi_ddrc_pmu_write_evtype, .get_event_idx = hisi_ddrc_pmu_v1_get_event_idx, @@ -290,6 +439,21 @@ static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = { .clear_int_status = hisi_ddrc_pmu_v1_clear_int_status, }; +static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = { + .write_evtype = hisi_ddrc_pmu_write_evtype, + .get_event_idx = hisi_ddrc_pmu_v2_get_event_idx, + .start_counters = hisi_ddrc_pmu_v2_start_counters, + .stop_counters = hisi_ddrc_pmu_v2_stop_counters, + .enable_counter = hisi_ddrc_pmu_v2_enable_counter, + .disable_counter = hisi_ddrc_pmu_v2_disable_counter, + .enable_counter_int = hisi_ddrc_pmu_v2_enable_counter_int, + .disable_counter_int = hisi_ddrc_pmu_v2_disable_counter_int, + .write_counter = hisi_ddrc_pmu_v2_write_counter, + .read_counter = hisi_ddrc_pmu_v2_read_counter, + .get_int_status = hisi_ddrc_pmu_v2_get_int_status, + .clear_int_status = hisi_ddrc_pmu_v2_clear_int_status, +}; + static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, struct hisi_pmu *ddrc_pmu) { @@ -303,12 +467,21 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; + if (ddrc_pmu->identifier >= HISI_PMU_V2) { + ddrc_pmu->counter_bits = 48; + ddrc_pmu->check_event = DDRC_V2_NR_EVENTS; + ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v2_attr_groups; + ddrc_pmu->ops = &hisi_uncore_ddrc_v2_ops; + } else { + ddrc_pmu->counter_bits = 32; + ddrc_pmu->check_event = DDRC_V1_NR_EVENTS; + ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v1_attr_groups; + ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops; + } + ddrc_pmu->num_counters = DDRC_NR_COUNTERS; - ddrc_pmu->counter_bits = 32; - ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops; ddrc_pmu->dev = &pdev->dev; ddrc_pmu->on_cpu = -1; - ddrc_pmu->check_event = DDRC_V1_NR_EVENTS; return 0; } @@ -330,8 +503,16 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_ddrc%u", - ddrc_pmu->sccl_id, ddrc_pmu->index_id); + if (ddrc_pmu->identifier >= HISI_PMU_V2) + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "hisi_sccl%u_ddrc%u_%u", + ddrc_pmu->sccl_id, ddrc_pmu->index_id, + ddrc_pmu->sub_id); + else + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, + ddrc_pmu->index_id); + ddrc_pmu->pmu = (struct pmu) { .name = name, .module = THIS_MODULE, @@ -344,7 +525,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) .start = hisi_uncore_pmu_start, .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, - .attr_groups = hisi_ddrc_pmu_v1_attr_groups, + .attr_groups = ddrc_pmu->pmu_events.attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 596b7914a38f..78c00d2d0ee6 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -84,6 +84,8 @@ struct hisi_pmu { void __iomem *base; /* the ID of the PMU modules */ u32 index_id; + /* For DDRC PMU v2: each DDRC has more than one DMC */ + u32 sub_id; int num_counters; int counter_bits; /* check event code range */ -- Gitee From 9671ebc660fb08d48436117c43e8eac9a12befd1 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 14 Aug 2023 20:40:12 +0800 Subject: [PATCH 36/39] perf/smmuv3: Enable HiSilicon Erratum 162001900 quirk for HIP08/09 Some HiSilicon SMMU PMCG suffers the erratum 162001900 that the PMU disable control sometimes fail to disable the counters. This will lead to error or inaccurate data since before we enable the counters the counter's still counting for the event used in last perf session. This patch tries to fix this by hardening the global disable process. Before disable the PMU, writing an invalid event type (0xffff) to focibly stop the counters. Correspondingly restore each events on pmu::pmu_enable(). Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20230814124012.58013-1-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: CaiJian Signed-off-by: huwentao Conflicts: Documentation/arm64/silicon-errata.rst --- Documentation/arm64/silicon-errata.rst | 3 ++ drivers/acpi/arm64/iort.c | 5 ++- drivers/perf/arm_smmuv3_pmu.c | 46 +++++++++++++++++++++++++- include/linux/acpi_iort.h | 1 + 4 files changed, 53 insertions(+), 2 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 13f595e6fe18..ccd187c55d1f 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -133,6 +133,9 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| Hisilicon | Hip08 SMMU PMCG | #162001900 | N/A | +| | Hip09 SMMU PMCG | | | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index bc95a5eebd13..8e60476c5c42 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1385,7 +1385,10 @@ static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res, static struct acpi_platform_list pmcg_plat_info[] __initdata = { /* HiSilicon Hip08 Platform */ {"HISI ", "HIP08 ", 0, ACPI_SIG_IORT, greater_than_or_equal, - "Erratum #162001800", IORT_SMMU_V3_PMCG_HISI_HIP08}, + "Erratum #162001800, Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP08}, + /* HiSilicon Hip09 Platform */ + {"HISI ", "HIP09 ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, { } }; diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 9cdd89b29334..6c570da7428d 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -95,6 +95,7 @@ #define SMMU_PMCG_PA_SHIFT 12 #define SMMU_PMCG_EVCNTR_RDONLY BIT(0) +#define SMMU_PMCG_HARDEN_DISABLE BIT(1) static int cpuhp_state_num; @@ -138,6 +139,20 @@ static inline void smmu_pmu_enable(struct pmu *pmu) writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR); } +static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu, + struct perf_event *event, int idx); + +static inline void smmu_pmu_enable_quirk_hip08_09(struct pmu *pmu) +{ + struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); + unsigned int idx; + + for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters) + smmu_pmu_apply_event_filter(smmu_pmu, smmu_pmu->events[idx], idx); + + smmu_pmu_enable(pmu); +} + static inline void smmu_pmu_disable(struct pmu *pmu) { struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); @@ -146,6 +161,22 @@ static inline void smmu_pmu_disable(struct pmu *pmu) writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL); } +static inline void smmu_pmu_disable_quirk_hip08_09(struct pmu *pmu) +{ + struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); + unsigned int idx; + + /* + * The global disable of PMU sometimes fail to stop the counting. + * Harden this by writing an invalid event type to each used counter + * to forcibly stop counting. + */ + for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters) + writel(0xffff, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx)); + + smmu_pmu_disable(pmu); +} + static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu, u32 idx, u64 value) { @@ -717,7 +748,10 @@ static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu) switch (model) { case IORT_SMMU_V3_PMCG_HISI_HIP08: /* HiSilicon Erratum 162001800 */ - smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY; + smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY | SMMU_PMCG_HARDEN_DISABLE; + break; + case IORT_SMMU_V3_PMCG_HISI_HIP09: + smmu_pmu->options |= SMMU_PMCG_HARDEN_DISABLE; break; } @@ -806,6 +840,16 @@ static int smmu_pmu_probe(struct platform_device *pdev) smmu_pmu_get_acpi_options(smmu_pmu); + /* + * For platforms suffer this quirk, the PMU disable sometimes fails to + * stop the counters. This will leads to inaccurate or error counting. + * Forcibly disable the counters with these quirk handler. + */ + if (smmu_pmu->options & SMMU_PMCG_HARDEN_DISABLE) { + smmu_pmu->pmu.pmu_enable = smmu_pmu_enable_quirk_hip08_09; + smmu_pmu->pmu.pmu_disable = smmu_pmu_disable_quirk_hip08_09; + } + /* Pick one CPU to be the preferred one to use */ smmu_pmu->on_cpu = raw_smp_processor_id(); WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 8e7e2ec37f1b..64f700254ca0 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -21,6 +21,7 @@ */ #define IORT_SMMU_V3_PMCG_GENERIC 0x00000000 /* Generic SMMUv3 PMCG */ #define IORT_SMMU_V3_PMCG_HISI_HIP08 0x00000001 /* HiSilicon HIP08 PMCG */ +#define IORT_SMMU_V3_PMCG_HISI_HIP09 0x00000002 /* HiSilicon HIP09 PMCG */ int iort_register_domain_token(int trans_id, phys_addr_t base, struct fwnode_handle *fw_node); -- Gitee From 89d5c54a6d5280f7e7bb4145364226a89c225732 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 14 Aug 2023 21:16:42 +0800 Subject: [PATCH 37/39] perf/smmuv3: Add MODULE_ALIAS for module auto loading On my ACPI based arm64 server, if the SMMUv3 PMU is configured as module it won't be loaded automatically after booting even if the device has already been scanned and added. It's because the module lacks a platform alias, the uevent mechanism and userspace tools like udevd make use of this to find the target driver module of the device. This patch adds the missing platform alias of the module, then module will be loaded automatically if device exists. Before this patch: [root@localhost tmp]# modinfo arm_smmuv3_pmu | grep alias alias: of:N*T*Carm,smmu-v3-pmcgC* alias: of:N*T*Carm,smmu-v3-pmcg After this patch: [root@localhost tmp]# modinfo arm_smmuv3_pmu | grep alias alias: platform:arm-smmu-v3-pmcg alias: of:N*T*Carm,smmu-v3-pmcgC* alias: of:N*T*Carm,smmu-v3-pmcg Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20230814131642.65263-1-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: CaiJian Signed-off-by: huwentao --- drivers/perf/arm_smmuv3_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 6c570da7428d..bba35145d143 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -933,6 +933,7 @@ static void __exit arm_smmu_pmu_exit(void) module_exit(arm_smmu_pmu_exit); +MODULE_ALIAS("platform:arm-smmu-v3-pmcg"); MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension"); MODULE_AUTHOR("Neil Leeder "); MODULE_AUTHOR("Shameer Kolothum "); -- Gitee From 298cd58b1cea1f2db2896a6aef110d197fdacfb8 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 31 Jul 2024 17:26:58 +0800 Subject: [PATCH 38/39] ACPI/IORT: Add PMCG platform information for HiSilicon HIP10/11 HiSilicon HIP10/11 platforms using the same SMMU PMCG with HIP09 and thus suffers the same erratum. List them in the PMCG platform information list without introducing a new SMMU PMCG Model. Update the silicon-errata.rst as well. Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20240731092658.11012-1-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: zhangshuowen96 Signed-off-by: huwentao --- Documentation/arm64/silicon-errata.rst | 4 ++-- drivers/acpi/arm64/iort.c | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index ccd187c55d1f..9133f4de8c29 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -133,8 +133,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A | +----------------+-----------------+-----------------+-----------------------------+ -| Hisilicon | Hip08 SMMU PMCG | #162001900 | N/A | -| | Hip09 SMMU PMCG | | | +| Hisilicon | Hip{08,09,10,10C| #162001900 | N/A | +| | ,11} SMMU PMCG | | | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 8e60476c5c42..24c50a04217b 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1389,6 +1389,13 @@ static struct acpi_platform_list pmcg_plat_info[] __initdata = { /* HiSilicon Hip09 Platform */ {"HISI ", "HIP09 ", 0, ACPI_SIG_IORT, greater_than_or_equal, "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, + /* HiSilicon Hip10/11 Platform uses the same SMMU IP with Hip09 */ + {"HISI ", "HIP10 ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, + {"HISI ", "HIP10C ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, + {"HISI ", "HIP11 ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, { } }; -- Gitee From a94689d5a060ae8b54419d371757d2f3c2aef7e3 Mon Sep 17 00:00:00 2001 From: Qinxin Xia Date: Thu, 5 Dec 2024 09:33:31 +0800 Subject: [PATCH 39/39] ACPI/IORT: Add PMCG platform information for HiSilicon HIP09A HiSilicon HIP09A platforms using the same SMMU PMCG with HIP09 and thus suffers the same erratum. List them in the PMCG platform information list without introducing a new SMMU PMCG Model. Update the silicon-errata.rst as well. Reviewed-by: Yicong Yang Acked-by: Hanjun Guo Signed-off-by: Qinxin Xia Link: https://lore.kernel.org/r/20241205013331.1484017-1-xiaqinxin@huawei.com Signed-off-by: Catalin Marinas Signed-off-by: Qizhi Zhang Signed-off-by: huwentao --- Documentation/arm64/silicon-errata.rst | 5 +++-- drivers/acpi/arm64/iort.c | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 9133f4de8c29..8a9a05388931 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -133,8 +133,9 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A | +----------------+-----------------+-----------------+-----------------------------+ -| Hisilicon | Hip{08,09,10,10C| #162001900 | N/A | -| | ,11} SMMU PMCG | | | +| Hisilicon | Hip{08,09,09A,10| #162001900 | N/A | +| | ,10C,11} | | | +| | SMMU PMCG | | | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 24c50a04217b..425b2821aee0 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1389,6 +1389,8 @@ static struct acpi_platform_list pmcg_plat_info[] __initdata = { /* HiSilicon Hip09 Platform */ {"HISI ", "HIP09 ", 0, ACPI_SIG_IORT, greater_than_or_equal, "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, + {"HISI ", "HIP09A ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, /* HiSilicon Hip10/11 Platform uses the same SMMU IP with Hip09 */ {"HISI ", "HIP10 ", 0, ACPI_SIG_IORT, greater_than_or_equal, "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, -- Gitee