From a61bf6dd77b6030e05a0caf5cd18ec98f5499d9a Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 16 Nov 2020 19:08:47 +1300 Subject: [PATCH 1/6] dma-mapping: add benchmark support for streaming DMA APIs mainline inclusion commit id 65789daa8087e125927230ccb7e1eab13999b0cf from mainline-v5.11-rc1 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I60AX5 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=65789daa8087e125927230ccb7e1eab13999b0cf ---------------------------------------------------------------------- Nowadays, there are increasing requirements to benchmark the performance of dma_map and dma_unmap particually while the device is attached to an IOMMU. This patch enables the support. Users can run specified number of threads to do dma_map_page and dma_unmap_page on a specific NUMA node with the specified duration. Then dma_map_benchmark will calculate the average latency for map and unmap. A difficulity for this benchmark is that dma_map/unmap APIs must run on a particular device. Each device might have different backend of IOMMU or non-IOMMU. So we use the driver_override to bind dma_map_benchmark to a particual device by: For platform devices: echo dma_map_benchmark > /sys/bus/platform/devices/xxx/driver_override echo xxx > /sys/bus/platform/drivers/xxx/unbind echo xxx > /sys/bus/platform/drivers/dma_map_benchmark/bind For PCI devices: echo dma_map_benchmark > /sys/bus/pci/devices/0000:00:01.0/driver_override echo 0000:00:01.0 > /sys/bus/pci/drivers/xxx/unbind echo 0000:00:01.0 > /sys/bus/pci/drivers/dma_map_benchmark/bind Cc: Will Deacon Cc: Shuah Khan Cc: Christoph Hellwig Cc: Marek Szyprowski Cc: Robin Murphy Signed-off-by: Barry Song [hch: folded in two fixes from Colin Ian King ] Signed-off-by: Christoph Hellwig Signed-off-by: Jie Liu --- kernel/dma/Kconfig | 9 + kernel/dma/Makefile | 1 + kernel/dma/map_benchmark.c | 361 +++++++++++++++++++++++++++++++++++++ 3 files changed, 371 insertions(+) create mode 100644 kernel/dma/map_benchmark.c diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index c99de4a21458..07f30651b83d 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -225,3 +225,12 @@ config DMA_API_DEBUG_SG is technically out-of-spec. If unsure, say N. + +config DMA_MAP_BENCHMARK + bool "Enable benchmarking of streaming DMA mapping" + depends on DEBUG_FS + help + Provides /sys/kernel/debug/dma_map_benchmark that helps with testing + performance of dma_(un)map_page. + + See tools/testing/selftests/dma/dma_map_benchmark.c diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index dc755ab68aab..7aa6b26b1348 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o obj-$(CONFIG_DMA_REMAP) += remap.o +obj-$(CONFIG_DMA_MAP_BENCHMARK) += map_benchmark.o diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c new file mode 100644 index 000000000000..b1496e744c68 --- /dev/null +++ b/kernel/dma/map_benchmark.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Hisilicon Limited. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) +#define DMA_MAP_MAX_THREADS 1024 +#define DMA_MAP_MAX_SECONDS 300 + +#define DMA_MAP_BIDIRECTIONAL 0 +#define DMA_MAP_TO_DEVICE 1 +#define DMA_MAP_FROM_DEVICE 2 + +struct map_benchmark { + __u64 avg_map_100ns; /* average map latency in 100ns */ + __u64 map_stddev; /* standard deviation of map latency */ + __u64 avg_unmap_100ns; /* as above */ + __u64 unmap_stddev; + __u32 threads; /* how many threads will do map/unmap in parallel */ + __u32 seconds; /* how long the test will last */ + __s32 node; /* which numa node this benchmark will run on */ + __u32 dma_bits; /* DMA addressing capability */ + __u32 dma_dir; /* DMA data direction */ + __u64 expansion[10]; /* For future use */ +}; + +struct map_benchmark_data { + struct map_benchmark bparam; + struct device *dev; + struct dentry *debugfs; + enum dma_data_direction dir; + atomic64_t sum_map_100ns; + atomic64_t sum_unmap_100ns; + atomic64_t sum_sq_map; + atomic64_t sum_sq_unmap; + atomic64_t loops; +}; + +static int map_benchmark_thread(void *data) +{ + void *buf; + dma_addr_t dma_addr; + struct map_benchmark_data *map = data; + int ret = 0; + + buf = (void *)__get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + + while (!kthread_should_stop()) { + u64 map_100ns, unmap_100ns, map_sq, unmap_sq; + ktime_t map_stime, map_etime, unmap_stime, unmap_etime; + ktime_t map_delta, unmap_delta; + + /* + * for a non-coherent device, if we don't stain them in the + * cache, this will give an underestimate of the real-world + * overhead of BIDIRECTIONAL or TO_DEVICE mappings; + * 66 means evertything goes well! 66 is lucky. + */ + if (map->dir != DMA_FROM_DEVICE) + memset(buf, 0x66, PAGE_SIZE); + + map_stime = ktime_get(); + dma_addr = dma_map_single(map->dev, buf, PAGE_SIZE, map->dir); + if (unlikely(dma_mapping_error(map->dev, dma_addr))) { + pr_err("dma_map_single failed on %s\n", + dev_name(map->dev)); + ret = -ENOMEM; + goto out; + } + map_etime = ktime_get(); + map_delta = ktime_sub(map_etime, map_stime); + + unmap_stime = ktime_get(); + dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir); + unmap_etime = ktime_get(); + unmap_delta = ktime_sub(unmap_etime, unmap_stime); + + /* calculate sum and sum of squares */ + + map_100ns = div64_ul(map_delta, 100); + unmap_100ns = div64_ul(unmap_delta, 100); + map_sq = map_100ns * map_100ns; + unmap_sq = unmap_100ns * unmap_100ns; + + atomic64_add(map_100ns, &map->sum_map_100ns); + atomic64_add(unmap_100ns, &map->sum_unmap_100ns); + atomic64_add(map_sq, &map->sum_sq_map); + atomic64_add(unmap_sq, &map->sum_sq_unmap); + atomic64_inc(&map->loops); + } + +out: + free_page((unsigned long)buf); + return ret; +} + +static int do_map_benchmark(struct map_benchmark_data *map) +{ + struct task_struct **tsk; + int threads = map->bparam.threads; + int node = map->bparam.node; + const cpumask_t *cpu_mask = cpumask_of_node(node); + u64 loops; + int ret = 0; + int i; + + tsk = kmalloc_array(threads, sizeof(*tsk), GFP_KERNEL); + if (!tsk) + return -ENOMEM; + + get_device(map->dev); + + for (i = 0; i < threads; i++) { + tsk[i] = kthread_create_on_node(map_benchmark_thread, map, + map->bparam.node, "dma-map-benchmark/%d", i); + if (IS_ERR(tsk[i])) { + pr_err("create dma_map thread failed\n"); + ret = PTR_ERR(tsk[i]); + goto out; + } + + if (node != NUMA_NO_NODE) + kthread_bind_mask(tsk[i], cpu_mask); + } + + /* clear the old value in the previous benchmark */ + atomic64_set(&map->sum_map_100ns, 0); + atomic64_set(&map->sum_unmap_100ns, 0); + atomic64_set(&map->sum_sq_map, 0); + atomic64_set(&map->sum_sq_unmap, 0); + atomic64_set(&map->loops, 0); + + for (i = 0; i < threads; i++) + wake_up_process(tsk[i]); + + msleep_interruptible(map->bparam.seconds * 1000); + + /* wait for the completion of benchmark threads */ + for (i = 0; i < threads; i++) { + ret = kthread_stop(tsk[i]); + if (ret) + goto out; + } + + loops = atomic64_read(&map->loops); + if (likely(loops > 0)) { + u64 map_variance, unmap_variance; + u64 sum_map = atomic64_read(&map->sum_map_100ns); + u64 sum_unmap = atomic64_read(&map->sum_unmap_100ns); + u64 sum_sq_map = atomic64_read(&map->sum_sq_map); + u64 sum_sq_unmap = atomic64_read(&map->sum_sq_unmap); + + /* average latency */ + map->bparam.avg_map_100ns = div64_u64(sum_map, loops); + map->bparam.avg_unmap_100ns = div64_u64(sum_unmap, loops); + + /* standard deviation of latency */ + map_variance = div64_u64(sum_sq_map, loops) - + map->bparam.avg_map_100ns * + map->bparam.avg_map_100ns; + unmap_variance = div64_u64(sum_sq_unmap, loops) - + map->bparam.avg_unmap_100ns * + map->bparam.avg_unmap_100ns; + map->bparam.map_stddev = int_sqrt64(map_variance); + map->bparam.unmap_stddev = int_sqrt64(unmap_variance); + } + +out: + put_device(map->dev); + kfree(tsk); + return ret; +} + +static long map_benchmark_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct map_benchmark_data *map = file->private_data; + void __user *argp = (void __user *)arg; + u64 old_dma_mask; + + int ret; + + if (copy_from_user(&map->bparam, argp, sizeof(map->bparam))) + return -EFAULT; + + switch (cmd) { + case DMA_MAP_BENCHMARK: + if (map->bparam.threads == 0 || + map->bparam.threads > DMA_MAP_MAX_THREADS) { + pr_err("invalid thread number\n"); + return -EINVAL; + } + + if (map->bparam.seconds == 0 || + map->bparam.seconds > DMA_MAP_MAX_SECONDS) { + pr_err("invalid duration seconds\n"); + return -EINVAL; + } + + if (map->bparam.node != NUMA_NO_NODE && + !node_possible(map->bparam.node)) { + pr_err("invalid numa node\n"); + return -EINVAL; + } + + switch (map->bparam.dma_dir) { + case DMA_MAP_BIDIRECTIONAL: + map->dir = DMA_BIDIRECTIONAL; + break; + case DMA_MAP_FROM_DEVICE: + map->dir = DMA_FROM_DEVICE; + break; + case DMA_MAP_TO_DEVICE: + map->dir = DMA_TO_DEVICE; + break; + default: + pr_err("invalid DMA direction\n"); + return -EINVAL; + } + + old_dma_mask = dma_get_mask(map->dev); + + ret = dma_set_mask(map->dev, + DMA_BIT_MASK(map->bparam.dma_bits)); + if (ret) { + pr_err("failed to set dma_mask on device %s\n", + dev_name(map->dev)); + return -EINVAL; + } + + ret = do_map_benchmark(map); + + /* + * restore the original dma_mask as many devices' dma_mask are + * set by architectures, acpi, busses. When we bind them back + * to their original drivers, those drivers shouldn't see + * dma_mask changed by benchmark + */ + dma_set_mask(map->dev, old_dma_mask); + break; + default: + return -EINVAL; + } + + if (copy_to_user(argp, &map->bparam, sizeof(map->bparam))) + return -EFAULT; + + return ret; +} + +static const struct file_operations map_benchmark_fops = { + .open = simple_open, + .unlocked_ioctl = map_benchmark_ioctl, +}; + +static void map_benchmark_remove_debugfs(void *data) +{ + struct map_benchmark_data *map = (struct map_benchmark_data *)data; + + debugfs_remove(map->debugfs); +} + +static int __map_benchmark_probe(struct device *dev) +{ + struct dentry *entry; + struct map_benchmark_data *map; + int ret; + + map = devm_kzalloc(dev, sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + map->dev = dev; + + ret = devm_add_action(dev, map_benchmark_remove_debugfs, map); + if (ret) { + pr_err("Can't add debugfs remove action\n"); + return ret; + } + + /* + * we only permit a device bound with this driver, 2nd probe + * will fail + */ + entry = debugfs_create_file("dma_map_benchmark", 0600, NULL, map, + &map_benchmark_fops); + if (IS_ERR(entry)) + return PTR_ERR(entry); + map->debugfs = entry; + + return 0; +} + +static int map_benchmark_platform_probe(struct platform_device *pdev) +{ + return __map_benchmark_probe(&pdev->dev); +} + +static struct platform_driver map_benchmark_platform_driver = { + .driver = { + .name = "dma_map_benchmark", + }, + .probe = map_benchmark_platform_probe, +}; + +static int +map_benchmark_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + return __map_benchmark_probe(&pdev->dev); +} + +static struct pci_driver map_benchmark_pci_driver = { + .name = "dma_map_benchmark", + .probe = map_benchmark_pci_probe, +}; + +static int __init map_benchmark_init(void) +{ + int ret; + + ret = pci_register_driver(&map_benchmark_pci_driver); + if (ret) + return ret; + + ret = platform_driver_register(&map_benchmark_platform_driver); + if (ret) { + pci_unregister_driver(&map_benchmark_pci_driver); + return ret; + } + + return 0; +} + +static void __exit map_benchmark_cleanup(void) +{ + platform_driver_unregister(&map_benchmark_platform_driver); + pci_unregister_driver(&map_benchmark_pci_driver); +} + +module_init(map_benchmark_init); +module_exit(map_benchmark_cleanup); + +MODULE_AUTHOR("Barry Song "); +MODULE_DESCRIPTION("dma_map benchmark driver"); +MODULE_LICENSE("GPL"); -- Gitee From 6a39a5aa30d18aa165a8244815295a45c9fbc318 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 16 Nov 2020 19:08:48 +1300 Subject: [PATCH 2/6] selftests/dma: add test application for DMA_MAP_BENCHMARK mainline inclusion commit id 7679325702c90aecd393cd7cde685576c14489c0 from mainline-v5.11-rc1 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I60AX5 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=7679325702c90aecd393cd7cde685576c14489c0 ---------------------------------------------------------------------- This patch provides the test application for DMA_MAP_BENCHMARK. Before running the test application, we need to bind a device to dma_map_ benchmark driver. For example, unbind "xxx" from its original driver and bind to dma_map_benchmark: echo dma_map_benchmark > /sys/bus/platform/devices/xxx/driver_override echo xxx > /sys/bus/platform/drivers/xxx/unbind echo xxx > /sys/bus/platform/drivers/dma_map_benchmark/bind Another example for PCI devices: echo dma_map_benchmark > /sys/bus/pci/devices/0000:00:01.0/driver_override echo 0000:00:01.0 > /sys/bus/pci/drivers/xxx/unbind echo 0000:00:01.0 > /sys/bus/pci/drivers/dma_map_benchmark/bind The below command will run 16 threads on numa node 0 for 10 seconds on the device bound to dma_map_benchmark platform_driver or pci_driver: ./dma_map_benchmark -t 16 -s 10 -n 0 dma mapping benchmark: threads:16 seconds:10 average map latency(us):1.1 standard deviation:1.9 average unmap latency(us):0.5 standard deviation:0.8 Cc: Will Deacon Cc: Shuah Khan Cc: Christoph Hellwig Cc: Marek Szyprowski Cc: Robin Murphy Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig Signed-off-by: Jie Liu --- MAINTAINERS | 6 + tools/testing/selftests/dma/Makefile | 6 + tools/testing/selftests/dma/config | 1 + .../testing/selftests/dma/dma_map_benchmark.c | 123 ++++++++++++++++++ 4 files changed, 136 insertions(+) create mode 100644 tools/testing/selftests/dma/Makefile create mode 100644 tools/testing/selftests/dma/config create mode 100644 tools/testing/selftests/dma/dma_map_benchmark.c diff --git a/MAINTAINERS b/MAINTAINERS index 5cbf5706da92..2540b56a4433 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5270,6 +5270,12 @@ F: include/linux/dma-mapping.h F: include/linux/dma-map-ops.h F: kernel/dma/ +DMA MAPPING BENCHMARK +M: Barry Song +L: iommu@lists.linux-foundation.org +F: kernel/dma/map_benchmark.c +F: tools/testing/selftests/dma/ + DMA-BUF HEAPS FRAMEWORK M: Sumit Semwal R: Benjamin Gaignard diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile new file mode 100644 index 000000000000..aa8e8b5b3864 --- /dev/null +++ b/tools/testing/selftests/dma/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -I../../../../usr/include/ + +TEST_GEN_PROGS := dma_map_benchmark + +include ../lib.mk diff --git a/tools/testing/selftests/dma/config b/tools/testing/selftests/dma/config new file mode 100644 index 000000000000..6102ee3c43cd --- /dev/null +++ b/tools/testing/selftests/dma/config @@ -0,0 +1 @@ +CONFIG_DMA_MAP_BENCHMARK=y diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c new file mode 100644 index 000000000000..7065163a8388 --- /dev/null +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Hisilicon Limited. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) +#define DMA_MAP_MAX_THREADS 1024 +#define DMA_MAP_MAX_SECONDS 300 + +#define DMA_MAP_BIDIRECTIONAL 0 +#define DMA_MAP_TO_DEVICE 1 +#define DMA_MAP_FROM_DEVICE 2 + +static char *directions[] = { + "BIDIRECTIONAL", + "TO_DEVICE", + "FROM_DEVICE", +}; + +struct map_benchmark { + __u64 avg_map_100ns; /* average map latency in 100ns */ + __u64 map_stddev; /* standard deviation of map latency */ + __u64 avg_unmap_100ns; /* as above */ + __u64 unmap_stddev; + __u32 threads; /* how many threads will do map/unmap in parallel */ + __u32 seconds; /* how long the test will last */ + __s32 node; /* which numa node this benchmark will run on */ + __u32 dma_bits; /* DMA addressing capability */ + __u32 dma_dir; /* DMA data direction */ + __u64 expansion[10]; /* For future use */ +}; + +int main(int argc, char **argv) +{ + struct map_benchmark map; + int fd, opt; + /* default single thread, run 20 seconds on NUMA_NO_NODE */ + int threads = 1, seconds = 20, node = -1; + /* default dma mask 32bit, bidirectional DMA */ + int bits = 32, dir = DMA_MAP_BIDIRECTIONAL; + + int cmd = DMA_MAP_BENCHMARK; + char *p; + + while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) { + switch (opt) { + case 't': + threads = atoi(optarg); + break; + case 's': + seconds = atoi(optarg); + break; + case 'n': + node = atoi(optarg); + break; + case 'b': + bits = atoi(optarg); + break; + case 'd': + dir = atoi(optarg); + break; + default: + return -1; + } + } + + if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) { + fprintf(stderr, "invalid number of threads, must be in 1-%d\n", + DMA_MAP_MAX_THREADS); + exit(1); + } + + if (seconds <= 0 || seconds > DMA_MAP_MAX_SECONDS) { + fprintf(stderr, "invalid number of seconds, must be in 1-%d\n", + DMA_MAP_MAX_SECONDS); + exit(1); + } + + /* suppose the mininum DMA zone is 1MB in the world */ + if (bits < 20 || bits > 64) { + fprintf(stderr, "invalid dma mask bit, must be in 20-64\n"); + exit(1); + } + + if (dir != DMA_MAP_BIDIRECTIONAL && dir != DMA_MAP_TO_DEVICE && + dir != DMA_MAP_FROM_DEVICE) { + fprintf(stderr, "invalid dma direction\n"); + exit(1); + } + + fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR); + if (fd == -1) { + perror("open"); + exit(1); + } + + map.seconds = seconds; + map.threads = threads; + map.node = node; + map.dma_bits = bits; + map.dma_dir = dir; + if (ioctl(fd, cmd, &map)) { + perror("ioctl"); + exit(1); + } + + printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n", + threads, seconds, node, dir[directions]); + printf("average map latency(us):%.1f standard deviation:%.1f\n", + map.avg_map_100ns/10.0, map.map_stddev/10.0); + printf("average unmap latency(us):%.1f standard deviation:%.1f\n", + map.avg_unmap_100ns/10.0, map.unmap_stddev/10.0); + + return 0; +} -- Gitee From 2a1c9632ee762090929001625ffb5ae9d044be28 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 25 Jan 2021 14:13:06 +1300 Subject: [PATCH 3/6] dma-mapping: benchmark: fix kernel crash when dma_map_single fails mainline inclusion commit id d17405d52bacd14fe7fdbb10c0434934ea496914 from mainline-v5.11-rc7 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I60AX5 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d17405d52bacd14fe7fdbb10c0434934ea496914 ---------------------------------------------------------------------- if dma_map_single() fails, kernel will give the below oops since task_struct has been destroyed and we are running into the memory corruption due to use-after-free in kthread_stop(): [ 48.095310] Unable to handle kernel paging request at virtual address 000000c473548040 [ 48.095736] Mem abort info: [ 48.095864] ESR = 0x96000004 [ 48.096025] EC = 0x25: DABT (current EL), IL = 32 bits [ 48.096268] SET = 0, FnV = 0 [ 48.096401] EA = 0, S1PTW = 0 [ 48.096538] Data abort info: [ 48.096659] ISV = 0, ISS = 0x00000004 [ 48.096820] CM = 0, WnR = 0 [ 48.097079] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000104639000 [ 48.098099] [000000c473548040] pgd=0000000000000000, p4d=0000000000000000 [ 48.098832] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 48.099232] Modules linked in: [ 48.099387] CPU: 0 PID: 2 Comm: kthreadd Tainted: G W [ 48.099887] Hardware name: linux,dummy-virt (DT) [ 48.100078] pstate: 60000005 (nZCv daif -PAN -UAO -TCO BTYPE=--) [ 48.100516] pc : __kmalloc_node+0x214/0x368 [ 48.100944] lr : __kmalloc_node+0x1f4/0x368 [ 48.101458] sp : ffff800011f0bb80 [ 48.101843] x29: ffff800011f0bb80 x28: ffff0000c0098ec0 [ 48.102330] x27: 0000000000000000 x26: 00000000001d4600 [ 48.102648] x25: ffff0000c0098ec0 x24: ffff800011b6a000 [ 48.102988] x23: 00000000ffffffff x22: ffff0000c0098ec0 [ 48.103333] x21: ffff8000101d7a54 x20: 0000000000000dc0 [ 48.103657] x19: ffff0000c0001e00 x18: 0000000000000000 [ 48.104069] x17: 0000000000000000 x16: 0000000000000000 [ 48.105449] x15: 000001aa0304e7b9 x14: 00000000000003b1 [ 48.106401] x13: ffff8000122d5000 x12: ffff80001228d000 [ 48.107296] x11: ffff0000c0154340 x10: 0000000000000000 [ 48.107862] x9 : ffff80000fffffff x8 : ffff0000c473527f [ 48.108326] x7 : ffff800011e62f58 x6 : ffff0000c01c8ed8 [ 48.108778] x5 : ffff0000c0098ec0 x4 : 0000000000000000 [ 48.109223] x3 : 00000000001d4600 x2 : 0000000000000040 [ 48.109656] x1 : 0000000000000001 x0 : ff0000c473548000 [ 48.110104] Call trace: [ 48.110287] __kmalloc_node+0x214/0x368 [ 48.110493] __vmalloc_node_range+0xc4/0x298 [ 48.110805] copy_process+0x2c8/0x15c8 [ 48.111133] kernel_clone+0x5c/0x3c0 [ 48.111373] kernel_thread+0x64/0x90 [ 48.111604] kthreadd+0x158/0x368 [ 48.111810] ret_from_fork+0x10/0x30 [ 48.112336] Code: 17ffffe9 b9402a62 b94008a1 11000421 (f8626802) [ 48.112884] ---[ end trace d4890e21e75419d5 ]--- Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig Signed-off-by: Jie Liu --- kernel/dma/map_benchmark.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index b1496e744c68..1b1b8ff875cb 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -147,8 +147,10 @@ static int do_map_benchmark(struct map_benchmark_data *map) atomic64_set(&map->sum_sq_unmap, 0); atomic64_set(&map->loops, 0); - for (i = 0; i < threads; i++) + for (i = 0; i < threads; i++) { + get_task_struct(tsk[i]); wake_up_process(tsk[i]); + } msleep_interruptible(map->bparam.seconds * 1000); @@ -183,6 +185,8 @@ static int do_map_benchmark(struct map_benchmark_data *map) } out: + for (i = 0; i < threads; i++) + put_task_struct(tsk[i]); put_device(map->dev); kfree(tsk); return ret; -- Gitee From 582fcc37069c4e92ecd15bf1e5ff5d074d92f363 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 6 Feb 2021 00:33:24 +1300 Subject: [PATCH 4/6] dma-mapping: benchmark: use u8 for reserved field in uAPI structure mainline inclusion commit id 9f5f8ec50165630cfc49897410b30997d4d677b5 from mainline-v5.11-rc7 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I60AX5 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9f5f8ec50165630cfc49897410b30997d4d677b5 ---------------------------------------------------------------------- The original code put five u32 before a u64 expansion[10] array. Five is odd, this will cause trouble in the extension of the structure by adding new features. This patch moves to use u8 for reserved field to avoid future alignment risk. Meanwhile, it also clears the memory of struct map_benchmark in tools, otherwise, if users use old version to run on newer kernel, the random expansion value will cause side effect on newer kernel. Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig Signed-off-by: Jie Liu --- kernel/dma/map_benchmark.c | 2 +- tools/testing/selftests/dma/dma_map_benchmark.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index 1b1b8ff875cb..da95df381483 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -36,7 +36,7 @@ struct map_benchmark { __s32 node; /* which numa node this benchmark will run on */ __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ - __u64 expansion[10]; /* For future use */ + __u8 expansion[84]; /* For future use */ }; struct map_benchmark_data { diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index 7065163a8388..537d65968c48 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,7 @@ struct map_benchmark { __s32 node; /* which numa node this benchmark will run on */ __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ - __u64 expansion[10]; /* For future use */ + __u8 expansion[84]; /* For future use */ }; int main(int argc, char **argv) @@ -102,6 +103,7 @@ int main(int argc, char **argv) exit(1); } + memset(&map, 0, sizeof(map)); map.seconds = seconds; map.threads = threads; map.node = node; -- Gitee From f18c6674a8b484a218121248ef649ceae83f9d2c Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 6 Feb 2021 00:33:25 +1300 Subject: [PATCH 5/6] dma-mapping: benchmark: pretend DMA is transmitting mainline inclusion commit id 9dc00b25eadf2908ae76ac0607b55a9f4e0e0cdc from mainline-v5.12-rc1-dontuse category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I60AX5 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9dc00b25eadf2908ae76ac0607b55a9f4e0e0cdc ---------------------------------------------------------------------- In a real dma mapping user case, after dma_map is done, data will be transmit. Thus, in multi-threaded user scenario, IOMMU contention should not be that severe. For example, if users enable multiple threads to send network packets through 1G/10G/100Gbps NIC, usually the steps will be: map -> transmission -> unmap. Transmission delay reduces the contention of IOMMU. Here a delay is added to simulate the transmission between map and unmap so that the tested result could be more accurate for TX and simple RX. A typical TX transmission for NIC would be like: map -> TX -> unmap since the socket buffers come from OS. Simple RX model eg. disk driver, is also map -> RX -> unmap, but real RX model in a NIC could be more complicated considering packets can come spontaneously and many drivers are using pre-mapped buffers pool. This is in the TBD list. Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig Signed-off-by: Jie Liu --- kernel/dma/map_benchmark.c | 12 ++++++++++- .../testing/selftests/dma/dma_map_benchmark.c | 21 ++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index da95df381483..e0e64f8b0739 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -21,6 +21,7 @@ #define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) #define DMA_MAP_MAX_THREADS 1024 #define DMA_MAP_MAX_SECONDS 300 +#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC) #define DMA_MAP_BIDIRECTIONAL 0 #define DMA_MAP_TO_DEVICE 1 @@ -36,7 +37,8 @@ struct map_benchmark { __s32 node; /* which numa node this benchmark will run on */ __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ - __u8 expansion[84]; /* For future use */ + __u32 dma_trans_ns; /* time for DMA transmission in ns */ + __u8 expansion[80]; /* For future use */ }; struct map_benchmark_data { @@ -87,6 +89,9 @@ static int map_benchmark_thread(void *data) map_etime = ktime_get(); map_delta = ktime_sub(map_etime, map_stime); + /* Pretend DMA is transmitting */ + ndelay(map->bparam.dma_trans_ns); + unmap_stime = ktime_get(); dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir); unmap_etime = ktime_get(); @@ -218,6 +223,11 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, return -EINVAL; } + if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) { + pr_err("invalid transmission delay\n"); + return -EINVAL; + } + if (map->bparam.node != NUMA_NO_NODE && !node_possible(map->bparam.node)) { pr_err("invalid numa node\n"); diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index 537d65968c48..fb23ce9617ea 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -12,9 +12,12 @@ #include #include +#define NSEC_PER_MSEC 1000000L + #define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) #define DMA_MAP_MAX_THREADS 1024 #define DMA_MAP_MAX_SECONDS 300 +#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC) #define DMA_MAP_BIDIRECTIONAL 0 #define DMA_MAP_TO_DEVICE 1 @@ -36,7 +39,8 @@ struct map_benchmark { __s32 node; /* which numa node this benchmark will run on */ __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ - __u8 expansion[84]; /* For future use */ + __u32 dma_trans_ns; /* time for DMA transmission in ns */ + __u8 expansion[80]; /* For future use */ }; int main(int argc, char **argv) @@ -46,12 +50,12 @@ int main(int argc, char **argv) /* default single thread, run 20 seconds on NUMA_NO_NODE */ int threads = 1, seconds = 20, node = -1; /* default dma mask 32bit, bidirectional DMA */ - int bits = 32, dir = DMA_MAP_BIDIRECTIONAL; + int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL; int cmd = DMA_MAP_BENCHMARK; char *p; - while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) { + while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) { switch (opt) { case 't': threads = atoi(optarg); @@ -68,6 +72,9 @@ int main(int argc, char **argv) case 'd': dir = atoi(optarg); break; + case 'x': + xdelay = atoi(optarg); + break; default: return -1; } @@ -85,6 +92,12 @@ int main(int argc, char **argv) exit(1); } + if (xdelay < 0 || xdelay > DMA_MAP_MAX_TRANS_DELAY) { + fprintf(stderr, "invalid transmit delay, must be in 0-%ld\n", + DMA_MAP_MAX_TRANS_DELAY); + exit(1); + } + /* suppose the mininum DMA zone is 1MB in the world */ if (bits < 20 || bits > 64) { fprintf(stderr, "invalid dma mask bit, must be in 20-64\n"); @@ -109,6 +122,8 @@ int main(int argc, char **argv) map.node = node; map.dma_bits = bits; map.dma_dir = dir; + map.dma_trans_ns = xdelay; + if (ioctl(fd, cmd, &map)) { perror("ioctl"); exit(1); -- Gitee From 145fdafddf13d37730ca0715f8afd5c981c23818 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 9 Nov 2022 16:33:11 +0800 Subject: [PATCH 6/6] dma-mapping:add a new configuration for map_benchmark. kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I60AX5 CVE: NA ---------------------------------------------------------------------- A new configuration needs to be added for map_benchmark. Signed-off-by: Jie Liu --- arch/arm64/configs/openeuler_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index ca5f0414b46d..3af240a257bb 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -6929,6 +6929,7 @@ CONFIG_DMA_REMAP=y CONFIG_DMA_DIRECT_REMAP=y CONFIG_DMA_CMA=y CONFIG_DMA_PERNUMA_CMA=y +CONFIG_DMA_MAP_BENCHMARK=y # # Default contiguous memory area size: -- Gitee