From ec1da19434203cd5a3668e74da8ebe97d1f2dd21 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 27 Oct 2020 10:34:35 -0700 Subject: [PATCH 001/173] dmaengine: idxd: Add shared workqueue support mainline inclusion from mainline-v5.11 commit 8e50d392652f20616a136165dff516b86baf5e49 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 8e50d392652f dmaengine: idxd: Add shared workqueue support. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add shared workqueue support that includes the support of Shared Virtual memory (SVM) or in similar terms On Demand Paging (ODP). The shared workqueue uses the enqcmds command in kernel and will respond with retry if the workqueue is full. Shared workqueue only works when there is PASID support from the IOMMU. Signed-off-by: Dave Jiang Reviewed-by: Tony Luck Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/160382007499.3911367.26043087963708134.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/Kconfig | 10 +++ drivers/dma/idxd/cdev.c | 49 +++++++++++++- drivers/dma/idxd/device.c | 90 +++++++++++++++++++++++-- drivers/dma/idxd/dma.c | 9 --- drivers/dma/idxd/idxd.h | 28 +++++++- drivers/dma/idxd/init.c | 91 +++++++++++++++++++------ drivers/dma/idxd/registers.h | 2 + drivers/dma/idxd/submit.c | 35 ++++++++-- drivers/dma/idxd/sysfs.c | 127 +++++++++++++++++++++++++++++++++++ 9 files changed, 398 insertions(+), 43 deletions(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 08013345d1f2..41898e32df2a 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -297,6 +297,16 @@ config INTEL_IDXD If unsure, say N. +# Config symbol that collects all the dependencies that's necessary to +# support shared virtual memory for the devices supported by idxd. +config INTEL_IDXD_SVM + bool "Accelerator Shared Virtual Memory Support" + depends on INTEL_IDXD + depends on INTEL_IOMMU_SVM + depends on PCI_PRI + depends on PCI_PASID + depends on PCI_IOV + config INTEL_IOATDMA tristate "Intel I/OAT DMA support" depends on PCI && X86_64 && !UML diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index ae65eb90afab..cdafd941c06c 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "registers.h" #include "idxd.h" @@ -32,7 +33,9 @@ static struct idxd_cdev_context ictx[IDXD_TYPE_MAX] = { struct idxd_user_context { struct idxd_wq *wq; struct task_struct *task; + unsigned int pasid; unsigned int flags; + struct iommu_sva *sva; }; static void idxd_cdev_dev_release(struct device *dev) @@ -72,6 +75,8 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) struct idxd_wq *wq; struct device *dev; int rc = 0; + struct iommu_sva *sva; + unsigned int pasid; wq = inode_wq(inode); idxd = wq->idxd; @@ -92,6 +97,34 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) ctx->wq = wq; filp->private_data = ctx; + + if (device_pasid_enabled(idxd)) { + sva = iommu_sva_bind_device(dev, current->mm, NULL); + if (IS_ERR(sva)) { + rc = PTR_ERR(sva); + dev_err(dev, "pasid allocation failed: %d\n", rc); + goto failed; + } + + pasid = iommu_sva_get_pasid(sva); + if (pasid == IOMMU_PASID_INVALID) { + iommu_sva_unbind_device(sva); + goto failed; + } + + ctx->sva = sva; + ctx->pasid = pasid; + + if (wq_dedicated(wq)) { + rc = idxd_wq_set_pasid(wq, pasid); + if (rc < 0) { + iommu_sva_unbind_device(sva); + dev_err(dev, "wq set pasid failed: %d\n", rc); + goto failed; + } + } + } + idxd_wq_get(wq); mutex_unlock(&wq->wq_lock); return 0; @@ -108,13 +141,27 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) struct idxd_wq *wq = ctx->wq; struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; + int rc; dev_dbg(dev, "%s called\n", __func__); filep->private_data = NULL; /* Wait for in-flight operations to complete. */ - idxd_wq_drain(wq); + if (wq_shared(wq)) { + idxd_device_drain_pasid(idxd, ctx->pasid); + } else { + if (device_pasid_enabled(idxd)) { + /* The wq disable in the disable pasid function will drain the wq */ + rc = idxd_wq_disable_pasid(wq); + if (rc < 0) + dev_err(dev, "wq disable pasid failed.\n"); + } else { + idxd_wq_drain(wq); + } + } + if (ctx->sva) + iommu_sva_unbind_device(ctx->sva); kfree(ctx); mutex_lock(&wq->wq_lock); idxd_wq_put(wq); diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 47aae5fe8273..d188f288d96f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -287,10 +287,9 @@ int idxd_wq_map_portal(struct idxd_wq *wq) start = pci_resource_start(pdev, IDXD_WQ_BAR); start += idxd_get_wq_portal_full_offset(wq->id, IDXD_PORTAL_LIMITED); - wq->dportal = devm_ioremap(dev, start, IDXD_PORTAL_SIZE); - if (!wq->dportal) + wq->portal = devm_ioremap(dev, start, IDXD_PORTAL_SIZE); + if (!wq->portal) return -ENOMEM; - dev_dbg(dev, "wq %d portal mapped at %p\n", wq->id, wq->dportal); return 0; } @@ -299,7 +298,61 @@ void idxd_wq_unmap_portal(struct idxd_wq *wq) { struct device *dev = &wq->idxd->pdev->dev; - devm_iounmap(dev, wq->dportal); + devm_iounmap(dev, wq->portal); +} + +int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid) +{ + struct idxd_device *idxd = wq->idxd; + int rc; + union wqcfg wqcfg; + unsigned int offset; + unsigned long flags; + + rc = idxd_wq_disable(wq); + if (rc < 0) + return rc; + + offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX); + spin_lock_irqsave(&idxd->dev_lock, flags); + wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset); + wqcfg.pasid_en = 1; + wqcfg.pasid = pasid; + iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset); + spin_unlock_irqrestore(&idxd->dev_lock, flags); + + rc = idxd_wq_enable(wq); + if (rc < 0) + return rc; + + return 0; +} + +int idxd_wq_disable_pasid(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + int rc; + union wqcfg wqcfg; + unsigned int offset; + unsigned long flags; + + rc = idxd_wq_disable(wq); + if (rc < 0) + return rc; + + offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX); + spin_lock_irqsave(&idxd->dev_lock, flags); + wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset); + wqcfg.pasid_en = 0; + wqcfg.pasid = 0; + iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset); + spin_unlock_irqrestore(&idxd->dev_lock, flags); + + rc = idxd_wq_enable(wq); + if (rc < 0) + return rc; + + return 0; } void idxd_wq_disable_cleanup(struct idxd_wq *wq) @@ -494,6 +547,17 @@ void idxd_device_reset(struct idxd_device *idxd) spin_unlock_irqrestore(&idxd->dev_lock, flags); } +void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid) +{ + struct device *dev = &idxd->pdev->dev; + u32 operand; + + operand = pasid; + dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_DRAIN_PASID, operand); + idxd_cmd_exec(idxd, IDXD_CMD_DRAIN_PASID, operand, NULL); + dev_dbg(dev, "pasid %d drained\n", pasid); +} + /* Device configuration bits */ static void idxd_group_config_write(struct idxd_group *group) { @@ -587,9 +651,21 @@ static int idxd_wq_config_write(struct idxd_wq *wq) /* byte 8-11 */ wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL); - wq->wqcfg->mode = 1; + if (wq_dedicated(wq)) + wq->wqcfg->mode = 1; + + if (device_pasid_enabled(idxd)) { + wq->wqcfg->pasid_en = 1; + if (wq->type == IDXD_WQT_KERNEL && wq_dedicated(wq)) + wq->wqcfg->pasid = idxd->pasid; + } + wq->wqcfg->priority = wq->priority; + if (idxd->hw.gen_cap.block_on_fault && + test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)) + wq->wqcfg->bof = 1; + /* bytes 12-15 */ wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); wq->wqcfg->max_batch_shift = ilog2(wq->max_batch_size); @@ -697,8 +773,8 @@ static int idxd_wqs_setup(struct idxd_device *idxd) if (!wq->size) continue; - if (!wq_dedicated(wq)) { - dev_warn(dev, "No shared workqueue support.\n"); + if (wq_shared(wq) && !device_swq_supported(idxd)) { + dev_warn(dev, "No shared wq support but configured.\n"); return -EINVAL; } diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 09ad37bbd98b..e06e3cf7bf92 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -64,8 +64,6 @@ static inline void idxd_prep_desc_common(struct idxd_wq *wq, u64 addr_f1, u64 addr_f2, u64 len, u64 compl, u32 flags) { - struct idxd_device *idxd = wq->idxd; - hw->flags = flags; hw->opcode = opcode; hw->src_addr = addr_f1; @@ -73,13 +71,6 @@ static inline void idxd_prep_desc_common(struct idxd_wq *wq, hw->xfer_size = len; hw->priv = !!(wq->type == IDXD_WQT_KERNEL); hw->completion_addr = compl; - - /* - * Descriptor completion vectors are 1-8 for MSIX. We will round - * robin through the 8 vectors. - */ - wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1; - hw->int_handle = wq->vec_ptr; } static struct dma_async_tx_descriptor * diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index eef6996ecc59..5c9271203319 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -62,6 +62,7 @@ enum idxd_wq_state { enum idxd_wq_flag { WQ_FLAG_DEDICATED = 0, + WQ_FLAG_BLOCK_ON_FAULT, }; enum idxd_wq_type { @@ -89,6 +90,7 @@ enum idxd_op_type { enum idxd_complete_type { IDXD_COMPLETE_NORMAL = 0, IDXD_COMPLETE_ABORT, + IDXD_COMPLETE_DEV_FAIL, }; struct idxd_dma_chan { @@ -97,7 +99,7 @@ struct idxd_dma_chan { }; struct idxd_wq { - void __iomem *dportal; + void __iomem *portal; struct device conf_dev; struct idxd_cdev *idxd_cdev; struct wait_queue_head err_queue; @@ -154,6 +156,7 @@ enum idxd_device_state { enum idxd_device_flag { IDXD_FLAG_CONFIGURABLE = 0, IDXD_FLAG_CMD_RUNNING, + IDXD_FLAG_PASID_ENABLED, }; struct idxd_dma_dev { @@ -181,6 +184,9 @@ struct idxd_device { struct idxd_wq *wqs; struct idxd_engine *engines; + struct iommu_sva *sva; + unsigned int pasid; + int num_groups; u32 msix_perm_offset; @@ -229,11 +235,28 @@ struct idxd_desc { extern struct bus_type dsa_bus_type; +extern bool support_enqcmd; + static inline bool wq_dedicated(struct idxd_wq *wq) { return test_bit(WQ_FLAG_DEDICATED, &wq->flags); } +static inline bool wq_shared(struct idxd_wq *wq) +{ + return !test_bit(WQ_FLAG_DEDICATED, &wq->flags); +} + +static inline bool device_pasid_enabled(struct idxd_device *idxd) +{ + return test_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); +} + +static inline bool device_swq_supported(struct idxd_device *idxd) +{ + return (support_enqcmd && device_pasid_enabled(idxd)); +} + enum idxd_portal_prot { IDXD_PORTAL_UNLIMITED = 0, IDXD_PORTAL_LIMITED, @@ -302,6 +325,7 @@ void idxd_device_reset(struct idxd_device *idxd); void idxd_device_cleanup(struct idxd_device *idxd); int idxd_device_config(struct idxd_device *idxd); void idxd_device_wqs_clear_state(struct idxd_device *idxd); +void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid); /* work queue control */ int idxd_wq_alloc_resources(struct idxd_wq *wq); @@ -313,6 +337,8 @@ void idxd_wq_reset(struct idxd_wq *wq); int idxd_wq_map_portal(struct idxd_wq *wq); void idxd_wq_unmap_portal(struct idxd_wq *wq); void idxd_wq_disable_cleanup(struct idxd_wq *wq); +int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid); +int idxd_wq_disable_pasid(struct idxd_wq *wq); /* submission */ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 048a23018a3d..f4453721a73f 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include #include "../dmaengine.h" @@ -26,6 +28,8 @@ MODULE_AUTHOR("Intel Corporation"); #define DRV_NAME "idxd" +bool support_enqcmd; + static struct idr idxd_idrs[IDXD_TYPE_MAX]; static struct mutex idxd_idr_lock; @@ -53,6 +57,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) struct idxd_irq_entry *irq_entry; int i, msixcnt; int rc = 0; + union msix_perm mperm; msixcnt = pci_msix_vec_count(pdev); if (msixcnt < 0) { @@ -131,6 +136,13 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) idxd_unmask_error_interrupts(idxd); + /* Setup MSIX permission table */ + mperm.bits = 0; + mperm.pasid = idxd->pasid; + mperm.pasid_en = device_pasid_enabled(idxd); + for (i = 1; i < msixcnt; i++) + iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); + return 0; err_no_irq: @@ -265,8 +277,7 @@ static void idxd_read_caps(struct idxd_device *idxd) } } -static struct idxd_device *idxd_alloc(struct pci_dev *pdev, - void __iomem * const *iomap) +static struct idxd_device *idxd_alloc(struct pci_dev *pdev) { struct device *dev = &pdev->dev; struct idxd_device *idxd; @@ -276,12 +287,45 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, return NULL; idxd->pdev = pdev; - idxd->reg_base = iomap[IDXD_MMIO_BAR]; spin_lock_init(&idxd->dev_lock); return idxd; } +static int idxd_enable_system_pasid(struct idxd_device *idxd) +{ + int flags; + unsigned int pasid; + struct iommu_sva *sva; + + flags = SVM_FLAG_SUPERVISOR_MODE; + + sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags); + if (IS_ERR(sva)) { + dev_warn(&idxd->pdev->dev, + "iommu sva bind failed: %ld\n", PTR_ERR(sva)); + return PTR_ERR(sva); + } + + pasid = iommu_sva_get_pasid(sva); + if (pasid == IOMMU_PASID_INVALID) { + iommu_sva_unbind_device(sva); + return -ENODEV; + } + + idxd->sva = sva; + idxd->pasid = pasid; + dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid); + return 0; +} + +static void idxd_disable_system_pasid(struct idxd_device *idxd) +{ + + iommu_sva_unbind_device(idxd->sva); + idxd->sva = NULL; +} + static int idxd_probe(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; @@ -295,6 +339,14 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD reset complete\n"); + if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM)) { + rc = idxd_enable_system_pasid(idxd); + if (rc < 0) + dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc); + else + set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + } + idxd_read_caps(idxd); idxd_read_table_offsets(idxd); @@ -325,29 +377,29 @@ static int idxd_probe(struct idxd_device *idxd) idxd_mask_error_interrupts(idxd); idxd_mask_msix_vectors(idxd); err_setup: + if (device_pasid_enabled(idxd)) + idxd_disable_system_pasid(idxd); return rc; } static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - void __iomem * const *iomap; struct device *dev = &pdev->dev; struct idxd_device *idxd; int rc; - unsigned int mask; rc = pcim_enable_device(pdev); if (rc) return rc; - dev_dbg(dev, "Mapping BARs\n"); - mask = (1 << IDXD_MMIO_BAR); - rc = pcim_iomap_regions(pdev, mask, DRV_NAME); - if (rc) - return rc; + dev_dbg(dev, "Alloc IDXD context\n"); + idxd = idxd_alloc(pdev); + if (!idxd) + return -ENOMEM; - iomap = pcim_iomap_table(pdev); - if (!iomap) + dev_dbg(dev, "Mapping BARs\n"); + idxd->reg_base = pcim_iomap(pdev, IDXD_MMIO_BAR, 0); + if (!idxd->reg_base) return -ENOMEM; dev_dbg(dev, "Set DMA masks\n"); @@ -363,11 +415,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - dev_dbg(dev, "Alloc IDXD context\n"); - idxd = idxd_alloc(pdev, iomap); - if (!idxd) - return -ENOMEM; - idxd_set_type(idxd); dev_dbg(dev, "Set PCI master\n"); @@ -455,6 +502,8 @@ static void idxd_remove(struct pci_dev *pdev) dev_dbg(&pdev->dev, "%s called\n", __func__); idxd_cleanup_sysfs(idxd); idxd_shutdown(pdev); + if (device_pasid_enabled(idxd)) + idxd_disable_system_pasid(idxd); mutex_lock(&idxd_idr_lock); idr_remove(&idxd_idrs[idxd->type], idxd->id); mutex_unlock(&idxd_idr_lock); @@ -473,7 +522,7 @@ static int __init idxd_init_module(void) int err, i; /* - * If the CPU does not support write512, there's no point in + * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in * enumerating the device. We can not utilize it. */ if (!boot_cpu_has(X86_FEATURE_MOVDIR64B)) { @@ -481,8 +530,10 @@ static int __init idxd_init_module(void) return -ENODEV; } - pr_info("%s: Intel(R) Accelerator Devices Driver %s\n", - DRV_NAME, IDXD_DRIVER_VERSION); + if (!boot_cpu_has(X86_FEATURE_ENQCMD)) + pr_warn("Platform does not have ENQCMD(S) support.\n"); + else + support_enqcmd = true; mutex_init(&idxd_idr_lock); for (i = 0; i < IDXD_TYPE_MAX; i++) diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 54390334c243..f1a0c15587b3 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -336,6 +336,8 @@ union wqcfg { u32 bits[8]; } __packed; +#define WQCFG_PASID_IDX 2 + /* * This macro calculates the offset into the WQCFG register * idxd - struct idxd * diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 0368c5490788..77f4799520b8 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -11,11 +11,22 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) { struct idxd_desc *desc; + struct idxd_device *idxd = wq->idxd; desc = wq->descs[idx]; memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); memset(desc->completion, 0, sizeof(struct dsa_completion_record)); desc->cpu = cpu; + + if (device_pasid_enabled(idxd)) + desc->hw->pasid = idxd->pasid; + + /* + * Descriptor completion vectors are 1-8 for MSIX. We will round + * robin through the 8 vectors. + */ + wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1; + desc->hw->int_handle = wq->vec_ptr; return desc; } @@ -70,18 +81,32 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) struct idxd_device *idxd = wq->idxd; int vec = desc->hw->int_handle; void __iomem *portal; + int rc; if (idxd->state != IDXD_DEV_ENABLED) return -EIO; - portal = wq->dportal; + portal = wq->portal; + /* - * The wmb() flushes writes to coherent DMA data before possibly - * triggering a DMA read. The wmb() is necessary even on UP because - * the recipient is a device. + * The wmb() flushes writes to coherent DMA data before + * possibly triggering a DMA read. The wmb() is necessary + * even on UP because the recipient is a device. */ wmb(); - iosubmit_cmds512(portal, desc->hw, 1); + if (wq_dedicated(wq)) { + iosubmit_cmds512(portal, desc->hw, 1); + } else { + /* + * It's not likely that we would receive queue full rejection + * since the descriptor allocation gates at wq size. If we + * receive a -EAGAIN, that means something went wrong such as the + * device is not accepting descriptor at all. + */ + rc = enqcmds(portal, desc->hw); + if (rc < 0) + return rc; + } /* * Pending the descriptor to the lockless list for the irq_entry diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 51af0dfc3c63..44a0e790c43e 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -175,6 +175,30 @@ static int idxd_config_bus_probe(struct device *dev) return -EINVAL; } + /* Shared WQ checks */ + if (wq_shared(wq)) { + if (!device_swq_supported(idxd)) { + dev_warn(dev, + "PASID not enabled and shared WQ.\n"); + mutex_unlock(&wq->wq_lock); + return -ENXIO; + } + /* + * Shared wq with the threshold set to 0 means the user + * did not set the threshold or transitioned from a + * dedicated wq but did not set threshold. A value + * of 0 would effectively disable the shared wq. The + * driver does not allow a value of 0 to be set for + * threshold via sysfs. + */ + if (wq->threshold == 0) { + dev_warn(dev, + "Shared WQ and threshold 0.\n"); + mutex_unlock(&wq->wq_lock); + return -EINVAL; + } + } + rc = idxd_wq_alloc_resources(wq); if (rc < 0) { mutex_unlock(&wq->wq_lock); @@ -870,6 +894,8 @@ static ssize_t wq_mode_store(struct device *dev, if (sysfs_streq(buf, "dedicated")) { set_bit(WQ_FLAG_DEDICATED, &wq->flags); wq->threshold = 0; + } else if (sysfs_streq(buf, "shared") && device_swq_supported(idxd)) { + clear_bit(WQ_FLAG_DEDICATED, &wq->flags); } else { return -EINVAL; } @@ -968,6 +994,87 @@ static ssize_t wq_priority_store(struct device *dev, static struct device_attribute dev_attr_wq_priority = __ATTR(priority, 0644, wq_priority_show, wq_priority_store); +static ssize_t wq_block_on_fault_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + + return sprintf(buf, "%u\n", + test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)); +} + +static ssize_t wq_block_on_fault_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_device *idxd = wq->idxd; + bool bof; + int rc; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (wq->state != IDXD_WQ_DISABLED) + return -ENXIO; + + rc = kstrtobool(buf, &bof); + if (rc < 0) + return rc; + + if (bof) + set_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + else + clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + + return count; +} + +static struct device_attribute dev_attr_wq_block_on_fault = + __ATTR(block_on_fault, 0644, wq_block_on_fault_show, + wq_block_on_fault_store); + +static ssize_t wq_threshold_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + + return sprintf(buf, "%u\n", wq->threshold); +} + +static ssize_t wq_threshold_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_device *idxd = wq->idxd; + unsigned int val; + int rc; + + rc = kstrtouint(buf, 0, &val); + if (rc < 0) + return -EINVAL; + + if (val > wq->size || val <= 0) + return -EINVAL; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (wq->state != IDXD_WQ_DISABLED) + return -ENXIO; + + if (test_bit(WQ_FLAG_DEDICATED, &wq->flags)) + return -EINVAL; + + wq->threshold = val; + + return count; +} + +static struct device_attribute dev_attr_wq_threshold = + __ATTR(threshold, 0644, wq_threshold_show, wq_threshold_store); + static ssize_t wq_type_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1039,6 +1146,13 @@ static ssize_t wq_name_store(struct device *dev, if (strlen(buf) > WQ_NAME_SIZE || strlen(buf) == 0) return -EINVAL; + /* + * This is temporarily placed here until we have SVM support for + * dmaengine. + */ + if (wq->type == IDXD_WQT_KERNEL && device_pasid_enabled(wq->idxd)) + return -EOPNOTSUPP; + memset(wq->name, 0, WQ_NAME_SIZE + 1); strncpy(wq->name, buf, WQ_NAME_SIZE); strreplace(wq->name, '\n', '\0'); @@ -1163,6 +1277,8 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_mode.attr, &dev_attr_wq_size.attr, &dev_attr_wq_priority.attr, + &dev_attr_wq_block_on_fault.attr, + &dev_attr_wq_threshold.attr, &dev_attr_wq_type.attr, &dev_attr_wq_name.attr, &dev_attr_wq_cdev_minor.attr, @@ -1320,6 +1436,16 @@ static ssize_t clients_show(struct device *dev, } static DEVICE_ATTR_RO(clients); +static ssize_t pasid_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = + container_of(dev, struct idxd_device, conf_dev); + + return sprintf(buf, "%u\n", device_pasid_enabled(idxd)); +} +static DEVICE_ATTR_RO(pasid_enabled); + static ssize_t state_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1439,6 +1565,7 @@ static struct attribute *idxd_device_attributes[] = { &dev_attr_gen_cap.attr, &dev_attr_configurable.attr, &dev_attr_clients.attr, + &dev_attr_pasid_enabled.attr, &dev_attr_state.attr, &dev_attr_errors.attr, &dev_attr_max_tokens.attr, -- Gitee From 7dc5e24dbe08391f408caf3ef775819490193ca0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 27 Oct 2020 10:34:40 -0700 Subject: [PATCH 002/173] dmaengine: idxd: Clean up descriptors with fault error mainline inclusion from mainline-v5.11 commit e4f4d8cdeb9a2fe746411c0b9a7538b5c0232c1e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit e4f4d8cdeb9a dmaengine: idxd: Clean up descriptors with fault error. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add code to "complete" a descriptor when the descriptor or its completion address hit a fault error when SVA mode is being used. This error can be triggered due to bad programming by the user. A lock is introduced in order to protect the descriptor completion lists since the fault handler will run from the system work queue after being scheduled in the interrupt handler. Signed-off-by: Dave Jiang Reviewed-by: Tony Luck Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/160382008092.3911367.12766483427643278985.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/idxd.h | 5 ++ drivers/dma/idxd/init.c | 1 + drivers/dma/idxd/irq.c | 146 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 140 insertions(+), 12 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 5c9271203319..8d7a9fab4c3f 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -37,6 +37,11 @@ struct idxd_irq_entry { int id; struct llist_head pending_llist; struct list_head work_list; + /* + * Lock to protect access between irq thread process descriptor + * and irq thread processing error descriptor. + */ + spinlock_t list_lock; }; struct idxd_group { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index f4453721a73f..a42891e7f00f 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -97,6 +97,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) for (i = 0; i < msixcnt; i++) { idxd->irq_entries[i].id = i; idxd->irq_entries[i].idxd = idxd; + spin_lock_init(&idxd->irq_entries[i].list_lock); } msix = &idxd->msix_entries[0]; diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index fc9579180705..829ee6590a74 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -11,6 +11,24 @@ #include "idxd.h" #include "registers.h" +enum irq_work_type { + IRQ_WORK_NORMAL = 0, + IRQ_WORK_PROCESS_FAULT, +}; + +struct idxd_fault { + struct work_struct work; + u64 addr; + struct idxd_device *idxd; +}; + +static int irq_process_work_list(struct idxd_irq_entry *irq_entry, + enum irq_work_type wtype, + int *processed, u64 data); +static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, + enum irq_work_type wtype, + int *processed, u64 data); + static void idxd_device_reinit(struct work_struct *work) { struct idxd_device *idxd = container_of(work, struct idxd_device, work); @@ -44,6 +62,46 @@ static void idxd_device_reinit(struct work_struct *work) idxd_device_wqs_clear_state(idxd); } +static void idxd_device_fault_work(struct work_struct *work) +{ + struct idxd_fault *fault = container_of(work, struct idxd_fault, work); + struct idxd_irq_entry *ie; + int i; + int processed; + int irqcnt = fault->idxd->num_wq_irqs + 1; + + for (i = 1; i < irqcnt; i++) { + ie = &fault->idxd->irq_entries[i]; + irq_process_work_list(ie, IRQ_WORK_PROCESS_FAULT, + &processed, fault->addr); + if (processed) + break; + + irq_process_pending_llist(ie, IRQ_WORK_PROCESS_FAULT, + &processed, fault->addr); + if (processed) + break; + } + + kfree(fault); +} + +static int idxd_device_schedule_fault_process(struct idxd_device *idxd, + u64 fault_addr) +{ + struct idxd_fault *fault; + + fault = kmalloc(sizeof(*fault), GFP_ATOMIC); + if (!fault) + return -ENOMEM; + + fault->addr = fault_addr; + fault->idxd = idxd; + INIT_WORK(&fault->work, idxd_device_fault_work); + queue_work(idxd->wq, &fault->work); + return 0; +} + irqreturn_t idxd_irq_handler(int vec, void *data) { struct idxd_irq_entry *irq_entry = data; @@ -122,6 +180,15 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) if (!err) return 0; + /* + * This case should rarely happen and typically is due to software + * programming error by the driver. + */ + if (idxd->sw_err.valid && + idxd->sw_err.desc_valid && + idxd->sw_err.fault_addr) + idxd_device_schedule_fault_process(idxd, idxd->sw_err.fault_addr); + gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); if (gensts.state == IDXD_DEVICE_STATE_HALT) { idxd->state = IDXD_DEV_HALTED; @@ -172,57 +239,110 @@ irqreturn_t idxd_misc_thread(int vec, void *data) return IRQ_HANDLED; } +static bool process_fault(struct idxd_desc *desc, u64 fault_addr) +{ + /* + * Completion address can be bad as well. Check fault address match for descriptor + * and completion address. + */ + if ((u64)desc->hw == fault_addr || + (u64)desc->completion == fault_addr) { + idxd_dma_complete_txd(desc, IDXD_COMPLETE_DEV_FAIL); + return true; + } + + return false; +} + +static bool complete_desc(struct idxd_desc *desc) +{ + if (desc->completion->status) { + idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL); + return true; + } + + return false; +} + static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, - int *processed) + enum irq_work_type wtype, + int *processed, u64 data) { struct idxd_desc *desc, *t; struct llist_node *head; int queued = 0; + bool completed = false; + unsigned long flags; *processed = 0; head = llist_del_all(&irq_entry->pending_llist); if (!head) - return 0; + goto out; llist_for_each_entry_safe(desc, t, head, llnode) { - if (desc->completion->status) { - idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL); + if (wtype == IRQ_WORK_NORMAL) + completed = complete_desc(desc); + else if (wtype == IRQ_WORK_PROCESS_FAULT) + completed = process_fault(desc, data); + + if (completed) { idxd_free_desc(desc->wq, desc); (*processed)++; + if (wtype == IRQ_WORK_PROCESS_FAULT) + break; } else { - list_add_tail(&desc->list, &irq_entry->work_list); + spin_lock_irqsave(&irq_entry->list_lock, flags); + list_add_tail(&desc->list, + &irq_entry->work_list); + spin_unlock_irqrestore(&irq_entry->list_lock, flags); queued++; } } + out: return queued; } static int irq_process_work_list(struct idxd_irq_entry *irq_entry, - int *processed) + enum irq_work_type wtype, + int *processed, u64 data) { struct list_head *node, *next; int queued = 0; + bool completed = false; + unsigned long flags; *processed = 0; + spin_lock_irqsave(&irq_entry->list_lock, flags); if (list_empty(&irq_entry->work_list)) - return 0; + goto out; list_for_each_safe(node, next, &irq_entry->work_list) { struct idxd_desc *desc = container_of(node, struct idxd_desc, list); - if (desc->completion->status) { + spin_unlock_irqrestore(&irq_entry->list_lock, flags); + if (wtype == IRQ_WORK_NORMAL) + completed = complete_desc(desc); + else if (wtype == IRQ_WORK_PROCESS_FAULT) + completed = process_fault(desc, data); + + if (completed) { + spin_lock_irqsave(&irq_entry->list_lock, flags); list_del(&desc->list); - /* process and callback */ - idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL); + spin_unlock_irqrestore(&irq_entry->list_lock, flags); idxd_free_desc(desc->wq, desc); (*processed)++; + if (wtype == IRQ_WORK_PROCESS_FAULT) + return queued; } else { queued++; } + spin_lock_irqsave(&irq_entry->list_lock, flags); } + out: + spin_unlock_irqrestore(&irq_entry->list_lock, flags); return queued; } @@ -250,12 +370,14 @@ static int idxd_desc_process(struct idxd_irq_entry *irq_entry) * 5. Repeat until no more descriptors. */ do { - rc = irq_process_work_list(irq_entry, &processed); + rc = irq_process_work_list(irq_entry, IRQ_WORK_NORMAL, + &processed, 0); total += processed; if (rc != 0) continue; - rc = irq_process_pending_llist(irq_entry, &processed); + rc = irq_process_pending_llist(irq_entry, IRQ_WORK_NORMAL, + &processed, 0); total += processed; } while (rc != 0); -- Gitee From ac3236a6407af662ed17cc2a528e3e65686e16f0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 27 Oct 2020 10:34:46 -0700 Subject: [PATCH 003/173] dmaengine: idxd: Add ABI documentation for shared wq mainline inclusion from mainline-v5.11 commit 4749f51ddd8aee5c7aa11e6593a54f96374a77ad category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 4749f51ddd8a dmaengine: idxd: Add ABI documentation for shared wq. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add the sysfs attribute bits in ABI/stable for shared wq support. Signed-off-by: Jing Lin Signed-off-by: Dave Jiang Reviewed-by: Tony Luck Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/160382008649.3911367.10851752182908509837.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- Documentation/ABI/stable/sysfs-driver-dma-idxd | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index b44183880935..5ea81ffd3c1a 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -77,6 +77,13 @@ Contact: dmaengine@vger.kernel.org Description: The operation capability bit mask specify the operation types supported by the this device. +What: /sys/bus/dsa/devices/dsa/pasid_enabled +Date: Oct 27, 2020 +KernelVersion: 5.11.0 +Contact: dmaengine@vger.kernel.org +Description: To indicate if PASID (process address space identifier) is + enabled or not for this device. + What: /sys/bus/dsa/devices/dsa/state Date: Oct 25, 2019 KernelVersion: 5.6.0 @@ -122,6 +129,13 @@ KernelVersion: 5.10.0 Contact: dmaengine@vger.kernel.org Description: The last executed device administrative command's status/error. +What: /sys/bus/dsa/devices/wq./block_on_fault +Date: Oct 27, 2020 +KernelVersion: 5.11.0 +Contact: dmaengine@vger.kernel.org +Description: To indicate block on fault is allowed or not for the work queue + to support on demand paging. + What: /sys/bus/dsa/devices/wq./group_id Date: Oct 25, 2019 KernelVersion: 5.6.0 -- Gitee From 37e2e56d9927c59e468a95374f4b5f245155e9b9 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 30 Oct 2020 08:49:06 -0700 Subject: [PATCH 004/173] dmaengine: idxd: Update calculation of group offset to be more readable mainline inclusion from mainline-v5.11 commit 5a71270197f39ff3e526cf130bc5d6e84db8f2d7 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 5a71270197f3 dmaengine: idxd: Update calculation of group offset to be more readable. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Create helper macros to make group offset calculation more readable. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/160407294683.839093.10740868559754142070.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 12 +++++------- drivers/dma/idxd/registers.h | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index d188f288d96f..e7b533ce1b97 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -569,24 +569,22 @@ static void idxd_group_config_write(struct idxd_group *group) dev_dbg(dev, "Writing group %d cfg registers\n", group->id); /* setup GRPWQCFG */ - for (i = 0; i < 4; i++) { - grpcfg_offset = idxd->grpcfg_offset + - group->id * 64 + i * sizeof(u64); - iowrite64(group->grpcfg.wqs[i], - idxd->reg_base + grpcfg_offset); + for (i = 0; i < GRPWQCFG_STRIDES; i++) { + grpcfg_offset = GRPWQCFG_OFFSET(idxd, group->id, i); + iowrite64(group->grpcfg.wqs[i], idxd->reg_base + grpcfg_offset); dev_dbg(dev, "GRPCFG wq[%d:%d: %#x]: %#llx\n", group->id, i, grpcfg_offset, ioread64(idxd->reg_base + grpcfg_offset)); } /* setup GRPENGCFG */ - grpcfg_offset = idxd->grpcfg_offset + group->id * 64 + 32; + grpcfg_offset = GRPENGCFG_OFFSET(idxd, group->id); iowrite64(group->grpcfg.engines, idxd->reg_base + grpcfg_offset); dev_dbg(dev, "GRPCFG engs[%d: %#x]: %#llx\n", group->id, grpcfg_offset, ioread64(idxd->reg_base + grpcfg_offset)); /* setup GRPFLAGS */ - grpcfg_offset = idxd->grpcfg_offset + group->id * 64 + 40; + grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id); iowrite32(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset); dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n", group->id, grpcfg_offset, diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index f1a0c15587b3..3451af2f6572 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -356,4 +356,22 @@ union wqcfg { #define WQCFG_STRIDES(_idxd_dev) ((_idxd_dev)->wqcfg_size / sizeof(u32)) +#define GRPCFG_SIZE 64 +#define GRPWQCFG_STRIDES 4 + +/* + * This macro calculates the offset into the GRPCFG register + * idxd - struct idxd * + * n - wq id + * ofs - the index of the 32b dword for the config register + * + * The WQCFG register block is divided into groups per each wq. The n index + * allows us to move to the register group that's for that particular wq. + * Each register is 32bits. The ofs gives us the number of register to access. + */ +#define GRPWQCFG_OFFSET(idxd_dev, n, ofs) ((idxd_dev)->grpcfg_offset +\ + (n) * GRPCFG_SIZE + sizeof(u64) * (ofs)) +#define GRPENGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 32) +#define GRPFLGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 40) + #endif -- Gitee From 469c1614704b50255f4be596dbf21c99262d18e7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 30 Oct 2020 08:51:56 -0700 Subject: [PATCH 005/173] dmaengine: idxd: define table offset multiplier mainline inclusion from mainline-v5.11 commit 2f8417a967d571bf8fb81cba95d7acf508ed334f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 2f8417a967d5 dmaengine: idxd: define table offset multiplier. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Convert table offset multiplier magic number to a define. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/160407311690.839435.6941865731867828234.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 17 +++++++---------- drivers/dma/idxd/registers.h | 2 ++ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index a42891e7f00f..09eab828fbba 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -214,17 +214,14 @@ static void idxd_read_table_offsets(struct idxd_device *idxd) struct device *dev = &idxd->pdev->dev; offsets.bits[0] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET); - offsets.bits[1] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET - + sizeof(u64)); - idxd->grpcfg_offset = offsets.grpcfg * 0x100; + offsets.bits[1] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET + sizeof(u64)); + idxd->grpcfg_offset = offsets.grpcfg * IDXD_TABLE_MULT; dev_dbg(dev, "IDXD Group Config Offset: %#x\n", idxd->grpcfg_offset); - idxd->wqcfg_offset = offsets.wqcfg * 0x100; - dev_dbg(dev, "IDXD Work Queue Config Offset: %#x\n", - idxd->wqcfg_offset); - idxd->msix_perm_offset = offsets.msix_perm * 0x100; - dev_dbg(dev, "IDXD MSIX Permission Offset: %#x\n", - idxd->msix_perm_offset); - idxd->perfmon_offset = offsets.perfmon * 0x100; + idxd->wqcfg_offset = offsets.wqcfg * IDXD_TABLE_MULT; + dev_dbg(dev, "IDXD Work Queue Config Offset: %#x\n", idxd->wqcfg_offset); + idxd->msix_perm_offset = offsets.msix_perm * IDXD_TABLE_MULT; + dev_dbg(dev, "IDXD MSIX Permission Offset: %#x\n", idxd->msix_perm_offset); + idxd->perfmon_offset = offsets.perfmon * IDXD_TABLE_MULT; dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset); } diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 3451af2f6572..cb9ce3ee397e 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -102,6 +102,8 @@ union offsets_reg { u64 bits[2]; } __packed; +#define IDXD_TABLE_MULT 0x100 + #define IDXD_GENCFG_OFFSET 0x80 union gencfg_reg { struct { -- Gitee From 9509fb797deb7a9f75fc1d02a1b0a5aec611f9c6 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 13 Nov 2020 15:55:05 -0700 Subject: [PATCH 006/173] dmaengine: idxd: add ATS disable knob for work queues mainline inclusion from mainline-v5.11 commit 92de5fa2dc39c3fba0704f7ac914e7f02eb732f2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 92de5fa2dc39 dmaengine: idxd: add ATS disable knob for work queues. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- With the DSA spec 1.1 update, a knob to disable ATS for individually is introduced. Add enabling code to allow a system admin to make the configuration through sysfs. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/160530810593.1288392.2561048329116529566.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- .../ABI/stable/sysfs-driver-dma-idxd | 7 ++++ drivers/dma/idxd/device.c | 4 +++ drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/registers.h | 5 +-- drivers/dma/idxd/sysfs.c | 34 +++++++++++++++++++ 5 files changed, 49 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 5ea81ffd3c1a..55285c136cf0 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -204,6 +204,13 @@ Contact: dmaengine@vger.kernel.org Description: The max batch size for this workqueue. Cannot exceed device max batch size. Configurable parameter. +What: /sys/bus/dsa/devices/wq./ats_disable +Date: Nov 13, 2020 +KernelVersion: 5.11.0 +Contact: dmaengine@vger.kernel.org +Description: Indicate whether ATS disable is turned on for the workqueue. + 0 indicates ATS is on, and 1 indicates ATS is off for the workqueue. + What: /sys/bus/dsa/devices/engine./group_id Date: Oct 25, 2019 KernelVersion: 5.6.0 diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index e7b533ce1b97..1ffd070e2257 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -366,6 +366,7 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq) wq->group = NULL; wq->threshold = 0; wq->priority = 0; + wq->ats_dis = 0; clear_bit(WQ_FLAG_DEDICATED, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); } @@ -664,6 +665,9 @@ static int idxd_wq_config_write(struct idxd_wq *wq) test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)) wq->wqcfg->bof = 1; + if (idxd->hw.wq_cap.wq_ats_support) + wq->wqcfg->wq_ats_disable = wq->ats_dis; + /* bytes 12-15 */ wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); wq->wqcfg->max_batch_shift = ilog2(wq->max_batch_size); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 8d7a9fab4c3f..81d71b5b69ec 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -132,6 +132,7 @@ struct idxd_wq { char name[WQ_NAME_SIZE + 1]; u64 max_xfer_bytes; u32 max_batch_size; + bool ats_dis; }; struct idxd_engine { diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index cb9ce3ee397e..5f52584b4510 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -47,7 +47,7 @@ union wq_cap_reg { u64 rsvd:20; u64 shared_mode:1; u64 dedicated_mode:1; - u64 rsvd2:1; + u64 wq_ats_support:1; u64 priority:1; u64 occupancy:1; u64 occupancy_int:1; @@ -303,7 +303,8 @@ union wqcfg { /* bytes 8-11 */ u32 mode:1; /* shared or dedicated */ u32 bof:1; /* block on fault */ - u32 rsvd2:2; + u32 wq_ats_disable:1; + u32 rsvd2:1; u32 priority:4; u32 pasid:20; u32 pasid_en:1; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 44a0e790c43e..7aeb08b9cf6c 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1270,6 +1270,39 @@ static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribu static struct device_attribute dev_attr_wq_max_batch_size = __ATTR(max_batch_size, 0644, wq_max_batch_size_show, wq_max_batch_size_store); +static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + + return sprintf(buf, "%u\n", wq->ats_dis); +} + +static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_device *idxd = wq->idxd; + bool ats_dis; + int rc; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + if (!idxd->hw.wq_cap.wq_ats_support) + return -EOPNOTSUPP; + + rc = kstrtobool(buf, &ats_dis); + if (rc < 0) + return rc; + + wq->ats_dis = ats_dis; + + return count; +} + +static struct device_attribute dev_attr_wq_ats_disable = + __ATTR(ats_disable, 0644, wq_ats_disable_show, wq_ats_disable_store); + static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_clients.attr, &dev_attr_wq_state.attr, @@ -1284,6 +1317,7 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_cdev_minor.attr, &dev_attr_wq_max_transfer_size.attr, &dev_attr_wq_max_batch_size.attr, + &dev_attr_wq_ats_disable.attr, NULL, }; -- Gitee From 65dcd5eaaa0ca8c030922ea976f05505e508ed8a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 17 Nov 2020 13:39:14 -0700 Subject: [PATCH 007/173] dmaengine: idxd: add IAX configuration support in the IDXD driver mainline inclusion from mainline-v5.11 commit f25b463883a8a2d1b7303a63339c0d589fc94f1e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit f25b463883a8 dmaengine: idxd: add IAX configuration support in the IDXD driver. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add support to allow configuration of Intel Analytics Accelerator (IAX) in addition to the Intel Data Streaming Accelerator (DSA). The IAX hardware has the same configuration interface as DSA. The main difference is the type of operations it performs. We can support the DSA and IAX devices on the same driver with some tweaks. IAX has a 64B completion record that needs to be 64B aligned, as opposed to a 32B completion record that is 32B aligned for DSA. IAX also does not support token management. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/160564555488.1834439.4261958859935360473.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/cdev.c | 1 + drivers/dma/idxd/device.c | 37 +++++++++++++---- drivers/dma/idxd/idxd.h | 24 +++++++++-- drivers/dma/idxd/init.c | 14 +++++++ drivers/dma/idxd/registers.h | 1 + drivers/dma/idxd/submit.c | 2 +- drivers/dma/idxd/sysfs.c | 46 +++++++++++++++++++-- include/uapi/linux/idxd.h | 79 ++++++++++++++++++++++++++++++++++++ 8 files changed, 187 insertions(+), 17 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index cdafd941c06c..3b8f05605db6 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -28,6 +28,7 @@ struct idxd_cdev_context { */ static struct idxd_cdev_context ictx[IDXD_TYPE_MAX] = { { .name = "dsa" }, + { .name = "iax" } }; struct idxd_user_context { diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 1ffd070e2257..ad839432ea54 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -131,6 +131,8 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; int rc, num_descs, i; + int align; + u64 tmp; if (wq->type != IDXD_WQT_KERNEL) return 0; @@ -142,14 +144,27 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) if (rc < 0) return rc; - wq->compls_size = num_descs * sizeof(struct dsa_completion_record); - wq->compls = dma_alloc_coherent(dev, wq->compls_size, - &wq->compls_addr, GFP_KERNEL); - if (!wq->compls) { + if (idxd->type == IDXD_TYPE_DSA) + align = 32; + else if (idxd->type == IDXD_TYPE_IAX) + align = 64; + else + return -ENODEV; + + wq->compls_size = num_descs * idxd->compl_size + align; + wq->compls_raw = dma_alloc_coherent(dev, wq->compls_size, + &wq->compls_addr_raw, GFP_KERNEL); + if (!wq->compls_raw) { rc = -ENOMEM; goto fail_alloc_compls; } + /* Adjust alignment */ + wq->compls_addr = (wq->compls_addr_raw + (align - 1)) & ~(align - 1); + tmp = (u64)wq->compls_raw; + tmp = (tmp + (align - 1)) & ~(align - 1); + wq->compls = (struct dsa_completion_record *)tmp; + rc = alloc_descs(wq, num_descs); if (rc < 0) goto fail_alloc_descs; @@ -163,9 +178,11 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) struct idxd_desc *desc = wq->descs[i]; desc->hw = wq->hw_descs[i]; - desc->completion = &wq->compls[i]; - desc->compl_dma = wq->compls_addr + - sizeof(struct dsa_completion_record) * i; + if (idxd->type == IDXD_TYPE_DSA) + desc->completion = &wq->compls[i]; + else if (idxd->type == IDXD_TYPE_IAX) + desc->iax_completion = &wq->iax_compls[i]; + desc->compl_dma = wq->compls_addr + idxd->compl_size * i; desc->id = i; desc->wq = wq; desc->cpu = -1; @@ -176,7 +193,8 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) fail_sbitmap_init: free_descs(wq); fail_alloc_descs: - dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr); + dma_free_coherent(dev, wq->compls_size, wq->compls_raw, + wq->compls_addr_raw); fail_alloc_compls: free_hw_descs(wq); return rc; @@ -191,7 +209,8 @@ void idxd_wq_free_resources(struct idxd_wq *wq) free_hw_descs(wq); free_descs(wq); - dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr); + dma_free_coherent(dev, wq->compls_size, wq->compls_raw, + wq->compls_addr_raw); sbitmap_queue_free(&wq->sbq); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 81d71b5b69ec..580f8aade7bc 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -23,7 +23,8 @@ struct idxd_wq; enum idxd_type { IDXD_TYPE_UNKNOWN = -1, IDXD_TYPE_DSA = 0, - IDXD_TYPE_MAX + IDXD_TYPE_IAX, + IDXD_TYPE_MAX, }; #define IDXD_NAME_SIZE 128 @@ -123,8 +124,13 @@ struct idxd_wq { u32 vec_ptr; /* interrupt steering */ struct dsa_hw_desc **hw_descs; int num_descs; - struct dsa_completion_record *compls; + union { + struct dsa_completion_record *compls; + struct iax_completion_record *iax_compls; + }; + void *compls_raw; dma_addr_t compls_addr; + dma_addr_t compls_addr_raw; int compls_size; struct idxd_desc **descs; struct sbitmap_queue sbq; @@ -210,6 +216,7 @@ struct idxd_device { int token_limit; int nr_tokens; /* non-reserved tokens */ unsigned int wqcfg_size; + int compl_size; union sw_err_reg sw_err; wait_queue_head_t cmd_waitq; @@ -224,9 +231,15 @@ struct idxd_device { /* IDXD software descriptor */ struct idxd_desc { - struct dsa_hw_desc *hw; + union { + struct dsa_hw_desc *hw; + struct iax_hw_desc *iax_hw; + }; dma_addr_t desc_dma; - struct dsa_completion_record *completion; + union { + struct dsa_completion_record *completion; + struct iax_completion_record *iax_completion; + }; dma_addr_t compl_dma; struct dma_async_tx_descriptor txd; struct llist_node llnode; @@ -240,6 +253,7 @@ struct idxd_desc { #define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev) extern struct bus_type dsa_bus_type; +extern struct bus_type iax_bus_type; extern bool support_enqcmd; @@ -285,6 +299,8 @@ static inline void idxd_set_type(struct idxd_device *idxd) if (pdev->device == PCI_DEVICE_ID_INTEL_DSA_SPR0) idxd->type = IDXD_TYPE_DSA; + else if (pdev->device == PCI_DEVICE_ID_INTEL_IAX_SPR0) + idxd->type = IDXD_TYPE_IAX; else idxd->type = IDXD_TYPE_UNKNOWN; } diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 09eab828fbba..8ba2869ce17a 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -36,12 +36,16 @@ static struct mutex idxd_idr_lock; static struct pci_device_id idxd_pci_tbl[] = { /* DSA ver 1.0 platforms */ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_DSA_SPR0) }, + + /* IAX ver 1.0 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IAX_SPR0) }, { 0, } }; MODULE_DEVICE_TABLE(pci, idxd_pci_tbl); static char *idxd_name[] = { "dsa", + "iax" }; const char *idxd_get_dev_name(struct idxd_device *idxd) @@ -380,6 +384,14 @@ static int idxd_probe(struct idxd_device *idxd) return rc; } +static void idxd_type_init(struct idxd_device *idxd) +{ + if (idxd->type == IDXD_TYPE_DSA) + idxd->compl_size = sizeof(struct dsa_completion_record); + else if (idxd->type == IDXD_TYPE_IAX) + idxd->compl_size = sizeof(struct iax_completion_record); +} + static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct device *dev = &pdev->dev; @@ -415,6 +427,8 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) idxd_set_type(idxd); + idxd_type_init(idxd); + dev_dbg(dev, "Set PCI master\n"); pci_set_master(pdev); pci_set_drvdata(pdev, idxd); diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 5f52584b4510..751ecb4f9f81 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -5,6 +5,7 @@ /* PCI Config */ #define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25 +#define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe #define IDXD_MMIO_BAR 0 #define IDXD_WQ_BAR 2 diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 77f4799520b8..10b411c22512 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -15,7 +15,7 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) desc = wq->descs[idx]; memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); - memset(desc->completion, 0, sizeof(struct dsa_completion_record)); + memset(desc->completion, 0, idxd->compl_size); desc->cpu = cpu; if (device_pasid_enabled(idxd)) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 7aeb08b9cf6c..601dbeb7d79d 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -41,14 +41,24 @@ static struct device_type dsa_device_type = { .release = idxd_conf_device_release, }; +static struct device_type iax_device_type = { + .name = "iax", + .release = idxd_conf_device_release, +}; + static inline bool is_dsa_dev(struct device *dev) { return dev ? dev->type == &dsa_device_type : false; } +static inline bool is_iax_dev(struct device *dev) +{ + return dev ? dev->type == &iax_device_type : false; +} + static inline bool is_idxd_dev(struct device *dev) { - return is_dsa_dev(dev); + return is_dsa_dev(dev) || is_iax_dev(dev); } static inline bool is_idxd_wq_dev(struct device *dev) @@ -354,8 +364,17 @@ struct bus_type dsa_bus_type = { .shutdown = idxd_config_bus_shutdown, }; +struct bus_type iax_bus_type = { + .name = "iax", + .match = idxd_config_bus_match, + .probe = idxd_config_bus_probe, + .remove = idxd_config_bus_remove, + .shutdown = idxd_config_bus_shutdown, +}; + static struct bus_type *idxd_bus_types[] = { - &dsa_bus_type + &dsa_bus_type, + &iax_bus_type }; static struct idxd_device_driver dsa_drv = { @@ -367,8 +386,18 @@ static struct idxd_device_driver dsa_drv = { }, }; +static struct idxd_device_driver iax_drv = { + .drv = { + .name = "iax", + .bus = &iax_bus_type, + .owner = THIS_MODULE, + .mod_name = KBUILD_MODNAME, + }, +}; + static struct idxd_device_driver *idxd_drvs[] = { - &dsa_drv + &dsa_drv, + &iax_drv }; struct bus_type *idxd_get_bus_type(struct idxd_device *idxd) @@ -380,6 +409,8 @@ static struct device_type *idxd_get_device_type(struct idxd_device *idxd) { if (idxd->type == IDXD_TYPE_DSA) return &dsa_device_type; + else if (idxd->type == IDXD_TYPE_IAX) + return &iax_device_type; else return NULL; } @@ -520,6 +551,9 @@ static ssize_t group_tokens_reserved_store(struct device *dev, if (rc < 0) return -EINVAL; + if (idxd->type == IDXD_TYPE_IAX) + return -EOPNOTSUPP; + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; @@ -565,6 +599,9 @@ static ssize_t group_tokens_allowed_store(struct device *dev, if (rc < 0) return -EINVAL; + if (idxd->type == IDXD_TYPE_IAX) + return -EOPNOTSUPP; + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; @@ -607,6 +644,9 @@ static ssize_t group_use_token_limit_store(struct device *dev, if (rc < 0) return -EINVAL; + if (idxd->type == IDXD_TYPE_IAX) + return -EOPNOTSUPP; + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 9d9ecc0f4c38..e33997b4d750 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -26,6 +26,9 @@ #define IDXD_OP_FLAG_DRDBK 0x4000 #define IDXD_OP_FLAG_DSTS 0x8000 +/* IAX */ +#define IDXD_OP_FLAG_RD_SRC2_AECS 0x010000 + /* Opcode */ enum dsa_opcode { DSA_OPCODE_NOOP = 0, @@ -47,6 +50,14 @@ enum dsa_opcode { DSA_OPCODE_CFLUSH = 0x20, }; +enum iax_opcode { + IAX_OPCODE_NOOP = 0, + IAX_OPCODE_DRAIN = 2, + IAX_OPCODE_MEMMOVE, + IAX_OPCODE_DECOMPRESS = 0x42, + IAX_OPCODE_COMPRESS, +}; + /* Completion record status */ enum dsa_completion_status { DSA_COMP_NONE = 0, @@ -80,6 +91,33 @@ enum dsa_completion_status { DSA_COMP_TRANSLATION_FAIL, }; +enum iax_completion_status { + IAX_COMP_NONE = 0, + IAX_COMP_SUCCESS, + IAX_COMP_PAGE_FAULT_IR = 0x04, + IAX_COMP_OUTBUF_OVERFLOW, + IAX_COMP_BAD_OPCODE = 0x10, + IAX_COMP_INVALID_FLAGS, + IAX_COMP_NOZERO_RESERVE, + IAX_COMP_INVALID_SIZE, + IAX_COMP_OVERLAP_BUFFERS = 0x16, + IAX_COMP_INT_HANDLE_INVAL = 0x19, + IAX_COMP_CRA_XLAT, + IAX_COMP_CRA_ALIGN, + IAX_COMP_ADDR_ALIGN, + IAX_COMP_PRIV_BAD, + IAX_COMP_TRAFFIC_CLASS_CONF, + IAX_COMP_PFAULT_RDBA, + IAX_COMP_HW_ERR1, + IAX_COMP_HW_ERR_DRB, + IAX_COMP_TRANSLATION_FAIL, + IAX_COMP_PRS_TIMEOUT, + IAX_COMP_WATCHDOG, + IAX_COMP_INVALID_COMP_FLAG = 0x30, + IAX_COMP_INVALID_FILTER_FLAG, + IAX_COMP_INVALID_NUM_ELEMS = 0x33, +}; + #define DSA_COMP_STATUS_MASK 0x7f #define DSA_COMP_STATUS_WRITE 0x80 @@ -163,6 +201,28 @@ struct dsa_hw_desc { }; } __attribute__((packed)); +struct iax_hw_desc { + uint32_t pasid:20; + uint32_t rsvd:11; + uint32_t priv:1; + uint32_t flags:24; + uint32_t opcode:8; + uint64_t completion_addr; + uint64_t src1_addr; + uint64_t dst_addr; + uint32_t src1_size; + uint16_t int_handle; + union { + uint16_t compr_flags; + uint16_t decompr_flags; + }; + uint64_t src2_addr; + uint32_t max_dst_size; + uint32_t src2_size; + uint32_t filter_flags; + uint32_t num_inputs; +} __attribute__((packed)); + struct dsa_raw_desc { uint64_t field[8]; } __attribute__((packed)); @@ -223,4 +283,23 @@ struct dsa_raw_completion_record { uint64_t field[4]; } __attribute__((packed)); +struct iax_completion_record { + volatile uint8_t status; + uint8_t error_code; + uint16_t rsvd; + uint32_t bytes_completed; + uint64_t fault_addr; + uint32_t invalid_flags; + uint32_t rsvd2; + uint32_t output_size; + uint8_t output_bits; + uint8_t rsvd3; + uint16_t rsvd4; + uint64_t rsvd5[4]; +} __attribute__((packed)); + +struct iax_raw_completion_record { + uint64_t field[8]; +} __attribute__((packed)); + #endif -- Gitee From ad6290f0ff975748c5250131d2532a7562e7047e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 15 Jan 2021 14:52:52 -0700 Subject: [PATCH 008/173] dmaengine: idxd: Fix list corruption in description completion mainline inclusion from mainline-v5.11 commit 16e19e11228ba660d9e322035635e7dcf160d5c2 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 16e19e11228b dmaengine: idxd: Fix list corruption in description completion. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Sanjay reported the following kernel splat after running dmatest for stress testing. The current code is giving up the spinlock in the middle of a completion list walk, and that opens up opportunity for list corruption if another thread touches the list at the same time. In order to make sure the list is always protected, the hardware completed descriptors will be put on a local list to be completed with callbacks from the outside of the list lock. kernel: general protection fault, probably for non-canonical address 0xdead000000000100: 0000 [#1] SMP NOPTI kernel: CPU: 62 PID: 1814 Comm: irq/89-idxd-por Tainted: G W 5.10.0-intel-next_10_16+ #1 kernel: Hardware name: Intel Corporation ArcherCity/ArcherCity, BIOS EGSDCRB1.SBT.4915.D02.2012070418 12/07/2020 kernel: RIP: 0010:irq_process_work_list+0xcd/0x170 [idxd] kernel: Code: e8 18 65 5c d3 8b 45 d4 85 c0 75 b3 4c 89 f7 e8 b9 fe ff ff 84 c0 74 bf 4c 89 e7 e8 dd 6b 5c d3 49 8b 3f 49 8b 4f 08 48 89 c6 <48> 89 4f 08 48 89 39 4c 89 e7 48 b9 00 01 00 00 00 00 ad de 49 89 kernel: RSP: 0018:ff256768c4353df8 EFLAGS: 00010046 kernel: RAX: 0000000000000202 RBX: dead000000000100 RCX: dead000000000122 kernel: RDX: 0000000000000001 RSI: 0000000000000202 RDI: dead000000000100 kernel: RBP: ff256768c4353e40 R08: 0000000000000001 R09: 0000000000000000 kernel: R10: 0000000000000000 R11: 00000000ffffffff R12: ff1fdf9fd06b3e48 kernel: R13: 0000000000000005 R14: ff1fdf9fc4275980 R15: ff1fdf9fc4275a00 kernel: FS: 0000000000000000(0000) GS:ff1fdfa32f880000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 00007f87f24012a0 CR3: 000000010f630004 CR4: 0000000003771ee0 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 kernel: DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 kernel: PKRU: 55555554 kernel: Call Trace: kernel: ? irq_thread+0xa9/0x1b0 kernel: idxd_wq_thread+0x34/0x90 [idxd] kernel: irq_thread_fn+0x24/0x60 kernel: irq_thread+0x10f/0x1b0 kernel: ? irq_forced_thread_fn+0x80/0x80 kernel: ? wake_threads_waitq+0x30/0x30 kernel: ? irq_thread_check_affinity+0xe0/0xe0 kernel: kthread+0x142/0x160 kernel: ? kthread_park+0x90/0x90 kernel: ret_from_fork+0x1f/0x30 kernel: Modules linked in: idxd_ktest dmatest intel_rapl_msr idxd_mdev iTCO_wdt vfio_pci vfio_virqfd iTCO_vendor_support intel_rapl_common i10nm_edac nfit x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper rapl msr pcspkr snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_intel_dspcfg ofpart snd_hda_codec snd_hda_core snd_hwdep cmdlinepart snd_seq snd_seq_device idxd snd_pcm intel_spi_pci intel_spi snd_timer spi_nor input_leds joydev snd i2c_i801 mtd soundcore i2c_smbus mei_me mei i2c_ismt ipmi_ssif acpi_ipmi ipmi_si ipmi_devintf ipmi_msghandler mac_hid sunrpc nls_iso8859_1 sch_fq_codel ip_tables x_tables xfs libcrc32c ast drm_vram_helper drm_ttm_helper ttm igc wmi pinctrl_sunrisepoint hid_generic usbmouse usbkbd usbhid hid kernel: ---[ end trace cd5ca950ef0db25f ]--- Reported-by: Sanjay Kumar Signed-off-by: Dave Jiang Tested-by: Sanjay Kumar Fixes: e4f4d8cdeb9a ("dmaengine: idxd: Clean up descriptors with fault error") Link: https://lore.kernel.org/r/161074757267.2183951.17912830858060607266.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/irq.c | 86 +++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 829ee6590a74..f287233fff65 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -239,29 +239,27 @@ irqreturn_t idxd_misc_thread(int vec, void *data) return IRQ_HANDLED; } -static bool process_fault(struct idxd_desc *desc, u64 fault_addr) +static inline bool match_fault(struct idxd_desc *desc, u64 fault_addr) { /* * Completion address can be bad as well. Check fault address match for descriptor * and completion address. */ - if ((u64)desc->hw == fault_addr || - (u64)desc->completion == fault_addr) { - idxd_dma_complete_txd(desc, IDXD_COMPLETE_DEV_FAIL); + if ((u64)desc->hw == fault_addr || (u64)desc->completion == fault_addr) { + struct idxd_device *idxd = desc->wq->idxd; + struct device *dev = &idxd->pdev->dev; + + dev_warn(dev, "desc with fault address: %#llx\n", fault_addr); return true; } return false; } -static bool complete_desc(struct idxd_desc *desc) +static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) { - if (desc->completion->status) { - idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL); - return true; - } - - return false; + idxd_dma_complete_txd(desc, reason); + idxd_free_desc(desc->wq, desc); } static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, @@ -271,25 +269,25 @@ static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, struct idxd_desc *desc, *t; struct llist_node *head; int queued = 0; - bool completed = false; unsigned long flags; + enum idxd_complete_type reason; *processed = 0; head = llist_del_all(&irq_entry->pending_llist); if (!head) goto out; - llist_for_each_entry_safe(desc, t, head, llnode) { - if (wtype == IRQ_WORK_NORMAL) - completed = complete_desc(desc); - else if (wtype == IRQ_WORK_PROCESS_FAULT) - completed = process_fault(desc, data); + if (wtype == IRQ_WORK_NORMAL) + reason = IDXD_COMPLETE_NORMAL; + else + reason = IDXD_COMPLETE_DEV_FAIL; - if (completed) { - idxd_free_desc(desc->wq, desc); + llist_for_each_entry_safe(desc, t, head, llnode) { + if (desc->completion->status) { + if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) + match_fault(desc, data); + complete_desc(desc, reason); (*processed)++; - if (wtype == IRQ_WORK_PROCESS_FAULT) - break; } else { spin_lock_irqsave(&irq_entry->list_lock, flags); list_add_tail(&desc->list, @@ -307,42 +305,46 @@ static int irq_process_work_list(struct idxd_irq_entry *irq_entry, enum irq_work_type wtype, int *processed, u64 data) { - struct list_head *node, *next; int queued = 0; - bool completed = false; unsigned long flags; + LIST_HEAD(flist); + struct idxd_desc *desc, *n; + enum idxd_complete_type reason; *processed = 0; - spin_lock_irqsave(&irq_entry->list_lock, flags); - if (list_empty(&irq_entry->work_list)) - goto out; - - list_for_each_safe(node, next, &irq_entry->work_list) { - struct idxd_desc *desc = - container_of(node, struct idxd_desc, list); + if (wtype == IRQ_WORK_NORMAL) + reason = IDXD_COMPLETE_NORMAL; + else + reason = IDXD_COMPLETE_DEV_FAIL; + /* + * This lock protects list corruption from access of list outside of the irq handler + * thread. + */ + spin_lock_irqsave(&irq_entry->list_lock, flags); + if (list_empty(&irq_entry->work_list)) { spin_unlock_irqrestore(&irq_entry->list_lock, flags); - if (wtype == IRQ_WORK_NORMAL) - completed = complete_desc(desc); - else if (wtype == IRQ_WORK_PROCESS_FAULT) - completed = process_fault(desc, data); + return 0; + } - if (completed) { - spin_lock_irqsave(&irq_entry->list_lock, flags); + list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) { + if (desc->completion->status) { list_del(&desc->list); - spin_unlock_irqrestore(&irq_entry->list_lock, flags); - idxd_free_desc(desc->wq, desc); (*processed)++; - if (wtype == IRQ_WORK_PROCESS_FAULT) - return queued; + list_add_tail(&desc->list, &flist); } else { queued++; } - spin_lock_irqsave(&irq_entry->list_lock, flags); } - out: spin_unlock_irqrestore(&irq_entry->list_lock, flags); + + list_for_each_entry(desc, &flist, list) { + if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) + match_fault(desc, data); + complete_desc(desc, reason); + } + return queued; } -- Gitee From 4c931f1adda007dcdd1673bbff1f794670cfff0f Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Thu, 24 Dec 2020 21:22:54 +0800 Subject: [PATCH 009/173] dma: idxd: use DEFINE_MUTEX() for mutex lock mainline inclusion from mainline-v5.12 commit e2fcd6e427c2fae92b366b24759f95d77b6f7bc7 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit e2fcd6e427c2 dma: idxd: use DEFINE_MUTEX() for mutex lock. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- mutex lock can be initialized automatically with DEFINE_MUTEX() rather than explicitly calling mutex_init(). Signed-off-by: Zheng Yongjun Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20201224132254.30961-1-zhengyongjun3@huawei.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 8ba2869ce17a..e54e6471263b 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -31,7 +31,7 @@ MODULE_AUTHOR("Intel Corporation"); bool support_enqcmd; static struct idr idxd_idrs[IDXD_TYPE_MAX]; -static struct mutex idxd_idr_lock; +static DEFINE_MUTEX(idxd_idr_lock); static struct pci_device_id idxd_pci_tbl[] = { /* DSA ver 1.0 platforms */ @@ -547,7 +547,6 @@ static int __init idxd_init_module(void) else support_enqcmd = true; - mutex_init(&idxd_idr_lock); for (i = 0; i < IDXD_TYPE_MAX; i++) idr_init(&idxd_idrs[i]); -- Gitee From 9a5f58fadb688d65a8b863d8a9b71c3186492ef3 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 7 Jan 2021 09:44:51 -0700 Subject: [PATCH 010/173] x86/asm: Annotate movdir64b()'s dst argument with __iomem mainline inclusion from mainline-v5.12 commit 6ae58d871319dc22ef780baaacd393f8543a1e74 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 6ae58d871319 x86/asm: Annotate movdir64b()'s dst argument with __iomem. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add a missing __iomem annotation to address a sparse warning. The caller is expected to pass an __iomem annotated pointer to this function. The current usages send a 64-bytes command descriptor to an MMIO location (portal) on a device for consumption. When future usages for the MOVDIR64B instruction warrant a separate variant of a memory to memory operation, the argument annotation can be revisited. Also, from the comment in movdir64b() @__dst must be supplied as an lvalue because this tells the compiler what the object is (its size) the instruction accesses. I.e., not the pointers but what they point to, thus the deref'ing '*'." The actual sparse warning is: sparse warnings: (new ones prefixed by >>) drivers/dma/idxd/submit.c: note: in included file (through include/linux/io.h, include/linux/pci.h): >> arch/x86/include/asm/io.h:422:27: sparse: sparse: incorrect type in \ argument 1 (different address spaces) @@ expected void *dst @@ got void [noderef] __iomem *dst @@ arch/x86/include/asm/io.h:422:27: sparse: expected void *dst arch/x86/include/asm/io.h:422:27: sparse: got void [noderef] __iomem *dst [ bp: Massage commit message. ] Fixes: 0888e1030d3e ("x86/asm: Carve out a generic movdir64b() helper for general usage") Reported-by: kernel test robot Signed-off-by: Dave Jiang Signed-off-by: Borislav Petkov Reviewed-by: Ben Widawsky Reviewed-by: Dan Williams Link: https://lkml.kernel.org/r/161003787823.4062451.6564503265464317197.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Xiaochen Shen --- arch/x86/include/asm/special_insns.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 940959a40443..70cb4e3ab303 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -231,10 +231,10 @@ static inline void serialize(void) } /* The dst parameter must be 64-bytes aligned */ -static inline void movdir64b(void *dst, const void *src) +static inline void movdir64b(void __iomem *dst, const void *src) { const struct { char _[64]; } *__src = src; - struct { char _[64]; } *__dst = dst; + struct { char _[64]; } __iomem *__dst = dst; /* * MOVDIR64B %(rdx), rax. -- Gitee From 4e041b7a6be5fbaa0f5fa15b6dd8bd30510df0bb Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 22 Jan 2021 11:46:00 -0700 Subject: [PATCH 011/173] dmaengine: idxd: add module parameter to force disable of SVA mainline inclusion from mainline-v5.12 commit 03d939c7e3d8800a9feb54808929c5776ac510eb category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 03d939c7e3d8 dmaengine: idxd: add module parameter to force disable of SVA. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add a module parameter that overrides the SVA feature enabling. This keeps the driver in legacy mode even when intel_iommu=sm_on is set. In this mode, the descriptor fields must be programmed with dma_addr_t from the Linux DMA API for source, destination, and completion descriptors. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161134110457.4005461.13171197785259115852.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ drivers/dma/idxd/init.c | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 8cf9dc031937..ebc997e6903a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1747,6 +1747,12 @@ In such case C2/C3 won't be used again. idle=nomwait: Disable mwait for CPU C-states + idxd.sva= [HW] + Format: + Allow force disabling of Shared Virtual Memory (SVA) + support for the idxd driver. By default it is set to + true (1). + ieee754= [MIPS] Select IEEE Std 754 conformance mode Format: { strict | legacy | 2008 | relaxed } Default: strict diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index e54e6471263b..6c74544ad6bb 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -26,6 +26,10 @@ MODULE_VERSION(IDXD_DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Intel Corporation"); +static bool sva = true; +module_param(sva, bool, 0644); +MODULE_PARM_DESC(sva, "Toggle SVA support on/off"); + #define DRV_NAME "idxd" bool support_enqcmd; @@ -341,12 +345,14 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD reset complete\n"); - if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM)) { + if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { rc = idxd_enable_system_pasid(idxd); if (rc < 0) dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc); else set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + } else if (!sva) { + dev_warn(dev, "User forced SVA off via module param.\n"); } idxd_read_caps(idxd); -- Gitee From ad8893492609e2eddfe0f757c14600d9020115ad Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 12 Apr 2021 09:23:27 -0700 Subject: [PATCH 012/173] dmaengine: idxd: clear MSIX permission entry on shutdown mainline inclusion from mainline-v5.12 commit 6df0e6c57dfc064af330071f372f11aa8c584997 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 6df0e6c57dfc dmaengine: idxd: clear MSIX permission entry on shutdown. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add disabling/clearing of MSIX permission entries on device shutdown to mirror the enabling of the MSIX entries on probe. Current code left the MSIX enabled and the pasid entries still programmed at device shutdown. Fixes: 8e50d392652f ("dmaengine: idxd: Add shared workqueue support") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161824457969.882533.6020239898682672311.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 30 ++++++++++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 2 ++ drivers/dma/idxd/init.c | 11 ++--------- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index ad839432ea54..d255bb016c4d 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -579,6 +579,36 @@ void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid) } /* Device configuration bits */ +void idxd_msix_perm_setup(struct idxd_device *idxd) +{ + union msix_perm mperm; + int i, msixcnt; + + msixcnt = pci_msix_vec_count(idxd->pdev); + if (msixcnt < 0) + return; + + mperm.bits = 0; + mperm.pasid = idxd->pasid; + mperm.pasid_en = device_pasid_enabled(idxd); + for (i = 1; i < msixcnt; i++) + iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); +} + +void idxd_msix_perm_clear(struct idxd_device *idxd) +{ + union msix_perm mperm; + int i, msixcnt; + + msixcnt = pci_msix_vec_count(idxd->pdev); + if (msixcnt < 0) + return; + + mperm.bits = 0; + for (i = 1; i < msixcnt; i++) + iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); +} + static void idxd_group_config_write(struct idxd_group *group) { struct idxd_device *idxd = group->idxd; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 580f8aade7bc..a0371035abd7 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -330,6 +330,8 @@ void idxd_unregister_driver(void); struct bus_type *idxd_get_bus_type(struct idxd_device *idxd); /* device interrupt control */ +void idxd_msix_perm_setup(struct idxd_device *idxd); +void idxd_msix_perm_clear(struct idxd_device *idxd); irqreturn_t idxd_irq_handler(int vec, void *data); irqreturn_t idxd_misc_thread(int vec, void *data); irqreturn_t idxd_wq_thread(int irq, void *data); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 6c74544ad6bb..e689481e1a4c 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -65,7 +65,6 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) struct idxd_irq_entry *irq_entry; int i, msixcnt; int rc = 0; - union msix_perm mperm; msixcnt = pci_msix_vec_count(pdev); if (msixcnt < 0) { @@ -144,14 +143,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } idxd_unmask_error_interrupts(idxd); - - /* Setup MSIX permission table */ - mperm.bits = 0; - mperm.pasid = idxd->pasid; - mperm.pasid_en = device_pasid_enabled(idxd); - for (i = 1; i < msixcnt; i++) - iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); - + idxd_msix_perm_setup(idxd); return 0; err_no_irq: @@ -510,6 +502,7 @@ static void idxd_shutdown(struct pci_dev *pdev) idxd_flush_work_list(irq_entry); } + idxd_msix_perm_clear(idxd); destroy_workqueue(idxd->wq); } -- Gitee From 057ef6e1bb4b0b3e4fd246fe930182461cd34e1a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Apr 2021 16:37:15 -0700 Subject: [PATCH 013/173] dmaengine: idxd: cleanup pci interrupt vector allocation management mainline inclusion from mainline-v5.13 commit 5fc8e85ff12ce0530ac658686902a0ee64600f56 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 5fc8e85ff12c dmaengine: idxd: cleanup pci interrupt vector allocation management. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The devm managed lifetime is incompatible with 'struct device' objects that resides in idxd context. This is one of the series that clean up the idxd driver 'struct device' lifetime. Remove devm managed pci interrupt vectors and replace with unmanged allocators. Reported-by: Jason Gunthorpe Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Signed-off-by: Dave Jiang Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/161852983563.2203940.8116028229124776669.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 4 +-- drivers/dma/idxd/idxd.h | 2 +- drivers/dma/idxd/init.c | 64 +++++++++++++++++---------------------- 3 files changed, 30 insertions(+), 40 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index d255bb016c4d..3f696abd74ac 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -19,7 +19,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, /* Interrupt control bits */ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id) { - struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector); + struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector); pci_msi_mask_irq(data); } @@ -36,7 +36,7 @@ void idxd_mask_msix_vectors(struct idxd_device *idxd) void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id) { - struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector); + struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector); pci_msi_unmask_irq(data); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index a0371035abd7..786597812cd8 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -36,6 +36,7 @@ struct idxd_device_driver { struct idxd_irq_entry { struct idxd_device *idxd; int id; + int vector; struct llist_head pending_llist; struct list_head work_list; /* @@ -220,7 +221,6 @@ struct idxd_device { union sw_err_reg sw_err; wait_queue_head_t cmd_waitq; - struct msix_entry *msix_entries; int num_wq_irqs; struct idxd_irq_entry *irq_entries; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index e689481e1a4c..61c8dfc42edf 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -61,7 +61,6 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; struct device *dev = &pdev->dev; - struct msix_entry *msix; struct idxd_irq_entry *irq_entry; int i, msixcnt; int rc = 0; @@ -69,23 +68,13 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) msixcnt = pci_msix_vec_count(pdev); if (msixcnt < 0) { dev_err(dev, "Not MSI-X interrupt capable.\n"); - goto err_no_irq; + return -ENOSPC; } - idxd->msix_entries = devm_kzalloc(dev, sizeof(struct msix_entry) * - msixcnt, GFP_KERNEL); - if (!idxd->msix_entries) { - rc = -ENOMEM; - goto err_no_irq; - } - - for (i = 0; i < msixcnt; i++) - idxd->msix_entries[i].entry = i; - - rc = pci_enable_msix_exact(pdev, idxd->msix_entries, msixcnt); - if (rc) { - dev_err(dev, "Failed enabling %d MSIX entries.\n", msixcnt); - goto err_no_irq; + rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX); + if (rc != msixcnt) { + dev_err(dev, "Failed enabling %d MSIX entries: %d\n", msixcnt, rc); + return -ENOSPC; } dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt); @@ -98,58 +87,57 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) GFP_KERNEL); if (!idxd->irq_entries) { rc = -ENOMEM; - goto err_no_irq; + goto err_irq_entries; } for (i = 0; i < msixcnt; i++) { idxd->irq_entries[i].id = i; idxd->irq_entries[i].idxd = idxd; + idxd->irq_entries[i].vector = pci_irq_vector(pdev, i); spin_lock_init(&idxd->irq_entries[i].list_lock); } - msix = &idxd->msix_entries[0]; irq_entry = &idxd->irq_entries[0]; - rc = devm_request_threaded_irq(dev, msix->vector, idxd_irq_handler, - idxd_misc_thread, 0, "idxd-misc", - irq_entry); + rc = request_threaded_irq(irq_entry->vector, idxd_irq_handler, idxd_misc_thread, + 0, "idxd-misc", irq_entry); if (rc < 0) { dev_err(dev, "Failed to allocate misc interrupt.\n"); - goto err_no_irq; + goto err_misc_irq; } - dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", - msix->vector); + dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector); /* first MSI-X entry is not for wq interrupts */ idxd->num_wq_irqs = msixcnt - 1; for (i = 1; i < msixcnt; i++) { - msix = &idxd->msix_entries[i]; irq_entry = &idxd->irq_entries[i]; init_llist_head(&idxd->irq_entries[i].pending_llist); INIT_LIST_HEAD(&idxd->irq_entries[i].work_list); - rc = devm_request_threaded_irq(dev, msix->vector, - idxd_irq_handler, - idxd_wq_thread, 0, - "idxd-portal", irq_entry); + rc = request_threaded_irq(irq_entry->vector, idxd_irq_handler, + idxd_wq_thread, 0, "idxd-portal", irq_entry); if (rc < 0) { - dev_err(dev, "Failed to allocate irq %d.\n", - msix->vector); - goto err_no_irq; + dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector); + goto err_wq_irqs; } - dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", - i, msix->vector); + dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector); } idxd_unmask_error_interrupts(idxd); idxd_msix_perm_setup(idxd); return 0; - err_no_irq: + err_wq_irqs: + while (--i >= 0) { + irq_entry = &idxd->irq_entries[i]; + free_irq(irq_entry->vector, irq_entry); + } + err_misc_irq: /* Disable error interrupt generation */ idxd_mask_error_interrupts(idxd); - pci_disable_msix(pdev); + err_irq_entries: + pci_free_irq_vectors(pdev); dev_err(dev, "No usable interrupts\n"); return rc; } @@ -495,7 +483,8 @@ static void idxd_shutdown(struct pci_dev *pdev) for (i = 0; i < msixcnt; i++) { irq_entry = &idxd->irq_entries[i]; - synchronize_irq(idxd->msix_entries[i].vector); + synchronize_irq(irq_entry->vector); + free_irq(irq_entry->vector, irq_entry); if (i == 0) continue; idxd_flush_pending_llist(irq_entry); @@ -503,6 +492,7 @@ static void idxd_shutdown(struct pci_dev *pdev) } idxd_msix_perm_clear(idxd); + pci_free_irq_vectors(pdev); destroy_workqueue(idxd->wq); } -- Gitee From c5dac2b2c4bfc5e25e15bba3c2f793d19eed25f5 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Apr 2021 16:37:21 -0700 Subject: [PATCH 014/173] dmaengine: idxd: removal of pcim managed mmio mapping mainline inclusion from mainline-v5.13 commit a39c7cd0438ee2f0b859ee1eb86cdc52217d2223 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit a39c7cd0438e dmaengine: idxd: removal of pcim managed mmio mapping. Incremental backporting patches for DSA/IAX on Intel Xeon platform. -------------------------------- The devm managed lifetime is incompatible with 'struct device' objects that resides in idxd context. This is one of the series that clean up the idxd driver 'struct device' lifetime. Remove pcim_* management of the PCI device and the ioremap of MMIO BAR and replace with unmanaged versions. This is for consistency of removing all the pcim/devm based calls. Reported-by: Jason Gunthorpe Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Signed-off-by: Dave Jiang Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/161852984150.2203940.8043988289748519056.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 61c8dfc42edf..9aa7c5b1ad27 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -384,32 +384,36 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct idxd_device *idxd; int rc; - rc = pcim_enable_device(pdev); + rc = pci_enable_device(pdev); if (rc) return rc; dev_dbg(dev, "Alloc IDXD context\n"); idxd = idxd_alloc(pdev); - if (!idxd) - return -ENOMEM; + if (!idxd) { + rc = -ENOMEM; + goto err_idxd_alloc; + } dev_dbg(dev, "Mapping BARs\n"); - idxd->reg_base = pcim_iomap(pdev, IDXD_MMIO_BAR, 0); - if (!idxd->reg_base) - return -ENOMEM; + idxd->reg_base = pci_iomap(pdev, IDXD_MMIO_BAR, 0); + if (!idxd->reg_base) { + rc = -ENOMEM; + goto err_iomap; + } dev_dbg(dev, "Set DMA masks\n"); rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (rc) rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (rc) - return rc; + goto err; rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (rc) rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (rc) - return rc; + goto err; idxd_set_type(idxd); @@ -423,13 +427,13 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) rc = idxd_probe(idxd); if (rc) { dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n"); - return -ENODEV; + goto err; } rc = idxd_setup_sysfs(idxd); if (rc) { dev_err(dev, "IDXD sysfs setup failed\n"); - return -ENODEV; + goto err; } idxd->state = IDXD_DEV_CONF_READY; @@ -438,6 +442,13 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) idxd->hw.version); return 0; + + err: + pci_iounmap(pdev, idxd->reg_base); + err_iomap: + err_idxd_alloc: + pci_disable_device(pdev); + return rc; } static void idxd_flush_pending_llist(struct idxd_irq_entry *ie) @@ -493,6 +504,8 @@ static void idxd_shutdown(struct pci_dev *pdev) idxd_msix_perm_clear(idxd); pci_free_irq_vectors(pdev); + pci_iounmap(pdev, idxd->reg_base); + pci_disable_device(pdev); destroy_workqueue(idxd->wq); } -- Gitee From 1b7e2b940ce7f18d259b6d21de1f23206624f11d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Apr 2021 16:37:27 -0700 Subject: [PATCH 015/173] dmaengine: idxd: use ida for device instance enumeration mainline inclusion from mainline-v5.13 commit f7f7739847bd68b3c3103fd1b50d943038bd14c7 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit f7f7739847bd dmaengine: idxd: use ida for device instance enumeration. Incremental backporting patches for DSA/IAX on Intel Xeon platform. -------------------------------- The idr is only used for an device id, never to lookup context from that id. Switch to plain ida. Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Reported-by: Jason Gunthorpe Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161852984730.2203940.15032482460902003819.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 9aa7c5b1ad27..abbe64e4f104 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -34,8 +34,7 @@ MODULE_PARM_DESC(sva, "Toggle SVA support on/off"); bool support_enqcmd; -static struct idr idxd_idrs[IDXD_TYPE_MAX]; -static DEFINE_MUTEX(idxd_idr_lock); +static struct ida idxd_idas[IDXD_TYPE_MAX]; static struct pci_device_id idxd_pci_tbl[] = { /* DSA ver 1.0 platforms */ @@ -348,12 +347,10 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD interrupt setup complete.\n"); - mutex_lock(&idxd_idr_lock); - idxd->id = idr_alloc(&idxd_idrs[idxd->type], idxd, 0, 0, GFP_KERNEL); - mutex_unlock(&idxd_idr_lock); + idxd->id = ida_alloc(&idxd_idas[idxd->type], GFP_KERNEL); if (idxd->id < 0) { rc = -ENOMEM; - goto err_idr_fail; + goto err_ida_fail; } idxd->major = idxd_cdev_get_major(idxd); @@ -361,7 +358,7 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id); return 0; - err_idr_fail: + err_ida_fail: idxd_mask_error_interrupts(idxd); idxd_mask_msix_vectors(idxd); err_setup: @@ -518,9 +515,7 @@ static void idxd_remove(struct pci_dev *pdev) idxd_shutdown(pdev); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); - mutex_lock(&idxd_idr_lock); - idr_remove(&idxd_idrs[idxd->type], idxd->id); - mutex_unlock(&idxd_idr_lock); + ida_free(&idxd_idas[idxd->type], idxd->id); } static struct pci_driver idxd_pci_driver = { @@ -550,7 +545,7 @@ static int __init idxd_init_module(void) support_enqcmd = true; for (i = 0; i < IDXD_TYPE_MAX; i++) - idr_init(&idxd_idrs[i]); + ida_init(&idxd_idas[i]); err = idxd_register_bus_type(); if (err < 0) -- Gitee From c9e9ba4aa3c2d2421d439535625080b099bb5f32 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Apr 2021 16:37:33 -0700 Subject: [PATCH 016/173] dmaengine: idxd: fix idxd conf_dev 'struct device' lifetime mainline inclusion from mainline-v5.13 commit 47c16ac27d4cb664cee53ee0b9b7e2f907923fb3 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 47c16ac27d4c dmaengine: idxd: fix idxd conf_dev 'struct device' lifetime. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The devm managed lifetime is incompatible with 'struct device' objects that resides in idxd context. This is one of the series that clean up the idxd driver 'struct device' lifetime. Fix idxd->conf_dev 'struct device' lifetime. Address issues flagged by CONFIG_DEBUG_KOBJECT_RELEASE. Add release functions in order to free the allocated memory at the appropriate time. Reported-by: Jason Gunthorpe Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161852985319.2203940.4650791514462735368.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/idxd.h | 36 ++++++++++------ drivers/dma/idxd/init.c | 56 ++++++++++++++++-------- drivers/dma/idxd/sysfs.c | 92 ++++++++++++++-------------------------- 3 files changed, 94 insertions(+), 90 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 786597812cd8..849b8a59aef2 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -8,6 +8,7 @@ #include #include #include +#include #include "registers.h" #define IDXD_DRIVER_VERSION "1.00" @@ -256,6 +257,23 @@ extern struct bus_type dsa_bus_type; extern struct bus_type iax_bus_type; extern bool support_enqcmd; +extern struct device_type dsa_device_type; +extern struct device_type iax_device_type; + +static inline bool is_dsa_dev(struct device *dev) +{ + return dev->type == &dsa_device_type; +} + +static inline bool is_iax_dev(struct device *dev) +{ + return dev->type == &iax_device_type; +} + +static inline bool is_idxd_dev(struct device *dev) +{ + return is_dsa_dev(dev) || is_iax_dev(dev); +} static inline bool wq_dedicated(struct idxd_wq *wq) { @@ -293,18 +311,6 @@ static inline int idxd_get_wq_portal_full_offset(int wq_id, return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot); } -static inline void idxd_set_type(struct idxd_device *idxd) -{ - struct pci_dev *pdev = idxd->pdev; - - if (pdev->device == PCI_DEVICE_ID_INTEL_DSA_SPR0) - idxd->type = IDXD_TYPE_DSA; - else if (pdev->device == PCI_DEVICE_ID_INTEL_IAX_SPR0) - idxd->type = IDXD_TYPE_IAX; - else - idxd->type = IDXD_TYPE_UNKNOWN; -} - static inline void idxd_wq_get(struct idxd_wq *wq) { wq->client_count++; @@ -320,14 +326,16 @@ static inline int idxd_wq_refcount(struct idxd_wq *wq) return wq->client_count; }; +struct ida *idxd_ida(struct idxd_device *idxd); const char *idxd_get_dev_name(struct idxd_device *idxd); int idxd_register_bus_type(void); void idxd_unregister_bus_type(void); -int idxd_setup_sysfs(struct idxd_device *idxd); -void idxd_cleanup_sysfs(struct idxd_device *idxd); +int idxd_register_devices(struct idxd_device *idxd); +void idxd_unregister_devices(struct idxd_device *idxd); int idxd_register_driver(void); void idxd_unregister_driver(void); struct bus_type *idxd_get_bus_type(struct idxd_device *idxd); +struct device_type *idxd_get_device_type(struct idxd_device *idxd); /* device interrupt control */ void idxd_msix_perm_setup(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index abbe64e4f104..c0710893f8e6 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -51,6 +51,11 @@ static char *idxd_name[] = { "iax" }; +struct ida *idxd_ida(struct idxd_device *idxd) +{ + return &idxd_idas[idxd->type]; +} + const char *idxd_get_dev_name(struct idxd_device *idxd) { return idxd_name[idxd->type]; @@ -81,9 +86,8 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) * We implement 1 completion list per MSI-X entry except for * entry 0, which is for errors and others. */ - idxd->irq_entries = devm_kcalloc(dev, msixcnt, - sizeof(struct idxd_irq_entry), - GFP_KERNEL); + idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry), + GFP_KERNEL, dev_to_node(dev)); if (!idxd->irq_entries) { rc = -ENOMEM; goto err_irq_entries; @@ -262,16 +266,44 @@ static void idxd_read_caps(struct idxd_device *idxd) } } +static inline void idxd_set_type(struct idxd_device *idxd) +{ + struct pci_dev *pdev = idxd->pdev; + + if (pdev->device == PCI_DEVICE_ID_INTEL_DSA_SPR0) + idxd->type = IDXD_TYPE_DSA; + else if (pdev->device == PCI_DEVICE_ID_INTEL_IAX_SPR0) + idxd->type = IDXD_TYPE_IAX; + else + idxd->type = IDXD_TYPE_UNKNOWN; +} + static struct idxd_device *idxd_alloc(struct pci_dev *pdev) { struct device *dev = &pdev->dev; struct idxd_device *idxd; + int rc; - idxd = devm_kzalloc(dev, sizeof(struct idxd_device), GFP_KERNEL); + idxd = kzalloc_node(sizeof(*idxd), GFP_KERNEL, dev_to_node(dev)); if (!idxd) return NULL; idxd->pdev = pdev; + idxd_set_type(idxd); + idxd->id = ida_alloc(idxd_ida(idxd), GFP_KERNEL); + if (idxd->id < 0) + return NULL; + + device_initialize(&idxd->conf_dev); + idxd->conf_dev.parent = dev; + idxd->conf_dev.bus = idxd_get_bus_type(idxd); + idxd->conf_dev.type = idxd_get_device_type(idxd); + rc = dev_set_name(&idxd->conf_dev, "%s%d", idxd_get_dev_name(idxd), idxd->id); + if (rc < 0) { + put_device(&idxd->conf_dev); + return NULL; + } + spin_lock_init(&idxd->dev_lock); return idxd; @@ -347,20 +379,11 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD interrupt setup complete.\n"); - idxd->id = ida_alloc(&idxd_idas[idxd->type], GFP_KERNEL); - if (idxd->id < 0) { - rc = -ENOMEM; - goto err_ida_fail; - } - idxd->major = idxd_cdev_get_major(idxd); dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id); return 0; - err_ida_fail: - idxd_mask_error_interrupts(idxd); - idxd_mask_msix_vectors(idxd); err_setup: if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); @@ -412,7 +435,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) goto err; - idxd_set_type(idxd); idxd_type_init(idxd); @@ -427,7 +449,7 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err; } - rc = idxd_setup_sysfs(idxd); + rc = idxd_register_devices(idxd); if (rc) { dev_err(dev, "IDXD sysfs setup failed\n"); goto err; @@ -443,6 +465,7 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) err: pci_iounmap(pdev, idxd->reg_base); err_iomap: + put_device(&idxd->conf_dev); err_idxd_alloc: pci_disable_device(pdev); return rc; @@ -511,11 +534,10 @@ static void idxd_remove(struct pci_dev *pdev) struct idxd_device *idxd = pci_get_drvdata(pdev); dev_dbg(&pdev->dev, "%s called\n", __func__); - idxd_cleanup_sysfs(idxd); idxd_shutdown(pdev); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); - ida_free(&idxd_idas[idxd->type], idxd->id); + idxd_unregister_devices(idxd); } static struct pci_driver idxd_pci_driver = { diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 601dbeb7d79d..43e4e8a8e862 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -16,51 +16,26 @@ static char *idxd_wq_type_names[] = { [IDXD_WQT_USER] = "user", }; -static void idxd_conf_device_release(struct device *dev) +static void idxd_conf_sub_device_release(struct device *dev) { dev_dbg(dev, "%s for %s\n", __func__, dev_name(dev)); } static struct device_type idxd_group_device_type = { .name = "group", - .release = idxd_conf_device_release, + .release = idxd_conf_sub_device_release, }; static struct device_type idxd_wq_device_type = { .name = "wq", - .release = idxd_conf_device_release, + .release = idxd_conf_sub_device_release, }; static struct device_type idxd_engine_device_type = { .name = "engine", - .release = idxd_conf_device_release, + .release = idxd_conf_sub_device_release, }; -static struct device_type dsa_device_type = { - .name = "dsa", - .release = idxd_conf_device_release, -}; - -static struct device_type iax_device_type = { - .name = "iax", - .release = idxd_conf_device_release, -}; - -static inline bool is_dsa_dev(struct device *dev) -{ - return dev ? dev->type == &dsa_device_type : false; -} - -static inline bool is_iax_dev(struct device *dev) -{ - return dev ? dev->type == &iax_device_type : false; -} - -static inline bool is_idxd_dev(struct device *dev) -{ - return is_dsa_dev(dev) || is_iax_dev(dev); -} - static inline bool is_idxd_wq_dev(struct device *dev) { return dev ? dev->type == &idxd_wq_device_type : false; @@ -405,7 +380,7 @@ struct bus_type *idxd_get_bus_type(struct idxd_device *idxd) return idxd_bus_types[idxd->type]; } -static struct device_type *idxd_get_device_type(struct idxd_device *idxd) +struct device_type *idxd_get_device_type(struct idxd_device *idxd) { if (idxd->type == IDXD_TYPE_DSA) return &dsa_device_type; @@ -1658,6 +1633,30 @@ static const struct attribute_group *idxd_attribute_groups[] = { NULL, }; +static void idxd_conf_device_release(struct device *dev) +{ + struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); + + kfree(idxd->groups); + kfree(idxd->wqs); + kfree(idxd->engines); + kfree(idxd->irq_entries); + ida_free(idxd_ida(idxd), idxd->id); + kfree(idxd); +} + +struct device_type dsa_device_type = { + .name = "dsa", + .release = idxd_conf_device_release, + .groups = idxd_attribute_groups, +}; + +struct device_type iax_device_type = { + .name = "iax", + .release = idxd_conf_device_release, + .groups = idxd_attribute_groups, +}; + static int idxd_setup_engine_sysfs(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -1759,39 +1758,14 @@ static int idxd_setup_wq_sysfs(struct idxd_device *idxd) return rc; } -static int idxd_setup_device_sysfs(struct idxd_device *idxd) +int idxd_register_devices(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; int rc; - char devname[IDXD_NAME_SIZE]; - sprintf(devname, "%s%d", idxd_get_dev_name(idxd), idxd->id); - idxd->conf_dev.parent = dev; - dev_set_name(&idxd->conf_dev, "%s", devname); - idxd->conf_dev.bus = idxd_get_bus_type(idxd); - idxd->conf_dev.groups = idxd_attribute_groups; - idxd->conf_dev.type = idxd_get_device_type(idxd); - - dev_dbg(dev, "IDXD device register: %s\n", dev_name(&idxd->conf_dev)); - rc = device_register(&idxd->conf_dev); - if (rc < 0) { - put_device(&idxd->conf_dev); - return rc; - } - - return 0; -} - -int idxd_setup_sysfs(struct idxd_device *idxd) -{ - struct device *dev = &idxd->pdev->dev; - int rc; - - rc = idxd_setup_device_sysfs(idxd); - if (rc < 0) { - dev_dbg(dev, "Device sysfs registering failed: %d\n", rc); + rc = device_add(&idxd->conf_dev); + if (rc < 0) return rc; - } rc = idxd_setup_wq_sysfs(idxd); if (rc < 0) { @@ -1817,7 +1791,7 @@ int idxd_setup_sysfs(struct idxd_device *idxd) return 0; } -void idxd_cleanup_sysfs(struct idxd_device *idxd) +void idxd_unregister_devices(struct idxd_device *idxd) { int i; -- Gitee From dd36acdc1f1c4dac86fdbb1659ef59a16956218c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Apr 2021 16:37:39 -0700 Subject: [PATCH 017/173] dmaengine: idxd: fix wq conf_dev 'struct device' lifetime mainline inclusion from mainline-v5.13 commit 7c5dd23e57c14cf7177b8a5e0fd08916e0c60005 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 7c5dd23e57c1 dmaengine: idxd: fix wq conf_dev 'struct device' lifetime. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Remove devm_* allocation and fix wq->conf_dev 'struct device' lifetime. Address issues flagged by CONFIG_DEBUG_KOBJECT_RELEASE. Add release functions in order to free the allocated memory for the wq context at device destruction time. Reported-by: Jason Gunthorpe Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161852985907.2203940.6840120734115043753.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 6 +-- drivers/dma/idxd/idxd.h | 20 +++++++- drivers/dma/idxd/init.c | 105 ++++++++++++++++++++++++++++---------- drivers/dma/idxd/irq.c | 6 +-- drivers/dma/idxd/sysfs.c | 100 ++++++++++++++++-------------------- 5 files changed, 146 insertions(+), 91 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 3f696abd74ac..c4183294a704 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -520,7 +520,7 @@ void idxd_device_wqs_clear_state(struct idxd_device *idxd) lockdep_assert_held(&idxd->dev_lock); for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; if (wq->state == IDXD_WQ_ENABLED) { idxd_wq_disable_cleanup(wq); @@ -738,7 +738,7 @@ static int idxd_wqs_config_write(struct idxd_device *idxd) int i, rc; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; rc = idxd_wq_config_write(wq); if (rc < 0) @@ -816,7 +816,7 @@ static int idxd_wqs_setup(struct idxd_device *idxd) } for (i = 0; i < idxd->max_wqs; i++) { - wq = &idxd->wqs[i]; + wq = idxd->wqs[i]; group = wq->group; if (!wq->group) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 849b8a59aef2..7b5bbde1db9f 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -195,7 +195,7 @@ struct idxd_device { spinlock_t dev_lock; /* spinlock for device */ struct completion *cmd_done; struct idxd_group *groups; - struct idxd_wq *wqs; + struct idxd_wq **wqs; struct idxd_engine *engines; struct iommu_sva *sva; @@ -259,6 +259,7 @@ extern struct bus_type iax_bus_type; extern bool support_enqcmd; extern struct device_type dsa_device_type; extern struct device_type iax_device_type; +extern struct device_type idxd_wq_device_type; static inline bool is_dsa_dev(struct device *dev) { @@ -275,6 +276,23 @@ static inline bool is_idxd_dev(struct device *dev) return is_dsa_dev(dev) || is_iax_dev(dev); } +static inline bool is_idxd_wq_dev(struct device *dev) +{ + return dev->type == &idxd_wq_device_type; +} + +static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq) +{ + if (wq->type == IDXD_WQT_KERNEL && strcmp(wq->name, "dmaengine") == 0) + return true; + return false; +} + +static inline bool is_idxd_wq_cdev(struct idxd_wq *wq) +{ + return wq->type == IDXD_WQT_USER; +} + static inline bool wq_dedicated(struct idxd_wq *wq) { return test_bit(WQ_FLAG_DEDICATED, &wq->flags); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index c0710893f8e6..c9cc742ba2b0 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -145,16 +145,74 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) return rc; } +static int idxd_setup_wqs(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + struct idxd_wq *wq; + int i, rc; + + idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *), + GFP_KERNEL, dev_to_node(dev)); + if (!idxd->wqs) + return -ENOMEM; + + for (i = 0; i < idxd->max_wqs; i++) { + wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev)); + if (!wq) { + rc = -ENOMEM; + goto err; + } + + wq->id = i; + wq->idxd = idxd; + device_initialize(&wq->conf_dev); + wq->conf_dev.parent = &idxd->conf_dev; + wq->conf_dev.bus = idxd_get_bus_type(idxd); + wq->conf_dev.type = &idxd_wq_device_type; + rc = dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id); + if (rc < 0) { + put_device(&wq->conf_dev); + goto err; + } + + mutex_init(&wq->wq_lock); + init_waitqueue_head(&wq->err_queue); + wq->max_xfer_bytes = idxd->max_xfer_bytes; + wq->max_batch_size = idxd->max_batch_size; + wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); + if (!wq->wqcfg) { + put_device(&wq->conf_dev); + rc = -ENOMEM; + goto err; + } + idxd->wqs[i] = wq; + } + + return 0; + + err: + while (--i >= 0) + put_device(&idxd->wqs[i]->conf_dev); + return rc; +} + static int idxd_setup_internals(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; - int i; + int i, rc; init_waitqueue_head(&idxd->cmd_waitq); + + rc = idxd_setup_wqs(idxd); + if (rc < 0) + return rc; + idxd->groups = devm_kcalloc(dev, idxd->max_groups, sizeof(struct idxd_group), GFP_KERNEL); - if (!idxd->groups) - return -ENOMEM; + if (!idxd->groups) { + rc = -ENOMEM; + goto err; + } for (i = 0; i < idxd->max_groups; i++) { idxd->groups[i].idxd = idxd; @@ -163,40 +221,31 @@ static int idxd_setup_internals(struct idxd_device *idxd) idxd->groups[i].tc_b = -1; } - idxd->wqs = devm_kcalloc(dev, idxd->max_wqs, sizeof(struct idxd_wq), - GFP_KERNEL); - if (!idxd->wqs) - return -ENOMEM; - idxd->engines = devm_kcalloc(dev, idxd->max_engines, sizeof(struct idxd_engine), GFP_KERNEL); - if (!idxd->engines) - return -ENOMEM; - - for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; - - wq->id = i; - wq->idxd = idxd; - mutex_init(&wq->wq_lock); - init_waitqueue_head(&wq->err_queue); - wq->max_xfer_bytes = idxd->max_xfer_bytes; - wq->max_batch_size = idxd->max_batch_size; - wq->wqcfg = devm_kzalloc(dev, idxd->wqcfg_size, GFP_KERNEL); - if (!wq->wqcfg) - return -ENOMEM; + if (!idxd->engines) { + rc = -ENOMEM; + goto err; } + for (i = 0; i < idxd->max_engines; i++) { idxd->engines[i].idxd = idxd; idxd->engines[i].id = i; } idxd->wq = create_workqueue(dev_name(dev)); - if (!idxd->wq) - return -ENOMEM; + if (!idxd->wq) { + rc = -ENOMEM; + goto err; + } return 0; + + err: + for (i = 0; i < idxd->max_wqs; i++) + put_device(&idxd->wqs[i]->conf_dev); + return rc; } static void idxd_read_table_offsets(struct idxd_device *idxd) @@ -371,11 +420,11 @@ static int idxd_probe(struct idxd_device *idxd) rc = idxd_setup_internals(idxd); if (rc) - goto err_setup; + goto err; rc = idxd_setup_interrupts(idxd); if (rc) - goto err_setup; + goto err; dev_dbg(dev, "IDXD interrupt setup complete.\n"); @@ -384,7 +433,7 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id); return 0; - err_setup: + err: if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); return rc; diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index f287233fff65..fc0781e3f36d 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -45,7 +45,7 @@ static void idxd_device_reinit(struct work_struct *work) goto out; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; if (wq->state == IDXD_WQ_ENABLED) { rc = idxd_wq_enable(wq); @@ -130,7 +130,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) { int id = idxd->sw_err.wq_idx; - struct idxd_wq *wq = &idxd->wqs[id]; + struct idxd_wq *wq = idxd->wqs[id]; if (wq->type == IDXD_WQT_USER) wake_up_interruptible(&wq->err_queue); @@ -138,7 +138,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) int i; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; if (wq->type == IDXD_WQT_USER) wake_up_interruptible(&wq->err_queue); diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 43e4e8a8e862..b4937b718c24 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -26,34 +26,11 @@ static struct device_type idxd_group_device_type = { .release = idxd_conf_sub_device_release, }; -static struct device_type idxd_wq_device_type = { - .name = "wq", - .release = idxd_conf_sub_device_release, -}; - static struct device_type idxd_engine_device_type = { .name = "engine", .release = idxd_conf_sub_device_release, }; -static inline bool is_idxd_wq_dev(struct device *dev) -{ - return dev ? dev->type == &idxd_wq_device_type : false; -} - -static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq) -{ - if (wq->type == IDXD_WQT_KERNEL && - strcmp(wq->name, "dmaengine") == 0) - return true; - return false; -} - -static inline bool is_idxd_wq_cdev(struct idxd_wq *wq) -{ - return wq->type == IDXD_WQT_USER; -} - static int idxd_config_bus_match(struct device *dev, struct device_driver *drv) { @@ -297,7 +274,7 @@ static int idxd_config_bus_remove(struct device *dev) dev_dbg(dev, "%s removing dev %s\n", __func__, dev_name(&idxd->conf_dev)); for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; if (wq->state == IDXD_WQ_DISABLED) continue; @@ -309,7 +286,7 @@ static int idxd_config_bus_remove(struct device *dev) idxd_unregister_dma_device(idxd); rc = idxd_device_disable(idxd); for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; mutex_lock(&wq->wq_lock); idxd_wq_disable_cleanup(wq); @@ -678,7 +655,7 @@ static ssize_t group_work_queues_show(struct device *dev, struct idxd_device *idxd = group->idxd; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; if (!wq->group) continue; @@ -935,7 +912,7 @@ static int total_claimed_wq_size(struct idxd_device *idxd) int wq_size = 0; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; wq_size += wq->size; } @@ -1345,6 +1322,20 @@ static const struct attribute_group *idxd_wq_attribute_groups[] = { NULL, }; +static void idxd_conf_wq_release(struct device *dev) +{ + struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + + kfree(wq->wqcfg); + kfree(wq); +} + +struct device_type idxd_wq_device_type = { + .name = "wq", + .release = idxd_conf_wq_release, + .groups = idxd_wq_attribute_groups, +}; + /* IDXD device attribs */ static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1475,7 +1466,7 @@ static ssize_t clients_show(struct device *dev, spin_lock_irqsave(&idxd->dev_lock, flags); for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; count += wq->client_count; } @@ -1725,70 +1716,67 @@ static int idxd_setup_group_sysfs(struct idxd_device *idxd) return rc; } -static int idxd_setup_wq_sysfs(struct idxd_device *idxd) +static int idxd_register_wq_devices(struct idxd_device *idxd) { - struct device *dev = &idxd->pdev->dev; - int i, rc; + int i, rc, j; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; - - wq->conf_dev.parent = &idxd->conf_dev; - dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id); - wq->conf_dev.bus = idxd_get_bus_type(idxd); - wq->conf_dev.groups = idxd_wq_attribute_groups; - wq->conf_dev.type = &idxd_wq_device_type; - dev_dbg(dev, "WQ device register: %s\n", - dev_name(&wq->conf_dev)); - rc = device_register(&wq->conf_dev); - if (rc < 0) { - put_device(&wq->conf_dev); + struct idxd_wq *wq = idxd->wqs[i]; + + rc = device_add(&wq->conf_dev); + if (rc < 0) goto cleanup; - } } return 0; cleanup: - while (i--) { - struct idxd_wq *wq = &idxd->wqs[i]; + j = i - 1; + for (; i < idxd->max_wqs; i++) + put_device(&idxd->wqs[i]->conf_dev); - device_unregister(&wq->conf_dev); - } + while (j--) + device_unregister(&idxd->wqs[j]->conf_dev); return rc; } int idxd_register_devices(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; - int rc; + int rc, i; rc = device_add(&idxd->conf_dev); if (rc < 0) return rc; - rc = idxd_setup_wq_sysfs(idxd); + rc = idxd_register_wq_devices(idxd); if (rc < 0) { - /* unregister conf dev */ - dev_dbg(dev, "Work Queue sysfs registering failed: %d\n", rc); - return rc; + dev_dbg(dev, "WQ devices registering failed: %d\n", rc); + goto err_wq; } rc = idxd_setup_group_sysfs(idxd); if (rc < 0) { /* unregister conf dev */ dev_dbg(dev, "Group sysfs registering failed: %d\n", rc); - return rc; + goto err; } rc = idxd_setup_engine_sysfs(idxd); if (rc < 0) { /* unregister conf dev */ dev_dbg(dev, "Engine sysfs registering failed: %d\n", rc); - return rc; + goto err; } return 0; + + err: + for (i = 0; i < idxd->max_wqs; i++) + device_unregister(&idxd->wqs[i]->conf_dev); + err_wq: + device_del(&idxd->conf_dev); + return rc; } void idxd_unregister_devices(struct idxd_device *idxd) @@ -1796,7 +1784,7 @@ void idxd_unregister_devices(struct idxd_device *idxd) int i; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; device_unregister(&wq->conf_dev); } -- Gitee From f6ad9857b814a7be101a3dce3d0df4aacad81506 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Apr 2021 16:37:44 -0700 Subject: [PATCH 018/173] dmaengine: idxd: fix engine conf_dev lifetime mainline inclusion from mainline-v5.13 commit 75b911309060f42ba94bbbf46f5f497d35d5cd02 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 75b911309060 dmaengine: idxd: fix engine conf_dev lifetime. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Remove devm_* allocation and fix engine->conf_dev 'struct device' lifetime. Address issues flagged by CONFIG_DEBUG_KOBJECT_RELEASE. Add release functions in order to free the allocated memory at the engine conf_dev destruction time. Reported-by: Jason Gunthorpe Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161852986460.2203940.16603218225412118431.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 2 +- drivers/dma/idxd/idxd.h | 3 +- drivers/dma/idxd/init.c | 60 +++++++++++++++++++++++++------- drivers/dma/idxd/sysfs.c | 72 +++++++++++++++++++-------------------- 4 files changed, 86 insertions(+), 51 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index c4183294a704..be1dcddfe3c4 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -786,7 +786,7 @@ static int idxd_engines_setup(struct idxd_device *idxd) } for (i = 0; i < idxd->max_engines; i++) { - eng = &idxd->engines[i]; + eng = idxd->engines[i]; group = eng->group; if (!group) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 7b5bbde1db9f..67ebcaa8aafe 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -196,7 +196,7 @@ struct idxd_device { struct completion *cmd_done; struct idxd_group *groups; struct idxd_wq **wqs; - struct idxd_engine *engines; + struct idxd_engine **engines; struct iommu_sva *sva; unsigned int pasid; @@ -260,6 +260,7 @@ extern bool support_enqcmd; extern struct device_type dsa_device_type; extern struct device_type iax_device_type; extern struct device_type idxd_wq_device_type; +extern struct device_type idxd_engine_device_type; static inline bool is_dsa_dev(struct device *dev) { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index c9cc742ba2b0..8b3c0c1b9139 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -196,6 +196,46 @@ static int idxd_setup_wqs(struct idxd_device *idxd) return rc; } +static int idxd_setup_engines(struct idxd_device *idxd) +{ + struct idxd_engine *engine; + struct device *dev = &idxd->pdev->dev; + int i, rc; + + idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *), + GFP_KERNEL, dev_to_node(dev)); + if (!idxd->engines) + return -ENOMEM; + + for (i = 0; i < idxd->max_engines; i++) { + engine = kzalloc_node(sizeof(*engine), GFP_KERNEL, dev_to_node(dev)); + if (!engine) { + rc = -ENOMEM; + goto err; + } + + engine->id = i; + engine->idxd = idxd; + device_initialize(&engine->conf_dev); + engine->conf_dev.parent = &idxd->conf_dev; + engine->conf_dev.type = &idxd_engine_device_type; + rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id); + if (rc < 0) { + put_device(&engine->conf_dev); + goto err; + } + + idxd->engines[i] = engine; + } + + return 0; + + err: + while (--i >= 0) + put_device(&idxd->engines[i]->conf_dev); + return rc; +} + static int idxd_setup_internals(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -207,6 +247,10 @@ static int idxd_setup_internals(struct idxd_device *idxd) if (rc < 0) return rc; + rc = idxd_setup_engines(idxd); + if (rc < 0) + goto err_engine; + idxd->groups = devm_kcalloc(dev, idxd->max_groups, sizeof(struct idxd_group), GFP_KERNEL); if (!idxd->groups) { @@ -221,19 +265,6 @@ static int idxd_setup_internals(struct idxd_device *idxd) idxd->groups[i].tc_b = -1; } - idxd->engines = devm_kcalloc(dev, idxd->max_engines, - sizeof(struct idxd_engine), GFP_KERNEL); - if (!idxd->engines) { - rc = -ENOMEM; - goto err; - } - - - for (i = 0; i < idxd->max_engines; i++) { - idxd->engines[i].idxd = idxd; - idxd->engines[i].id = i; - } - idxd->wq = create_workqueue(dev_name(dev)); if (!idxd->wq) { rc = -ENOMEM; @@ -243,6 +274,9 @@ static int idxd_setup_internals(struct idxd_device *idxd) return 0; err: + for (i = 0; i < idxd->max_engines; i++) + put_device(&idxd->engines[i]->conf_dev); + err_engine: for (i = 0; i < idxd->max_wqs; i++) put_device(&idxd->wqs[i]->conf_dev); return rc; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index b4937b718c24..980f15ed8909 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -26,11 +26,6 @@ static struct device_type idxd_group_device_type = { .release = idxd_conf_sub_device_release, }; -static struct device_type idxd_engine_device_type = { - .name = "engine", - .release = idxd_conf_sub_device_release, -}; - static int idxd_config_bus_match(struct device *dev, struct device_driver *drv) { @@ -464,6 +459,19 @@ static const struct attribute_group *idxd_engine_attribute_groups[] = { NULL, }; +static void idxd_conf_engine_release(struct device *dev) +{ + struct idxd_engine *engine = container_of(dev, struct idxd_engine, conf_dev); + + kfree(engine); +} + +struct device_type idxd_engine_device_type = { + .name = "engine", + .release = idxd_conf_engine_release, + .groups = idxd_engine_attribute_groups, +}; + /* Group attributes */ static void idxd_set_free_tokens(struct idxd_device *idxd) @@ -626,7 +634,7 @@ static ssize_t group_engines_show(struct device *dev, struct idxd_device *idxd = group->idxd; for (i = 0; i < idxd->max_engines; i++) { - struct idxd_engine *engine = &idxd->engines[i]; + struct idxd_engine *engine = idxd->engines[i]; if (!engine->group) continue; @@ -1648,37 +1656,27 @@ struct device_type iax_device_type = { .groups = idxd_attribute_groups, }; -static int idxd_setup_engine_sysfs(struct idxd_device *idxd) +static int idxd_register_engine_devices(struct idxd_device *idxd) { - struct device *dev = &idxd->pdev->dev; - int i, rc; + int i, j, rc; for (i = 0; i < idxd->max_engines; i++) { - struct idxd_engine *engine = &idxd->engines[i]; - - engine->conf_dev.parent = &idxd->conf_dev; - dev_set_name(&engine->conf_dev, "engine%d.%d", - idxd->id, engine->id); - engine->conf_dev.bus = idxd_get_bus_type(idxd); - engine->conf_dev.groups = idxd_engine_attribute_groups; - engine->conf_dev.type = &idxd_engine_device_type; - dev_dbg(dev, "Engine device register: %s\n", - dev_name(&engine->conf_dev)); - rc = device_register(&engine->conf_dev); - if (rc < 0) { - put_device(&engine->conf_dev); + struct idxd_engine *engine = idxd->engines[i]; + + rc = device_add(&engine->conf_dev); + if (rc < 0) goto cleanup; - } } return 0; cleanup: - while (i--) { - struct idxd_engine *engine = &idxd->engines[i]; + j = i - 1; + for (; i < idxd->max_engines; i++) + put_device(&idxd->engines[i]->conf_dev); - device_unregister(&engine->conf_dev); - } + while (j--) + device_unregister(&idxd->engines[j]->conf_dev); return rc; } @@ -1755,23 +1753,25 @@ int idxd_register_devices(struct idxd_device *idxd) goto err_wq; } - rc = idxd_setup_group_sysfs(idxd); + rc = idxd_register_engine_devices(idxd); if (rc < 0) { - /* unregister conf dev */ - dev_dbg(dev, "Group sysfs registering failed: %d\n", rc); - goto err; + dev_dbg(dev, "Engine devices registering failed: %d\n", rc); + goto err_engine; } - rc = idxd_setup_engine_sysfs(idxd); + rc = idxd_setup_group_sysfs(idxd); if (rc < 0) { /* unregister conf dev */ - dev_dbg(dev, "Engine sysfs registering failed: %d\n", rc); - goto err; + dev_dbg(dev, "Group sysfs registering failed: %d\n", rc); + goto err_group; } return 0; - err: + err_group: + for (i = 0; i < idxd->max_engines; i++) + device_unregister(&idxd->engines[i]->conf_dev); + err_engine: for (i = 0; i < idxd->max_wqs; i++) device_unregister(&idxd->wqs[i]->conf_dev); err_wq: @@ -1790,7 +1790,7 @@ void idxd_unregister_devices(struct idxd_device *idxd) } for (i = 0; i < idxd->max_engines; i++) { - struct idxd_engine *engine = &idxd->engines[i]; + struct idxd_engine *engine = idxd->engines[i]; device_unregister(&engine->conf_dev); } -- Gitee From de368ac606d2f4d0d08bac20d7e0c996b2992406 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Apr 2021 16:37:51 -0700 Subject: [PATCH 019/173] dmaengine: idxd: fix group conf_dev lifetime mainline inclusion from mainline-v5.13 commit defe49f96012ca91e8e673cb95b5c30b4a3735e8 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit defe49f96012 dmaengine: idxd: fix group conf_dev lifetime. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Remove devm_* allocation and fix group->conf_dev 'struct device' lifetime. Address issues flagged by CONFIG_DEBUG_KOBJECT_RELEASE. Add release functions in order to free the allocated memory at the group->conf_dev destruction time. Reported-by: Jason Gunthorpe Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161852987144.2203940.8830315575880047.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 8 ++--- drivers/dma/idxd/idxd.h | 3 +- drivers/dma/idxd/init.c | 68 ++++++++++++++++++++++++++++++--------- drivers/dma/idxd/sysfs.c | 68 +++++++++++++++++---------------------- 4 files changed, 88 insertions(+), 59 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index be1dcddfe3c4..4fef57717049 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -659,7 +659,7 @@ static int idxd_groups_config_write(struct idxd_device *idxd) ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET)); for (i = 0; i < idxd->max_groups; i++) { - struct idxd_group *group = &idxd->groups[i]; + struct idxd_group *group = idxd->groups[i]; idxd_group_config_write(group); } @@ -754,7 +754,7 @@ static void idxd_group_flags_setup(struct idxd_device *idxd) /* TC-A 0 and TC-B 1 should be defaults */ for (i = 0; i < idxd->max_groups; i++) { - struct idxd_group *group = &idxd->groups[i]; + struct idxd_group *group = idxd->groups[i]; if (group->tc_a == -1) group->tc_a = group->grpcfg.flags.tc_a = 0; @@ -781,7 +781,7 @@ static int idxd_engines_setup(struct idxd_device *idxd) struct idxd_group *group; for (i = 0; i < idxd->max_groups; i++) { - group = &idxd->groups[i]; + group = idxd->groups[i]; group->grpcfg.engines = 0; } @@ -810,7 +810,7 @@ static int idxd_wqs_setup(struct idxd_device *idxd) struct device *dev = &idxd->pdev->dev; for (i = 0; i < idxd->max_groups; i++) { - group = &idxd->groups[i]; + group = idxd->groups[i]; for (j = 0; j < 4; j++) group->grpcfg.wqs[j] = 0; } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 67ebcaa8aafe..89daf746d121 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -194,7 +194,7 @@ struct idxd_device { spinlock_t dev_lock; /* spinlock for device */ struct completion *cmd_done; - struct idxd_group *groups; + struct idxd_group **groups; struct idxd_wq **wqs; struct idxd_engine **engines; @@ -261,6 +261,7 @@ extern struct device_type dsa_device_type; extern struct device_type iax_device_type; extern struct device_type idxd_wq_device_type; extern struct device_type idxd_engine_device_type; +extern struct device_type idxd_group_device_type; static inline bool is_dsa_dev(struct device *dev) { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 8b3c0c1b9139..7f50977388f0 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -236,11 +236,54 @@ static int idxd_setup_engines(struct idxd_device *idxd) return rc; } -static int idxd_setup_internals(struct idxd_device *idxd) +static int idxd_setup_groups(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; + struct idxd_group *group; int i, rc; + idxd->groups = kcalloc_node(idxd->max_groups, sizeof(struct idxd_group *), + GFP_KERNEL, dev_to_node(dev)); + if (!idxd->groups) + return -ENOMEM; + + for (i = 0; i < idxd->max_groups; i++) { + group = kzalloc_node(sizeof(*group), GFP_KERNEL, dev_to_node(dev)); + if (!group) { + rc = -ENOMEM; + goto err; + } + + group->id = i; + group->idxd = idxd; + device_initialize(&group->conf_dev); + group->conf_dev.parent = &idxd->conf_dev; + group->conf_dev.bus = idxd_get_bus_type(idxd); + group->conf_dev.type = &idxd_group_device_type; + rc = dev_set_name(&group->conf_dev, "group%d.%d", idxd->id, group->id); + if (rc < 0) { + put_device(&group->conf_dev); + goto err; + } + + idxd->groups[i] = group; + group->tc_a = -1; + group->tc_b = -1; + } + + return 0; + + err: + while (--i >= 0) + put_device(&idxd->groups[i]->conf_dev); + return rc; +} + +static int idxd_setup_internals(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + int rc, i; + init_waitqueue_head(&idxd->cmd_waitq); rc = idxd_setup_wqs(idxd); @@ -251,29 +294,22 @@ static int idxd_setup_internals(struct idxd_device *idxd) if (rc < 0) goto err_engine; - idxd->groups = devm_kcalloc(dev, idxd->max_groups, - sizeof(struct idxd_group), GFP_KERNEL); - if (!idxd->groups) { - rc = -ENOMEM; - goto err; - } - - for (i = 0; i < idxd->max_groups; i++) { - idxd->groups[i].idxd = idxd; - idxd->groups[i].id = i; - idxd->groups[i].tc_a = -1; - idxd->groups[i].tc_b = -1; - } + rc = idxd_setup_groups(idxd); + if (rc < 0) + goto err_group; idxd->wq = create_workqueue(dev_name(dev)); if (!idxd->wq) { rc = -ENOMEM; - goto err; + goto err_wkq_create; } return 0; - err: + err_wkq_create: + for (i = 0; i < idxd->max_groups; i++) + put_device(&idxd->groups[i]->conf_dev); + err_group: for (i = 0; i < idxd->max_engines; i++) put_device(&idxd->engines[i]->conf_dev); err_engine: diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 980f15ed8909..c0e5bb6142ed 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -16,16 +16,6 @@ static char *idxd_wq_type_names[] = { [IDXD_WQT_USER] = "user", }; -static void idxd_conf_sub_device_release(struct device *dev) -{ - dev_dbg(dev, "%s for %s\n", __func__, dev_name(dev)); -} - -static struct device_type idxd_group_device_type = { - .name = "group", - .release = idxd_conf_sub_device_release, -}; - static int idxd_config_bus_match(struct device *dev, struct device_driver *drv) { @@ -435,7 +425,7 @@ static ssize_t engine_group_id_store(struct device *dev, if (prevg) prevg->num_engines--; - engine->group = &idxd->groups[id]; + engine->group = idxd->groups[id]; engine->group->num_engines++; return count; @@ -479,7 +469,7 @@ static void idxd_set_free_tokens(struct idxd_device *idxd) int i, tokens; for (i = 0, tokens = 0; i < idxd->max_groups; i++) { - struct idxd_group *g = &idxd->groups[i]; + struct idxd_group *g = idxd->groups[i]; tokens += g->tokens_reserved; } @@ -784,6 +774,19 @@ static const struct attribute_group *idxd_group_attribute_groups[] = { NULL, }; +static void idxd_conf_group_release(struct device *dev) +{ + struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); + + kfree(group); +} + +struct device_type idxd_group_device_type = { + .name = "group", + .release = idxd_conf_group_release, + .groups = idxd_group_attribute_groups, +}; + /* IDXD work queue attribs */ static ssize_t wq_clients_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -856,7 +859,7 @@ static ssize_t wq_group_id_store(struct device *dev, return count; } - group = &idxd->groups[id]; + group = idxd->groups[id]; prevg = wq->group; if (prevg) @@ -1680,37 +1683,27 @@ static int idxd_register_engine_devices(struct idxd_device *idxd) return rc; } -static int idxd_setup_group_sysfs(struct idxd_device *idxd) +static int idxd_register_group_devices(struct idxd_device *idxd) { - struct device *dev = &idxd->pdev->dev; - int i, rc; + int i, j, rc; for (i = 0; i < idxd->max_groups; i++) { - struct idxd_group *group = &idxd->groups[i]; - - group->conf_dev.parent = &idxd->conf_dev; - dev_set_name(&group->conf_dev, "group%d.%d", - idxd->id, group->id); - group->conf_dev.bus = idxd_get_bus_type(idxd); - group->conf_dev.groups = idxd_group_attribute_groups; - group->conf_dev.type = &idxd_group_device_type; - dev_dbg(dev, "Group device register: %s\n", - dev_name(&group->conf_dev)); - rc = device_register(&group->conf_dev); - if (rc < 0) { - put_device(&group->conf_dev); + struct idxd_group *group = idxd->groups[i]; + + rc = device_add(&group->conf_dev); + if (rc < 0) goto cleanup; - } } return 0; cleanup: - while (i--) { - struct idxd_group *group = &idxd->groups[i]; + j = i - 1; + for (; i < idxd->max_groups; i++) + put_device(&idxd->groups[i]->conf_dev); - device_unregister(&group->conf_dev); - } + while (j--) + device_unregister(&idxd->groups[j]->conf_dev); return rc; } @@ -1759,10 +1752,9 @@ int idxd_register_devices(struct idxd_device *idxd) goto err_engine; } - rc = idxd_setup_group_sysfs(idxd); + rc = idxd_register_group_devices(idxd); if (rc < 0) { - /* unregister conf dev */ - dev_dbg(dev, "Group sysfs registering failed: %d\n", rc); + dev_dbg(dev, "Group device registering failed: %d\n", rc); goto err_group; } @@ -1796,7 +1788,7 @@ void idxd_unregister_devices(struct idxd_device *idxd) } for (i = 0; i < idxd->max_groups; i++) { - struct idxd_group *group = &idxd->groups[i]; + struct idxd_group *group = idxd->groups[i]; device_unregister(&group->conf_dev); } -- Gitee From 60063623827ee1d720a22129e672023ef1ad4c3e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Apr 2021 16:38:03 -0700 Subject: [PATCH 020/173] dmaengine: idxd: iax bus removal mainline inclusion from mainline-v5.13 commit 4b73e4ebd43ce48101a4c09bf13d439a954d61c5 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 4b73e4ebd43c dmaengine: idxd: iax bus removal. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- There is no need to have an additional bus for the IAX device. The removal of IAX will change user ABI as /sys/bus/iax will no longer exist. The iax device will be moved to the dsa bus. The device id for dsa and iax will now be combined rather than unique for each device type in order to accommodate the iax devices. This is in preparation for fixing the sub-driver code for idxd. There's no hardware deployment for Sapphire Rapids platform yet, which means that users have no reason to have developed scripts against this ABI. There is some exposure to released versions of accel-config, but those are being fixed up and an accel-config upgrade is reasonable to get IAX support. As far as accel-config is concerned IAX support starts when these devices appear under /sys/bus/dsa, and old accel-config just assumes that an empty / missing /sys/bus/iax just means a lack of platform support. Fixes: f25b463883a8 ("dmaengine: idxd: add IAX configuration support in the IDXD driver") Suggested-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161852988298.2203940.4529909758034944428.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/cdev.c | 2 +- drivers/dma/idxd/idxd.h | 3 +- drivers/dma/idxd/init.c | 21 ++++-------- drivers/dma/idxd/sysfs.c | 74 +++------------------------------------- 4 files changed, 13 insertions(+), 87 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 3b8f05605db6..46172492a602 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -268,7 +268,7 @@ int idxd_wq_add_cdev(struct idxd_wq *wq) device_initialize(dev); dev->parent = &wq->conf_dev; - dev->bus = idxd_get_bus_type(idxd); + dev->bus = &dsa_bus_type; dev->type = &idxd_cdev_device_type; dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 89daf746d121..b17415aa42bd 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -257,6 +257,7 @@ extern struct bus_type dsa_bus_type; extern struct bus_type iax_bus_type; extern bool support_enqcmd; +extern struct ida idxd_ida; extern struct device_type dsa_device_type; extern struct device_type iax_device_type; extern struct device_type idxd_wq_device_type; @@ -346,7 +347,6 @@ static inline int idxd_wq_refcount(struct idxd_wq *wq) return wq->client_count; }; -struct ida *idxd_ida(struct idxd_device *idxd); const char *idxd_get_dev_name(struct idxd_device *idxd); int idxd_register_bus_type(void); void idxd_unregister_bus_type(void); @@ -354,7 +354,6 @@ int idxd_register_devices(struct idxd_device *idxd); void idxd_unregister_devices(struct idxd_device *idxd); int idxd_register_driver(void); void idxd_unregister_driver(void); -struct bus_type *idxd_get_bus_type(struct idxd_device *idxd); struct device_type *idxd_get_device_type(struct idxd_device *idxd); /* device interrupt control */ diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 7f50977388f0..2ea8c1880d42 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -33,8 +33,7 @@ MODULE_PARM_DESC(sva, "Toggle SVA support on/off"); #define DRV_NAME "idxd" bool support_enqcmd; - -static struct ida idxd_idas[IDXD_TYPE_MAX]; +DEFINE_IDA(idxd_ida); static struct pci_device_id idxd_pci_tbl[] = { /* DSA ver 1.0 platforms */ @@ -51,11 +50,6 @@ static char *idxd_name[] = { "iax" }; -struct ida *idxd_ida(struct idxd_device *idxd) -{ - return &idxd_idas[idxd->type]; -} - const char *idxd_get_dev_name(struct idxd_device *idxd) { return idxd_name[idxd->type]; @@ -167,7 +161,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd) wq->idxd = idxd; device_initialize(&wq->conf_dev); wq->conf_dev.parent = &idxd->conf_dev; - wq->conf_dev.bus = idxd_get_bus_type(idxd); + wq->conf_dev.bus = &dsa_bus_type; wq->conf_dev.type = &idxd_wq_device_type; rc = dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id); if (rc < 0) { @@ -258,7 +252,7 @@ static int idxd_setup_groups(struct idxd_device *idxd) group->idxd = idxd; device_initialize(&group->conf_dev); group->conf_dev.parent = &idxd->conf_dev; - group->conf_dev.bus = idxd_get_bus_type(idxd); + group->conf_dev.bus = &dsa_bus_type; group->conf_dev.type = &idxd_group_device_type; rc = dev_set_name(&group->conf_dev, "group%d.%d", idxd->id, group->id); if (rc < 0) { @@ -409,13 +403,13 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev) idxd->pdev = pdev; idxd_set_type(idxd); - idxd->id = ida_alloc(idxd_ida(idxd), GFP_KERNEL); + idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL); if (idxd->id < 0) return NULL; device_initialize(&idxd->conf_dev); idxd->conf_dev.parent = dev; - idxd->conf_dev.bus = idxd_get_bus_type(idxd); + idxd->conf_dev.bus = &dsa_bus_type; idxd->conf_dev.type = idxd_get_device_type(idxd); rc = dev_set_name(&idxd->conf_dev, "%s%d", idxd_get_dev_name(idxd), idxd->id); if (rc < 0) { @@ -669,7 +663,7 @@ static struct pci_driver idxd_pci_driver = { static int __init idxd_init_module(void) { - int err, i; + int err; /* * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in @@ -685,9 +679,6 @@ static int __init idxd_init_module(void) else support_enqcmd = true; - for (i = 0; i < IDXD_TYPE_MAX; i++) - ida_init(&idxd_idas[i]); - err = idxd_register_bus_type(); if (err < 0) return err; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index c0e5bb6142ed..9877e147b4b0 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -301,19 +301,6 @@ struct bus_type dsa_bus_type = { .shutdown = idxd_config_bus_shutdown, }; -struct bus_type iax_bus_type = { - .name = "iax", - .match = idxd_config_bus_match, - .probe = idxd_config_bus_probe, - .remove = idxd_config_bus_remove, - .shutdown = idxd_config_bus_shutdown, -}; - -static struct bus_type *idxd_bus_types[] = { - &dsa_bus_type, - &iax_bus_type -}; - static struct idxd_device_driver dsa_drv = { .drv = { .name = "dsa", @@ -323,25 +310,6 @@ static struct idxd_device_driver dsa_drv = { }, }; -static struct idxd_device_driver iax_drv = { - .drv = { - .name = "iax", - .bus = &iax_bus_type, - .owner = THIS_MODULE, - .mod_name = KBUILD_MODNAME, - }, -}; - -static struct idxd_device_driver *idxd_drvs[] = { - &dsa_drv, - &iax_drv -}; - -struct bus_type *idxd_get_bus_type(struct idxd_device *idxd) -{ - return idxd_bus_types[idxd->type]; -} - struct device_type *idxd_get_device_type(struct idxd_device *idxd) { if (idxd->type == IDXD_TYPE_DSA) @@ -355,28 +323,12 @@ struct device_type *idxd_get_device_type(struct idxd_device *idxd) /* IDXD generic driver setup */ int idxd_register_driver(void) { - int i, rc; - - for (i = 0; i < IDXD_TYPE_MAX; i++) { - rc = driver_register(&idxd_drvs[i]->drv); - if (rc < 0) - goto drv_fail; - } - - return 0; - -drv_fail: - while (--i >= 0) - driver_unregister(&idxd_drvs[i]->drv); - return rc; + return driver_register(&dsa_drv.drv); } void idxd_unregister_driver(void) { - int i; - - for (i = 0; i < IDXD_TYPE_MAX; i++) - driver_unregister(&idxd_drvs[i]->drv); + driver_unregister(&dsa_drv.drv); } /* IDXD engine attributes */ @@ -1643,7 +1595,7 @@ static void idxd_conf_device_release(struct device *dev) kfree(idxd->wqs); kfree(idxd->engines); kfree(idxd->irq_entries); - ida_free(idxd_ida(idxd), idxd->id); + ida_free(&idxd_ida, idxd->id); kfree(idxd); } @@ -1798,26 +1750,10 @@ void idxd_unregister_devices(struct idxd_device *idxd) int idxd_register_bus_type(void) { - int i, rc; - - for (i = 0; i < IDXD_TYPE_MAX; i++) { - rc = bus_register(idxd_bus_types[i]); - if (rc < 0) - goto bus_err; - } - - return 0; - -bus_err: - while (--i >= 0) - bus_unregister(idxd_bus_types[i]); - return rc; + return bus_register(&dsa_bus_type); } void idxd_unregister_bus_type(void) { - int i; - - for (i = 0; i < IDXD_TYPE_MAX; i++) - bus_unregister(idxd_bus_types[i]); + bus_unregister(&dsa_bus_type); } -- Gitee From 9acb514baaaa4400cbf6be86ae7558679589c6b8 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Apr 2021 16:38:09 -0700 Subject: [PATCH 021/173] dmaengine: idxd: remove detection of device type mainline inclusion from mainline-v5.13 commit 435b512dbc0dac42b34348393049b386bb1a19bd category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 435b512dbc0d dmaengine: idxd: remove detection of device type. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Move all static data type for per device type to an idxd_driver_data data structure. The data can be attached to the pci_device_id and provided by the pci probe function. This removes a lot of unnecessary type detection and setup code. Suggested-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161852988924.2203940.2787590808682466398.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/cdev.c | 11 +++---- drivers/dma/idxd/device.c | 16 +++------- drivers/dma/idxd/idxd.h | 13 +++++--- drivers/dma/idxd/init.c | 65 +++++++++++++++------------------------ drivers/dma/idxd/submit.c | 2 +- drivers/dma/idxd/sysfs.c | 16 ++-------- 6 files changed, 48 insertions(+), 75 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 46172492a602..afb3f624b5a4 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -45,7 +45,7 @@ static void idxd_cdev_dev_release(struct device *dev) struct idxd_cdev_context *cdev_ctx; struct idxd_wq *wq = idxd_cdev->wq; - cdev_ctx = &ictx[wq->idxd->type]; + cdev_ctx = &ictx[wq->idxd->data->type]; ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor); kfree(idxd_cdev); } @@ -239,7 +239,7 @@ static const struct file_operations idxd_cdev_fops = { int idxd_cdev_get_major(struct idxd_device *idxd) { - return MAJOR(ictx[idxd->type].devt); + return MAJOR(ictx[idxd->data->type].devt); } int idxd_wq_add_cdev(struct idxd_wq *wq) @@ -258,7 +258,7 @@ int idxd_wq_add_cdev(struct idxd_wq *wq) idxd_cdev->wq = wq; cdev = &idxd_cdev->cdev; dev = &idxd_cdev->dev; - cdev_ctx = &ictx[wq->idxd->type]; + cdev_ctx = &ictx[wq->idxd->data->type]; minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL); if (minor < 0) { kfree(idxd_cdev); @@ -272,8 +272,7 @@ int idxd_wq_add_cdev(struct idxd_wq *wq) dev->type = &idxd_cdev_device_type; dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor); - rc = dev_set_name(dev, "%s/wq%u.%u", idxd_get_dev_name(idxd), - idxd->id, wq->id); + rc = dev_set_name(dev, "%s/wq%u.%u", idxd->data->name_prefix, idxd->id, wq->id); if (rc < 0) goto err; @@ -298,7 +297,7 @@ void idxd_wq_del_cdev(struct idxd_wq *wq) struct idxd_cdev *idxd_cdev; struct idxd_cdev_context *cdev_ctx; - cdev_ctx = &ictx[wq->idxd->type]; + cdev_ctx = &ictx[wq->idxd->data->type]; idxd_cdev = wq->idxd_cdev; wq->idxd_cdev = NULL; cdev_device_del(&idxd_cdev->cdev, &idxd_cdev->dev); diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 4fef57717049..016df87cf5c5 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -144,14 +144,8 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) if (rc < 0) return rc; - if (idxd->type == IDXD_TYPE_DSA) - align = 32; - else if (idxd->type == IDXD_TYPE_IAX) - align = 64; - else - return -ENODEV; - - wq->compls_size = num_descs * idxd->compl_size + align; + align = idxd->data->align; + wq->compls_size = num_descs * idxd->data->compl_size + align; wq->compls_raw = dma_alloc_coherent(dev, wq->compls_size, &wq->compls_addr_raw, GFP_KERNEL); if (!wq->compls_raw) { @@ -178,11 +172,11 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) struct idxd_desc *desc = wq->descs[i]; desc->hw = wq->hw_descs[i]; - if (idxd->type == IDXD_TYPE_DSA) + if (idxd->data->type == IDXD_TYPE_DSA) desc->completion = &wq->compls[i]; - else if (idxd->type == IDXD_TYPE_IAX) + else if (idxd->data->type == IDXD_TYPE_IAX) desc->iax_completion = &wq->iax_compls[i]; - desc->compl_dma = wq->compls_addr + idxd->compl_size * i; + desc->compl_dma = wq->compls_addr + idxd->data->compl_size * i; desc->id = i; desc->wq = wq; desc->cpu = -1; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index b17415aa42bd..8055e872953c 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -178,9 +178,17 @@ struct idxd_dma_dev { struct dma_device dma; }; -struct idxd_device { +struct idxd_driver_data { + const char *name_prefix; enum idxd_type type; + struct device_type *dev_type; + int compl_size; + int align; +}; + +struct idxd_device { struct device conf_dev; + struct idxd_driver_data *data; struct list_head list; struct idxd_hw hw; enum idxd_device_state state; @@ -218,7 +226,6 @@ struct idxd_device { int token_limit; int nr_tokens; /* non-reserved tokens */ unsigned int wqcfg_size; - int compl_size; union sw_err_reg sw_err; wait_queue_head_t cmd_waitq; @@ -347,14 +354,12 @@ static inline int idxd_wq_refcount(struct idxd_wq *wq) return wq->client_count; }; -const char *idxd_get_dev_name(struct idxd_device *idxd); int idxd_register_bus_type(void); void idxd_unregister_bus_type(void); int idxd_register_devices(struct idxd_device *idxd); void idxd_unregister_devices(struct idxd_device *idxd); int idxd_register_driver(void); void idxd_unregister_driver(void); -struct device_type *idxd_get_device_type(struct idxd_device *idxd); /* device interrupt control */ void idxd_msix_perm_setup(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 2ea8c1880d42..c88dd207f0c5 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -35,26 +35,33 @@ MODULE_PARM_DESC(sva, "Toggle SVA support on/off"); bool support_enqcmd; DEFINE_IDA(idxd_ida); +static struct idxd_driver_data idxd_driver_data[] = { + [IDXD_TYPE_DSA] = { + .name_prefix = "dsa", + .type = IDXD_TYPE_DSA, + .compl_size = sizeof(struct dsa_completion_record), + .align = 32, + .dev_type = &dsa_device_type, + }, + [IDXD_TYPE_IAX] = { + .name_prefix = "iax", + .type = IDXD_TYPE_IAX, + .compl_size = sizeof(struct iax_completion_record), + .align = 64, + .dev_type = &iax_device_type, + }, +}; + static struct pci_device_id idxd_pci_tbl[] = { /* DSA ver 1.0 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_DSA_SPR0) }, + { PCI_DEVICE_DATA(INTEL, DSA_SPR0, &idxd_driver_data[IDXD_TYPE_DSA]) }, /* IAX ver 1.0 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IAX_SPR0) }, + { PCI_DEVICE_DATA(INTEL, IAX_SPR0, &idxd_driver_data[IDXD_TYPE_IAX]) }, { 0, } }; MODULE_DEVICE_TABLE(pci, idxd_pci_tbl); -static char *idxd_name[] = { - "dsa", - "iax" -}; - -const char *idxd_get_dev_name(struct idxd_device *idxd) -{ - return idxd_name[idxd->type]; -} - static int idxd_setup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; @@ -379,19 +386,7 @@ static void idxd_read_caps(struct idxd_device *idxd) } } -static inline void idxd_set_type(struct idxd_device *idxd) -{ - struct pci_dev *pdev = idxd->pdev; - - if (pdev->device == PCI_DEVICE_ID_INTEL_DSA_SPR0) - idxd->type = IDXD_TYPE_DSA; - else if (pdev->device == PCI_DEVICE_ID_INTEL_IAX_SPR0) - idxd->type = IDXD_TYPE_IAX; - else - idxd->type = IDXD_TYPE_UNKNOWN; -} - -static struct idxd_device *idxd_alloc(struct pci_dev *pdev) +static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) { struct device *dev = &pdev->dev; struct idxd_device *idxd; @@ -402,7 +397,7 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev) return NULL; idxd->pdev = pdev; - idxd_set_type(idxd); + idxd->data = data; idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL); if (idxd->id < 0) return NULL; @@ -410,8 +405,8 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev) device_initialize(&idxd->conf_dev); idxd->conf_dev.parent = dev; idxd->conf_dev.bus = &dsa_bus_type; - idxd->conf_dev.type = idxd_get_device_type(idxd); - rc = dev_set_name(&idxd->conf_dev, "%s%d", idxd_get_dev_name(idxd), idxd->id); + idxd->conf_dev.type = idxd->data->dev_type; + rc = dev_set_name(&idxd->conf_dev, "%s%d", idxd->data->name_prefix, idxd->id); if (rc < 0) { put_device(&idxd->conf_dev); return NULL; @@ -503,18 +498,11 @@ static int idxd_probe(struct idxd_device *idxd) return rc; } -static void idxd_type_init(struct idxd_device *idxd) -{ - if (idxd->type == IDXD_TYPE_DSA) - idxd->compl_size = sizeof(struct dsa_completion_record); - else if (idxd->type == IDXD_TYPE_IAX) - idxd->compl_size = sizeof(struct iax_completion_record); -} - static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct device *dev = &pdev->dev; struct idxd_device *idxd; + struct idxd_driver_data *data = (struct idxd_driver_data *)id->driver_data; int rc; rc = pci_enable_device(pdev); @@ -522,7 +510,7 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; dev_dbg(dev, "Alloc IDXD context\n"); - idxd = idxd_alloc(pdev); + idxd = idxd_alloc(pdev, data); if (!idxd) { rc = -ENOMEM; goto err_idxd_alloc; @@ -548,9 +536,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) goto err; - - idxd_type_init(idxd); - dev_dbg(dev, "Set PCI master\n"); pci_set_master(pdev); pci_set_drvdata(pdev, idxd); diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 10b411c22512..1f5e539fd8a5 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -15,7 +15,7 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) desc = wq->descs[idx]; memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); - memset(desc->completion, 0, idxd->compl_size); + memset(desc->completion, 0, idxd->data->compl_size); desc->cpu = cpu; if (device_pasid_enabled(idxd)) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 9877e147b4b0..0f7f199936b9 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -310,16 +310,6 @@ static struct idxd_device_driver dsa_drv = { }, }; -struct device_type *idxd_get_device_type(struct idxd_device *idxd) -{ - if (idxd->type == IDXD_TYPE_DSA) - return &dsa_device_type; - else if (idxd->type == IDXD_TYPE_IAX) - return &iax_device_type; - else - return NULL; -} - /* IDXD generic driver setup */ int idxd_register_driver(void) { @@ -453,7 +443,7 @@ static ssize_t group_tokens_reserved_store(struct device *dev, if (rc < 0) return -EINVAL; - if (idxd->type == IDXD_TYPE_IAX) + if (idxd->data->type == IDXD_TYPE_IAX) return -EOPNOTSUPP; if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) @@ -501,7 +491,7 @@ static ssize_t group_tokens_allowed_store(struct device *dev, if (rc < 0) return -EINVAL; - if (idxd->type == IDXD_TYPE_IAX) + if (idxd->data->type == IDXD_TYPE_IAX) return -EOPNOTSUPP; if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) @@ -546,7 +536,7 @@ static ssize_t group_use_token_limit_store(struct device *dev, if (rc < 0) return -EINVAL; - if (idxd->type == IDXD_TYPE_IAX) + if (idxd->data->type == IDXD_TYPE_IAX) return -EOPNOTSUPP; if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) -- Gitee From a092b7eb7d1a21b50c4aeb7f04b2de6c5c32ac65 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 20 Apr 2021 11:46:22 -0700 Subject: [PATCH 022/173] dmaengine: idxd: add percpu_ref to descriptor submission path mainline inclusion from mainline-v5.13 commit 93a40a6d7428921897bb7fed5ffb4ce83df05432 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 93a40a6d7428 dmaengine: idxd: add percpu_ref to descriptor submission path. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Current submission path has no way to restrict the submitter from stop submiting on shutdown path or wq disable path. This provides a way to quiesce the submission path. Modeling after 'struct reqeust_queue' usage of percpu_ref. One of the abilities of per_cpu reference counting is the ability to stop new references from being taken while awaiting outstanding references to be dropped. On wq shutdown, we want to block any new submissions to the kernel workqueue and quiesce before disabling. The percpu_ref allows us to block any new submissions and wait for any current submission calls to finish submitting to the workqueue. A percpu_ref is embedded in each idxd_wq context to allow control for individual wq. The wq->wq_active counter is elevated before calling movdir64b() or enqcmds() to submit a descriptor to the wq and dropped once the submission call completes. The function is gated by percpu_ref_tryget_live(). On shutdown with percpu_ref_kill() called, any new submission would be blocked from acquiring a ref and failed. Once all references are dropped for the wq, shutdown can continue. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161894438293.3202472.14894701611500822232.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 26 +++++ drivers/dma/idxd/idxd.h | 4 + drivers/dma/idxd/init.c | 1 + drivers/dma/idxd/submit.c | 5 + drivers/dma/idxd/sysfs.c | 233 ++++++++++++++++++++------------------ 5 files changed, 161 insertions(+), 108 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 016df87cf5c5..6d674fdedb4d 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -384,6 +384,32 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq) memset(wq->name, 0, WQ_NAME_SIZE); } +static void idxd_wq_ref_release(struct percpu_ref *ref) +{ + struct idxd_wq *wq = container_of(ref, struct idxd_wq, wq_active); + + complete(&wq->wq_dead); +} + +int idxd_wq_init_percpu_ref(struct idxd_wq *wq) +{ + int rc; + + memset(&wq->wq_active, 0, sizeof(wq->wq_active)); + rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL); + if (rc < 0) + return rc; + reinit_completion(&wq->wq_dead); + return 0; +} + +void idxd_wq_quiesce(struct idxd_wq *wq) +{ + percpu_ref_kill(&wq->wq_active); + wait_for_completion(&wq->wq_dead); + percpu_ref_exit(&wq->wq_active); +} + /* Device control bits */ static inline bool idxd_is_enabled(struct idxd_device *idxd) { diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 8055e872953c..1b539cbf3c14 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -108,6 +108,8 @@ struct idxd_dma_chan { struct idxd_wq { void __iomem *portal; + struct percpu_ref wq_active; + struct completion wq_dead; struct device conf_dev; struct idxd_cdev *idxd_cdev; struct wait_queue_head err_queue; @@ -395,6 +397,8 @@ void idxd_wq_unmap_portal(struct idxd_wq *wq); void idxd_wq_disable_cleanup(struct idxd_wq *wq); int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid); int idxd_wq_disable_pasid(struct idxd_wq *wq); +void idxd_wq_quiesce(struct idxd_wq *wq); +int idxd_wq_init_percpu_ref(struct idxd_wq *wq); /* submission */ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index c88dd207f0c5..23b23411dcdb 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -178,6 +178,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd) mutex_init(&wq->wq_lock); init_waitqueue_head(&wq->err_queue); + init_completion(&wq->wq_dead); wq->max_xfer_bytes = idxd->max_xfer_bytes; wq->max_batch_size = idxd->max_batch_size; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 1f5e539fd8a5..4bb09f7be647 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -86,6 +86,9 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) if (idxd->state != IDXD_DEV_ENABLED) return -EIO; + if (!percpu_ref_tryget_live(&wq->wq_active)) + return -ENXIO; + portal = wq->portal; /* @@ -108,6 +111,8 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) return rc; } + percpu_ref_put(&wq->wq_active); + /* * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 0f7f199936b9..9070505b90d4 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -47,6 +47,127 @@ static int idxd_config_bus_match(struct device *dev, return matched; } +static int enable_wq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + unsigned long flags; + int rc; + + mutex_lock(&wq->wq_lock); + + if (idxd->state != IDXD_DEV_ENABLED) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "Enabling while device not enabled.\n"); + return -EPERM; + } + + if (wq->state != IDXD_WQ_DISABLED) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "WQ %d already enabled.\n", wq->id); + return -EBUSY; + } + + if (!wq->group) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "WQ not attached to group.\n"); + return -EINVAL; + } + + if (strlen(wq->name) == 0) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "WQ name not set.\n"); + return -EINVAL; + } + + /* Shared WQ checks */ + if (wq_shared(wq)) { + if (!device_swq_supported(idxd)) { + dev_warn(dev, "PASID not enabled and shared WQ.\n"); + mutex_unlock(&wq->wq_lock); + return -ENXIO; + } + /* + * Shared wq with the threshold set to 0 means the user + * did not set the threshold or transitioned from a + * dedicated wq but did not set threshold. A value + * of 0 would effectively disable the shared wq. The + * driver does not allow a value of 0 to be set for + * threshold via sysfs. + */ + if (wq->threshold == 0) { + dev_warn(dev, "Shared WQ and threshold 0.\n"); + mutex_unlock(&wq->wq_lock); + return -EINVAL; + } + } + + rc = idxd_wq_alloc_resources(wq); + if (rc < 0) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "WQ resource alloc failed\n"); + return rc; + } + + spin_lock_irqsave(&idxd->dev_lock, flags); + rc = idxd_device_config(idxd); + spin_unlock_irqrestore(&idxd->dev_lock, flags); + if (rc < 0) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "Writing WQ %d config failed: %d\n", wq->id, rc); + return rc; + } + + rc = idxd_wq_enable(wq); + if (rc < 0) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "WQ %d enabling failed: %d\n", wq->id, rc); + return rc; + } + + rc = idxd_wq_map_portal(wq); + if (rc < 0) { + dev_warn(dev, "wq portal mapping failed: %d\n", rc); + rc = idxd_wq_disable(wq); + if (rc < 0) + dev_warn(dev, "IDXD wq disable failed\n"); + mutex_unlock(&wq->wq_lock); + return rc; + } + + wq->client_count = 0; + + if (wq->type == IDXD_WQT_KERNEL) { + rc = idxd_wq_init_percpu_ref(wq); + if (rc < 0) { + dev_dbg(dev, "percpu_ref setup failed\n"); + mutex_unlock(&wq->wq_lock); + return rc; + } + } + + if (is_idxd_wq_dmaengine(wq)) { + rc = idxd_register_dma_channel(wq); + if (rc < 0) { + dev_dbg(dev, "DMA channel register failed\n"); + mutex_unlock(&wq->wq_lock); + return rc; + } + } else if (is_idxd_wq_cdev(wq)) { + rc = idxd_wq_add_cdev(wq); + if (rc < 0) { + dev_dbg(dev, "Cdev creation failed\n"); + mutex_unlock(&wq->wq_lock); + return rc; + } + } + + mutex_unlock(&wq->wq_lock); + dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev)); + + return 0; +} + static int idxd_config_bus_probe(struct device *dev) { int rc; @@ -94,115 +215,8 @@ static int idxd_config_bus_probe(struct device *dev) return 0; } else if (is_idxd_wq_dev(dev)) { struct idxd_wq *wq = confdev_to_wq(dev); - struct idxd_device *idxd = wq->idxd; - - mutex_lock(&wq->wq_lock); - - if (idxd->state != IDXD_DEV_ENABLED) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "Enabling while device not enabled.\n"); - return -EPERM; - } - - if (wq->state != IDXD_WQ_DISABLED) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ %d already enabled.\n", wq->id); - return -EBUSY; - } - - if (!wq->group) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ not attached to group.\n"); - return -EINVAL; - } - - if (strlen(wq->name) == 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ name not set.\n"); - return -EINVAL; - } - - /* Shared WQ checks */ - if (wq_shared(wq)) { - if (!device_swq_supported(idxd)) { - dev_warn(dev, - "PASID not enabled and shared WQ.\n"); - mutex_unlock(&wq->wq_lock); - return -ENXIO; - } - /* - * Shared wq with the threshold set to 0 means the user - * did not set the threshold or transitioned from a - * dedicated wq but did not set threshold. A value - * of 0 would effectively disable the shared wq. The - * driver does not allow a value of 0 to be set for - * threshold via sysfs. - */ - if (wq->threshold == 0) { - dev_warn(dev, - "Shared WQ and threshold 0.\n"); - mutex_unlock(&wq->wq_lock); - return -EINVAL; - } - } - - rc = idxd_wq_alloc_resources(wq); - if (rc < 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ resource alloc failed\n"); - return rc; - } - - spin_lock_irqsave(&idxd->dev_lock, flags); - rc = idxd_device_config(idxd); - spin_unlock_irqrestore(&idxd->dev_lock, flags); - if (rc < 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "Writing WQ %d config failed: %d\n", - wq->id, rc); - return rc; - } - rc = idxd_wq_enable(wq); - if (rc < 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ %d enabling failed: %d\n", - wq->id, rc); - return rc; - } - - rc = idxd_wq_map_portal(wq); - if (rc < 0) { - dev_warn(dev, "wq portal mapping failed: %d\n", rc); - rc = idxd_wq_disable(wq); - if (rc < 0) - dev_warn(dev, "IDXD wq disable failed\n"); - mutex_unlock(&wq->wq_lock); - return rc; - } - - wq->client_count = 0; - - dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev)); - - if (is_idxd_wq_dmaengine(wq)) { - rc = idxd_register_dma_channel(wq); - if (rc < 0) { - dev_dbg(dev, "DMA channel register failed\n"); - mutex_unlock(&wq->wq_lock); - return rc; - } - } else if (is_idxd_wq_cdev(wq)) { - rc = idxd_wq_add_cdev(wq); - if (rc < 0) { - dev_dbg(dev, "Cdev creation failed\n"); - mutex_unlock(&wq->wq_lock); - return rc; - } - } - - mutex_unlock(&wq->wq_lock); - return 0; + return enable_wq(wq); } return -ENODEV; @@ -220,6 +234,9 @@ static void disable_wq(struct idxd_wq *wq) return; } + if (wq->type == IDXD_WQT_KERNEL) + idxd_wq_quiesce(wq); + if (is_idxd_wq_dmaengine(wq)) idxd_unregister_dma_channel(wq); else if (is_idxd_wq_cdev(wq)) -- Gitee From b99d489ac7419e5235afecf11d52a00f7bc0a433 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 20 Apr 2021 11:46:28 -0700 Subject: [PATCH 023/173] dmaengine: idxd: add support for readonly config mode mainline inclusion from mainline-v5.13 commit 8c66bbdc4fbf3c297ebc8edf71f359e4a132c9db category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 8c66bbdc4fbf dmaengine: idxd: add support for readonly config mode. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The read-only configuration mode is defined by the DSA spec as a mode of the device WQ configuration. When GENCAP register bit 31 is set to 0, the device is in RO mode and group configuration and some fields of the workqueue configuration registers are read-only and reflect the fixed configuration of the device. Add support for RO mode. The driver will load the values from the registers directly setup all the internally cached data structures based on the device configuration. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161894438847.3202472.6317563824045432727.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 116 ++++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/init.c | 8 +++ drivers/dma/idxd/sysfs.c | 22 +++++--- 4 files changed, 138 insertions(+), 9 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 6d674fdedb4d..3ddb1c731080 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -884,3 +884,119 @@ int idxd_device_config(struct idxd_device *idxd) return 0; } + +static int idxd_wq_load_config(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + int wqcfg_offset; + int i; + + wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, 0); + memcpy_fromio(wq->wqcfg, idxd->reg_base + wqcfg_offset, idxd->wqcfg_size); + + wq->size = wq->wqcfg->wq_size; + wq->threshold = wq->wqcfg->wq_thresh; + if (wq->wqcfg->priv) + wq->type = IDXD_WQT_KERNEL; + + /* The driver does not support shared WQ mode in read-only config yet */ + if (wq->wqcfg->mode == 0 || wq->wqcfg->pasid_en) + return -EOPNOTSUPP; + + set_bit(WQ_FLAG_DEDICATED, &wq->flags); + + wq->priority = wq->wqcfg->priority; + + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, i); + dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wqcfg_offset, wq->wqcfg->bits[i]); + } + + return 0; +} + +static void idxd_group_load_config(struct idxd_group *group) +{ + struct idxd_device *idxd = group->idxd; + struct device *dev = &idxd->pdev->dev; + int i, j, grpcfg_offset; + + /* + * Load WQS bit fields + * Iterate through all 256 bits 64 bits at a time + */ + for (i = 0; i < GRPWQCFG_STRIDES; i++) { + struct idxd_wq *wq; + + grpcfg_offset = GRPWQCFG_OFFSET(idxd, group->id, i); + group->grpcfg.wqs[i] = ioread64(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPCFG wq[%d:%d: %#x]: %#llx\n", + group->id, i, grpcfg_offset, group->grpcfg.wqs[i]); + + if (i * 64 >= idxd->max_wqs) + break; + + /* Iterate through all 64 bits and check for wq set */ + for (j = 0; j < 64; j++) { + int id = i * 64 + j; + + /* No need to check beyond max wqs */ + if (id >= idxd->max_wqs) + break; + + /* Set group assignment for wq if wq bit is set */ + if (group->grpcfg.wqs[i] & BIT(j)) { + wq = idxd->wqs[id]; + wq->group = group; + } + } + } + + grpcfg_offset = GRPENGCFG_OFFSET(idxd, group->id); + group->grpcfg.engines = ioread64(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPCFG engs[%d: %#x]: %#llx\n", group->id, + grpcfg_offset, group->grpcfg.engines); + + /* Iterate through all 64 bits to check engines set */ + for (i = 0; i < 64; i++) { + if (i >= idxd->max_engines) + break; + + if (group->grpcfg.engines & BIT(i)) { + struct idxd_engine *engine = idxd->engines[i]; + + engine->group = group; + } + } + + grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id); + group->grpcfg.flags.bits = ioread32(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n", + group->id, grpcfg_offset, group->grpcfg.flags.bits); +} + +int idxd_device_load_config(struct idxd_device *idxd) +{ + union gencfg_reg reg; + int i, rc; + + reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + idxd->token_limit = reg.token_limit; + + for (i = 0; i < idxd->max_groups; i++) { + struct idxd_group *group = idxd->groups[i]; + + idxd_group_load_config(group); + } + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + rc = idxd_wq_load_config(wq); + if (rc < 0) + return rc; + } + + return 0; +} diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 1b539cbf3c14..940a2e1ddf12 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -384,6 +384,7 @@ void idxd_device_cleanup(struct idxd_device *idxd); int idxd_device_config(struct idxd_device *idxd); void idxd_device_wqs_clear_state(struct idxd_device *idxd); void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid); +int idxd_device_load_config(struct idxd_device *idxd); /* work queue control */ int idxd_wq_alloc_resources(struct idxd_wq *wq); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 23b23411dcdb..c8ae978faaf9 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -482,6 +482,14 @@ static int idxd_probe(struct idxd_device *idxd) if (rc) goto err; + /* If the configs are readonly, then load them from device */ + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { + dev_dbg(dev, "Loading RO device config\n"); + rc = idxd_device_load_config(idxd); + if (rc < 0) + goto err; + } + rc = idxd_setup_interrupts(idxd); if (rc) goto err; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 9070505b90d4..989265c8aeb8 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -110,7 +110,8 @@ static int enable_wq(struct idxd_wq *wq) } spin_lock_irqsave(&idxd->dev_lock, flags); - rc = idxd_device_config(idxd); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + rc = idxd_device_config(idxd); spin_unlock_irqrestore(&idxd->dev_lock, flags); if (rc < 0) { mutex_unlock(&wq->wq_lock); @@ -170,7 +171,7 @@ static int enable_wq(struct idxd_wq *wq) static int idxd_config_bus_probe(struct device *dev) { - int rc; + int rc = 0; unsigned long flags; dev_dbg(dev, "%s called\n", __func__); @@ -188,7 +189,8 @@ static int idxd_config_bus_probe(struct device *dev) /* Perform IDXD configuration and enabling */ spin_lock_irqsave(&idxd->dev_lock, flags); - rc = idxd_device_config(idxd); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + rc = idxd_device_config(idxd); spin_unlock_irqrestore(&idxd->dev_lock, flags); if (rc < 0) { module_put(THIS_MODULE); @@ -287,12 +289,14 @@ static int idxd_config_bus_remove(struct device *dev) idxd_unregister_dma_device(idxd); rc = idxd_device_disable(idxd); - for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = idxd->wqs[i]; - - mutex_lock(&wq->wq_lock); - idxd_wq_disable_cleanup(wq); - mutex_unlock(&wq->wq_lock); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + mutex_lock(&wq->wq_lock); + idxd_wq_disable_cleanup(wq); + mutex_unlock(&wq->wq_lock); + } } module_put(THIS_MODULE); if (rc < 0) -- Gitee From a42c47f9df1f0f377a803d08d5942e0b5755c60b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 20 Apr 2021 11:46:34 -0700 Subject: [PATCH 024/173] dmaengine: idxd: add interrupt handle request and release support mainline inclusion from mainline-v5.13 commit eb15e7154fbfa3e61c777704b2ff28eb3a0d4796 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit eb15e7154fbf dmaengine: idxd: add interrupt handle request and release support. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- DSA spec states that when Request Interrupt Handle and Release Interrupt Handle command bits are set in the CMDCAP register, these device commands must be supported by the driver. The interrupt handle is programmed in a descriptor. When Request Interrupt Handle is not supported, the interrupt handle is the index of the desired entry in the MSI-X table. When the command is supported, driver must use the command to obtain a handle to be programmed in the submitted descriptor. A requested handle may be revoked. After the handle is revoked, any use of the handle will result in Invalid Interrupt Handle error. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161894439422.3202472.17579543737810265471.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 71 ++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 13 +++++++ drivers/dma/idxd/init.c | 56 +++++++++++++++++++++++++++- drivers/dma/idxd/registers.h | 9 ++++- drivers/dma/idxd/submit.c | 35 ++++++++++++++---- drivers/dma/idxd/sysfs.c | 1 + 6 files changed, 176 insertions(+), 9 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 3ddb1c731080..54d5afec81cf 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -598,6 +598,77 @@ void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid) dev_dbg(dev, "pasid %d drained\n", pasid); } +int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle, + enum idxd_interrupt_type irq_type) +{ + struct device *dev = &idxd->pdev->dev; + u32 operand, status; + + if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE))) + return -EOPNOTSUPP; + + dev_dbg(dev, "get int handle, idx %d\n", idx); + + operand = idx & GENMASK(15, 0); + if (irq_type == IDXD_IRQ_IMS) + operand |= CMD_INT_HANDLE_IMS; + + dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_REQUEST_INT_HANDLE, operand); + + idxd_cmd_exec(idxd, IDXD_CMD_REQUEST_INT_HANDLE, operand, &status); + + if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) { + dev_dbg(dev, "request int handle failed: %#x\n", status); + return -ENXIO; + } + + *handle = (status >> IDXD_CMDSTS_RES_SHIFT) & GENMASK(15, 0); + + dev_dbg(dev, "int handle acquired: %u\n", *handle); + return 0; +} + +int idxd_device_release_int_handle(struct idxd_device *idxd, int handle, + enum idxd_interrupt_type irq_type) +{ + struct device *dev = &idxd->pdev->dev; + u32 operand, status; + union idxd_command_reg cmd; + unsigned long flags; + + if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))) + return -EOPNOTSUPP; + + dev_dbg(dev, "release int handle, handle %d\n", handle); + + memset(&cmd, 0, sizeof(cmd)); + operand = handle & GENMASK(15, 0); + + if (irq_type == IDXD_IRQ_IMS) + operand |= CMD_INT_HANDLE_IMS; + + cmd.cmd = IDXD_CMD_RELEASE_INT_HANDLE; + cmd.operand = operand; + + dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_RELEASE_INT_HANDLE, operand); + + spin_lock_irqsave(&idxd->dev_lock, flags); + iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); + + while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE) + cpu_relax(); + status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); + spin_unlock_irqrestore(&idxd->dev_lock, flags); + + if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) { + dev_dbg(dev, "release int handle failed: %#x\n", status); + return -ENXIO; + } + + dev_dbg(dev, "int handle released.\n"); + return 0; +} + /* Device configuration bits */ void idxd_msix_perm_setup(struct idxd_device *idxd) { diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 940a2e1ddf12..c1d4a1976206 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -160,6 +160,7 @@ struct idxd_hw { union group_cap_reg group_cap; union engine_cap_reg engine_cap; struct opcap opcap; + u32 cmd_cap; }; enum idxd_device_state { @@ -237,6 +238,8 @@ struct idxd_device { struct idxd_dma_dev *idxd_dma; struct workqueue_struct *wq; struct work_struct work; + + int *int_handles; }; /* IDXD software descriptor */ @@ -256,6 +259,7 @@ struct idxd_desc { struct list_head list; int id; int cpu; + unsigned int vector; struct idxd_wq *wq; }; @@ -330,6 +334,11 @@ enum idxd_portal_prot { IDXD_PORTAL_LIMITED, }; +enum idxd_interrupt_type { + IDXD_IRQ_MSIX = 0, + IDXD_IRQ_IMS, +}; + static inline int idxd_get_wq_portal_offset(enum idxd_portal_prot prot) { return prot * 0x1000; @@ -385,6 +394,10 @@ int idxd_device_config(struct idxd_device *idxd); void idxd_device_wqs_clear_state(struct idxd_device *idxd); void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid); int idxd_device_load_config(struct idxd_device *idxd); +int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle, + enum idxd_interrupt_type irq_type); +int idxd_device_release_int_handle(struct idxd_device *idxd, int handle, + enum idxd_interrupt_type irq_type); /* work queue control */ int idxd_wq_alloc_resources(struct idxd_wq *wq); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index c8ae978faaf9..124f8bc7ac04 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -125,7 +125,25 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector); goto err_wq_irqs; } + dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector); + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { + /* + * The MSIX vector enumeration starts at 1 with vector 0 being the + * misc interrupt that handles non I/O completion events. The + * interrupt handles are for IMS enumeration on guest. The misc + * interrupt vector does not require a handle and therefore we start + * the int_handles at index 0. Since 'i' starts at 1, the first + * int_handles index will be 0. + */ + rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1], + IDXD_IRQ_MSIX); + if (rc < 0) { + free_irq(irq_entry->vector, irq_entry); + goto err_wq_irqs; + } + dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]); + } } idxd_unmask_error_interrupts(idxd); @@ -136,6 +154,9 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) while (--i >= 0) { irq_entry = &idxd->irq_entries[i]; free_irq(irq_entry->vector, irq_entry); + if (i != 0) + idxd_device_release_int_handle(idxd, + idxd->int_handles[i], IDXD_IRQ_MSIX); } err_misc_irq: /* Disable error interrupt generation */ @@ -288,9 +309,15 @@ static int idxd_setup_internals(struct idxd_device *idxd) init_waitqueue_head(&idxd->cmd_waitq); + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { + idxd->int_handles = devm_kcalloc(dev, idxd->max_wqs, sizeof(int), GFP_KERNEL); + if (!idxd->int_handles) + return -ENOMEM; + } + rc = idxd_setup_wqs(idxd); if (rc < 0) - return rc; + goto err_wqs; rc = idxd_setup_engines(idxd); if (rc < 0) @@ -317,6 +344,8 @@ static int idxd_setup_internals(struct idxd_device *idxd) err_engine: for (i = 0; i < idxd->max_wqs; i++) put_device(&idxd->wqs[i]->conf_dev); + err_wqs: + kfree(idxd->int_handles); return rc; } @@ -345,6 +374,12 @@ static void idxd_read_caps(struct idxd_device *idxd) /* reading generic capabilities */ idxd->hw.gen_cap.bits = ioread64(idxd->reg_base + IDXD_GENCAP_OFFSET); dev_dbg(dev, "gen_cap: %#llx\n", idxd->hw.gen_cap.bits); + + if (idxd->hw.gen_cap.cmd_cap) { + idxd->hw.cmd_cap = ioread32(idxd->reg_base + IDXD_CMDCAP_OFFSET); + dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap); + } + idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift; dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes); idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift; @@ -604,6 +639,24 @@ static void idxd_flush_work_list(struct idxd_irq_entry *ie) } } +static void idxd_release_int_handles(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + int i, rc; + + for (i = 0; i < idxd->num_wq_irqs; i++) { + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) { + rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i], + IDXD_IRQ_MSIX); + if (rc < 0) + dev_warn(dev, "irq handle %d release failed\n", + idxd->int_handles[i]); + else + dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]); + } + } +} + static void idxd_shutdown(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); @@ -630,6 +683,7 @@ static void idxd_shutdown(struct pci_dev *pdev) } idxd_msix_perm_clear(idxd); + idxd_release_int_handles(idxd); pci_free_irq_vectors(pdev); pci_iounmap(pdev, idxd->reg_base); pci_disable_device(pdev); diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 751ecb4f9f81..5cbf368c7367 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -24,8 +24,8 @@ union gen_cap_reg { u64 overlap_copy:1; u64 cache_control_mem:1; u64 cache_control_cache:1; + u64 cmd_cap:1; u64 rsvd:3; - u64 int_handle_req:1; u64 dest_readback:1; u64 drain_readback:1; u64 rsvd2:6; @@ -180,8 +180,11 @@ enum idxd_cmd { IDXD_CMD_DRAIN_PASID, IDXD_CMD_ABORT_PASID, IDXD_CMD_REQUEST_INT_HANDLE, + IDXD_CMD_RELEASE_INT_HANDLE, }; +#define CMD_INT_HANDLE_IMS 0x10000 + #define IDXD_CMDSTS_OFFSET 0xa8 union cmdsts_reg { struct { @@ -193,6 +196,8 @@ union cmdsts_reg { u32 bits; } __packed; #define IDXD_CMDSTS_ACTIVE 0x80000000 +#define IDXD_CMDSTS_ERR_MASK 0xff +#define IDXD_CMDSTS_RES_SHIFT 8 enum idxd_cmdsts_err { IDXD_CMDSTS_SUCCESS = 0, @@ -228,6 +233,8 @@ enum idxd_cmdsts_err { IDXD_CMDSTS_ERR_NO_HANDLE, }; +#define IDXD_CMDCAP_OFFSET 0xb0 + #define IDXD_SWERR_OFFSET 0xc0 #define IDXD_SWERR_VALID 0x00000001 #define IDXD_SWERR_OVERFLOW 0x00000002 diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 4bb09f7be647..0f15ed852daa 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -22,11 +22,23 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) desc->hw->pasid = idxd->pasid; /* - * Descriptor completion vectors are 1-8 for MSIX. We will round - * robin through the 8 vectors. + * Descriptor completion vectors are 1...N for MSIX. We will round + * robin through the N vectors. */ wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1; - desc->hw->int_handle = wq->vec_ptr; + if (!idxd->int_handles) { + desc->hw->int_handle = wq->vec_ptr; + } else { + desc->vector = wq->vec_ptr; + /* + * int_handles are only for descriptor completion. However for device + * MSIX enumeration, vec 0 is used for misc interrupts. Therefore even + * though we are rotating through 1...N for descriptor interrupts, we + * need to acqurie the int_handles from 0..N-1. + */ + desc->hw->int_handle = idxd->int_handles[desc->vector - 1]; + } + return desc; } @@ -79,7 +91,6 @@ void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; - int vec = desc->hw->int_handle; void __iomem *portal; int rc; @@ -117,9 +128,19 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) - llist_add(&desc->llnode, - &idxd->irq_entries[vec].pending_llist); + if (desc->hw->flags & IDXD_OP_FLAG_RCI) { + int vec; + + /* + * If the driver is on host kernel, it would be the value + * assigned to interrupt handle, which is index for MSIX + * vector. If it's guest then can't use the int_handle since + * that is the index to IMS for the entire device. The guest + * device local index will be used. + */ + vec = !idxd->int_handles ? desc->hw->int_handle : desc->vector; + llist_add(&desc->llnode, &idxd->irq_entries[vec].pending_llist); + } return 0; } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 989265c8aeb8..6fedf97dbf37 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1606,6 +1606,7 @@ static void idxd_conf_device_release(struct device *dev) kfree(idxd->wqs); kfree(idxd->engines); kfree(idxd->irq_entries); + kfree(idxd->int_handles); ida_free(&idxd_ida, idxd->id); kfree(idxd); } -- Gitee From 70fb14320cdd98548a27417102a15c116a5a7569 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 20 Apr 2021 11:46:40 -0700 Subject: [PATCH 025/173] dmaengine: idxd: convert sprintf() to sysfs_emit() for all usages mainline inclusion from mainline-v5.13 commit 8241571fac9eeb7f3424ad343369eaa411919da3 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 8241571fac9e dmaengine: idxd: convert sprintf() to sysfs_emit() for all usages. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Convert sprintf() to sysfs_emit() in order to check buffer overrun on sysfs outputs. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161894440044.3202472.13926639619695319753.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/sysfs.c | 116 +++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 61 deletions(-) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 6fedf97dbf37..4d8d9dac4702 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -350,9 +350,9 @@ static ssize_t engine_group_id_show(struct device *dev, container_of(dev, struct idxd_engine, conf_dev); if (engine->group) - return sprintf(buf, "%d\n", engine->group->id); + return sysfs_emit(buf, "%d\n", engine->group->id); else - return sprintf(buf, "%d\n", -1); + return sysfs_emit(buf, "%d\n", -1); } static ssize_t engine_group_id_store(struct device *dev, @@ -447,7 +447,7 @@ static ssize_t group_tokens_reserved_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); - return sprintf(buf, "%u\n", group->tokens_reserved); + return sysfs_emit(buf, "%u\n", group->tokens_reserved); } static ssize_t group_tokens_reserved_store(struct device *dev, @@ -495,7 +495,7 @@ static ssize_t group_tokens_allowed_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); - return sprintf(buf, "%u\n", group->tokens_allowed); + return sysfs_emit(buf, "%u\n", group->tokens_allowed); } static ssize_t group_tokens_allowed_store(struct device *dev, @@ -540,7 +540,7 @@ static ssize_t group_use_token_limit_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); - return sprintf(buf, "%u\n", group->use_token_limit); + return sysfs_emit(buf, "%u\n", group->use_token_limit); } static ssize_t group_use_token_limit_store(struct device *dev, @@ -583,7 +583,6 @@ static ssize_t group_engines_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); int i, rc = 0; - char *tmp = buf; struct idxd_device *idxd = group->idxd; for (i = 0; i < idxd->max_engines; i++) { @@ -593,12 +592,13 @@ static ssize_t group_engines_show(struct device *dev, continue; if (engine->group->id == group->id) - rc += sprintf(tmp + rc, "engine%d.%d ", - idxd->id, engine->id); + rc += sysfs_emit_at(buf, rc, "engine%d.%d ", idxd->id, engine->id); } + if (!rc) + return 0; rc--; - rc += sprintf(tmp + rc, "\n"); + rc += sysfs_emit_at(buf, rc, "\n"); return rc; } @@ -612,7 +612,6 @@ static ssize_t group_work_queues_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); int i, rc = 0; - char *tmp = buf; struct idxd_device *idxd = group->idxd; for (i = 0; i < idxd->max_wqs; i++) { @@ -622,12 +621,13 @@ static ssize_t group_work_queues_show(struct device *dev, continue; if (wq->group->id == group->id) - rc += sprintf(tmp + rc, "wq%d.%d ", - idxd->id, wq->id); + rc += sysfs_emit_at(buf, rc, "wq%d.%d ", idxd->id, wq->id); } + if (!rc) + return 0; rc--; - rc += sprintf(tmp + rc, "\n"); + rc += sysfs_emit_at(buf, rc, "\n"); return rc; } @@ -642,7 +642,7 @@ static ssize_t group_traffic_class_a_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); - return sprintf(buf, "%d\n", group->tc_a); + return sysfs_emit(buf, "%d\n", group->tc_a); } static ssize_t group_traffic_class_a_store(struct device *dev, @@ -683,7 +683,7 @@ static ssize_t group_traffic_class_b_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); - return sprintf(buf, "%d\n", group->tc_b); + return sysfs_emit(buf, "%d\n", group->tc_b); } static ssize_t group_traffic_class_b_store(struct device *dev, @@ -756,7 +756,7 @@ static ssize_t wq_clients_show(struct device *dev, { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%d\n", wq->client_count); + return sysfs_emit(buf, "%d\n", wq->client_count); } static struct device_attribute dev_attr_wq_clients = @@ -769,12 +769,12 @@ static ssize_t wq_state_show(struct device *dev, switch (wq->state) { case IDXD_WQ_DISABLED: - return sprintf(buf, "disabled\n"); + return sysfs_emit(buf, "disabled\n"); case IDXD_WQ_ENABLED: - return sprintf(buf, "enabled\n"); + return sysfs_emit(buf, "enabled\n"); } - return sprintf(buf, "unknown\n"); + return sysfs_emit(buf, "unknown\n"); } static struct device_attribute dev_attr_wq_state = @@ -786,9 +786,9 @@ static ssize_t wq_group_id_show(struct device *dev, struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); if (wq->group) - return sprintf(buf, "%u\n", wq->group->id); + return sysfs_emit(buf, "%u\n", wq->group->id); else - return sprintf(buf, "-1\n"); + return sysfs_emit(buf, "-1\n"); } static ssize_t wq_group_id_store(struct device *dev, @@ -840,8 +840,7 @@ static ssize_t wq_mode_show(struct device *dev, struct device_attribute *attr, { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%s\n", - wq_dedicated(wq) ? "dedicated" : "shared"); + return sysfs_emit(buf, "%s\n", wq_dedicated(wq) ? "dedicated" : "shared"); } static ssize_t wq_mode_store(struct device *dev, @@ -877,7 +876,7 @@ static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr, { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%u\n", wq->size); + return sysfs_emit(buf, "%u\n", wq->size); } static int total_claimed_wq_size(struct idxd_device *idxd) @@ -928,7 +927,7 @@ static ssize_t wq_priority_show(struct device *dev, { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%u\n", wq->priority); + return sysfs_emit(buf, "%u\n", wq->priority); } static ssize_t wq_priority_store(struct device *dev, @@ -965,8 +964,7 @@ static ssize_t wq_block_on_fault_show(struct device *dev, { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%u\n", - test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)); + return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)); } static ssize_t wq_block_on_fault_store(struct device *dev, @@ -1005,7 +1003,7 @@ static ssize_t wq_threshold_show(struct device *dev, { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%u\n", wq->threshold); + return sysfs_emit(buf, "%u\n", wq->threshold); } static ssize_t wq_threshold_store(struct device *dev, @@ -1048,15 +1046,12 @@ static ssize_t wq_type_show(struct device *dev, switch (wq->type) { case IDXD_WQT_KERNEL: - return sprintf(buf, "%s\n", - idxd_wq_type_names[IDXD_WQT_KERNEL]); + return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_KERNEL]); case IDXD_WQT_USER: - return sprintf(buf, "%s\n", - idxd_wq_type_names[IDXD_WQT_USER]); + return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_USER]); case IDXD_WQT_NONE: default: - return sprintf(buf, "%s\n", - idxd_wq_type_names[IDXD_WQT_NONE]); + return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_NONE]); } return -EINVAL; @@ -1097,7 +1092,7 @@ static ssize_t wq_name_show(struct device *dev, { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%s\n", wq->name); + return sysfs_emit(buf, "%s\n", wq->name); } static ssize_t wq_name_store(struct device *dev, @@ -1167,7 +1162,7 @@ static ssize_t wq_max_transfer_size_show(struct device *dev, struct device_attri { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%llu\n", wq->max_xfer_bytes); + return sysfs_emit(buf, "%llu\n", wq->max_xfer_bytes); } static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attribute *attr, @@ -1204,7 +1199,7 @@ static ssize_t wq_max_batch_size_show(struct device *dev, struct device_attribut { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%u\n", wq->max_batch_size); + return sysfs_emit(buf, "%u\n", wq->max_batch_size); } static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribute *attr, @@ -1240,7 +1235,7 @@ static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute * { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%u\n", wq->ats_dis); + return sysfs_emit(buf, "%u\n", wq->ats_dis); } static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr, @@ -1317,7 +1312,7 @@ static ssize_t version_show(struct device *dev, struct device_attribute *attr, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%#x\n", idxd->hw.version); + return sysfs_emit(buf, "%#x\n", idxd->hw.version); } static DEVICE_ATTR_RO(version); @@ -1328,7 +1323,7 @@ static ssize_t max_work_queues_size_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->max_wq_size); + return sysfs_emit(buf, "%u\n", idxd->max_wq_size); } static DEVICE_ATTR_RO(max_work_queues_size); @@ -1338,7 +1333,7 @@ static ssize_t max_groups_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->max_groups); + return sysfs_emit(buf, "%u\n", idxd->max_groups); } static DEVICE_ATTR_RO(max_groups); @@ -1348,7 +1343,7 @@ static ssize_t max_work_queues_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->max_wqs); + return sysfs_emit(buf, "%u\n", idxd->max_wqs); } static DEVICE_ATTR_RO(max_work_queues); @@ -1358,7 +1353,7 @@ static ssize_t max_engines_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->max_engines); + return sysfs_emit(buf, "%u\n", idxd->max_engines); } static DEVICE_ATTR_RO(max_engines); @@ -1368,7 +1363,7 @@ static ssize_t numa_node_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%d\n", dev_to_node(&idxd->pdev->dev)); + return sysfs_emit(buf, "%d\n", dev_to_node(&idxd->pdev->dev)); } static DEVICE_ATTR_RO(numa_node); @@ -1378,7 +1373,7 @@ static ssize_t max_batch_size_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->max_batch_size); + return sysfs_emit(buf, "%u\n", idxd->max_batch_size); } static DEVICE_ATTR_RO(max_batch_size); @@ -1389,7 +1384,7 @@ static ssize_t max_transfer_size_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%llu\n", idxd->max_xfer_bytes); + return sysfs_emit(buf, "%llu\n", idxd->max_xfer_bytes); } static DEVICE_ATTR_RO(max_transfer_size); @@ -1415,7 +1410,7 @@ static ssize_t gen_cap_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%#llx\n", idxd->hw.gen_cap.bits); + return sysfs_emit(buf, "%#llx\n", idxd->hw.gen_cap.bits); } static DEVICE_ATTR_RO(gen_cap); @@ -1425,8 +1420,7 @@ static ssize_t configurable_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", - test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)); + return sysfs_emit(buf, "%u\n", test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)); } static DEVICE_ATTR_RO(configurable); @@ -1446,7 +1440,7 @@ static ssize_t clients_show(struct device *dev, } spin_unlock_irqrestore(&idxd->dev_lock, flags); - return sprintf(buf, "%d\n", count); + return sysfs_emit(buf, "%d\n", count); } static DEVICE_ATTR_RO(clients); @@ -1456,7 +1450,7 @@ static ssize_t pasid_enabled_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", device_pasid_enabled(idxd)); + return sysfs_emit(buf, "%u\n", device_pasid_enabled(idxd)); } static DEVICE_ATTR_RO(pasid_enabled); @@ -1469,14 +1463,14 @@ static ssize_t state_show(struct device *dev, switch (idxd->state) { case IDXD_DEV_DISABLED: case IDXD_DEV_CONF_READY: - return sprintf(buf, "disabled\n"); + return sysfs_emit(buf, "disabled\n"); case IDXD_DEV_ENABLED: - return sprintf(buf, "enabled\n"); + return sysfs_emit(buf, "enabled\n"); case IDXD_DEV_HALTED: - return sprintf(buf, "halted\n"); + return sysfs_emit(buf, "halted\n"); } - return sprintf(buf, "unknown\n"); + return sysfs_emit(buf, "unknown\n"); } static DEVICE_ATTR_RO(state); @@ -1490,10 +1484,10 @@ static ssize_t errors_show(struct device *dev, spin_lock_irqsave(&idxd->dev_lock, flags); for (i = 0; i < 4; i++) - out += sprintf(buf + out, "%#018llx ", idxd->sw_err.bits[i]); + out += sysfs_emit_at(buf, out, "%#018llx ", idxd->sw_err.bits[i]); spin_unlock_irqrestore(&idxd->dev_lock, flags); out--; - out += sprintf(buf + out, "\n"); + out += sysfs_emit_at(buf, out, "\n"); return out; } static DEVICE_ATTR_RO(errors); @@ -1504,7 +1498,7 @@ static ssize_t max_tokens_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->max_tokens); + return sysfs_emit(buf, "%u\n", idxd->max_tokens); } static DEVICE_ATTR_RO(max_tokens); @@ -1514,7 +1508,7 @@ static ssize_t token_limit_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->token_limit); + return sysfs_emit(buf, "%u\n", idxd->token_limit); } static ssize_t token_limit_store(struct device *dev, @@ -1553,7 +1547,7 @@ static ssize_t cdev_major_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->major); + return sysfs_emit(buf, "%u\n", idxd->major); } static DEVICE_ATTR_RO(cdev_major); @@ -1562,7 +1556,7 @@ static ssize_t cmd_status_show(struct device *dev, { struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%#x\n", idxd->cmd_status); + return sysfs_emit(buf, "%#x\n", idxd->cmd_status); } static DEVICE_ATTR_RO(cmd_status); -- Gitee From 80106f738b7ff842aa7fdc859e3cdd9f8f56b0f4 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 20 Apr 2021 11:46:46 -0700 Subject: [PATCH 026/173] dmaengine: idxd: enable SVA feature for IOMMU mainline inclusion from mainline-v5.13 commit cf5f86a7d47df149857ba2fb72f9c6c9da46af2e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit cf5f86a7d47d dmaengine: idxd: enable SVA feature for IOMMU. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Enable IOMMU_DEV_FEAT_SVA before attempt to bind pasid. This is needed according to iommu_sva_bind_device() comment. Currently Intel IOMMU code does this before bind call. It really needs to be controlled by the driver. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161894440621.3202472.17644507396206848134.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 124f8bc7ac04..f113e95c5d7d 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -501,11 +501,18 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD reset complete\n"); if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { - rc = idxd_enable_system_pasid(idxd); - if (rc < 0) - dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc); - else - set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + rc = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA); + if (rc == 0) { + rc = idxd_enable_system_pasid(idxd); + if (rc < 0) { + iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); + dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc); + } else { + set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + } + } else { + dev_warn(dev, "Unable to turn on SVA feature.\n"); + } } else if (!sva) { dev_warn(dev, "User forced SVA off via module param.\n"); } @@ -539,6 +546,7 @@ static int idxd_probe(struct idxd_device *idxd) err: if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); + iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); return rc; } @@ -699,6 +707,7 @@ static void idxd_remove(struct pci_dev *pdev) if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); idxd_unregister_devices(idxd); + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); } static struct pci_driver idxd_pci_driver = { -- Gitee From f222d9d7185e1f530621740b1fb906d330bc4326 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 20 Apr 2021 11:46:51 -0700 Subject: [PATCH 027/173] dmaengine: idxd: support reporting of halt interrupt mainline inclusion from mainline-v5.13 commit 5b0c68c473a131c2acb21abad44b0047b200e185 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 5b0c68c473a1 dmaengine: idxd: support reporting of halt interrupt. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Unmask the halt error interrupt so it gets reported to the interrupt handler. When halt state interrupt is received, quiesce the kernel WQs and unmap the portals to stop submission. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161894441167.3202472.9485946398140619501.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 15 +++++++++++++++ drivers/dma/idxd/idxd.h | 2 ++ drivers/dma/idxd/init.c | 12 ++++++++++++ drivers/dma/idxd/irq.c | 2 ++ drivers/dma/idxd/registers.h | 3 ++- 5 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 54d5afec81cf..3934e660d951 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -47,6 +47,7 @@ void idxd_unmask_error_interrupts(struct idxd_device *idxd) genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); genctrl.softerr_int_en = 1; + genctrl.halt_int_en = 1; iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); } @@ -56,6 +57,7 @@ void idxd_mask_error_interrupts(struct idxd_device *idxd) genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); genctrl.softerr_int_en = 0; + genctrl.halt_int_en = 0; iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); } @@ -312,6 +314,19 @@ void idxd_wq_unmap_portal(struct idxd_wq *wq) struct device *dev = &wq->idxd->pdev->dev; devm_iounmap(dev, wq->portal); + wq->portal = NULL; +} + +void idxd_wqs_unmap_portal(struct idxd_device *idxd) +{ + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + if (wq->portal) + idxd_wq_unmap_portal(wq); + } } int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index c1d4a1976206..d7185c6bfade 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -371,6 +371,7 @@ int idxd_register_devices(struct idxd_device *idxd); void idxd_unregister_devices(struct idxd_device *idxd); int idxd_register_driver(void); void idxd_unregister_driver(void); +void idxd_wqs_quiesce(struct idxd_device *idxd); /* device interrupt control */ void idxd_msix_perm_setup(struct idxd_device *idxd); @@ -400,6 +401,7 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle, enum idxd_interrupt_type irq_type); /* work queue control */ +void idxd_wqs_unmap_portal(struct idxd_device *idxd); int idxd_wq_alloc_resources(struct idxd_wq *wq); void idxd_wq_free_resources(struct idxd_wq *wq); int idxd_wq_enable(struct idxd_wq *wq); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index f113e95c5d7d..17e8f5acf3fe 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -647,6 +647,18 @@ static void idxd_flush_work_list(struct idxd_irq_entry *ie) } } +void idxd_wqs_quiesce(struct idxd_device *idxd) +{ + struct idxd_wq *wq; + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + wq = idxd->wqs[i]; + if (wq->state == IDXD_WQ_ENABLED && wq->type == IDXD_WQT_KERNEL) + idxd_wq_quiesce(wq); + } +} + static void idxd_release_int_handles(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index fc0781e3f36d..43eea5c9cbd4 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -202,6 +202,8 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) queue_work(idxd->wq, &idxd->work); } else { spin_lock_bh(&idxd->dev_lock); + idxd_wqs_quiesce(idxd); + idxd_wqs_unmap_portal(idxd); idxd_device_wqs_clear_state(idxd); dev_err(&idxd->pdev->dev, "idxd halted, need %s.\n", diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 5cbf368c7367..6c11375cc56a 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -120,7 +120,8 @@ union gencfg_reg { union genctrl_reg { struct { u32 softerr_int_en:1; - u32 rsvd:31; + u32 halt_int_en:1; + u32 rsvd:30; }; u32 bits; } __packed; -- Gitee From 973aa11216ce2ad6e576bc117e87367f2c8f6356 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 20 Apr 2021 12:00:56 -0700 Subject: [PATCH 028/173] dmaengine: idxd: device cmd should use dedicated lock mainline inclusion from mainline-v5.13 commit 53b2ee7f637c4f1fa2f50dbdb210088e30c11d2b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 53b2ee7f637c dmaengine: idxd: device cmd should use dedicated lock. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Create a dedicated lock for device command operations. Put the device command operation under finer grained locking instead of using the idxd->dev_lock. Suggested-by: Sanjay Kumar Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161894525685.3210132.16160045731436382560.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 18 +++++++++--------- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/init.c | 1 + 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 3934e660d951..420b93fe5feb 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -465,13 +465,13 @@ int idxd_device_init_reset(struct idxd_device *idxd) memset(&cmd, 0, sizeof(cmd)); cmd.cmd = IDXD_CMD_RESET_DEVICE; dev_dbg(dev, "%s: sending reset for init.\n", __func__); - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock_irqsave(&idxd->cmd_lock, flags); iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE) cpu_relax(); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock_irqrestore(&idxd->cmd_lock, flags); return 0; } @@ -494,10 +494,10 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, cmd.operand = operand; cmd.int_req = 1; - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock_irqsave(&idxd->cmd_lock, flags); wait_event_lock_irq(idxd->cmd_waitq, !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags), - idxd->dev_lock); + idxd->cmd_lock); dev_dbg(&idxd->pdev->dev, "%s: sending cmd: %#x op: %#x\n", __func__, cmd_code, operand); @@ -511,9 +511,9 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, * After command submitted, release lock and go to sleep until * the command completes via interrupt. */ - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock_irqrestore(&idxd->cmd_lock, flags); wait_for_completion(&done); - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock_irqsave(&idxd->cmd_lock, flags); if (status) { *status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); idxd->cmd_status = *status & GENMASK(7, 0); @@ -522,7 +522,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, __clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags); /* Wake up other pending commands */ wake_up(&idxd->cmd_waitq); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock_irqrestore(&idxd->cmd_lock, flags); } int idxd_device_enable(struct idxd_device *idxd) @@ -667,13 +667,13 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle, dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_RELEASE_INT_HANDLE, operand); - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock_irqsave(&idxd->cmd_lock, flags); iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE) cpu_relax(); status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock_irqrestore(&idxd->cmd_lock, flags); if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) { dev_dbg(dev, "release int handle failed: %#x\n", status); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index d7185c6bfade..87330b6940fa 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -204,6 +204,7 @@ struct idxd_device { void __iomem *reg_base; spinlock_t dev_lock; /* spinlock for device */ + spinlock_t cmd_lock; /* spinlock for device commands */ struct completion *cmd_done; struct idxd_group **groups; struct idxd_wq **wqs; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 17e8f5acf3fe..5da8609e63b8 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -449,6 +449,7 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d } spin_lock_init(&idxd->dev_lock); + spin_lock_init(&idxd->cmd_lock); return idxd; } -- Gitee From e7fd44b562b0da76eff20c08cb744f6060095d9f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 20 Apr 2021 12:00:34 -0700 Subject: [PATCH 029/173] dmaengine: idxd: remove MSIX masking for interrupt handlers mainline inclusion from mainline-v5.13 commit a16104617d212d4b482568847b25172972b87e60 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit a16104617d21 dmaengine: idxd: remove MSIX masking for interrupt handlers. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Remove interrupt masking and just let the hard irq handler keep firing for new events. This is less of a performance impact vs the MMIO readback inside the pci_msi_{mask,unmas}_irq(). Especially with a loaded system those flushes can be stuck behind large amounts of MMIO writes to flush. When guest kernel is running on top of VFIO mdev, mask/unmask causes a vmexit each time and is not desirable. Suggested-by: Dan Williams Signed-off-by: Dave Jiang Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/161894523436.3210025.1834640110556139277.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/idxd.h | 1 - drivers/dma/idxd/init.c | 4 ++-- drivers/dma/idxd/irq.c | 12 ------------ 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 87330b6940fa..97c96ca6ab70 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -377,7 +377,6 @@ void idxd_wqs_quiesce(struct idxd_device *idxd); /* device interrupt control */ void idxd_msix_perm_setup(struct idxd_device *idxd); void idxd_msix_perm_clear(struct idxd_device *idxd); -irqreturn_t idxd_irq_handler(int vec, void *data); irqreturn_t idxd_misc_thread(int vec, void *data); irqreturn_t idxd_wq_thread(int irq, void *data); void idxd_mask_error_interrupts(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 5da8609e63b8..d250b243bdff 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -102,7 +102,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } irq_entry = &idxd->irq_entries[0]; - rc = request_threaded_irq(irq_entry->vector, idxd_irq_handler, idxd_misc_thread, + rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread, 0, "idxd-misc", irq_entry); if (rc < 0) { dev_err(dev, "Failed to allocate misc interrupt.\n"); @@ -119,7 +119,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) init_llist_head(&idxd->irq_entries[i].pending_llist); INIT_LIST_HEAD(&idxd->irq_entries[i].work_list); - rc = request_threaded_irq(irq_entry->vector, idxd_irq_handler, + rc = request_threaded_irq(irq_entry->vector, NULL, idxd_wq_thread, 0, "idxd-portal", irq_entry); if (rc < 0) { dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 43eea5c9cbd4..afee571e0194 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -102,15 +102,6 @@ static int idxd_device_schedule_fault_process(struct idxd_device *idxd, return 0; } -irqreturn_t idxd_irq_handler(int vec, void *data) -{ - struct idxd_irq_entry *irq_entry = data; - struct idxd_device *idxd = irq_entry->idxd; - - idxd_mask_msix_vector(idxd, irq_entry->id); - return IRQ_WAKE_THREAD; -} - static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) { struct device *dev = &idxd->pdev->dev; @@ -237,7 +228,6 @@ irqreturn_t idxd_misc_thread(int vec, void *data) iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); } - idxd_unmask_msix_vector(idxd, irq_entry->id); return IRQ_HANDLED; } @@ -394,8 +384,6 @@ irqreturn_t idxd_wq_thread(int irq, void *data) int processed; processed = idxd_desc_process(irq_entry); - idxd_unmask_msix_vector(irq_entry->idxd, irq_entry->id); - if (processed == 0) return IRQ_NONE; -- Gitee From bceacf8315c0518375a30a5f85eff25bbcdb380a Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sat, 24 Apr 2021 10:04:15 -0500 Subject: [PATCH 030/173] dmaengine: idxd: Add IDXD performance monitor support mainline inclusion from mainline-v5.13 commit 81dd4d4d6178306ab31db91bdc7353d485bdafce category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 81dd4d4d6178 dmaengine: idxd: Add IDXD performance monitor support. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Implement the IDXD performance monitor capability (named 'perfmon' in the DSA (Data Streaming Accelerator) spec [1]), which supports the collection of information about key events occurring during DSA and IAX (Intel Analytics Accelerator) device execution, to assist in performance tuning and debugging. The idxd perfmon support is implemented as part of the IDXD driver and interfaces with the Linux perf framework. It has several features in common with the existing uncore pmu support: - it does not support sampling - does not support per-thread counting However it also has some unique features not present in the core and uncore support: - all general-purpose counters are identical, thus no event constraints - operation is always system-wide While the core perf subsystem assumes that all counters are by default per-cpu, the uncore pmus are socket-scoped and use a cpu mask to restrict counting to one cpu from each socket. IDXD counters use a similar strategy but expand the scope even further; since IDXD counters are system-wide and can be read from any cpu, the IDXD perf driver picks a single cpu to do the work (with cpu hotplug notifiers to choose a different cpu if the chosen one is taken off-line). More specifically, the perf userspace tool by default opens a counter for each cpu for an event. However, if it finds a cpumask file associated with the pmu under sysfs, as is the case with the uncore pmus, it will open counters only on the cpus specified by the cpumask. Since perfmon only needs to open a single counter per event for a given IDXD device, the perfmon driver will create a sysfs cpumask file for the device and insert the first cpu of the system into it. When a user uses perf to open an event, perf will open a single counter on the cpu specified by the cpu mask. This amounts to the default system-wide rather than per-cpu counting mentioned previously for perfmon pmu events. In order to keep the cpu mask up-to-date, the driver implements cpu hotplug support for multiple devices, as IDXD usually enumerates and registers more than one idxd device. The perfmon driver implements basic perfmon hardware capability discovery and configuration, and is initialized by the IDXD driver's probe function. During initialization, the driver retrieves the total number of supported performance counters, the pmu ID, and the device type from idxd device, and registers itself under the Linux perf framework. The perf userspace tool can be used to monitor single or multiple events depending on the given configuration, as well as event groups, which are also supported by the perfmon driver. The user configures events using the perf tool command-line interface by specifying the event and corresponding event category, along with an optional set of filters that can be used to restrict counting to specific work queues, traffic classes, page and transfer sizes, and engines (See [1] for specifics). With the configuration specified by the user, the perf tool issues a system call passing that information to the kernel, which uses it to initialize the specified event(s). The event(s) are opened and started, and following termination of the perf command, they're stopped. At that point, the perfmon driver will read the latest count for the event(s), calculate the difference between the latest counter values and previously tracked counter values, and display the final incremental count as the event count for the cycle. An overflow handler registered on the IDXD irq path is used to account for counter overflows, which are signaled by an overflow interrupt. Below are a couple of examples of perf usage for monitoring DSA events. The following monitors all events in the 'engine' category. Becuuse no filters are specified, this captures all engine events for the workload, which in this case is 19 iterations of the work generated by the kernel dmatest module. Details describing the events can be found in Appendix D of [1], Performance Monitoring Events, but briefly they are: event 0x1: total input data processed, in 32-byte units event 0x2: total data written, in 32-byte units event 0x4: number of work descriptors that read the source event 0x8: number of work descriptors that write the destination event 0x10: number of work descriptors dispatched from batch descriptors event 0x20: number of work descriptors dispatched from work queues # perf stat -e dsa0/event=0x1,event_category=0x1/, dsa0/event=0x2,event_category=0x1/, dsa0/event=0x4,event_category=0x1/, dsa0/event=0x8,event_category=0x1/, dsa0/event=0x10,event_category=0x1/, dsa0/event=0x20,event_category=0x1/ modprobe dmatest channel=dma0chan0 timeout=2000 iterations=19 run=1 wait=1 Performance counter stats for 'system wide': 5,332 dsa0/event=0x1,event_category=0x1/ 5,327 dsa0/event=0x2,event_category=0x1/ 19 dsa0/event=0x4,event_category=0x1/ 19 dsa0/event=0x8,event_category=0x1/ 0 dsa0/event=0x10,event_category=0x1/ 19 dsa0/event=0x20,event_category=0x1/ 21.977436186 seconds time elapsed The command below illustrates filter usage with a simple example. It specifies that MEM_MOVE operations should be counted for the DSA device dsa0 (event 0x8 corresponds to the EV_MEM_MOVE event - Number of Memory Move Descriptors, which is part of event category 0x3 - Operations. The detailed category and event IDs are available in Appendix D, Performance Monitoring Events, of [1]). In addition to the event and event category, a number of filters are also specified (the detailed filter values are available in Chapter 6.4 (Filter Support) of [1]), which will restrict counting to only those events that meet all of the filter criteria. In this case, the filters specify that only MEM_MOVE operations that are serviced by work queue wq0 and specifically engine number engine0 and traffic class tc0 having sizes between 0 and 4k and page size of between 0 and 1G result in a counter hit; anything else will be filtered out and not appear in the final count. Note that filters are optional - any filter not specified is assumed to be all ones and will pass anything. # perf stat -e dsa0/filter_wq=0x1,filter_tc=0x1,filter_sz=0x7, filter_eng=0x1,event=0x8,event_category=0x3/ modprobe dmatest channel=dma0chan0 timeout=2000 iterations=19 run=1 wait=1 Performance counter stats for 'system wide': 19 dsa0/filter_wq=0x1,filter_tc=0x1,filter_sz=0x7, filter_eng=0x1,event=0x8,event_category=0x3/ 21.865914091 seconds time elapsed The output above reflects that the unspecified workload resulted in the counting of 19 MEM_MOVE operation events that met the filter criteria. [1]: https://software.intel.com/content/www/us/en/develop/download/intel-data-streaming-accelerator-preliminary-architecture-specification.html [ Based on work originally by Jing Lin. ] Reviewed-by: Dave Jiang Reviewed-by: Kan Liang Signed-off-by: Tom Zanussi Link: https://lore.kernel.org/r/0c5080a7d541904c4ad42b848c76a1ce056ddac7.1619276133.git.zanussi@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- .../sysfs-bus-event_source-devices-dsa | 30 + drivers/dma/Kconfig | 12 + drivers/dma/idxd/Makefile | 2 + drivers/dma/idxd/idxd.h | 45 ++ drivers/dma/idxd/perfmon.c | 662 ++++++++++++++++++ drivers/dma/idxd/perfmon.h | 119 ++++ drivers/dma/idxd/registers.h | 108 +++ include/linux/cpuhotplug.h | 1 + 8 files changed, 979 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-event_source-devices-dsa create mode 100644 drivers/dma/idxd/perfmon.c create mode 100644 drivers/dma/idxd/perfmon.h diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-dsa b/Documentation/ABI/testing/sysfs-bus-event_source-devices-dsa new file mode 100644 index 000000000000..3c7d132281b0 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-dsa @@ -0,0 +1,30 @@ +What: /sys/bus/event_source/devices/dsa*/format +Date: April 2021 +KernelVersion: 5.13 +Contact: Tom Zanussi +Description: Read-only. Attribute group to describe the magic bits + that go into perf_event_attr.config or + perf_event_attr.config1 for the IDXD DSA pmu. (See also + ABI/testing/sysfs-bus-event_source-devices-format). + + Each attribute in this group defines a bit range in + perf_event_attr.config or perf_event_attr.config1. + All supported attributes are listed below (See the + IDXD DSA Spec for possible attribute values):: + + event_category = "config:0-3" - event category + event = "config:4-31" - event ID + + filter_wq = "config1:0-31" - workqueue filter + filter_tc = "config1:32-39" - traffic class filter + filter_pgsz = "config1:40-43" - page size filter + filter_sz = "config1:44-51" - transfer size filter + filter_eng = "config1:52-59" - engine filter + +What: /sys/bus/event_source/devices/dsa*/cpumask +Date: April 2021 +KernelVersion: 5.13 +Contact: Tom Zanussi +Description: Read-only. This file always returns the cpu to which the + IDXD DSA pmu is bound for access to all dsa pmu + performance monitoring events. diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 41898e32df2a..8df68dd4dd06 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -307,6 +307,18 @@ config INTEL_IDXD_SVM depends on PCI_PASID depends on PCI_IOV +config INTEL_IDXD_PERFMON + bool "Intel Data Accelerators performance monitor support" + depends on INTEL_IDXD + help + Enable performance monitor (pmu) support for the Intel(R) + data accelerators present in Intel Xeon CPU. With this + enabled, perf can be used to monitor the DSA (Intel Data + Streaming Accelerator) events described in the Intel DSA + spec. + + If unsure, say N. + config INTEL_IOATDMA tristate "Intel I/OAT DMA support" depends on PCI && X86_64 && !UML diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile index 8978b898d777..6d11558756f8 100644 --- a/drivers/dma/idxd/Makefile +++ b/drivers/dma/idxd/Makefile @@ -1,2 +1,4 @@ obj-$(CONFIG_INTEL_IDXD) += idxd.o idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o + +idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 97c96ca6ab70..26482c7d4c3a 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include "registers.h" #define IDXD_DRIVER_VERSION "1.00" @@ -29,6 +31,7 @@ enum idxd_type { }; #define IDXD_NAME_SIZE 128 +#define IDXD_PMU_EVENT_MAX 64 struct idxd_device_driver { struct device_driver drv; @@ -61,6 +64,31 @@ struct idxd_group { int tc_b; }; +struct idxd_pmu { + struct idxd_device *idxd; + + struct perf_event *event_list[IDXD_PMU_EVENT_MAX]; + int n_events; + + DECLARE_BITMAP(used_mask, IDXD_PMU_EVENT_MAX); + + struct pmu pmu; + char name[IDXD_NAME_SIZE]; + int cpu; + + int n_counters; + int counter_width; + int n_event_categories; + + bool per_counter_caps_supported; + unsigned long supported_event_categories; + + unsigned long supported_filters; + int n_filters; + + struct hlist_node cpuhp_node; +}; + #define IDXD_MAX_PRIORITY 0xf enum idxd_wq_state { @@ -241,6 +269,8 @@ struct idxd_device { struct work_struct work; int *int_handles; + + struct idxd_pmu *idxd_pmu; }; /* IDXD software descriptor */ @@ -437,4 +467,19 @@ int idxd_cdev_get_major(struct idxd_device *idxd); int idxd_wq_add_cdev(struct idxd_wq *wq); void idxd_wq_del_cdev(struct idxd_wq *wq); +/* perfmon */ +#if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON) +int perfmon_pmu_init(struct idxd_device *idxd); +void perfmon_pmu_remove(struct idxd_device *idxd); +void perfmon_counter_overflow(struct idxd_device *idxd); +void perfmon_init(void); +void perfmon_exit(void); +#else +static inline int perfmon_pmu_init(struct idxd_device *idxd) { return 0; } +static inline void perfmon_pmu_remove(struct idxd_device *idxd) {} +static inline void perfmon_counter_overflow(struct idxd_device *idxd) {} +static inline void perfmon_init(void) {} +static inline void perfmon_exit(void) {} +#endif + #endif diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c new file mode 100644 index 000000000000..d73004f47cf4 --- /dev/null +++ b/drivers/dma/idxd/perfmon.c @@ -0,0 +1,662 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */ + +#include +#include +#include "idxd.h" +#include "perfmon.h" + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf); + +static cpumask_t perfmon_dsa_cpu_mask; +static bool cpuhp_set_up; +static enum cpuhp_state cpuhp_slot; + +/* + * perf userspace reads this attribute to determine which cpus to open + * counters on. It's connected to perfmon_dsa_cpu_mask, which is + * maintained by the cpu hotplug handlers. + */ +static DEVICE_ATTR_RO(cpumask); + +static struct attribute *perfmon_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group cpumask_attr_group = { + .attrs = perfmon_cpumask_attrs, +}; + +/* + * These attributes specify the bits in the config word that the perf + * syscall uses to pass the event ids and categories to perfmon. + */ +DEFINE_PERFMON_FORMAT_ATTR(event_category, "config:0-3"); +DEFINE_PERFMON_FORMAT_ATTR(event, "config:4-31"); + +/* + * These attributes specify the bits in the config1 word that the perf + * syscall uses to pass filter data to perfmon. + */ +DEFINE_PERFMON_FORMAT_ATTR(filter_wq, "config1:0-31"); +DEFINE_PERFMON_FORMAT_ATTR(filter_tc, "config1:32-39"); +DEFINE_PERFMON_FORMAT_ATTR(filter_pgsz, "config1:40-43"); +DEFINE_PERFMON_FORMAT_ATTR(filter_sz, "config1:44-51"); +DEFINE_PERFMON_FORMAT_ATTR(filter_eng, "config1:52-59"); + +#define PERFMON_FILTERS_START 2 +#define PERFMON_FILTERS_MAX 5 + +static struct attribute *perfmon_format_attrs[] = { + &format_attr_idxd_event_category.attr, + &format_attr_idxd_event.attr, + &format_attr_idxd_filter_wq.attr, + &format_attr_idxd_filter_tc.attr, + &format_attr_idxd_filter_pgsz.attr, + &format_attr_idxd_filter_sz.attr, + &format_attr_idxd_filter_eng.attr, + NULL, +}; + +static struct attribute_group perfmon_format_attr_group = { + .name = "format", + .attrs = perfmon_format_attrs, +}; + +static const struct attribute_group *perfmon_attr_groups[] = { + &perfmon_format_attr_group, + &cpumask_attr_group, + NULL, +}; + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &perfmon_dsa_cpu_mask); +} + +static bool is_idxd_event(struct idxd_pmu *idxd_pmu, struct perf_event *event) +{ + return &idxd_pmu->pmu == event->pmu; +} + +static int perfmon_collect_events(struct idxd_pmu *idxd_pmu, + struct perf_event *leader, + bool do_grp) +{ + struct perf_event *event; + int n, max_count; + + max_count = idxd_pmu->n_counters; + n = idxd_pmu->n_events; + + if (n >= max_count) + return -EINVAL; + + if (is_idxd_event(idxd_pmu, leader)) { + idxd_pmu->event_list[n] = leader; + idxd_pmu->event_list[n]->hw.idx = n; + n++; + } + + if (!do_grp) + return n; + + for_each_sibling_event(event, leader) { + if (!is_idxd_event(idxd_pmu, event) || + event->state <= PERF_EVENT_STATE_OFF) + continue; + + if (n >= max_count) + return -EINVAL; + + idxd_pmu->event_list[n] = event; + idxd_pmu->event_list[n]->hw.idx = n; + n++; + } + + return n; +} + +static void perfmon_assign_hw_event(struct idxd_pmu *idxd_pmu, + struct perf_event *event, int idx) +{ + struct idxd_device *idxd = idxd_pmu->idxd; + struct hw_perf_event *hwc = &event->hw; + + hwc->idx = idx; + hwc->config_base = ioread64(CNTRCFG_REG(idxd, idx)); + hwc->event_base = ioread64(CNTRCFG_REG(idxd, idx)); +} + +static int perfmon_assign_event(struct idxd_pmu *idxd_pmu, + struct perf_event *event) +{ + int i; + + for (i = 0; i < IDXD_PMU_EVENT_MAX; i++) + if (!test_and_set_bit(i, idxd_pmu->used_mask)) + return i; + + return -EINVAL; +} + +/* + * Check whether there are enough counters to satisfy that all the + * events in the group can actually be scheduled at the same time. + * + * To do this, create a fake idxd_pmu object so the event collection + * and assignment functions can be used without affecting the internal + * state of the real idxd_pmu object. + */ +static int perfmon_validate_group(struct idxd_pmu *pmu, + struct perf_event *event) +{ + struct perf_event *leader = event->group_leader; + struct idxd_pmu *fake_pmu; + int i, ret = 0, n, idx; + + fake_pmu = kzalloc(sizeof(*fake_pmu), GFP_KERNEL); + if (!fake_pmu) + return -ENOMEM; + + fake_pmu->pmu.name = pmu->pmu.name; + fake_pmu->n_counters = pmu->n_counters; + + n = perfmon_collect_events(fake_pmu, leader, true); + if (n < 0) { + ret = n; + goto out; + } + + fake_pmu->n_events = n; + n = perfmon_collect_events(fake_pmu, event, false); + if (n < 0) { + ret = n; + goto out; + } + + fake_pmu->n_events = n; + + for (i = 0; i < n; i++) { + event = fake_pmu->event_list[i]; + + idx = perfmon_assign_event(fake_pmu, event); + if (idx < 0) { + ret = idx; + goto out; + } + } +out: + kfree(fake_pmu); + + return ret; +} + +static int perfmon_pmu_event_init(struct perf_event *event) +{ + struct idxd_device *idxd; + int ret = 0; + + idxd = event_to_idxd(event); + event->hw.idx = -1; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* sampling not supported */ + if (event->attr.sample_period) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + if (event->pmu != &idxd->idxd_pmu->pmu) + return -EINVAL; + + event->hw.event_base = ioread64(PERFMON_TABLE_OFFSET(idxd)); + event->cpu = idxd->idxd_pmu->cpu; + event->hw.config = event->attr.config; + + if (event->group_leader != event) + /* non-group events have themselves as leader */ + ret = perfmon_validate_group(idxd->idxd_pmu, event); + + return ret; +} + +static inline u64 perfmon_pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct idxd_device *idxd; + int cntr = hwc->idx; + + idxd = event_to_idxd(event); + + return ioread64(CNTRDATA_REG(idxd, cntr)); +} + +static void perfmon_pmu_event_update(struct perf_event *event) +{ + struct idxd_device *idxd = event_to_idxd(event); + u64 prev_raw_count, new_raw_count, delta, p, n; + int shift = 64 - idxd->idxd_pmu->counter_width; + struct hw_perf_event *hwc = &event->hw; + + do { + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = perfmon_pmu_read_counter(event); + } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count); + + n = (new_raw_count << shift); + p = (prev_raw_count << shift); + + delta = ((n - p) >> shift); + + local64_add(delta, &event->count); +} + +void perfmon_counter_overflow(struct idxd_device *idxd) +{ + int i, n_counters, max_loop = OVERFLOW_SIZE; + struct perf_event *event; + unsigned long ovfstatus; + + n_counters = min(idxd->idxd_pmu->n_counters, OVERFLOW_SIZE); + + ovfstatus = ioread32(OVFSTATUS_REG(idxd)); + + /* + * While updating overflowed counters, other counters behind + * them could overflow and be missed in a given pass. + * Normally this could happen at most n_counters times, but in + * theory a tiny counter width could result in continual + * overflows and endless looping. max_loop provides a + * failsafe in that highly unlikely case. + */ + while (ovfstatus && max_loop--) { + /* Figure out which counter(s) overflowed */ + for_each_set_bit(i, &ovfstatus, n_counters) { + unsigned long ovfstatus_clear = 0; + + /* Update event->count for overflowed counter */ + event = idxd->idxd_pmu->event_list[i]; + perfmon_pmu_event_update(event); + /* Writing 1 to OVFSTATUS bit clears it */ + set_bit(i, &ovfstatus_clear); + iowrite32(ovfstatus_clear, OVFSTATUS_REG(idxd)); + } + + ovfstatus = ioread32(OVFSTATUS_REG(idxd)); + } + + /* + * Should never happen. If so, it means a counter(s) looped + * around twice while this handler was running. + */ + WARN_ON_ONCE(ovfstatus); +} + +static inline void perfmon_reset_config(struct idxd_device *idxd) +{ + iowrite32(CONFIG_RESET, PERFRST_REG(idxd)); + iowrite32(0, OVFSTATUS_REG(idxd)); + iowrite32(0, PERFFRZ_REG(idxd)); +} + +static inline void perfmon_reset_counters(struct idxd_device *idxd) +{ + iowrite32(CNTR_RESET, PERFRST_REG(idxd)); +} + +static inline void perfmon_reset(struct idxd_device *idxd) +{ + perfmon_reset_config(idxd); + perfmon_reset_counters(idxd); +} + +static void perfmon_pmu_event_start(struct perf_event *event, int mode) +{ + u32 flt_wq, flt_tc, flt_pg_sz, flt_xfer_sz, flt_eng = 0; + u64 cntr_cfg, cntrdata, event_enc, event_cat = 0; + struct hw_perf_event *hwc = &event->hw; + union filter_cfg flt_cfg; + union event_cfg event_cfg; + struct idxd_device *idxd; + int cntr; + + idxd = event_to_idxd(event); + + event->hw.idx = hwc->idx; + cntr = hwc->idx; + + /* Obtain event category and event value from user space */ + event_cfg.val = event->attr.config; + flt_cfg.val = event->attr.config1; + event_cat = event_cfg.event_cat; + event_enc = event_cfg.event_enc; + + /* Obtain filter configuration from user space */ + flt_wq = flt_cfg.wq; + flt_tc = flt_cfg.tc; + flt_pg_sz = flt_cfg.pg_sz; + flt_xfer_sz = flt_cfg.xfer_sz; + flt_eng = flt_cfg.eng; + + if (flt_wq && test_bit(FLT_WQ, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_wq, FLTCFG_REG(idxd, cntr, FLT_WQ)); + if (flt_tc && test_bit(FLT_TC, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_tc, FLTCFG_REG(idxd, cntr, FLT_TC)); + if (flt_pg_sz && test_bit(FLT_PG_SZ, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_pg_sz, FLTCFG_REG(idxd, cntr, FLT_PG_SZ)); + if (flt_xfer_sz && test_bit(FLT_XFER_SZ, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_xfer_sz, FLTCFG_REG(idxd, cntr, FLT_XFER_SZ)); + if (flt_eng && test_bit(FLT_ENG, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_eng, FLTCFG_REG(idxd, cntr, FLT_ENG)); + + /* Read the start value */ + cntrdata = ioread64(CNTRDATA_REG(idxd, cntr)); + local64_set(&event->hw.prev_count, cntrdata); + + /* Set counter to event/category */ + cntr_cfg = event_cat << CNTRCFG_CATEGORY_SHIFT; + cntr_cfg |= event_enc << CNTRCFG_EVENT_SHIFT; + /* Set interrupt on overflow and counter enable bits */ + cntr_cfg |= (CNTRCFG_IRQ_OVERFLOW | CNTRCFG_ENABLE); + + iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr)); +} + +static void perfmon_pmu_event_stop(struct perf_event *event, int mode) +{ + struct hw_perf_event *hwc = &event->hw; + struct idxd_device *idxd; + int i, cntr = hwc->idx; + u64 cntr_cfg; + + idxd = event_to_idxd(event); + + /* remove this event from event list */ + for (i = 0; i < idxd->idxd_pmu->n_events; i++) { + if (event != idxd->idxd_pmu->event_list[i]) + continue; + + for (++i; i < idxd->idxd_pmu->n_events; i++) + idxd->idxd_pmu->event_list[i - 1] = idxd->idxd_pmu->event_list[i]; + --idxd->idxd_pmu->n_events; + break; + } + + cntr_cfg = ioread64(CNTRCFG_REG(idxd, cntr)); + cntr_cfg &= ~CNTRCFG_ENABLE; + iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr)); + + if (mode == PERF_EF_UPDATE) + perfmon_pmu_event_update(event); + + event->hw.idx = -1; + clear_bit(cntr, idxd->idxd_pmu->used_mask); +} + +static void perfmon_pmu_event_del(struct perf_event *event, int mode) +{ + perfmon_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int perfmon_pmu_event_add(struct perf_event *event, int flags) +{ + struct idxd_device *idxd = event_to_idxd(event); + struct idxd_pmu *idxd_pmu = idxd->idxd_pmu; + struct hw_perf_event *hwc = &event->hw; + int idx, n; + + n = perfmon_collect_events(idxd_pmu, event, false); + if (n < 0) + return n; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; + + idx = perfmon_assign_event(idxd_pmu, event); + if (idx < 0) + return idx; + + perfmon_assign_hw_event(idxd_pmu, event, idx); + + if (flags & PERF_EF_START) + perfmon_pmu_event_start(event, 0); + + idxd_pmu->n_events = n; + + return 0; +} + +static void enable_perfmon_pmu(struct idxd_device *idxd) +{ + iowrite32(COUNTER_UNFREEZE, PERFFRZ_REG(idxd)); +} + +static void disable_perfmon_pmu(struct idxd_device *idxd) +{ + iowrite32(COUNTER_FREEZE, PERFFRZ_REG(idxd)); +} + +static void perfmon_pmu_enable(struct pmu *pmu) +{ + struct idxd_device *idxd = pmu_to_idxd(pmu); + + enable_perfmon_pmu(idxd); +} + +static void perfmon_pmu_disable(struct pmu *pmu) +{ + struct idxd_device *idxd = pmu_to_idxd(pmu); + + disable_perfmon_pmu(idxd); +} + +static void skip_filter(int i) +{ + int j; + + for (j = i; j < PERFMON_FILTERS_MAX; j++) + perfmon_format_attrs[PERFMON_FILTERS_START + j] = + perfmon_format_attrs[PERFMON_FILTERS_START + j + 1]; +} + +static void idxd_pmu_init(struct idxd_pmu *idxd_pmu) +{ + int i; + + for (i = 0 ; i < PERFMON_FILTERS_MAX; i++) { + if (!test_bit(i, &idxd_pmu->supported_filters)) + skip_filter(i); + } + + idxd_pmu->pmu.name = idxd_pmu->name; + idxd_pmu->pmu.attr_groups = perfmon_attr_groups; + idxd_pmu->pmu.task_ctx_nr = perf_invalid_context; + idxd_pmu->pmu.event_init = perfmon_pmu_event_init; + idxd_pmu->pmu.pmu_enable = perfmon_pmu_enable, + idxd_pmu->pmu.pmu_disable = perfmon_pmu_disable, + idxd_pmu->pmu.add = perfmon_pmu_event_add; + idxd_pmu->pmu.del = perfmon_pmu_event_del; + idxd_pmu->pmu.start = perfmon_pmu_event_start; + idxd_pmu->pmu.stop = perfmon_pmu_event_stop; + idxd_pmu->pmu.read = perfmon_pmu_event_update; + idxd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; + idxd_pmu->pmu.module = THIS_MODULE; +} + +void perfmon_pmu_remove(struct idxd_device *idxd) +{ + if (!idxd->idxd_pmu) + return; + + cpuhp_state_remove_instance(cpuhp_slot, &idxd->idxd_pmu->cpuhp_node); + perf_pmu_unregister(&idxd->idxd_pmu->pmu); + kfree(idxd->idxd_pmu); + idxd->idxd_pmu = NULL; +} + +static int perf_event_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct idxd_pmu *idxd_pmu; + + idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node); + + /* select the first online CPU as the designated reader */ + if (cpumask_empty(&perfmon_dsa_cpu_mask)) { + cpumask_set_cpu(cpu, &perfmon_dsa_cpu_mask); + idxd_pmu->cpu = cpu; + } + + return 0; +} + +static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node) +{ + struct idxd_pmu *idxd_pmu; + unsigned int target; + + idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node); + + if (!cpumask_test_and_clear_cpu(cpu, &perfmon_dsa_cpu_mask)) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + + /* migrate events if there is a valid target */ + if (target < nr_cpu_ids) + cpumask_set_cpu(target, &perfmon_dsa_cpu_mask); + else + target = -1; + + perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + + return 0; +} + +int perfmon_pmu_init(struct idxd_device *idxd) +{ + union idxd_perfcap perfcap; + struct idxd_pmu *idxd_pmu; + int rc = -ENODEV; + + /* + * perfmon module initialization failed, nothing to do + */ + if (!cpuhp_set_up) + return -ENODEV; + + /* + * If perfmon_offset or num_counters is 0, it means perfmon is + * not supported on this hardware. + */ + if (idxd->perfmon_offset == 0) + return -ENODEV; + + idxd_pmu = kzalloc(sizeof(*idxd_pmu), GFP_KERNEL); + if (!idxd_pmu) + return -ENOMEM; + + idxd_pmu->idxd = idxd; + idxd->idxd_pmu = idxd_pmu; + + if (idxd->data->type == IDXD_TYPE_DSA) { + rc = sprintf(idxd_pmu->name, "dsa%d", idxd->id); + if (rc < 0) + goto free; + } else if (idxd->data->type == IDXD_TYPE_IAX) { + rc = sprintf(idxd_pmu->name, "iax%d", idxd->id); + if (rc < 0) + goto free; + } else { + goto free; + } + + perfmon_reset(idxd); + + perfcap.bits = ioread64(PERFCAP_REG(idxd)); + + /* + * If total perf counter is 0, stop further registration. + * This is necessary in order to support driver running on + * guest which does not have pmon support. + */ + if (perfcap.num_perf_counter == 0) + goto free; + + /* A counter width of 0 means it can't count */ + if (perfcap.counter_width == 0) + goto free; + + /* Overflow interrupt and counter freeze support must be available */ + if (!perfcap.overflow_interrupt || !perfcap.counter_freeze) + goto free; + + /* Number of event categories cannot be 0 */ + if (perfcap.num_event_category == 0) + goto free; + + /* + * We don't support per-counter capabilities for now. + */ + if (perfcap.cap_per_counter) + goto free; + + idxd_pmu->n_event_categories = perfcap.num_event_category; + idxd_pmu->supported_event_categories = perfcap.global_event_category; + idxd_pmu->per_counter_caps_supported = perfcap.cap_per_counter; + + /* check filter capability. If 0, then filters are not supported */ + idxd_pmu->supported_filters = perfcap.filter; + if (perfcap.filter) + idxd_pmu->n_filters = hweight8(perfcap.filter); + + /* Store the total number of counters categories, and counter width */ + idxd_pmu->n_counters = perfcap.num_perf_counter; + idxd_pmu->counter_width = perfcap.counter_width; + + idxd_pmu_init(idxd_pmu); + + rc = perf_pmu_register(&idxd_pmu->pmu, idxd_pmu->name, -1); + if (rc) + goto free; + + rc = cpuhp_state_add_instance(cpuhp_slot, &idxd_pmu->cpuhp_node); + if (rc) { + perf_pmu_unregister(&idxd->idxd_pmu->pmu); + goto free; + } +out: + return rc; +free: + kfree(idxd_pmu); + idxd->idxd_pmu = NULL; + + goto out; +} + +void __init perfmon_init(void) +{ + int rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "driver/dma/idxd/perf:online", + perf_event_cpu_online, + perf_event_cpu_offline); + if (WARN_ON(rc < 0)) + return; + + cpuhp_slot = rc; + cpuhp_set_up = true; +} + +void __exit perfmon_exit(void) +{ + if (cpuhp_set_up) + cpuhp_remove_multi_state(cpuhp_slot); +} diff --git a/drivers/dma/idxd/perfmon.h b/drivers/dma/idxd/perfmon.h new file mode 100644 index 000000000000..9a081a1bc605 --- /dev/null +++ b/drivers/dma/idxd/perfmon.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */ + +#ifndef _PERFMON_H_ +#define _PERFMON_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "registers.h" + +static inline struct idxd_pmu *event_to_pmu(struct perf_event *event) +{ + struct idxd_pmu *idxd_pmu; + struct pmu *pmu; + + pmu = event->pmu; + idxd_pmu = container_of(pmu, struct idxd_pmu, pmu); + + return idxd_pmu; +} + +static inline struct idxd_device *event_to_idxd(struct perf_event *event) +{ + struct idxd_pmu *idxd_pmu; + struct pmu *pmu; + + pmu = event->pmu; + idxd_pmu = container_of(pmu, struct idxd_pmu, pmu); + + return idxd_pmu->idxd; +} + +static inline struct idxd_device *pmu_to_idxd(struct pmu *pmu) +{ + struct idxd_pmu *idxd_pmu; + + idxd_pmu = container_of(pmu, struct idxd_pmu, pmu); + + return idxd_pmu->idxd; +} + +enum dsa_perf_events { + DSA_PERF_EVENT_WQ = 0, + DSA_PERF_EVENT_ENGINE, + DSA_PERF_EVENT_ADDR_TRANS, + DSA_PERF_EVENT_OP, + DSA_PERF_EVENT_COMPL, + DSA_PERF_EVENT_MAX, +}; + +enum filter_enc { + FLT_WQ = 0, + FLT_TC, + FLT_PG_SZ, + FLT_XFER_SZ, + FLT_ENG, + FLT_MAX, +}; + +#define CONFIG_RESET 0x0000000000000001 +#define CNTR_RESET 0x0000000000000002 +#define CNTR_ENABLE 0x0000000000000001 +#define INTR_OVFL 0x0000000000000002 + +#define COUNTER_FREEZE 0x00000000FFFFFFFF +#define COUNTER_UNFREEZE 0x0000000000000000 +#define OVERFLOW_SIZE 32 + +#define CNTRCFG_ENABLE BIT(0) +#define CNTRCFG_IRQ_OVERFLOW BIT(1) +#define CNTRCFG_CATEGORY_SHIFT 8 +#define CNTRCFG_EVENT_SHIFT 32 + +#define PERFMON_TABLE_OFFSET(_idxd) \ +({ \ + typeof(_idxd) __idxd = (_idxd); \ + ((__idxd)->reg_base + (__idxd)->perfmon_offset); \ +}) +#define PERFMON_REG_OFFSET(idxd, offset) \ + (PERFMON_TABLE_OFFSET(idxd) + (offset)) + +#define PERFCAP_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFCAP_OFFSET)) +#define PERFRST_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFRST_OFFSET)) +#define OVFSTATUS_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_OVFSTATUS_OFFSET)) +#define PERFFRZ_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFFRZ_OFFSET)) + +#define FLTCFG_REG(idxd, cntr, flt) \ + (PERFMON_REG_OFFSET(idxd, IDXD_FLTCFG_OFFSET) + ((cntr) * 32) + ((flt) * 4)) + +#define CNTRCFG_REG(idxd, cntr) \ + (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCFG_OFFSET) + ((cntr) * 8)) +#define CNTRDATA_REG(idxd, cntr) \ + (PERFMON_REG_OFFSET(idxd, IDXD_CNTRDATA_OFFSET) + ((cntr) * 8)) +#define CNTRCAP_REG(idxd, cntr) \ + (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCAP_OFFSET) + ((cntr) * 8)) + +#define EVNTCAP_REG(idxd, category) \ + (PERFMON_REG_OFFSET(idxd, IDXD_EVNTCAP_OFFSET) + ((category) * 8)) + +#define DEFINE_PERFMON_FORMAT_ATTR(_name, _format) \ +static ssize_t __perfmon_idxd_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct kobj_attribute format_attr_idxd_##_name = \ + __ATTR(_name, 0444, __perfmon_idxd_##_name##_show, NULL) + +#endif diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 6c11375cc56a..c970c3f025f0 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -386,4 +386,112 @@ union wqcfg { #define GRPENGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 32) #define GRPFLGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 40) +/* Following is performance monitor registers */ +#define IDXD_PERFCAP_OFFSET 0x0 +union idxd_perfcap { + struct { + u64 num_perf_counter:6; + u64 rsvd1:2; + u64 counter_width:8; + u64 num_event_category:4; + u64 global_event_category:16; + u64 filter:8; + u64 rsvd2:8; + u64 cap_per_counter:1; + u64 writeable_counter:1; + u64 counter_freeze:1; + u64 overflow_interrupt:1; + u64 rsvd3:8; + }; + u64 bits; +} __packed; + +#define IDXD_EVNTCAP_OFFSET 0x80 +union idxd_evntcap { + struct { + u64 events:28; + u64 rsvd:36; + }; + u64 bits; +} __packed; + +struct idxd_event { + union { + struct { + u32 event_category:4; + u32 events:28; + }; + u32 val; + }; +} __packed; + +#define IDXD_CNTRCAP_OFFSET 0x800 +struct idxd_cntrcap { + union { + struct { + u32 counter_width:8; + u32 rsvd:20; + u32 num_events:4; + }; + u32 val; + }; + struct idxd_event events[]; +} __packed; + +#define IDXD_PERFRST_OFFSET 0x10 +union idxd_perfrst { + struct { + u32 perfrst_config:1; + u32 perfrst_counter:1; + u32 rsvd:30; + }; + u32 val; +} __packed; + +#define IDXD_OVFSTATUS_OFFSET 0x30 +#define IDXD_PERFFRZ_OFFSET 0x20 +#define IDXD_CNTRCFG_OFFSET 0x100 +union idxd_cntrcfg { + struct { + u64 enable:1; + u64 interrupt_ovf:1; + u64 global_freeze_ovf:1; + u64 rsvd1:5; + u64 event_category:4; + u64 rsvd2:20; + u64 events:28; + u64 rsvd3:4; + }; + u64 val; +} __packed; + +#define IDXD_FLTCFG_OFFSET 0x300 + +#define IDXD_CNTRDATA_OFFSET 0x200 +union idxd_cntrdata { + struct { + u64 event_count_value; + }; + u64 val; +} __packed; + +union event_cfg { + struct { + u64 event_cat:4; + u64 event_enc:28; + }; + u64 val; +} __packed; + +union filter_cfg { + struct { + u64 wq:32; + u64 tc:8; + u64 pg_sz:4; + u64 xfer_sz:8; + u64 eng:8; + }; + u64 val; +} __packed; + #endif diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index b98b9eb7d5f8..3c7e775daece 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -169,6 +169,7 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_RAPL_ONLINE, CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, + CPUHP_AP_PERF_X86_IDXD_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, -- Gitee From 7865b0ce6cd7eeb94f109a48b4cfde1a6f7ed6c5 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sat, 24 Apr 2021 10:04:16 -0500 Subject: [PATCH 031/173] dmaengine: idxd: Enable IDXD performance monitor support mainline inclusion from mainline-v5.13 commit 0bde4444ec44b8e64bbd4af72fcaef58bcdbd4ce category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 0bde4444ec44 dmaengine: idxd: Enable IDXD performance monitor support. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add the code needed in the main IDXD driver to interface with the IDXD perfmon implementation. [ Based on work originally by Jing Lin. ] Reviewed-by: Dave Jiang Reviewed-by: Kan Liang Signed-off-by: Tom Zanussi Link: https://lore.kernel.org/r/a5564a5583911565d31c2af9234218c5166c4b2c.1619276133.git.zanussi@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 9 +++++++++ drivers/dma/idxd/irq.c | 5 +---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index d250b243bdff..3883ab0adc95 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -21,6 +21,7 @@ #include "../dmaengine.h" #include "registers.h" #include "idxd.h" +#include "perfmon.h" MODULE_VERSION(IDXD_DRIVER_VERSION); MODULE_LICENSE("GPL v2"); @@ -541,6 +542,10 @@ static int idxd_probe(struct idxd_device *idxd) idxd->major = idxd_cdev_get_major(idxd); + rc = perfmon_pmu_init(idxd); + if (rc < 0) + dev_warn(dev, "Failed to initialize perfmon. No PMU support: %d\n", rc); + dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id); return 0; @@ -720,6 +725,7 @@ static void idxd_remove(struct pci_dev *pdev) if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); idxd_unregister_devices(idxd); + perfmon_pmu_remove(idxd); iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); } @@ -749,6 +755,8 @@ static int __init idxd_init_module(void) else support_enqcmd = true; + perfmon_init(); + err = idxd_register_bus_type(); if (err < 0) return err; @@ -783,5 +791,6 @@ static void __exit idxd_exit_module(void) pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); idxd_unregister_bus_type(); + perfmon_exit(); } module_exit(idxd_exit_module); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index afee571e0194..ae68e1e5487a 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -156,11 +156,8 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) } if (cause & IDXD_INTC_PERFMON_OVFL) { - /* - * Driver does not utilize perfmon counter overflow interrupt - * yet. - */ val |= IDXD_INTC_PERFMON_OVFL; + perfmon_counter_overflow(idxd); } val ^= cause; -- Gitee From ecfe010efc86fd794642fbc0415d7efafc131f5b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 2 Jun 2021 12:07:52 +0200 Subject: [PATCH 032/173] dmaengine: idxd: Use cpu_feature_enabled() mainline inclusion from mainline-v5.13 commit 74b2fc882d380d8fafc2a26f01d401c2a7beeadb category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 74b2fc882d38 dmaengine: idxd: Use cpu_feature_enabled(). Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- When testing x86 feature bits, use cpu_feature_enabled() so that build-disabled features can remain off, regardless of what CPUID says. Fixes: 8e50d392652f ("dmaengine: idxd: Add shared workqueue support") Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Acked-By: Vinod Koul Cc: Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 3883ab0adc95..de2d8cee063b 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -745,12 +745,12 @@ static int __init idxd_init_module(void) * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in * enumerating the device. We can not utilize it. */ - if (!boot_cpu_has(X86_FEATURE_MOVDIR64B)) { + if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) { pr_warn("idxd driver failed to load without MOVDIR64B.\n"); return -ENODEV; } - if (!boot_cpu_has(X86_FEATURE_ENQCMD)) + if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) pr_warn("Platform does not have ENQCMD(S) support.\n"); else support_enqcmd = true; -- Gitee From 6b00828d5e48b0a0bf132deacf14a0b5f324fce9 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 26 Apr 2021 16:09:19 -0700 Subject: [PATCH 033/173] dmaengine: idxd: add engine 'struct device' missing bus type assignment mainline inclusion from mainline-v5.13 commit 1c4841ccbd2b185587010d6178aac11953f61d4c category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 1c4841ccbd2b dmaengine: idxd: add engine 'struct device' missing bus type assignment. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- engine 'struct device' setup is missing assigning the bus type. Add it to dsa_bus_type. Fixes: 75b911309060 ("dmaengine: idxd: fix engine conf_dev lifetime") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161947841562.984844.17505646725993659651.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index de2d8cee063b..eaee1acbf002 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -242,6 +242,7 @@ static int idxd_setup_engines(struct idxd_device *idxd) engine->idxd = idxd; device_initialize(&engine->conf_dev); engine->conf_dev.parent = &idxd->conf_dev; + engine->conf_dev.bus = &dsa_bus_type; engine->conf_dev.type = &idxd_engine_device_type; rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id); if (rc < 0) { -- Gitee From 0fbb746e63f1f6513f6e82f510344bc9296efcea Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 25 May 2021 12:23:37 -0700 Subject: [PATCH 034/173] dmaengine: idxd: Add missing cleanup for early error out in probe call mainline inclusion from mainline-v5.13 commit ddf742d4f3f12a6ba1b8e6ecbbf3ae736942f970 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit ddf742d4f3f1 dmaengine: idxd: Add missing cleanup for early error out in probe call. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The probe call stack is missing some cleanup when things fail in the middle. Add the appropriate cleanup routines to make sure we exit gracefully. Fixes: a39c7cd0438e ("dmaengine: idxd: removal of pcim managed mmio mapping") Reported-by: Nikhil Rao Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162197061707.392656.15760573520817310791.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 61 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index eaee1acbf002..442d55c11a5f 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -168,6 +168,32 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) return rc; } +static void idxd_cleanup_interrupts(struct idxd_device *idxd) +{ + struct pci_dev *pdev = idxd->pdev; + struct idxd_irq_entry *irq_entry; + int i, msixcnt; + + msixcnt = pci_msix_vec_count(pdev); + if (msixcnt <= 0) + return; + + irq_entry = &idxd->irq_entries[0]; + free_irq(irq_entry->vector, irq_entry); + + for (i = 1; i < msixcnt; i++) { + + irq_entry = &idxd->irq_entries[i]; + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) + idxd_device_release_int_handle(idxd, idxd->int_handles[i], + IDXD_IRQ_MSIX); + free_irq(irq_entry->vector, irq_entry); + } + + idxd_mask_error_interrupts(idxd); + pci_free_irq_vectors(pdev); +} + static int idxd_setup_wqs(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -304,6 +330,19 @@ static int idxd_setup_groups(struct idxd_device *idxd) return rc; } +static void idxd_cleanup_internals(struct idxd_device *idxd) +{ + int i; + + for (i = 0; i < idxd->max_groups; i++) + put_device(&idxd->groups[i]->conf_dev); + for (i = 0; i < idxd->max_engines; i++) + put_device(&idxd->engines[i]->conf_dev); + for (i = 0; i < idxd->max_wqs; i++) + put_device(&idxd->wqs[i]->conf_dev); + destroy_workqueue(idxd->wq); +} + static int idxd_setup_internals(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -532,12 +571,12 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "Loading RO device config\n"); rc = idxd_device_load_config(idxd); if (rc < 0) - goto err; + goto err_config; } rc = idxd_setup_interrupts(idxd); if (rc) - goto err; + goto err_config; dev_dbg(dev, "IDXD interrupt setup complete.\n"); @@ -550,6 +589,8 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id); return 0; + err_config: + idxd_cleanup_internals(idxd); err: if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); @@ -557,6 +598,18 @@ static int idxd_probe(struct idxd_device *idxd) return rc; } +static void idxd_cleanup(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + + perfmon_pmu_remove(idxd); + idxd_cleanup_interrupts(idxd); + idxd_cleanup_internals(idxd); + if (device_pasid_enabled(idxd)) + idxd_disable_system_pasid(idxd); + iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); +} + static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct device *dev = &pdev->dev; @@ -609,7 +662,7 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) rc = idxd_register_devices(idxd); if (rc) { dev_err(dev, "IDXD sysfs setup failed\n"); - goto err; + goto err_dev_register; } idxd->state = IDXD_DEV_CONF_READY; @@ -619,6 +672,8 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; + err_dev_register: + idxd_cleanup(idxd); err: pci_iounmap(pdev, idxd->reg_base); err_iomap: -- Gitee From 3327b546a9278aecc2524e03b66c191027b00844 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 2 Jun 2021 18:07:26 +0800 Subject: [PATCH 035/173] dmaengine: idxd: Fix missing error code in idxd_cdev_open() mainline inclusion from mainline-v5.13 commit 99b18e88a1cf737ae924123d63b46d9a3d17b1af category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 99b18e88a1cf dmaengine: idxd: Fix missing error code in idxd_cdev_open(). Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The error code is missing in this code scenario, add the error code '-EINVAL' to the return value 'rc'. Eliminate the follow smatch warning: drivers/dma/idxd/cdev.c:113 idxd_cdev_open() warn: missing error code 'rc'. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Acked-by: Dave Jiang Link: https://lore.kernel.org/r/1622628446-87909-1-git-send-email-jiapeng.chong@linux.alibaba.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/cdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index afb3f624b5a4..fe9c52e22596 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -110,6 +110,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) pasid = iommu_sva_get_pasid(sva); if (pasid == IOMMU_PASID_INVALID) { iommu_sva_unbind_device(sva); + rc = -EINVAL; goto failed; } -- Gitee From cbb1c464cd27ba8dd84877205fb18d4127c5df81 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 6 May 2021 19:00:47 +0800 Subject: [PATCH 036/173] dmaengine: idxd: Remove redundant variable cdev_ctx mainline inclusion from mainline-v5.14 commit 58cb138e20296dffe3b2be2beb64e5aa22846b84 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 58cb138e2029 dmaengine: idxd: Remove redundant variable cdev_ctx. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Variable cdev_ctx is set to '&ictx[wq->idxd->data->type]' but this value is not used, hence it is a redundant assignment and can be removed. Clean up the following clang-analyzer warning: drivers/dma/idxd/cdev.c:300:2: warning: Value stored to 'cdev_ctx' is never read [clang-analyzer-deadcode.DeadStores]. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Acked-by: Dave Jiang Link: https://lore.kernel.org/r/1620298847-33127-1-git-send-email-jiapeng.chong@linux.alibaba.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/cdev.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index fe9c52e22596..6ecf0e5f3285 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -296,9 +296,7 @@ int idxd_wq_add_cdev(struct idxd_wq *wq) void idxd_wq_del_cdev(struct idxd_wq *wq) { struct idxd_cdev *idxd_cdev; - struct idxd_cdev_context *cdev_ctx; - cdev_ctx = &ictx[wq->idxd->data->type]; idxd_cdev = wq->idxd_cdev; wq->idxd_cdev = NULL; cdev_device_del(&idxd_cdev->cdev, &idxd_cdev->dev); -- Gitee From c3a5a2c2ac28445b46a434e33205d0d75ebec2ca Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sun, 9 May 2021 17:38:25 -0700 Subject: [PATCH 037/173] dmaengine: idxd: remove devm allocation for idxd->int_handles mainline inclusion from mainline-v5.14 commit 33f9f3c33e9336e5f49501c9632584c3d1f4f3a5 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 33f9f3c33e93 dmaengine: idxd: remove devm allocation for idxd->int_handles. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Allocation of idxd->int_handles was merged incorrectly for the 5.13 merge window. The devm_kcalloc should've been regular kcalloc due to devm_* removal series for the driver. Fixes: eb15e7154fbf ("dmaengine: idxd: add interrupt handle request and release support") Reported-by: Dan Carpenter Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162060710518.130816.11349798049329202863.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 442d55c11a5f..c8ae41d36040 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -351,7 +351,8 @@ static int idxd_setup_internals(struct idxd_device *idxd) init_waitqueue_head(&idxd->cmd_waitq); if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { - idxd->int_handles = devm_kcalloc(dev, idxd->max_wqs, sizeof(int), GFP_KERNEL); + idxd->int_handles = kcalloc_node(idxd->max_wqs, sizeof(int), GFP_KERNEL, + dev_to_node(dev)); if (!idxd->int_handles) return -ENOMEM; } -- Gitee From 3673eb89648d32549534a98db87cfd4022ce4eed Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 21 Jul 2021 12:25:20 -0700 Subject: [PATCH 038/173] dmaengine: idxd: Change license on idxd.h to LGPL mainline inclusion from mainline-v5.14 commit 25905f602fdb0cfa147017056636768a7aa1ff6f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 25905f602fdb dmaengine: idxd: Change license on idxd.h to LGPL. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- This file was given GPL-2.0 license. But LGPL-2.1 makes more sense as it needs to be used by libraries outside of the kernel source tree. Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds Signed-off-by: Xiaochen Shen --- include/uapi/linux/idxd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index e33997b4d750..edc346a77c91 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */ /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ #ifndef _USR_IDXD_H_ #define _USR_IDXD_H_ -- Gitee From 6a863aa2d70af03aab5f6a6e81b1c6595d996746 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 24 Jun 2021 12:08:21 -0700 Subject: [PATCH 039/173] dmaengine: idxd: add missing percpu ref put on failure mainline inclusion from mainline-v5.15 commit ac24a2dc06cd773895d2fba0378c2538b8176565 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit ac24a2dc06cd dmaengine: idxd: add missing percpu ref put on failure. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- When enqcmds() fails, exit path is missing a percpu_ref_put(). This can cause failure on shutdown path when the driver is attempting to quiesce the wq. Add missing percpu_ref_put() call on the error exit path. Fixes: 93a40a6d7428 ("dmaengine: idxd: add percpu_ref to descriptor submission path") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162456170168.1121236.7240941044089212312.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/submit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 0f15ed852daa..cb2d4713724d 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -118,8 +118,10 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * device is not accepting descriptor at all. */ rc = enqcmds(portal, desc->hw); - if (rc < 0) + if (rc < 0) { + percpu_ref_put(&wq->wq_active); return rc; + } } percpu_ref_put(&wq->wq_active); -- Gitee From d5b16bae02bcbabaee6e4d547c88f028e4deec1d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 14 Jul 2021 11:38:41 -0700 Subject: [PATCH 040/173] dmaengine: idxd: fix desc->vector that isn't being updated mainline inclusion from mainline-v5.14 commit 8ba89a3c7967808f33478a8573277cf6a7412c4c category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 8ba89a3c7967 dmaengine: idxd: fix desc->vector that isn't being updated. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Missing update for desc->vector when the wq vector gets updated. This causes the desc->vector to always be at 0. Fixes: da435aedb00a ("dmaengine: idxd: fix array index when int_handles are being used") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162628784374.353761.4736602409627820431.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/submit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index cb2d4713724d..ad7112feebe5 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -25,11 +25,10 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) * Descriptor completion vectors are 1...N for MSIX. We will round * robin through the N vectors. */ - wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1; + wq->vec_ptr = desc->vector = (wq->vec_ptr % idxd->num_wq_irqs) + 1; if (!idxd->int_handles) { desc->hw->int_handle = wq->vec_ptr; } else { - desc->vector = wq->vec_ptr; /* * int_handles are only for descriptor completion. However for device * MSIX enumeration, vec 0 is used for misc interrupts. Therefore even -- Gitee From 759456f52277513474b6704cfd5b60bd08249f31 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 24 Jun 2021 12:09:29 -0700 Subject: [PATCH 041/173] dmaengine: idxd: fix array index when int_handles are being used mainline inclusion from mainline-v5.14 commit da435aedb00a4ef61019ff11ae0c08ffb9b1fb18 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit da435aedb00a dmaengine: idxd: fix array index when int_handles are being used. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The index to the irq vector should be local and has no relation to the assigned interrupt handle. Assign the MSIX interrupt index that is programmed for the descriptor. The interrupt handle only matters when it comes to hardware descriptor programming. Fixes: eb15e7154fbf ("dmaengine: idxd: add interrupt handle request and release support") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162456176939.1121476.3366256009925001897.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/submit.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index ad7112feebe5..2eaac6bb55f3 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -129,19 +129,8 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) { - int vec; - - /* - * If the driver is on host kernel, it would be the value - * assigned to interrupt handle, which is index for MSIX - * vector. If it's guest then can't use the int_handle since - * that is the index to IMS for the entire device. The guest - * device local index will be used. - */ - vec = !idxd->int_handles ? desc->hw->int_handle : desc->vector; - llist_add(&desc->llnode, &idxd->irq_entries[vec].pending_llist); - } + if (desc->hw->flags & IDXD_OP_FLAG_RCI) + llist_add(&desc->llnode, &idxd->irq_entries[desc->vector].pending_llist); return 0; } -- Gitee From 4396ac9a1e1b6ea461340b45cbb4658fc1786818 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 24 Jun 2021 13:39:33 -0700 Subject: [PATCH 042/173] dmaengine: idxd: assign MSIX vectors to each WQ rather than roundrobin mainline inclusion from mainline-v5.15 commit 6cfd9e62e3297993f9f9d2d15f3acb14a0e8abbf category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 6cfd9e62e329 dmaengine: idxd: assign MSIX vectors to each WQ rather than roundrobin. Incremental backporting patches for DSA/IAA on Intel Xeon platform. Deviation from upstream: Merge commit da435aedb00a dmaengine: idxd: fix array index when int_handles are being used. -------------------------------- IOPS increased when changing MSIX vector to per WQ from roundrobin. Allows descriptor to be completed by the submitter improves caching locality. Suggested-by: Konstantin Ananyev Signed-off-by: Dave Jiang Acked-by: Konstantin Ananyev Link: https://lore.kernel.org/r/162456717326.1130457.15258077196523268356.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/idxd.h | 2 -- drivers/dma/idxd/submit.c | 22 +++++++--------------- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 26482c7d4c3a..de61bf98110c 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -153,7 +153,6 @@ struct idxd_wq { enum idxd_wq_state state; unsigned long flags; union wqcfg *wqcfg; - u32 vec_ptr; /* interrupt steering */ struct dsa_hw_desc **hw_descs; int num_descs; union { @@ -290,7 +289,6 @@ struct idxd_desc { struct list_head list; int id; int cpu; - unsigned int vector; struct idxd_wq *wq; }; diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 2eaac6bb55f3..a36f81c68844 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -22,21 +22,13 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) desc->hw->pasid = idxd->pasid; /* - * Descriptor completion vectors are 1...N for MSIX. We will round - * robin through the N vectors. + * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match + * vector 1:1 to the WQ id, we need to add 1 */ - wq->vec_ptr = desc->vector = (wq->vec_ptr % idxd->num_wq_irqs) + 1; - if (!idxd->int_handles) { - desc->hw->int_handle = wq->vec_ptr; - } else { - /* - * int_handles are only for descriptor completion. However for device - * MSIX enumeration, vec 0 is used for misc interrupts. Therefore even - * though we are rotating through 1...N for descriptor interrupts, we - * need to acqurie the int_handles from 0..N-1. - */ - desc->hw->int_handle = idxd->int_handles[desc->vector - 1]; - } + if (!idxd->int_handles) + desc->hw->int_handle = wq->id + 1; + else + desc->hw->int_handle = idxd->int_handles[wq->id]; return desc; } @@ -130,7 +122,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * that we designated the descriptor to. */ if (desc->hw->flags & IDXD_OP_FLAG_RCI) - llist_add(&desc->llnode, &idxd->irq_entries[desc->vector].pending_llist); + llist_add(&desc->llnode, &idxd->irq_entries[wq->id + 1].pending_llist); return 0; } -- Gitee From 1cbca217697e71c442c5daa11bcf9167a8653113 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 24 Jun 2021 13:43:32 -0700 Subject: [PATCH 043/173] dmaengine: idxd: fix setup sequence for MSIXPERM table mainline inclusion from mainline-v5.14 commit d5c10e0fc8645342fe5c9796b00c84ab078cd713 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit d5c10e0fc864 dmaengine: idxd: fix setup sequence for MSIXPERM table. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The MSIX permission table should be programmed BEFORE request_irq() happens. This prevents any possibility of an interrupt happening before the MSIX perm table is setup, however slight. Fixes: 6df0e6c57dfc ("dmaengine: idxd: clear MSIX permission entry on shutdown") Sign-off-by: Dave Jiang Link: https://lore.kernel.org/r/162456741222.1138073.1298447364671237896.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index c8ae41d36040..4e32a4dcc3ab 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -102,6 +102,8 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) spin_lock_init(&idxd->irq_entries[i].list_lock); } + idxd_msix_perm_setup(idxd); + irq_entry = &idxd->irq_entries[0]; rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread, 0, "idxd-misc", irq_entry); @@ -148,7 +150,6 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } idxd_unmask_error_interrupts(idxd); - idxd_msix_perm_setup(idxd); return 0; err_wq_irqs: @@ -162,6 +163,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) err_misc_irq: /* Disable error interrupt generation */ idxd_mask_error_interrupts(idxd); + idxd_msix_perm_clear(idxd); err_irq_entries: pci_free_irq_vectors(pdev); dev_err(dev, "No usable interrupts\n"); -- Gitee From eea99f168b2d731a324e55889ce803f123a29ddc Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 14 Jul 2021 14:57:19 -0700 Subject: [PATCH 044/173] dmaengine: idxd: fix sequence for pci driver remove() and shutdown() mainline inclusion from mainline-v5.14 commit 7eb25da161befbc9a80e94e1bd90d6c06aa645cf category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 7eb25da161be dmaengine: idxd: fix sequence for pci driver remove() and shutdown(). Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- ->shutdown() call should only be responsible for quiescing the device. Currently it is doing PCI device tear down. This causes issue when things like MMIO mapping is removed while idxd_unregister_devices() will trigger removal of idxd device sub-driver and still initiates MMIO writes to the device. Another issue is with the unregistering of idxd 'struct device', the memory context gets freed. So the teardown calls are accessing freed memory and can cause kernel oops. Move all the teardown bits that doesn't belong in shutdown to ->remove() call. Move unregistering of the idxd conf_dev 'struct device' to after doing all the teardown to free all the memory that's no longer needed. Fixes: 47c16ac27d4c ("dmaengine: idxd: fix idxd conf_dev 'struct device' lifetime") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162629983901.395844.17964803190905549615.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 26 +++++++++++++++++--------- drivers/dma/idxd/sysfs.c | 2 -- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 4e32a4dcc3ab..c0f4c0422f32 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -760,32 +760,40 @@ static void idxd_shutdown(struct pci_dev *pdev) for (i = 0; i < msixcnt; i++) { irq_entry = &idxd->irq_entries[i]; synchronize_irq(irq_entry->vector); - free_irq(irq_entry->vector, irq_entry); if (i == 0) continue; idxd_flush_pending_llist(irq_entry); idxd_flush_work_list(irq_entry); } - - idxd_msix_perm_clear(idxd); - idxd_release_int_handles(idxd); - pci_free_irq_vectors(pdev); - pci_iounmap(pdev, idxd->reg_base); - pci_disable_device(pdev); - destroy_workqueue(idxd->wq); + flush_workqueue(idxd->wq); } static void idxd_remove(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); + struct idxd_irq_entry *irq_entry; + int msixcnt = pci_msix_vec_count(pdev); + int i; dev_dbg(&pdev->dev, "%s called\n", __func__); idxd_shutdown(pdev); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); idxd_unregister_devices(idxd); - perfmon_pmu_remove(idxd); + + for (i = 0; i < msixcnt; i++) { + irq_entry = &idxd->irq_entries[i]; + free_irq(irq_entry->vector, irq_entry); + } + idxd_msix_perm_clear(idxd); + idxd_release_int_handles(idxd); + pci_free_irq_vectors(pdev); + pci_iounmap(pdev, idxd->reg_base); iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + pci_disable_device(pdev); + destroy_workqueue(idxd->wq); + perfmon_pmu_remove(idxd); + device_unregister(&idxd->conf_dev); } static struct pci_driver idxd_pci_driver = { diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 4d8d9dac4702..7df388a1b806 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1750,8 +1750,6 @@ void idxd_unregister_devices(struct idxd_device *idxd) device_unregister(&group->conf_dev); } - - device_unregister(&idxd->conf_dev); } int idxd_register_bus_type(void) -- Gitee From 0a1f856f97796242cbc240f2ee16d937b1e48c5b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 14 Jul 2021 11:50:06 -0700 Subject: [PATCH 045/173] dmaengine: idxd: fix submission race window mainline inclusion from mainline-v5.14 commit 6b4b87f2c31ac1af4f244990a7cbfb50d3f3e33f category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 6b4b87f2c31a dmaengine: idxd: fix submission race window. Incremental backporting patches for DSA/IAA on Intel Xeon platform. Deviation from upstream: Merge commit 88c5d0a2b9b0 Merge branch 'fixes' into next. -------------------------------- Konstantin observed that when descriptors are submitted, the descriptor is added to the pending list after the submission. This creates a race window with the slight possibility that the descriptor can complete before it gets added to the pending list and this window would cause the completion handler to miss processing the descriptor. To address the issue, the addition of the descriptor to the pending list must be done before it gets submitted to the hardware. However, submitting to swq with ENQCMDS instruction can cause a failure with the condition of either wq is full or wq is not "active". With the descriptor allocation being the gate to the wq capacity, it is not possible to hit a retry with ENQCMDS submission to the swq. The only possible failure can happen is when wq is no longer "active" due to hw error and therefore we are moving towards taking down the portal. Given this is a rare condition and there's no longer concern over I/O performance, the driver can walk the completion lists in order to retrieve and abort the descriptor. The error path will set the descriptor to aborted status. It will take the work list lock to prevent further processing of worklist. It will do a delete_all on the pending llist to retrieve all descriptors on the pending llist. The delete_all action does not require a lock. It will walk through the acquired llist to find the aborted descriptor while add all remaining descriptors to the work list since it holds the lock. If it does not find the aborted descriptor on the llist, it will walk through the work list. And if it still does not find the descriptor, then it means the interrupt handler has removed the desc from the llist but is pending on the work list lock and will process it once the error path releases the lock. Fixes: eb15e7154fbf ("dmaengine: idxd: add interrupt handle request and release support") Reported-by: Konstantin Ananyev Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162628855747.360485.10101925573082466530.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/idxd.h | 14 ++++++++ drivers/dma/idxd/irq.c | 27 +++++++++----- drivers/dma/idxd/submit.c | 75 ++++++++++++++++++++++++++++++++++----- 3 files changed, 99 insertions(+), 17 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index de61bf98110c..551413eb0ffb 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -292,6 +292,14 @@ struct idxd_desc { struct idxd_wq *wq; }; +/* + * This is software defined error for the completion status. We overload the error code + * that will never appear in completion status and only SWERR register. + */ +enum idxd_completion_status { + IDXD_COMP_DESC_ABORT = 0xff, +}; + #define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev) #define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev) @@ -480,4 +488,10 @@ static inline void perfmon_init(void) {} static inline void perfmon_exit(void) {} #endif +static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) +{ + idxd_dma_complete_txd(desc, reason); + idxd_free_desc(desc->wq, desc); +} + #endif diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index ae68e1e5487a..4e3a7198c0ca 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -245,12 +245,6 @@ static inline bool match_fault(struct idxd_desc *desc, u64 fault_addr) return false; } -static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) -{ - idxd_dma_complete_txd(desc, reason); - idxd_free_desc(desc->wq, desc); -} - static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, enum irq_work_type wtype, int *processed, u64 data) @@ -272,8 +266,16 @@ static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, reason = IDXD_COMPLETE_DEV_FAIL; llist_for_each_entry_safe(desc, t, head, llnode) { - if (desc->completion->status) { - if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) + u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; + + if (status) { + if (unlikely(status == IDXD_COMP_DESC_ABORT)) { + complete_desc(desc, IDXD_COMPLETE_ABORT); + (*processed)++; + continue; + } + + if (unlikely(status != DSA_COMP_SUCCESS)) match_fault(desc, data); complete_desc(desc, reason); (*processed)++; @@ -329,7 +331,14 @@ static int irq_process_work_list(struct idxd_irq_entry *irq_entry, spin_unlock_irqrestore(&irq_entry->list_lock, flags); list_for_each_entry(desc, &flist, list) { - if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) + u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; + + if (unlikely(status == IDXD_COMP_DESC_ABORT)) { + complete_desc(desc, IDXD_COMPLETE_ABORT); + continue; + } + + if (unlikely(status != DSA_COMP_SUCCESS)) match_fault(desc, data); complete_desc(desc, reason); } diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index a36f81c68844..65b0130ab2db 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -79,9 +79,64 @@ void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) sbitmap_queue_clear(&wq->sbq, desc->id, cpu); } +static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *n; + + lockdep_assert_held(&ie->list_lock); + list_for_each_entry_safe(d, n, &ie->work_list, list) { + if (d == desc) { + list_del(&d->list); + return d; + } + } + + /* + * At this point, the desc needs to be aborted is held by the completion + * handler where it has taken it off the pending list but has not added to the + * work list. It will be cleaned up by the interrupt handler when it sees the + * IDXD_COMP_DESC_ABORT for completion status. + */ + return NULL; +} + +static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *t, *found = NULL; + struct llist_node *head; + unsigned long flags; + + desc->completion->status = IDXD_COMP_DESC_ABORT; + /* + * Grab the list lock so it will block the irq thread handler. This allows the + * abort code to locate the descriptor need to be aborted. + */ + spin_lock_irqsave(&ie->list_lock, flags); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(d, t, head, llnode) { + if (d == desc) { + found = desc; + continue; + } + list_add_tail(&desc->list, &ie->work_list); + } + } + + if (!found) + found = list_abort_desc(wq, ie, desc); + spin_unlock_irqrestore(&ie->list_lock, flags); + + if (found) + complete_desc(found, IDXD_COMPLETE_ABORT); +} + int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; + struct idxd_irq_entry *ie = NULL; void __iomem *portal; int rc; @@ -99,6 +154,16 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * even on UP because the recipient is a device. */ wmb(); + + /* + * Pending the descriptor to the lockless list for the irq_entry + * that we designated the descriptor to. + */ + if (desc->hw->flags & IDXD_OP_FLAG_RCI) { + ie = &idxd->irq_entries[wq->id + 1]; + llist_add(&desc->llnode, &ie->pending_llist); + } + if (wq_dedicated(wq)) { iosubmit_cmds512(portal, desc->hw, 1); } else { @@ -111,18 +176,12 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) rc = enqcmds(portal, desc->hw); if (rc < 0) { percpu_ref_put(&wq->wq_active); + if (ie) + llist_abort_desc(wq, ie, desc); return rc; } } percpu_ref_put(&wq->wq_active); - - /* - * Pending the descriptor to the lockless list for the irq_entry - * that we designated the descriptor to. - */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) - llist_add(&desc->llnode, &idxd->irq_entries[wq->id + 1].pending_llist); - return 0; } -- Gitee From 09795bc66d2da5ec5cd47bc715b6dab9352e1212 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 8 Jul 2021 07:08:26 +0200 Subject: [PATCH 046/173] dmaengine: idxd: Simplify code and axe the use of a deprecated API mainline inclusion from mainline-v5.15 commit 53b50458110d829c8fc45e7803a335a515698fd8 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 53b50458110d dmaengine: idxd: Simplify code and axe the use of a deprecated API. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The wrappers in include/linux/pci-dma-compat.h should go away. Replace 'pci_set_dma_mask/pci_set_consistent_dma_mask' by an equivalent and less verbose 'dma_set_mask_and_coherent()' call. Even if the code may look different, it should have exactly the same run-time behavior. If pci_set_dma_mask(64) fails and pci_set_dma_mask(32) succeeds, then pci_set_consistent_dma_mask(64) will also fail. Signed-off-by: Christophe JAILLET Acked-by: Dave Jiang Link: https://lore.kernel.org/r/70c8a3bc67e41c5fefb526ecd64c5174c1e2dc76.1625720835.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index c0f4c0422f32..75ac6a4bc9d1 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -639,15 +639,9 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) } dev_dbg(dev, "Set DMA masks\n"); - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (rc) - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) - goto err; - - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (rc) - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (rc) goto err; -- Gitee From 47b8ff615529e629fea3fa4fda75a304c49f53da Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 4 Jun 2021 17:06:21 -0700 Subject: [PATCH 047/173] dmanegine: idxd: cleanup all device related bits after disabling device mainline inclusion from mainline-v5.15 commit 0dcfe41e9a4ca759ccc87a48e3bb9cc3b08ff1e8 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 0dcfe41e9a4c dmanegine: idxd: cleanup all device related bits after disabling device. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The previous state cleanup patch only performed wq state cleanups. This does not go far enough as when device is disabled or reset, the state for groups and engines must also be cleaned up. Add additional state cleanup beyond wq cleanup. Tie those cleanups directly to device disable and reset, and wq disable and reset. Fixes: da32b28c95a7 ("dmaengine: idxd: cleanup workqueue config after disabling") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162285154108.2096632.5572805472362321307.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 88 +++++++++++++++++++++++++++++---------- drivers/dma/idxd/idxd.h | 6 +-- drivers/dma/idxd/irq.c | 4 +- drivers/dma/idxd/sysfs.c | 22 +++------- 4 files changed, 74 insertions(+), 46 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 420b93fe5feb..928c2c8817f0 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -15,6 +15,8 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, u32 *status); +static void idxd_device_wqs_clear_state(struct idxd_device *idxd); +static void idxd_wq_disable_cleanup(struct idxd_wq *wq); /* Interrupt control bits */ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id) @@ -234,7 +236,7 @@ int idxd_wq_enable(struct idxd_wq *wq) return 0; } -int idxd_wq_disable(struct idxd_wq *wq) +int idxd_wq_disable(struct idxd_wq *wq, bool reset_config) { struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; @@ -255,6 +257,8 @@ int idxd_wq_disable(struct idxd_wq *wq) return -ENXIO; } + if (reset_config) + idxd_wq_disable_cleanup(wq); wq->state = IDXD_WQ_DISABLED; dev_dbg(dev, "WQ %d disabled\n", wq->id); return 0; @@ -289,6 +293,7 @@ void idxd_wq_reset(struct idxd_wq *wq) operand = BIT(wq->id % 16) | ((wq->id / 16) << 16); idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL); + idxd_wq_disable_cleanup(wq); wq->state = IDXD_WQ_DISABLED; } @@ -337,7 +342,7 @@ int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid) unsigned int offset; unsigned long flags; - rc = idxd_wq_disable(wq); + rc = idxd_wq_disable(wq, false); if (rc < 0) return rc; @@ -364,7 +369,7 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq) unsigned int offset; unsigned long flags; - rc = idxd_wq_disable(wq); + rc = idxd_wq_disable(wq, false); if (rc < 0) return rc; @@ -383,11 +388,11 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq) return 0; } -void idxd_wq_disable_cleanup(struct idxd_wq *wq) +static void idxd_wq_disable_cleanup(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; - lockdep_assert_held(&idxd->dev_lock); + lockdep_assert_held(&wq->wq_lock); memset(wq->wqcfg, 0, idxd->wqcfg_size); wq->type = IDXD_WQT_NONE; wq->size = 0; @@ -548,22 +553,6 @@ int idxd_device_enable(struct idxd_device *idxd) return 0; } -void idxd_device_wqs_clear_state(struct idxd_device *idxd) -{ - int i; - - lockdep_assert_held(&idxd->dev_lock); - - for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = idxd->wqs[i]; - - if (wq->state == IDXD_WQ_ENABLED) { - idxd_wq_disable_cleanup(wq); - wq->state = IDXD_WQ_DISABLED; - } - } -} - int idxd_device_disable(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -585,7 +574,7 @@ int idxd_device_disable(struct idxd_device *idxd) } spin_lock_irqsave(&idxd->dev_lock, flags); - idxd_device_wqs_clear_state(idxd); + idxd_device_clear_state(idxd); idxd->state = IDXD_DEV_CONF_READY; spin_unlock_irqrestore(&idxd->dev_lock, flags); return 0; @@ -597,7 +586,7 @@ void idxd_device_reset(struct idxd_device *idxd) idxd_cmd_exec(idxd, IDXD_CMD_RESET_DEVICE, 0, NULL); spin_lock_irqsave(&idxd->dev_lock, flags); - idxd_device_wqs_clear_state(idxd); + idxd_device_clear_state(idxd); idxd->state = IDXD_DEV_CONF_READY; spin_unlock_irqrestore(&idxd->dev_lock, flags); } @@ -685,6 +674,59 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle, } /* Device configuration bits */ +static void idxd_engines_clear_state(struct idxd_device *idxd) +{ + struct idxd_engine *engine; + int i; + + lockdep_assert_held(&idxd->dev_lock); + for (i = 0; i < idxd->max_engines; i++) { + engine = idxd->engines[i]; + engine->group = NULL; + } +} + +static void idxd_groups_clear_state(struct idxd_device *idxd) +{ + struct idxd_group *group; + int i; + + lockdep_assert_held(&idxd->dev_lock); + for (i = 0; i < idxd->max_groups; i++) { + group = idxd->groups[i]; + memset(&group->grpcfg, 0, sizeof(group->grpcfg)); + group->num_engines = 0; + group->num_wqs = 0; + group->use_token_limit = false; + group->tokens_allowed = 0; + group->tokens_reserved = 0; + group->tc_a = -1; + group->tc_b = -1; + } +} + +static void idxd_device_wqs_clear_state(struct idxd_device *idxd) +{ + int i; + + lockdep_assert_held(&idxd->dev_lock); + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + if (wq->state == IDXD_WQ_ENABLED) { + idxd_wq_disable_cleanup(wq); + wq->state = IDXD_WQ_DISABLED; + } + } +} + +void idxd_device_clear_state(struct idxd_device *idxd) +{ + idxd_groups_clear_state(idxd); + idxd_engines_clear_state(idxd); + idxd_device_wqs_clear_state(idxd); +} + void idxd_msix_perm_setup(struct idxd_device *idxd) { union msix_perm mperm; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 551413eb0ffb..d875b3d41ed2 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -426,9 +426,8 @@ int idxd_device_init_reset(struct idxd_device *idxd); int idxd_device_enable(struct idxd_device *idxd); int idxd_device_disable(struct idxd_device *idxd); void idxd_device_reset(struct idxd_device *idxd); -void idxd_device_cleanup(struct idxd_device *idxd); +void idxd_device_clear_state(struct idxd_device *idxd); int idxd_device_config(struct idxd_device *idxd); -void idxd_device_wqs_clear_state(struct idxd_device *idxd); void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid); int idxd_device_load_config(struct idxd_device *idxd); int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle, @@ -441,12 +440,11 @@ void idxd_wqs_unmap_portal(struct idxd_device *idxd); int idxd_wq_alloc_resources(struct idxd_wq *wq); void idxd_wq_free_resources(struct idxd_wq *wq); int idxd_wq_enable(struct idxd_wq *wq); -int idxd_wq_disable(struct idxd_wq *wq); +int idxd_wq_disable(struct idxd_wq *wq, bool reset_config); void idxd_wq_drain(struct idxd_wq *wq); void idxd_wq_reset(struct idxd_wq *wq); int idxd_wq_map_portal(struct idxd_wq *wq); void idxd_wq_unmap_portal(struct idxd_wq *wq); -void idxd_wq_disable_cleanup(struct idxd_wq *wq); int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid); int idxd_wq_disable_pasid(struct idxd_wq *wq); void idxd_wq_quiesce(struct idxd_wq *wq); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 4e3a7198c0ca..2924819ca8f3 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -59,7 +59,7 @@ static void idxd_device_reinit(struct work_struct *work) return; out: - idxd_device_wqs_clear_state(idxd); + idxd_device_clear_state(idxd); } static void idxd_device_fault_work(struct work_struct *work) @@ -192,7 +192,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) spin_lock_bh(&idxd->dev_lock); idxd_wqs_quiesce(idxd); idxd_wqs_unmap_portal(idxd); - idxd_device_wqs_clear_state(idxd); + idxd_device_clear_state(idxd); dev_err(&idxd->pdev->dev, "idxd halted, need %s.\n", gensts.reset_type == IDXD_DEVICE_RESET_FLR ? diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 7df388a1b806..0cd511d873e7 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -129,7 +129,7 @@ static int enable_wq(struct idxd_wq *wq) rc = idxd_wq_map_portal(wq); if (rc < 0) { dev_warn(dev, "wq portal mapping failed: %d\n", rc); - rc = idxd_wq_disable(wq); + rc = idxd_wq_disable(wq, false); if (rc < 0) dev_warn(dev, "IDXD wq disable failed\n"); mutex_unlock(&wq->wq_lock); @@ -262,8 +262,6 @@ static void disable_wq(struct idxd_wq *wq) static int idxd_config_bus_remove(struct device *dev) { - int rc; - dev_dbg(dev, "%s called for %s\n", __func__, dev_name(dev)); /* disable workqueue here */ @@ -288,22 +286,12 @@ static int idxd_config_bus_remove(struct device *dev) } idxd_unregister_dma_device(idxd); - rc = idxd_device_disable(idxd); - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { - for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = idxd->wqs[i]; - - mutex_lock(&wq->wq_lock); - idxd_wq_disable_cleanup(wq); - mutex_unlock(&wq->wq_lock); - } - } + idxd_device_disable(idxd); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + idxd_device_reset(idxd); module_put(THIS_MODULE); - if (rc < 0) - dev_warn(dev, "Device disable failed\n"); - else - dev_info(dev, "Device %s disabled\n", dev_name(dev)); + dev_info(dev, "Device %s disabled\n", dev_name(dev)); } return 0; -- Gitee From 1e24687fdca80239a600c85edfe59e46d1aeaad4 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 3 Jun 2021 14:57:35 -0700 Subject: [PATCH 048/173] dmaengine: idxd: Add wq occupancy information to sysfs attribute mainline inclusion from mainline-v5.15 commit e753a64bee753136087dfd70b37fdd199e942ea9 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit e753a64bee75 dmaengine: idxd: Add wq occupancy information to sysfs attribute. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add occupancy information to wq sysfs attribute. Attribute will show wq occupancy data if "WQ Occupancy Support" field in WQCAP is 1. It displays the number of entries currently in this WQ. This is provided as an estimate and should not be relied on to determine whether there is space in the WQ. The data is to provide information to user apps for flow control. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162275745546.1857062.8765615879420582018.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- .../ABI/stable/sysfs-driver-dma-idxd | 7 +++++++ drivers/dma/idxd/registers.h | 3 +++ drivers/dma/idxd/sysfs.c | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 55285c136cf0..ebd521314803 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -211,6 +211,13 @@ Contact: dmaengine@vger.kernel.org Description: Indicate whether ATS disable is turned on for the workqueue. 0 indicates ATS is on, and 1 indicates ATS is off for the workqueue. +What: /sys/bus/dsa/devices/wq./occupancy +Date May 25, 2021 +KernelVersion: 5.14.0 +Contact: dmaengine@vger.kernel.org +Description: Show the current number of entries in this WQ if WQ Occupancy + Support bit WQ capabilities is 1. + What: /sys/bus/dsa/devices/engine./group_id Date: Oct 25, 2019 KernelVersion: 5.6.0 diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index c970c3f025f0..7343a8f48819 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -349,6 +349,9 @@ union wqcfg { } __packed; #define WQCFG_PASID_IDX 2 +#define WQCFG_OCCUP_IDX 6 + +#define WQCFG_OCCUP_MASK 0xffff /* * This macro calculates the offset into the WQCFG register diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 0cd511d873e7..fbfcee5c3bfa 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1252,6 +1252,24 @@ static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute static struct device_attribute dev_attr_wq_ats_disable = __ATTR(ats_disable, 0644, wq_ats_disable_show, wq_ats_disable_store); +static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + u32 occup, offset; + + if (!idxd->hw.wq_cap.occupancy) + return -EOPNOTSUPP; + + offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_OCCUP_IDX); + occup = ioread32(idxd->reg_base + offset) & WQCFG_OCCUP_MASK; + + return sysfs_emit(buf, "%u\n", occup); +} + +static struct device_attribute dev_attr_wq_occupancy = + __ATTR(occupancy, 0444, wq_occupancy_show, NULL); + static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_clients.attr, &dev_attr_wq_state.attr, @@ -1267,6 +1285,7 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_max_transfer_size.attr, &dev_attr_wq_max_batch_size.attr, &dev_attr_wq_ats_disable.attr, + &dev_attr_wq_occupancy.attr, NULL, }; -- Gitee From 44e82552e65abd7e696d582db0a28d601e60b7b0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 3 Jun 2021 11:01:37 -0700 Subject: [PATCH 049/173] dmaengine: idxd: have command status always set mainline inclusion from mainline-v5.15 commit 53499d1fc11267e078557fc68ce943c1eb3b7a37 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 53499d1fc112 dmaengine: idxd: have command status always set. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The cached command status is only set when the write back status is is passed in. Move the variable set outside of the check so it is always set. Fixes: 0d5c10b4c84d ("dmaengine: idxd: add work queue drain support") Reported-by: Ramesh Thomas Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162274329740.1822314.3443875665504707588.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 928c2c8817f0..c8cf1de72176 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -486,6 +486,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, union idxd_command_reg cmd; DECLARE_COMPLETION_ONSTACK(done); unsigned long flags; + u32 stat; if (idxd_device_is_halted(idxd)) { dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); @@ -518,11 +519,11 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, */ spin_unlock_irqrestore(&idxd->cmd_lock, flags); wait_for_completion(&done); + stat = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); spin_lock_irqsave(&idxd->cmd_lock, flags); - if (status) { - *status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); - idxd->cmd_status = *status & GENMASK(7, 0); - } + if (status) + *status = stat; + idxd->cmd_status = stat & GENMASK(7, 0); __clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags); /* Wake up other pending commands */ -- Gitee From e87014afcb1b9580254aadcee558c5429abb35d8 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:43:09 -0700 Subject: [PATCH 050/173] dmaengine: idxd: add driver register helper mainline inclusion from mainline-v5.15 commit 3ecfc9135e6c82183d121c5578ed5d6f07a53ec8 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 3ecfc9135e6c dmaengine: idxd: add driver register helper. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add helper functions for dsa-driver registration similar to other bus-types. In particular, do not require dsa-drivers to open-code the bus, owner, and mod_name fields. Let registration and unregistration operate on the 'struct idxd_device_driver' instead of the raw / embedded 'struct device_driver'. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637458949.744545.14996726325385482050.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/idxd.h | 7 +++++++ drivers/dma/idxd/init.c | 17 +++++++++++++++++ drivers/dma/idxd/sysfs.c | 7 ++----- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index d875b3d41ed2..8db19b899709 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -402,6 +402,13 @@ static inline int idxd_wq_refcount(struct idxd_wq *wq) return wq->client_count; }; +int __must_check __idxd_driver_register(struct idxd_device_driver *idxd_drv, + struct module *module, const char *mod_name); +#define idxd_driver_register(driver) \ + __idxd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) + +void idxd_driver_unregister(struct idxd_device_driver *idxd_drv); + int idxd_register_bus_type(void); void idxd_unregister_bus_type(void); int idxd_register_devices(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 75ac6a4bc9d1..b15817751d5f 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -855,3 +855,20 @@ static void __exit idxd_exit_module(void) perfmon_exit(); } module_exit(idxd_exit_module); + +int __idxd_driver_register(struct idxd_device_driver *idxd_drv, struct module *owner, + const char *mod_name) +{ + struct device_driver *drv = &idxd_drv->drv; + + drv->bus = &dsa_bus_type; + drv->owner = owner; + drv->mod_name = mod_name; + + return driver_register(drv); +} + +void idxd_driver_unregister(struct idxd_device_driver *idxd_drv) +{ + driver_unregister(&idxd_drv->drv); +} diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index fbfcee5c3bfa..a6e4aeac43bb 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -313,21 +313,18 @@ struct bus_type dsa_bus_type = { static struct idxd_device_driver dsa_drv = { .drv = { .name = "dsa", - .bus = &dsa_bus_type, - .owner = THIS_MODULE, - .mod_name = KBUILD_MODNAME, }, }; /* IDXD generic driver setup */ int idxd_register_driver(void) { - return driver_register(&dsa_drv.drv); + return idxd_driver_register(&dsa_drv); } void idxd_unregister_driver(void) { - driver_unregister(&dsa_drv.drv); + idxd_driver_unregister(&dsa_drv); } /* IDXD engine attributes */ -- Gitee From 3e8563d50124522d99cb157e79fa275dad761341 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:43:15 -0700 Subject: [PATCH 051/173] dmaengine: idxd: add driver name mainline inclusion from mainline-v5.15 commit da5a11d75d6837c9c5ef40810f66ce9d2db6ca5e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit da5a11d75d68 dmaengine: idxd: add driver name. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add name field in idxd_device_driver so we don't have to touch the 'struct device_driver' during declaration. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637459517.744545.7572915135318813722.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/init.c | 1 + drivers/dma/idxd/sysfs.c | 4 +--- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 8db19b899709..e8721ff028c2 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -34,6 +34,7 @@ enum idxd_type { #define IDXD_PMU_EVENT_MAX 64 struct idxd_device_driver { + const char *name; struct device_driver drv; }; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index b15817751d5f..6403d55c7ff7 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -861,6 +861,7 @@ int __idxd_driver_register(struct idxd_device_driver *idxd_drv, struct module *o { struct device_driver *drv = &idxd_drv->drv; + drv->name = idxd_drv->name; drv->bus = &dsa_bus_type; drv->owner = owner; drv->mod_name = mod_name; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index a6e4aeac43bb..a0da67fa84cd 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -311,9 +311,7 @@ struct bus_type dsa_bus_type = { }; static struct idxd_device_driver dsa_drv = { - .drv = { - .name = "dsa", - }, + .name = "dsa", }; /* IDXD generic driver setup */ -- Gitee From 5a262dcea0ba6e73968f42588bb8594c09e6d5eb Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:43:20 -0700 Subject: [PATCH 052/173] dmaengine: idxd: add 'struct idxd_dev' as wrapper for conf_dev mainline inclusion from mainline-v5.15 commit 700af3a0a26cbac87e4a0ae1dfa79597d0056d5f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 700af3a0a26c dmaengine: idxd: add 'struct idxd_dev' as wrapper for conf_dev. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add a 'struct idxd_dev' that wraps the 'struct device' for idxd conf_dev that registers with the dsa bus. This is introduced in order to deal with multiple different types of 'devices' that are registered on the dsa_bus when the compat driver needs to route them to the correct driver to attach. The bind() call now can determine the type of device and then do the appropriate driver matching. Reviewed-by Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637460065.744545.584492831446090984.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/cdev.c | 11 +- drivers/dma/idxd/dma.c | 4 +- drivers/dma/idxd/idxd.h | 82 ++++++++++++-- drivers/dma/idxd/init.c | 98 +++++++++------- drivers/dma/idxd/irq.c | 2 +- drivers/dma/idxd/sysfs.c | 239 ++++++++++++++++++--------------------- 6 files changed, 251 insertions(+), 185 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 6ecf0e5f3285..3a8c1f0fc14b 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -41,7 +41,7 @@ struct idxd_user_context { static void idxd_cdev_dev_release(struct device *dev) { - struct idxd_cdev *idxd_cdev = container_of(dev, struct idxd_cdev, dev); + struct idxd_cdev *idxd_cdev = dev_to_cdev(dev); struct idxd_cdev_context *cdev_ctx; struct idxd_wq *wq = idxd_cdev->wq; @@ -256,9 +256,10 @@ int idxd_wq_add_cdev(struct idxd_wq *wq) if (!idxd_cdev) return -ENOMEM; + idxd_cdev->idxd_dev.type = IDXD_DEV_CDEV; idxd_cdev->wq = wq; cdev = &idxd_cdev->cdev; - dev = &idxd_cdev->dev; + dev = cdev_dev(idxd_cdev); cdev_ctx = &ictx[wq->idxd->data->type]; minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL); if (minor < 0) { @@ -268,7 +269,7 @@ int idxd_wq_add_cdev(struct idxd_wq *wq) idxd_cdev->minor = minor; device_initialize(dev); - dev->parent = &wq->conf_dev; + dev->parent = wq_confdev(wq); dev->bus = &dsa_bus_type; dev->type = &idxd_cdev_device_type; dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor); @@ -299,8 +300,8 @@ void idxd_wq_del_cdev(struct idxd_wq *wq) idxd_cdev = wq->idxd_cdev; wq->idxd_cdev = NULL; - cdev_device_del(&idxd_cdev->cdev, &idxd_cdev->dev); - put_device(&idxd_cdev->dev); + cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev)); + put_device(cdev_dev(idxd_cdev)); } int idxd_cdev_register(void) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index e06e3cf7bf92..89239569e310 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -268,7 +268,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq) wq->idxd_chan = idxd_chan; idxd_chan->wq = wq; - get_device(&wq->conf_dev); + get_device(wq_confdev(wq)); return 0; } @@ -283,5 +283,5 @@ void idxd_unregister_dma_channel(struct idxd_wq *wq) list_del(&chan->device_node); kfree(wq->idxd_chan); wq->idxd_chan = NULL; - put_device(&wq->conf_dev); + put_device(wq_confdev(wq)); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index e8721ff028c2..ae60fcc7b625 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -17,8 +17,24 @@ extern struct kmem_cache *idxd_desc_pool; -struct idxd_device; struct idxd_wq; +struct idxd_dev; + +enum idxd_dev_type { + IDXD_DEV_NONE = -1, + IDXD_DEV_DSA = 0, + IDXD_DEV_IAX, + IDXD_DEV_WQ, + IDXD_DEV_GROUP, + IDXD_DEV_ENGINE, + IDXD_DEV_CDEV, + IDXD_DEV_MAX_TYPE, +}; + +struct idxd_dev { + struct device conf_dev; + enum idxd_dev_type type; +}; #define IDXD_REG_TIMEOUT 50 #define IDXD_DRAIN_TIMEOUT 5000 @@ -52,7 +68,7 @@ struct idxd_irq_entry { }; struct idxd_group { - struct device conf_dev; + struct idxd_dev idxd_dev; struct idxd_device *idxd; struct grpcfg grpcfg; int id; @@ -111,7 +127,7 @@ enum idxd_wq_type { struct idxd_cdev { struct idxd_wq *wq; struct cdev cdev; - struct device dev; + struct idxd_dev idxd_dev; int minor; }; @@ -139,7 +155,7 @@ struct idxd_wq { void __iomem *portal; struct percpu_ref wq_active; struct completion wq_dead; - struct device conf_dev; + struct idxd_dev idxd_dev; struct idxd_cdev *idxd_cdev; struct wait_queue_head err_queue; struct idxd_device *idxd; @@ -174,7 +190,7 @@ struct idxd_wq { }; struct idxd_engine { - struct device conf_dev; + struct idxd_dev idxd_dev; int id; struct idxd_group *group; struct idxd_device *idxd; @@ -218,7 +234,7 @@ struct idxd_driver_data { }; struct idxd_device { - struct device conf_dev; + struct idxd_dev idxd_dev; struct idxd_driver_data *data; struct list_head list; struct idxd_hw hw; @@ -301,8 +317,58 @@ enum idxd_completion_status { IDXD_COMP_DESC_ABORT = 0xff, }; -#define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev) -#define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev) +#define idxd_confdev(idxd) &idxd->idxd_dev.conf_dev +#define wq_confdev(wq) &wq->idxd_dev.conf_dev +#define engine_confdev(engine) &engine->idxd_dev.conf_dev +#define group_confdev(group) &group->idxd_dev.conf_dev +#define cdev_dev(cdev) &cdev->idxd_dev.conf_dev + +#define confdev_to_idxd_dev(dev) container_of(dev, struct idxd_dev, conf_dev) + +static inline struct idxd_device *confdev_to_idxd(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_device, idxd_dev); +} + +static inline struct idxd_wq *confdev_to_wq(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_wq, idxd_dev); +} + +static inline struct idxd_engine *confdev_to_engine(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_engine, idxd_dev); +} + +static inline struct idxd_group *confdev_to_group(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_group, idxd_dev); +} + +static inline struct idxd_cdev *dev_to_cdev(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_cdev, idxd_dev); +} + +static inline void idxd_dev_set_type(struct idxd_dev *idev, int type) +{ + if (type >= IDXD_DEV_MAX_TYPE) { + idev->type = IDXD_DEV_NONE; + return; + } + + idev->type = type; +} extern struct bus_type dsa_bus_type; extern struct bus_type iax_bus_type; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 6403d55c7ff7..f500076882d2 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -200,6 +200,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; struct idxd_wq *wq; + struct device *conf_dev; int i, rc; idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *), @@ -214,15 +215,17 @@ static int idxd_setup_wqs(struct idxd_device *idxd) goto err; } + idxd_dev_set_type(&wq->idxd_dev, IDXD_DEV_WQ); + conf_dev = wq_confdev(wq); wq->id = i; wq->idxd = idxd; - device_initialize(&wq->conf_dev); - wq->conf_dev.parent = &idxd->conf_dev; - wq->conf_dev.bus = &dsa_bus_type; - wq->conf_dev.type = &idxd_wq_device_type; - rc = dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id); + device_initialize(wq_confdev(wq)); + conf_dev->parent = idxd_confdev(idxd); + conf_dev->bus = &dsa_bus_type; + conf_dev->type = &idxd_wq_device_type; + rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id); if (rc < 0) { - put_device(&wq->conf_dev); + put_device(conf_dev); goto err; } @@ -233,7 +236,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd) wq->max_batch_size = idxd->max_batch_size; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); if (!wq->wqcfg) { - put_device(&wq->conf_dev); + put_device(conf_dev); rc = -ENOMEM; goto err; } @@ -243,8 +246,11 @@ static int idxd_setup_wqs(struct idxd_device *idxd) return 0; err: - while (--i >= 0) - put_device(&idxd->wqs[i]->conf_dev); + while (--i >= 0) { + wq = idxd->wqs[i]; + conf_dev = wq_confdev(wq); + put_device(conf_dev); + } return rc; } @@ -252,6 +258,7 @@ static int idxd_setup_engines(struct idxd_device *idxd) { struct idxd_engine *engine; struct device *dev = &idxd->pdev->dev; + struct device *conf_dev; int i, rc; idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *), @@ -266,15 +273,17 @@ static int idxd_setup_engines(struct idxd_device *idxd) goto err; } + idxd_dev_set_type(&engine->idxd_dev, IDXD_DEV_ENGINE); + conf_dev = engine_confdev(engine); engine->id = i; engine->idxd = idxd; - device_initialize(&engine->conf_dev); - engine->conf_dev.parent = &idxd->conf_dev; - engine->conf_dev.bus = &dsa_bus_type; - engine->conf_dev.type = &idxd_engine_device_type; - rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id); + device_initialize(conf_dev); + conf_dev->parent = idxd_confdev(idxd); + conf_dev->bus = &dsa_bus_type; + conf_dev->type = &idxd_engine_device_type; + rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id); if (rc < 0) { - put_device(&engine->conf_dev); + put_device(conf_dev); goto err; } @@ -284,14 +293,18 @@ static int idxd_setup_engines(struct idxd_device *idxd) return 0; err: - while (--i >= 0) - put_device(&idxd->engines[i]->conf_dev); + while (--i >= 0) { + engine = idxd->engines[i]; + conf_dev = engine_confdev(engine); + put_device(conf_dev); + } return rc; } static int idxd_setup_groups(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; + struct device *conf_dev; struct idxd_group *group; int i, rc; @@ -307,15 +320,17 @@ static int idxd_setup_groups(struct idxd_device *idxd) goto err; } + idxd_dev_set_type(&group->idxd_dev, IDXD_DEV_GROUP); + conf_dev = group_confdev(group); group->id = i; group->idxd = idxd; - device_initialize(&group->conf_dev); - group->conf_dev.parent = &idxd->conf_dev; - group->conf_dev.bus = &dsa_bus_type; - group->conf_dev.type = &idxd_group_device_type; - rc = dev_set_name(&group->conf_dev, "group%d.%d", idxd->id, group->id); + device_initialize(conf_dev); + conf_dev->parent = idxd_confdev(idxd); + conf_dev->bus = &dsa_bus_type; + conf_dev->type = &idxd_group_device_type; + rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id); if (rc < 0) { - put_device(&group->conf_dev); + put_device(conf_dev); goto err; } @@ -327,8 +342,10 @@ static int idxd_setup_groups(struct idxd_device *idxd) return 0; err: - while (--i >= 0) - put_device(&idxd->groups[i]->conf_dev); + while (--i >= 0) { + group = idxd->groups[i]; + put_device(group_confdev(group)); + } return rc; } @@ -337,11 +354,11 @@ static void idxd_cleanup_internals(struct idxd_device *idxd) int i; for (i = 0; i < idxd->max_groups; i++) - put_device(&idxd->groups[i]->conf_dev); + put_device(group_confdev(idxd->groups[i])); for (i = 0; i < idxd->max_engines; i++) - put_device(&idxd->engines[i]->conf_dev); + put_device(engine_confdev(idxd->engines[i])); for (i = 0; i < idxd->max_wqs; i++) - put_device(&idxd->wqs[i]->conf_dev); + put_device(wq_confdev(idxd->wqs[i])); destroy_workqueue(idxd->wq); } @@ -381,13 +398,13 @@ static int idxd_setup_internals(struct idxd_device *idxd) err_wkq_create: for (i = 0; i < idxd->max_groups; i++) - put_device(&idxd->groups[i]->conf_dev); + put_device(group_confdev(idxd->groups[i])); err_group: for (i = 0; i < idxd->max_engines; i++) - put_device(&idxd->engines[i]->conf_dev); + put_device(engine_confdev(idxd->engines[i])); err_engine: for (i = 0; i < idxd->max_wqs; i++) - put_device(&idxd->wqs[i]->conf_dev); + put_device(wq_confdev(idxd->wqs[i])); err_wqs: kfree(idxd->int_handles); return rc; @@ -469,6 +486,7 @@ static void idxd_read_caps(struct idxd_device *idxd) static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) { struct device *dev = &pdev->dev; + struct device *conf_dev; struct idxd_device *idxd; int rc; @@ -476,19 +494,21 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d if (!idxd) return NULL; + conf_dev = idxd_confdev(idxd); idxd->pdev = pdev; idxd->data = data; + idxd_dev_set_type(&idxd->idxd_dev, idxd->data->type); idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL); if (idxd->id < 0) return NULL; - device_initialize(&idxd->conf_dev); - idxd->conf_dev.parent = dev; - idxd->conf_dev.bus = &dsa_bus_type; - idxd->conf_dev.type = idxd->data->dev_type; - rc = dev_set_name(&idxd->conf_dev, "%s%d", idxd->data->name_prefix, idxd->id); + device_initialize(conf_dev); + conf_dev->parent = dev; + conf_dev->bus = &dsa_bus_type; + conf_dev->type = idxd->data->dev_type; + rc = dev_set_name(conf_dev, "%s%d", idxd->data->name_prefix, idxd->id); if (rc < 0) { - put_device(&idxd->conf_dev); + put_device(conf_dev); return NULL; } @@ -674,7 +694,7 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) err: pci_iounmap(pdev, idxd->reg_base); err_iomap: - put_device(&idxd->conf_dev); + put_device(idxd_confdev(idxd)); err_idxd_alloc: pci_disable_device(pdev); return rc; @@ -787,7 +807,7 @@ static void idxd_remove(struct pci_dev *pdev) pci_disable_device(pdev); destroy_workqueue(idxd->wq); perfmon_pmu_remove(idxd); - device_unregister(&idxd->conf_dev); + device_unregister(idxd_confdev(idxd)); } static struct pci_driver idxd_pci_driver = { diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 2924819ca8f3..be65d55e1fc4 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -51,7 +51,7 @@ static void idxd_device_reinit(struct work_struct *work) rc = idxd_wq_enable(wq); if (rc < 0) { dev_warn(dev, "Unable to re-enable wq %s\n", - dev_name(&wq->conf_dev)); + dev_name(wq_confdev(wq))); } } } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index a0da67fa84cd..dcb8b35418fb 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -164,7 +164,7 @@ static int enable_wq(struct idxd_wq *wq) } mutex_unlock(&wq->wq_lock); - dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev)); + dev_info(dev, "wq %s enabled\n", dev_name(wq_confdev(wq))); return 0; } @@ -230,7 +230,7 @@ static void disable_wq(struct idxd_wq *wq) struct device *dev = &idxd->pdev->dev; mutex_lock(&wq->wq_lock); - dev_dbg(dev, "%s removing WQ %s\n", __func__, dev_name(&wq->conf_dev)); + dev_dbg(dev, "%s removing WQ %s\n", __func__, dev_name(wq_confdev(wq))); if (wq->state == IDXD_WQ_DISABLED) { mutex_unlock(&wq->wq_lock); return; @@ -257,7 +257,7 @@ static void disable_wq(struct idxd_wq *wq) wq->client_count = 0; mutex_unlock(&wq->wq_lock); - dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev)); + dev_info(dev, "wq %s disabled\n", dev_name(wq_confdev(wq))); } static int idxd_config_bus_remove(struct device *dev) @@ -274,15 +274,15 @@ static int idxd_config_bus_remove(struct device *dev) int i; dev_dbg(dev, "%s removing dev %s\n", __func__, - dev_name(&idxd->conf_dev)); + dev_name(idxd_confdev(idxd))); for (i = 0; i < idxd->max_wqs; i++) { struct idxd_wq *wq = idxd->wqs[i]; if (wq->state == IDXD_WQ_DISABLED) continue; dev_warn(dev, "Active wq %d on disable %s.\n", i, - dev_name(&idxd->conf_dev)); - device_release_driver(&wq->conf_dev); + dev_name(wq_confdev(wq))); + device_release_driver(wq_confdev(wq)); } idxd_unregister_dma_device(idxd); @@ -329,8 +329,7 @@ void idxd_unregister_driver(void) static ssize_t engine_group_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_engine *engine = - container_of(dev, struct idxd_engine, conf_dev); + struct idxd_engine *engine = confdev_to_engine(dev); if (engine->group) return sysfs_emit(buf, "%d\n", engine->group->id); @@ -342,8 +341,7 @@ static ssize_t engine_group_id_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_engine *engine = - container_of(dev, struct idxd_engine, conf_dev); + struct idxd_engine *engine = confdev_to_engine(dev); struct idxd_device *idxd = engine->idxd; long id; int rc; @@ -397,7 +395,7 @@ static const struct attribute_group *idxd_engine_attribute_groups[] = { static void idxd_conf_engine_release(struct device *dev) { - struct idxd_engine *engine = container_of(dev, struct idxd_engine, conf_dev); + struct idxd_engine *engine = confdev_to_engine(dev); kfree(engine); } @@ -427,8 +425,7 @@ static ssize_t group_tokens_reserved_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); return sysfs_emit(buf, "%u\n", group->tokens_reserved); } @@ -437,8 +434,7 @@ static ssize_t group_tokens_reserved_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; unsigned long val; int rc; @@ -475,8 +471,7 @@ static ssize_t group_tokens_allowed_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); return sysfs_emit(buf, "%u\n", group->tokens_allowed); } @@ -485,8 +480,7 @@ static ssize_t group_tokens_allowed_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; unsigned long val; int rc; @@ -520,8 +514,7 @@ static ssize_t group_use_token_limit_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); return sysfs_emit(buf, "%u\n", group->use_token_limit); } @@ -530,8 +523,7 @@ static ssize_t group_use_token_limit_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; unsigned long val; int rc; @@ -563,8 +555,7 @@ static struct device_attribute dev_attr_group_use_token_limit = static ssize_t group_engines_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); int i, rc = 0; struct idxd_device *idxd = group->idxd; @@ -592,8 +583,7 @@ static struct device_attribute dev_attr_group_engines = static ssize_t group_work_queues_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); int i, rc = 0; struct idxd_device *idxd = group->idxd; @@ -622,8 +612,7 @@ static ssize_t group_traffic_class_a_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); return sysfs_emit(buf, "%d\n", group->tc_a); } @@ -632,8 +621,7 @@ static ssize_t group_traffic_class_a_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; long val; int rc; @@ -663,8 +651,7 @@ static ssize_t group_traffic_class_b_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); return sysfs_emit(buf, "%d\n", group->tc_b); } @@ -673,8 +660,7 @@ static ssize_t group_traffic_class_b_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_group *group = - container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; long val; int rc; @@ -722,7 +708,7 @@ static const struct attribute_group *idxd_group_attribute_groups[] = { static void idxd_conf_group_release(struct device *dev) { - struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); + struct idxd_group *group = confdev_to_group(dev); kfree(group); } @@ -737,7 +723,7 @@ struct device_type idxd_group_device_type = { static ssize_t wq_clients_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); return sysfs_emit(buf, "%d\n", wq->client_count); } @@ -748,7 +734,7 @@ static struct device_attribute dev_attr_wq_clients = static ssize_t wq_state_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); switch (wq->state) { case IDXD_WQ_DISABLED: @@ -766,7 +752,7 @@ static struct device_attribute dev_attr_wq_state = static ssize_t wq_group_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); if (wq->group) return sysfs_emit(buf, "%u\n", wq->group->id); @@ -778,7 +764,7 @@ static ssize_t wq_group_id_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; long id; int rc; @@ -821,7 +807,7 @@ static struct device_attribute dev_attr_wq_group_id = static ssize_t wq_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); return sysfs_emit(buf, "%s\n", wq_dedicated(wq) ? "dedicated" : "shared"); } @@ -830,7 +816,7 @@ static ssize_t wq_mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) @@ -857,7 +843,7 @@ static struct device_attribute dev_attr_wq_mode = static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); return sysfs_emit(buf, "%u\n", wq->size); } @@ -880,7 +866,7 @@ static ssize_t wq_size_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); unsigned long size; struct idxd_device *idxd = wq->idxd; int rc; @@ -908,7 +894,7 @@ static struct device_attribute dev_attr_wq_size = static ssize_t wq_priority_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); return sysfs_emit(buf, "%u\n", wq->priority); } @@ -917,7 +903,7 @@ static ssize_t wq_priority_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); unsigned long prio; struct idxd_device *idxd = wq->idxd; int rc; @@ -945,7 +931,7 @@ static struct device_attribute dev_attr_wq_priority = static ssize_t wq_block_on_fault_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)); } @@ -954,7 +940,7 @@ static ssize_t wq_block_on_fault_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; bool bof; int rc; @@ -984,7 +970,7 @@ static struct device_attribute dev_attr_wq_block_on_fault = static ssize_t wq_threshold_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); return sysfs_emit(buf, "%u\n", wq->threshold); } @@ -993,7 +979,7 @@ static ssize_t wq_threshold_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; unsigned int val; int rc; @@ -1025,7 +1011,7 @@ static struct device_attribute dev_attr_wq_threshold = static ssize_t wq_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); switch (wq->type) { case IDXD_WQT_KERNEL: @@ -1044,7 +1030,7 @@ static ssize_t wq_type_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); enum idxd_wq_type old_type; if (wq->state != IDXD_WQ_DISABLED) @@ -1073,7 +1059,7 @@ static struct device_attribute dev_attr_wq_type = static ssize_t wq_name_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); return sysfs_emit(buf, "%s\n", wq->name); } @@ -1082,7 +1068,7 @@ static ssize_t wq_name_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); if (wq->state != IDXD_WQ_DISABLED) return -EPERM; @@ -1109,7 +1095,7 @@ static struct device_attribute dev_attr_wq_name = static ssize_t wq_cdev_minor_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); int minor = -1; mutex_lock(&wq->wq_lock); @@ -1143,7 +1129,7 @@ static int __get_sysfs_u64(const char *buf, u64 *val) static ssize_t wq_max_transfer_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); return sysfs_emit(buf, "%llu\n", wq->max_xfer_bytes); } @@ -1151,7 +1137,7 @@ static ssize_t wq_max_transfer_size_show(struct device *dev, struct device_attri static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; u64 xfer_size; int rc; @@ -1180,7 +1166,7 @@ static struct device_attribute dev_attr_wq_max_transfer_size = static ssize_t wq_max_batch_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); return sysfs_emit(buf, "%u\n", wq->max_batch_size); } @@ -1188,7 +1174,7 @@ static ssize_t wq_max_batch_size_show(struct device *dev, struct device_attribut static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; u64 batch_size; int rc; @@ -1216,7 +1202,7 @@ static struct device_attribute dev_attr_wq_max_batch_size = static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); return sysfs_emit(buf, "%u\n", wq->ats_dis); } @@ -1224,7 +1210,7 @@ static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute * static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); struct idxd_device *idxd = wq->idxd; bool ats_dis; int rc; @@ -1295,7 +1281,7 @@ static const struct attribute_group *idxd_wq_attribute_groups[] = { static void idxd_conf_wq_release(struct device *dev) { - struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + struct idxd_wq *wq = confdev_to_wq(dev); kfree(wq->wqcfg); kfree(wq); @@ -1311,8 +1297,7 @@ struct device_type idxd_wq_device_type = { static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%#x\n", idxd->hw.version); } @@ -1322,8 +1307,7 @@ static ssize_t max_work_queues_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", idxd->max_wq_size); } @@ -1332,8 +1316,7 @@ static DEVICE_ATTR_RO(max_work_queues_size); static ssize_t max_groups_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", idxd->max_groups); } @@ -1342,8 +1325,7 @@ static DEVICE_ATTR_RO(max_groups); static ssize_t max_work_queues_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", idxd->max_wqs); } @@ -1352,8 +1334,7 @@ static DEVICE_ATTR_RO(max_work_queues); static ssize_t max_engines_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", idxd->max_engines); } @@ -1362,8 +1343,7 @@ static DEVICE_ATTR_RO(max_engines); static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%d\n", dev_to_node(&idxd->pdev->dev)); } @@ -1372,8 +1352,7 @@ static DEVICE_ATTR_RO(numa_node); static ssize_t max_batch_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", idxd->max_batch_size); } @@ -1383,8 +1362,7 @@ static ssize_t max_transfer_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%llu\n", idxd->max_xfer_bytes); } @@ -1393,8 +1371,7 @@ static DEVICE_ATTR_RO(max_transfer_size); static ssize_t op_cap_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); int i, rc = 0; for (i = 0; i < 4; i++) @@ -1409,8 +1386,7 @@ static DEVICE_ATTR_RO(op_cap); static ssize_t gen_cap_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%#llx\n", idxd->hw.gen_cap.bits); } @@ -1419,8 +1395,7 @@ static DEVICE_ATTR_RO(gen_cap); static ssize_t configurable_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)); } @@ -1429,8 +1404,7 @@ static DEVICE_ATTR_RO(configurable); static ssize_t clients_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); unsigned long flags; int count = 0, i; @@ -1449,8 +1423,7 @@ static DEVICE_ATTR_RO(clients); static ssize_t pasid_enabled_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", device_pasid_enabled(idxd)); } @@ -1459,8 +1432,7 @@ static DEVICE_ATTR_RO(pasid_enabled); static ssize_t state_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); switch (idxd->state) { case IDXD_DEV_DISABLED: @@ -1479,8 +1451,7 @@ static DEVICE_ATTR_RO(state); static ssize_t errors_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); int i, out = 0; unsigned long flags; @@ -1497,8 +1468,7 @@ static DEVICE_ATTR_RO(errors); static ssize_t max_tokens_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", idxd->max_tokens); } @@ -1507,8 +1477,7 @@ static DEVICE_ATTR_RO(max_tokens); static ssize_t token_limit_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", idxd->token_limit); } @@ -1517,8 +1486,7 @@ static ssize_t token_limit_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); unsigned long val; int rc; @@ -1546,8 +1514,7 @@ static DEVICE_ATTR_RW(token_limit); static ssize_t cdev_major_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = - container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", idxd->major); } @@ -1556,7 +1523,7 @@ static DEVICE_ATTR_RO(cdev_major); static ssize_t cmd_status_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%#x\n", idxd->cmd_status); } @@ -1596,7 +1563,7 @@ static const struct attribute_group *idxd_attribute_groups[] = { static void idxd_conf_device_release(struct device *dev) { - struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); + struct idxd_device *idxd = confdev_to_idxd(dev); kfree(idxd->groups); kfree(idxd->wqs); @@ -1621,12 +1588,12 @@ struct device_type iax_device_type = { static int idxd_register_engine_devices(struct idxd_device *idxd) { + struct idxd_engine *engine; int i, j, rc; for (i = 0; i < idxd->max_engines; i++) { - struct idxd_engine *engine = idxd->engines[i]; - - rc = device_add(&engine->conf_dev); + engine = idxd->engines[i]; + rc = device_add(engine_confdev(engine)); if (rc < 0) goto cleanup; } @@ -1635,22 +1602,26 @@ static int idxd_register_engine_devices(struct idxd_device *idxd) cleanup: j = i - 1; - for (; i < idxd->max_engines; i++) - put_device(&idxd->engines[i]->conf_dev); + for (; i < idxd->max_engines; i++) { + engine = idxd->engines[i]; + put_device(engine_confdev(engine)); + } - while (j--) - device_unregister(&idxd->engines[j]->conf_dev); + while (j--) { + engine = idxd->engines[j]; + device_unregister(engine_confdev(engine)); + } return rc; } static int idxd_register_group_devices(struct idxd_device *idxd) { + struct idxd_group *group; int i, j, rc; for (i = 0; i < idxd->max_groups; i++) { - struct idxd_group *group = idxd->groups[i]; - - rc = device_add(&group->conf_dev); + group = idxd->groups[i]; + rc = device_add(group_confdev(group)); if (rc < 0) goto cleanup; } @@ -1659,22 +1630,26 @@ static int idxd_register_group_devices(struct idxd_device *idxd) cleanup: j = i - 1; - for (; i < idxd->max_groups; i++) - put_device(&idxd->groups[i]->conf_dev); + for (; i < idxd->max_groups; i++) { + group = idxd->groups[i]; + put_device(group_confdev(group)); + } - while (j--) - device_unregister(&idxd->groups[j]->conf_dev); + while (j--) { + group = idxd->groups[j]; + device_unregister(group_confdev(group)); + } return rc; } static int idxd_register_wq_devices(struct idxd_device *idxd) { + struct idxd_wq *wq; int i, rc, j; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = idxd->wqs[i]; - - rc = device_add(&wq->conf_dev); + wq = idxd->wqs[i]; + rc = device_add(wq_confdev(wq)); if (rc < 0) goto cleanup; } @@ -1683,11 +1658,15 @@ static int idxd_register_wq_devices(struct idxd_device *idxd) cleanup: j = i - 1; - for (; i < idxd->max_wqs; i++) - put_device(&idxd->wqs[i]->conf_dev); + for (; i < idxd->max_wqs; i++) { + wq = idxd->wqs[i]; + put_device(wq_confdev(wq)); + } - while (j--) - device_unregister(&idxd->wqs[j]->conf_dev); + while (j--) { + wq = idxd->wqs[j]; + device_unregister(wq_confdev(wq)); + } return rc; } @@ -1696,7 +1675,7 @@ int idxd_register_devices(struct idxd_device *idxd) struct device *dev = &idxd->pdev->dev; int rc, i; - rc = device_add(&idxd->conf_dev); + rc = device_add(idxd_confdev(idxd)); if (rc < 0) return rc; @@ -1722,12 +1701,12 @@ int idxd_register_devices(struct idxd_device *idxd) err_group: for (i = 0; i < idxd->max_engines; i++) - device_unregister(&idxd->engines[i]->conf_dev); + device_unregister(engine_confdev(idxd->engines[i])); err_engine: for (i = 0; i < idxd->max_wqs; i++) - device_unregister(&idxd->wqs[i]->conf_dev); + device_unregister(wq_confdev(idxd->wqs[i])); err_wq: - device_del(&idxd->conf_dev); + device_del(idxd_confdev(idxd)); return rc; } @@ -1738,19 +1717,19 @@ void idxd_unregister_devices(struct idxd_device *idxd) for (i = 0; i < idxd->max_wqs; i++) { struct idxd_wq *wq = idxd->wqs[i]; - device_unregister(&wq->conf_dev); + device_unregister(wq_confdev(wq)); } for (i = 0; i < idxd->max_engines; i++) { struct idxd_engine *engine = idxd->engines[i]; - device_unregister(&engine->conf_dev); + device_unregister(engine_confdev(engine)); } for (i = 0; i < idxd->max_groups; i++) { struct idxd_group *group = idxd->groups[i]; - device_unregister(&group->conf_dev); + device_unregister(group_confdev(group)); } } -- Gitee From 8ce16c9f50aea39660278a799654e59fbe6e9f0a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:43:26 -0700 Subject: [PATCH 053/173] dmaengine: idxd: remove IDXD_DEV_CONF_READY mainline inclusion from mainline-v5.15 commit f52058ae11523304a337de249c4c07ba5076f288 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit f52058ae1152 dmaengine: idxd: remove IDXD_DEV_CONF_READY. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The IDXD_DEV_CONF_READY state flag is no longer needed. The current implementation uses this flag to stop the device from doing configuration until the pci driver probe has completed. With the driver architecture going towards multiple sub-driver attached to the dsa_bus, this is no longer feasible. The sub-drivers will be allowed to probe and return with failure when they are not ready to complete the probe rather than using a state flag to gate the probing. There is no expectation that the devices auto-attach to a driver. Userspace configuration is expected to setup the device before enabling. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637460633.744545.8902095097471365420.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 4 ++-- drivers/dma/idxd/idxd.h | 1 - drivers/dma/idxd/init.c | 2 -- drivers/dma/idxd/sysfs.c | 14 -------------- 4 files changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index c8cf1de72176..4a2af9799239 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -576,7 +576,7 @@ int idxd_device_disable(struct idxd_device *idxd) spin_lock_irqsave(&idxd->dev_lock, flags); idxd_device_clear_state(idxd); - idxd->state = IDXD_DEV_CONF_READY; + idxd->state = IDXD_DEV_DISABLED; spin_unlock_irqrestore(&idxd->dev_lock, flags); return 0; } @@ -588,7 +588,7 @@ void idxd_device_reset(struct idxd_device *idxd) idxd_cmd_exec(idxd, IDXD_CMD_RESET_DEVICE, 0, NULL); spin_lock_irqsave(&idxd->dev_lock, flags); idxd_device_clear_state(idxd); - idxd->state = IDXD_DEV_CONF_READY; + idxd->state = IDXD_DEV_DISABLED; spin_unlock_irqrestore(&idxd->dev_lock, flags); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index ae60fcc7b625..9fc1a88f336d 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -210,7 +210,6 @@ struct idxd_hw { enum idxd_device_state { IDXD_DEV_HALTED = -1, IDXD_DEV_DISABLED = 0, - IDXD_DEV_CONF_READY, IDXD_DEV_ENABLED, }; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index f500076882d2..c22225b14c5d 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -682,8 +682,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_dev_register; } - idxd->state = IDXD_DEV_CONF_READY; - dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n", idxd->hw.version); diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index dcb8b35418fb..019079e5adb9 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -22,17 +22,9 @@ static int idxd_config_bus_match(struct device *dev, int matched = 0; if (is_idxd_dev(dev)) { - struct idxd_device *idxd = confdev_to_idxd(dev); - - if (idxd->state != IDXD_DEV_CONF_READY) - return 0; matched = 1; } else if (is_idxd_wq_dev(dev)) { struct idxd_wq *wq = confdev_to_wq(dev); - struct idxd_device *idxd = wq->idxd; - - if (idxd->state < IDXD_DEV_CONF_READY) - return 0; if (wq->state != IDXD_WQ_DISABLED) { dev_dbg(dev, "%s not disabled\n", dev_name(dev)); @@ -179,11 +171,6 @@ static int idxd_config_bus_probe(struct device *dev) if (is_idxd_dev(dev)) { struct idxd_device *idxd = confdev_to_idxd(dev); - if (idxd->state != IDXD_DEV_CONF_READY) { - dev_warn(dev, "Device not ready for config\n"); - return -EBUSY; - } - if (!try_module_get(THIS_MODULE)) return -ENXIO; @@ -1436,7 +1423,6 @@ static ssize_t state_show(struct device *dev, switch (idxd->state) { case IDXD_DEV_DISABLED: - case IDXD_DEV_CONF_READY: return sysfs_emit(buf, "disabled\n"); case IDXD_DEV_ENABLED: return sysfs_emit(buf, "enabled\n"); -- Gitee From 5555a15f936d5bae24745e97fffec7421079f7c3 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:43:31 -0700 Subject: [PATCH 054/173] dmaengine: idxd: move wq_enable() to device.c mainline inclusion from mainline-v5.15 commit 1f2bb40337f0df1d9af80793e9fdacff7706e654 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 1f2bb40337f0 dmaengine: idxd: move wq_enable() to device.c. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Move the wq_enable() function to device.c in preparation of setting up the idxd internal sub-driver framework. No logic changes. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637461176.744545.3806109011554118998.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 124 ++++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/sysfs.c | 124 +------------------------------------- 3 files changed, 126 insertions(+), 123 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 4a2af9799239..b1c509bcfa31 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1129,3 +1129,127 @@ int idxd_device_load_config(struct idxd_device *idxd) return 0; } + +static int __drv_enable_wq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + unsigned long flags; + int rc = -ENXIO; + + lockdep_assert_held(&wq->wq_lock); + + if (idxd->state != IDXD_DEV_ENABLED) + goto err; + + if (wq->state != IDXD_WQ_DISABLED) { + dev_dbg(dev, "wq %d already enabled.\n", wq->id); + rc = -EBUSY; + goto err; + } + + if (!wq->group) { + dev_dbg(dev, "wq %d not attached to group.\n", wq->id); + goto err; + } + + if (strlen(wq->name) == 0) { + dev_dbg(dev, "wq %d name not set.\n", wq->id); + goto err; + } + + /* Shared WQ checks */ + if (wq_shared(wq)) { + if (!device_swq_supported(idxd)) { + dev_dbg(dev, "PASID not enabled and shared wq.\n"); + goto err; + } + /* + * Shared wq with the threshold set to 0 means the user + * did not set the threshold or transitioned from a + * dedicated wq but did not set threshold. A value + * of 0 would effectively disable the shared wq. The + * driver does not allow a value of 0 to be set for + * threshold via sysfs. + */ + if (wq->threshold == 0) { + dev_dbg(dev, "Shared wq and threshold 0.\n"); + goto err; + } + } + + rc = idxd_wq_alloc_resources(wq); + if (rc < 0) { + dev_dbg(dev, "wq resource alloc failed\n"); + goto err; + } + + spin_lock_irqsave(&idxd->dev_lock, flags); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + rc = idxd_device_config(idxd); + spin_unlock_irqrestore(&idxd->dev_lock, flags); + if (rc < 0) { + dev_dbg(dev, "Writing wq %d config failed: %d\n", wq->id, rc); + goto err; + } + + rc = idxd_wq_enable(wq); + if (rc < 0) { + dev_dbg(dev, "wq %d enabling failed: %d\n", wq->id, rc); + goto err; + } + + rc = idxd_wq_map_portal(wq); + if (rc < 0) { + dev_dbg(dev, "wq %d portal mapping failed: %d\n", wq->id, rc); + goto err_map_portal; + } + + wq->client_count = 0; + + if (wq->type == IDXD_WQT_KERNEL) { + rc = idxd_wq_init_percpu_ref(wq); + if (rc < 0) { + dev_dbg(dev, "wq %d percpu_ref setup failed\n", wq->id); + goto err_cpu_ref; + } + } + + if (is_idxd_wq_dmaengine(wq)) { + rc = idxd_register_dma_channel(wq); + if (rc < 0) { + dev_dbg(dev, "wq %d DMA channel register failed\n", wq->id); + goto err_client; + } + } else if (is_idxd_wq_cdev(wq)) { + rc = idxd_wq_add_cdev(wq); + if (rc < 0) { + dev_dbg(dev, "wq %d cdev creation failed\n", wq->id); + goto err_client; + } + } + + dev_info(dev, "wq %s enabled\n", dev_name(wq_confdev(wq))); + return 0; + +err_client: + idxd_wq_quiesce(wq); +err_cpu_ref: + idxd_wq_unmap_portal(wq); +err_map_portal: + rc = idxd_wq_disable(wq, false); + if (rc < 0) + dev_dbg(dev, "wq %s disable failed\n", dev_name(wq_confdev(wq))); +err: + return rc; +} + +int drv_enable_wq(struct idxd_wq *wq) +{ + int rc; + + mutex_lock(&wq->wq_lock); + rc = __drv_enable_wq(wq); + mutex_unlock(&wq->wq_lock); + return rc; +} diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 9fc1a88f336d..551a61fa1aff 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -495,6 +495,7 @@ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id); void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id); /* device control */ +int drv_enable_wq(struct idxd_wq *wq); int idxd_device_init_reset(struct idxd_device *idxd); int idxd_device_enable(struct idxd_device *idxd); int idxd_device_disable(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 019079e5adb9..7af5f1df3c6a 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -39,128 +39,6 @@ static int idxd_config_bus_match(struct device *dev, return matched; } -static int enable_wq(struct idxd_wq *wq) -{ - struct idxd_device *idxd = wq->idxd; - struct device *dev = &idxd->pdev->dev; - unsigned long flags; - int rc; - - mutex_lock(&wq->wq_lock); - - if (idxd->state != IDXD_DEV_ENABLED) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "Enabling while device not enabled.\n"); - return -EPERM; - } - - if (wq->state != IDXD_WQ_DISABLED) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ %d already enabled.\n", wq->id); - return -EBUSY; - } - - if (!wq->group) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ not attached to group.\n"); - return -EINVAL; - } - - if (strlen(wq->name) == 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ name not set.\n"); - return -EINVAL; - } - - /* Shared WQ checks */ - if (wq_shared(wq)) { - if (!device_swq_supported(idxd)) { - dev_warn(dev, "PASID not enabled and shared WQ.\n"); - mutex_unlock(&wq->wq_lock); - return -ENXIO; - } - /* - * Shared wq with the threshold set to 0 means the user - * did not set the threshold or transitioned from a - * dedicated wq but did not set threshold. A value - * of 0 would effectively disable the shared wq. The - * driver does not allow a value of 0 to be set for - * threshold via sysfs. - */ - if (wq->threshold == 0) { - dev_warn(dev, "Shared WQ and threshold 0.\n"); - mutex_unlock(&wq->wq_lock); - return -EINVAL; - } - } - - rc = idxd_wq_alloc_resources(wq); - if (rc < 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ resource alloc failed\n"); - return rc; - } - - spin_lock_irqsave(&idxd->dev_lock, flags); - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) - rc = idxd_device_config(idxd); - spin_unlock_irqrestore(&idxd->dev_lock, flags); - if (rc < 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "Writing WQ %d config failed: %d\n", wq->id, rc); - return rc; - } - - rc = idxd_wq_enable(wq); - if (rc < 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ %d enabling failed: %d\n", wq->id, rc); - return rc; - } - - rc = idxd_wq_map_portal(wq); - if (rc < 0) { - dev_warn(dev, "wq portal mapping failed: %d\n", rc); - rc = idxd_wq_disable(wq, false); - if (rc < 0) - dev_warn(dev, "IDXD wq disable failed\n"); - mutex_unlock(&wq->wq_lock); - return rc; - } - - wq->client_count = 0; - - if (wq->type == IDXD_WQT_KERNEL) { - rc = idxd_wq_init_percpu_ref(wq); - if (rc < 0) { - dev_dbg(dev, "percpu_ref setup failed\n"); - mutex_unlock(&wq->wq_lock); - return rc; - } - } - - if (is_idxd_wq_dmaengine(wq)) { - rc = idxd_register_dma_channel(wq); - if (rc < 0) { - dev_dbg(dev, "DMA channel register failed\n"); - mutex_unlock(&wq->wq_lock); - return rc; - } - } else if (is_idxd_wq_cdev(wq)) { - rc = idxd_wq_add_cdev(wq); - if (rc < 0) { - dev_dbg(dev, "Cdev creation failed\n"); - mutex_unlock(&wq->wq_lock); - return rc; - } - } - - mutex_unlock(&wq->wq_lock); - dev_info(dev, "wq %s enabled\n", dev_name(wq_confdev(wq))); - - return 0; -} - static int idxd_config_bus_probe(struct device *dev) { int rc = 0; @@ -205,7 +83,7 @@ static int idxd_config_bus_probe(struct device *dev) } else if (is_idxd_wq_dev(dev)) { struct idxd_wq *wq = confdev_to_wq(dev); - return enable_wq(wq); + return drv_enable_wq(wq); } return -ENODEV; -- Gitee From c80798af464a5a5f0ce4685b07f4b9d38023233e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:43:37 -0700 Subject: [PATCH 055/173] dmaengine: idxd: move wq_disable() to device.c mainline inclusion from mainline-v5.15 commit 69e4f8be596d897679e44e86a323629537c02975 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 69e4f8be596d dmaengine: idxd: move wq_disable() to device.c. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Move the wq_disable() function to device.c in preparation of setting up the idxd internal sub-driver framework. No logic changes. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637461775.744545.9644048686618957886.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 37 +++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/sysfs.c | 38 +------------------------------------- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index b1c509bcfa31..8d8e249931a9 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1253,3 +1253,40 @@ int drv_enable_wq(struct idxd_wq *wq) mutex_unlock(&wq->wq_lock); return rc; } + +static void __drv_disable_wq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + + lockdep_assert_held(&wq->wq_lock); + + if (wq->type == IDXD_WQT_KERNEL) + idxd_wq_quiesce(wq); + + if (is_idxd_wq_dmaengine(wq)) + idxd_unregister_dma_channel(wq); + else if (is_idxd_wq_cdev(wq)) + idxd_wq_del_cdev(wq); + + if (idxd_wq_refcount(wq)) + dev_warn(dev, "Clients has claim on wq %d: %d\n", + wq->id, idxd_wq_refcount(wq)); + + idxd_wq_unmap_portal(wq); + + idxd_wq_drain(wq); + idxd_wq_reset(wq); + + idxd_wq_free_resources(wq); + wq->client_count = 0; + + dev_info(dev, "wq %s disabled\n", dev_name(wq_confdev(wq))); +} + +void drv_disable_wq(struct idxd_wq *wq) +{ + mutex_lock(&wq->wq_lock); + __drv_disable_wq(wq); + mutex_unlock(&wq->wq_lock); +} diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 551a61fa1aff..e96ddbfc4569 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -496,6 +496,7 @@ void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id); /* device control */ int drv_enable_wq(struct idxd_wq *wq); +void drv_disable_wq(struct idxd_wq *wq); int idxd_device_init_reset(struct idxd_device *idxd); int idxd_device_enable(struct idxd_device *idxd); int idxd_device_disable(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 7af5f1df3c6a..bd156ec18672 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -89,42 +89,6 @@ static int idxd_config_bus_probe(struct device *dev) return -ENODEV; } -static void disable_wq(struct idxd_wq *wq) -{ - struct idxd_device *idxd = wq->idxd; - struct device *dev = &idxd->pdev->dev; - - mutex_lock(&wq->wq_lock); - dev_dbg(dev, "%s removing WQ %s\n", __func__, dev_name(wq_confdev(wq))); - if (wq->state == IDXD_WQ_DISABLED) { - mutex_unlock(&wq->wq_lock); - return; - } - - if (wq->type == IDXD_WQT_KERNEL) - idxd_wq_quiesce(wq); - - if (is_idxd_wq_dmaengine(wq)) - idxd_unregister_dma_channel(wq); - else if (is_idxd_wq_cdev(wq)) - idxd_wq_del_cdev(wq); - - if (idxd_wq_refcount(wq)) - dev_warn(dev, "Clients has claim on wq %d: %d\n", - wq->id, idxd_wq_refcount(wq)); - - idxd_wq_unmap_portal(wq); - - idxd_wq_drain(wq); - idxd_wq_reset(wq); - - idxd_wq_free_resources(wq); - wq->client_count = 0; - mutex_unlock(&wq->wq_lock); - - dev_info(dev, "wq %s disabled\n", dev_name(wq_confdev(wq))); -} - static int idxd_config_bus_remove(struct device *dev) { dev_dbg(dev, "%s called for %s\n", __func__, dev_name(dev)); @@ -133,7 +97,7 @@ static int idxd_config_bus_remove(struct device *dev) if (is_idxd_wq_dev(dev)) { struct idxd_wq *wq = confdev_to_wq(dev); - disable_wq(wq); + drv_disable_wq(wq); } else if (is_idxd_dev(dev)) { struct idxd_device *idxd = confdev_to_idxd(dev); int i; -- Gitee From bb06ddd0a1ab4b083791391e88fece1bcf679ca6 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:43:43 -0700 Subject: [PATCH 056/173] dmaengine: idxd: remove bus shutdown mainline inclusion from mainline-v5.15 commit 3a5cc01647f07431b342e9703cda0542457ec467 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 3a5cc01647f0 dmaengine: idxd: remove bus shutdown. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Remove ->shutdown() function for the dsa bus as it does not do anything and is not necessary. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637462319.744545.10383189484257042066.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/sysfs.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index bd156ec18672..6a0a26a2e6ba 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -126,17 +126,11 @@ static int idxd_config_bus_remove(struct device *dev) return 0; } -static void idxd_config_bus_shutdown(struct device *dev) -{ - dev_dbg(dev, "%s called\n", __func__); -} - struct bus_type dsa_bus_type = { .name = "dsa", .match = idxd_config_bus_match, .probe = idxd_config_bus_probe, .remove = idxd_config_bus_remove, - .shutdown = idxd_config_bus_shutdown, }; static struct idxd_device_driver dsa_drv = { -- Gitee From 1f7e58df26a2f9af982ffbba22c7429b0ecbc530 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:43:49 -0700 Subject: [PATCH 057/173] dmaengine: idxd: remove iax_bus_type prototype mainline inclusion from mainline-v5.15 commit 1c264299431e9a105f3974ad49b6bccc3f03540f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 1c264299431e dmaengine: idxd: remove iax_bus_type prototype. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Remove unused iax_bus_type prototype declaration. Should have been removed when iax_bus_type was removed. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637462909.744545.7106049898386277608.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/idxd.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index e96ddbfc4569..4c3d3eb94450 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -370,7 +370,6 @@ static inline void idxd_dev_set_type(struct idxd_dev *idev, int type) } extern struct bus_type dsa_bus_type; -extern struct bus_type iax_bus_type; extern bool support_enqcmd; extern struct ida idxd_ida; -- Gitee From 4cfff5b30096f8341bd8157dee424eba07a8f302 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:43:55 -0700 Subject: [PATCH 058/173] dmaengine: idxd: fix bus_probe() and bus_remove() for dsa_bus mainline inclusion from mainline-v5.15 commit fcc2281b142bf14e3534d6b1150991194f8d1d44 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit fcc2281b142b dmaengine: idxd: fix bus_probe() and bus_remove() for dsa_bus. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Current implementation have put all the code that should be in a driver probe/remove in the bus probe/remove function. Add ->probe() and ->remove() support for the dsa_drv and move all those code out of bus probe/remove. The change does not split out the distinction between device sub-driver and wq sub-driver. It only cleans up the bus calls. The split out will be addressed in follow on patches. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637463586.744545.5806250155539938643.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/idxd.h | 24 +++++---- drivers/dma/idxd/sysfs.c | 108 ++++++++++++++++++++------------------- 2 files changed, 69 insertions(+), 63 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 4c3d3eb94450..493958ecc208 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -51,6 +51,8 @@ enum idxd_type { struct idxd_device_driver { const char *name; + int (*probe)(struct idxd_dev *idxd_dev); + void (*remove)(struct idxd_dev *idxd_dev); struct device_driver drv; }; @@ -323,19 +325,21 @@ enum idxd_completion_status { #define cdev_dev(cdev) &cdev->idxd_dev.conf_dev #define confdev_to_idxd_dev(dev) container_of(dev, struct idxd_dev, conf_dev) +#define idxd_dev_to_idxd(idxd_dev) container_of(idxd_dev, struct idxd_device, idxd_dev) +#define idxd_dev_to_wq(idxd_dev) container_of(idxd_dev, struct idxd_wq, idxd_dev) static inline struct idxd_device *confdev_to_idxd(struct device *dev) { struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); - return container_of(idxd_dev, struct idxd_device, idxd_dev); + return idxd_dev_to_idxd(idxd_dev); } static inline struct idxd_wq *confdev_to_wq(struct device *dev) { struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); - return container_of(idxd_dev, struct idxd_wq, idxd_dev); + return idxd_dev_to_wq(idxd_dev); } static inline struct idxd_engine *confdev_to_engine(struct device *dev) @@ -379,24 +383,24 @@ extern struct device_type idxd_wq_device_type; extern struct device_type idxd_engine_device_type; extern struct device_type idxd_group_device_type; -static inline bool is_dsa_dev(struct device *dev) +static inline bool is_dsa_dev(struct idxd_dev *idxd_dev) { - return dev->type == &dsa_device_type; + return idxd_dev->type == IDXD_DEV_DSA; } -static inline bool is_iax_dev(struct device *dev) +static inline bool is_iax_dev(struct idxd_dev *idxd_dev) { - return dev->type == &iax_device_type; + return idxd_dev->type == IDXD_DEV_IAX; } -static inline bool is_idxd_dev(struct device *dev) +static inline bool is_idxd_dev(struct idxd_dev *idxd_dev) { - return is_dsa_dev(dev) || is_iax_dev(dev); + return is_dsa_dev(idxd_dev) || is_iax_dev(idxd_dev); } -static inline bool is_idxd_wq_dev(struct device *dev) +static inline bool is_idxd_wq_dev(struct idxd_dev *idxd_dev) { - return dev->type == &idxd_wq_device_type; + return idxd_dev->type == IDXD_DEV_WQ; } static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 6a0a26a2e6ba..c62abb5c6d58 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -19,69 +19,80 @@ static char *idxd_wq_type_names[] = { static int idxd_config_bus_match(struct device *dev, struct device_driver *drv) { - int matched = 0; + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); - if (is_idxd_dev(dev)) { - matched = 1; - } else if (is_idxd_wq_dev(dev)) { - struct idxd_wq *wq = confdev_to_wq(dev); + return (is_idxd_dev(idxd_dev) || is_idxd_wq_dev(idxd_dev)); +} - if (wq->state != IDXD_WQ_DISABLED) { - dev_dbg(dev, "%s not disabled\n", dev_name(dev)); - return 0; - } - matched = 1; - } +static int idxd_config_bus_probe(struct device *dev) +{ + struct idxd_device_driver *idxd_drv = + container_of(dev->driver, struct idxd_device_driver, drv); + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); - if (matched) - dev_dbg(dev, "%s matched\n", dev_name(dev)); + return idxd_drv->probe(idxd_dev); +} - return matched; +static int idxd_config_bus_remove(struct device *dev) +{ + struct idxd_device_driver *idxd_drv = + container_of(dev->driver, struct idxd_device_driver, drv); + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + idxd_drv->remove(idxd_dev); + return 0; } -static int idxd_config_bus_probe(struct device *dev) +struct bus_type dsa_bus_type = { + .name = "dsa", + .match = idxd_config_bus_match, + .probe = idxd_config_bus_probe, + .remove = idxd_config_bus_remove, +}; + +static int idxd_dsa_drv_probe(struct idxd_dev *idxd_dev) { - int rc = 0; + struct device *dev = &idxd_dev->conf_dev; unsigned long flags; + int rc; - dev_dbg(dev, "%s called\n", __func__); - - if (is_idxd_dev(dev)) { - struct idxd_device *idxd = confdev_to_idxd(dev); + if (is_idxd_dev(idxd_dev)) { + struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev); - if (!try_module_get(THIS_MODULE)) + if (idxd->state != IDXD_DEV_DISABLED) return -ENXIO; - /* Perform IDXD configuration and enabling */ + /* Device configuration */ spin_lock_irqsave(&idxd->dev_lock, flags); if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) rc = idxd_device_config(idxd); spin_unlock_irqrestore(&idxd->dev_lock, flags); if (rc < 0) { - module_put(THIS_MODULE); - dev_warn(dev, "Device config failed: %d\n", rc); + dev_dbg(dev, "Device config failed: %d\n", rc); return rc; } - /* start device */ + /* Start device */ rc = idxd_device_enable(idxd); if (rc < 0) { - module_put(THIS_MODULE); dev_warn(dev, "Device enable failed: %d\n", rc); return rc; } - dev_info(dev, "Device %s enabled\n", dev_name(dev)); - + /* Setup DMA device without channels */ rc = idxd_register_dma_device(idxd); if (rc < 0) { - module_put(THIS_MODULE); dev_dbg(dev, "Failed to register dmaengine device\n"); + idxd_device_disable(idxd); return rc; } + + dev_info(dev, "Device %s enabled\n", dev_name(dev)); return 0; - } else if (is_idxd_wq_dev(dev)) { - struct idxd_wq *wq = confdev_to_wq(dev); + } + + if (is_idxd_wq_dev(idxd_dev)) { + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); return drv_enable_wq(wq); } @@ -89,21 +100,14 @@ static int idxd_config_bus_probe(struct device *dev) return -ENODEV; } -static int idxd_config_bus_remove(struct device *dev) +static void idxd_dsa_drv_remove(struct idxd_dev *idxd_dev) { - dev_dbg(dev, "%s called for %s\n", __func__, dev_name(dev)); - - /* disable workqueue here */ - if (is_idxd_wq_dev(dev)) { - struct idxd_wq *wq = confdev_to_wq(dev); + struct device *dev = &idxd_dev->conf_dev; - drv_disable_wq(wq); - } else if (is_idxd_dev(dev)) { - struct idxd_device *idxd = confdev_to_idxd(dev); + if (is_idxd_dev(idxd_dev)) { + struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev); int i; - dev_dbg(dev, "%s removing dev %s\n", __func__, - dev_name(idxd_confdev(idxd))); for (i = 0; i < idxd->max_wqs; i++) { struct idxd_wq *wq = idxd->wqs[i]; @@ -118,23 +122,21 @@ static int idxd_config_bus_remove(struct device *dev) idxd_device_disable(idxd); if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) idxd_device_reset(idxd); - module_put(THIS_MODULE); - - dev_info(dev, "Device %s disabled\n", dev_name(dev)); + return; } - return 0; -} + if (is_idxd_wq_dev(idxd_dev)) { + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); -struct bus_type dsa_bus_type = { - .name = "dsa", - .match = idxd_config_bus_match, - .probe = idxd_config_bus_probe, - .remove = idxd_config_bus_remove, -}; + drv_disable_wq(wq); + return; + } +} static struct idxd_device_driver dsa_drv = { .name = "dsa", + .probe = idxd_dsa_drv_probe, + .remove = idxd_dsa_drv_remove, }; /* IDXD generic driver setup */ -- Gitee From 6492cbaa7fe41b2d3586a17f1dcaed7e670646f4 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:44:01 -0700 Subject: [PATCH 059/173] dmaengine: idxd: move probe() bits for idxd 'struct device' to device.c mainline inclusion from mainline-v5.15 commit bd42805b5da33b9c75f3ce0ae9d6ff0ec3f2cd6b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit bd42805b5da3 dmaengine: idxd: move probe() bits for idxd 'struct device' to device.c. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Move the code related to a ->probe() function for the idxd 'struct device' to device.c to prep for the idxd device sub-driver in device.c. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637464189.744545.17423830646786162194.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 37 ++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/sysfs.c | 40 ++------------------------------------- 3 files changed, 40 insertions(+), 38 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 8d8e249931a9..b9aa209efee4 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1290,3 +1290,40 @@ void drv_disable_wq(struct idxd_wq *wq) __drv_disable_wq(wq); mutex_unlock(&wq->wq_lock); } + +int idxd_device_drv_probe(struct idxd_dev *idxd_dev) +{ + struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev); + unsigned long flags; + int rc = 0; + + /* + * Device should be in disabled state for the idxd_drv to load. If it's in + * enabled state, then the device was altered outside of driver's control. + * If the state is in halted state, then we don't want to proceed. + */ + if (idxd->state != IDXD_DEV_DISABLED) + return -ENXIO; + + /* Device configuration */ + spin_lock_irqsave(&idxd->dev_lock, flags); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + rc = idxd_device_config(idxd); + spin_unlock_irqrestore(&idxd->dev_lock, flags); + if (rc < 0) + return -ENXIO; + + /* Start device */ + rc = idxd_device_enable(idxd); + if (rc < 0) + return rc; + + /* Setup DMA device without channels */ + rc = idxd_register_dma_device(idxd); + if (rc < 0) { + idxd_device_disable(idxd); + return rc; + } + + return 0; +} diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 493958ecc208..dbbd36feb462 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -498,6 +498,7 @@ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id); void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id); /* device control */ +int idxd_device_drv_probe(struct idxd_dev *idxd_dev); int drv_enable_wq(struct idxd_wq *wq); void drv_disable_wq(struct idxd_wq *wq); int idxd_device_init_reset(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index c62abb5c6d58..fd299e9bc726 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -52,44 +52,8 @@ struct bus_type dsa_bus_type = { static int idxd_dsa_drv_probe(struct idxd_dev *idxd_dev) { - struct device *dev = &idxd_dev->conf_dev; - unsigned long flags; - int rc; - - if (is_idxd_dev(idxd_dev)) { - struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev); - - if (idxd->state != IDXD_DEV_DISABLED) - return -ENXIO; - - /* Device configuration */ - spin_lock_irqsave(&idxd->dev_lock, flags); - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) - rc = idxd_device_config(idxd); - spin_unlock_irqrestore(&idxd->dev_lock, flags); - if (rc < 0) { - dev_dbg(dev, "Device config failed: %d\n", rc); - return rc; - } - - /* Start device */ - rc = idxd_device_enable(idxd); - if (rc < 0) { - dev_warn(dev, "Device enable failed: %d\n", rc); - return rc; - } - - /* Setup DMA device without channels */ - rc = idxd_register_dma_device(idxd); - if (rc < 0) { - dev_dbg(dev, "Failed to register dmaengine device\n"); - idxd_device_disable(idxd); - return rc; - } - - dev_info(dev, "Device %s enabled\n", dev_name(dev)); - return 0; - } + if (is_idxd_dev(idxd_dev)) + return idxd_device_drv_probe(idxd_dev); if (is_idxd_wq_dev(idxd_dev)) { struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); -- Gitee From 1ad1b3e090b113ee6a0de1dcdb601162ed06830b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:44:07 -0700 Subject: [PATCH 060/173] dmaengine: idxd: idxd: move remove() bits for idxd 'struct device' to device.c mainline inclusion from mainline-v5.15 commit 745e92a6d816277fcbd231bda5ad2d882b22fe52 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 745e92a6d816 dmaengine: idxd: idxd: move remove() bits for idxd 'struct device' to device.c. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Move the code related to a ->remove() function for the idxd 'struct device' to device.c to prep for the idxd device sub-driver in device.c. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637464768.744545.15797285510999151668.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 22 ++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/sysfs.c | 20 +------------------- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index b9aa209efee4..d5a0b6fff3b9 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1327,3 +1327,25 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev) return 0; } + +void idxd_device_drv_remove(struct idxd_dev *idxd_dev) +{ + struct device *dev = &idxd_dev->conf_dev; + struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev); + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + struct device *wq_dev = wq_confdev(wq); + + if (wq->state == IDXD_WQ_DISABLED) + continue; + dev_warn(dev, "Active wq %d on disable %s.\n", i, dev_name(wq_dev)); + device_release_driver(wq_dev); + } + + idxd_unregister_dma_device(idxd); + idxd_device_disable(idxd); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + idxd_device_reset(idxd); +} diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index dbbd36feb462..1c8abba13470 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -499,6 +499,7 @@ void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id); /* device control */ int idxd_device_drv_probe(struct idxd_dev *idxd_dev); +void idxd_device_drv_remove(struct idxd_dev *idxd_dev); int drv_enable_wq(struct idxd_wq *wq); void drv_disable_wq(struct idxd_wq *wq); int idxd_device_init_reset(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index fd299e9bc726..a5fedf787f3e 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -66,26 +66,8 @@ static int idxd_dsa_drv_probe(struct idxd_dev *idxd_dev) static void idxd_dsa_drv_remove(struct idxd_dev *idxd_dev) { - struct device *dev = &idxd_dev->conf_dev; - if (is_idxd_dev(idxd_dev)) { - struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev); - int i; - - for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = idxd->wqs[i]; - - if (wq->state == IDXD_WQ_DISABLED) - continue; - dev_warn(dev, "Active wq %d on disable %s.\n", i, - dev_name(wq_confdev(wq))); - device_release_driver(wq_confdev(wq)); - } - - idxd_unregister_dma_device(idxd); - idxd_device_disable(idxd); - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) - idxd_device_reset(idxd); + idxd_device_drv_remove(idxd_dev); return; } -- Gitee From d81fd572a0703805d6161bf708c12eaa7abb62db Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:44:13 -0700 Subject: [PATCH 061/173] dmanegine: idxd: open code the dsa_drv registration mainline inclusion from mainline-v5.15 commit c05257b5600bb35a580ecdb25695efff26326d59 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit c05257b5600b dmanegine: idxd: open code the dsa_drv registration. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Don't need a wrapper to register the driver. Just do it directly. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637465319.744545.16325178432532362906.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/idxd.h | 2 ++ drivers/dma/idxd/init.c | 10 +++++----- drivers/dma/idxd/sysfs.c | 13 +------------ 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 1c8abba13470..7fc26b7727c0 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -56,6 +56,8 @@ struct idxd_device_driver { struct device_driver drv; }; +extern struct idxd_device_driver dsa_drv; + struct idxd_irq_entry { struct idxd_device *idxd; int id; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index c22225b14c5d..5b628e6c04bf 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -840,9 +840,9 @@ static int __init idxd_init_module(void) if (err < 0) return err; - err = idxd_register_driver(); + err = idxd_driver_register(&dsa_drv); if (err < 0) - goto err_idxd_driver_register; + goto err_dsa_driver_register; err = idxd_cdev_register(); if (err) @@ -857,8 +857,8 @@ static int __init idxd_init_module(void) err_pci_register: idxd_cdev_remove(); err_cdev_register: - idxd_unregister_driver(); -err_idxd_driver_register: + idxd_driver_unregister(&dsa_drv); +err_dsa_driver_register: idxd_unregister_bus_type(); return err; } @@ -866,7 +866,7 @@ module_init(idxd_init_module); static void __exit idxd_exit_module(void) { - idxd_unregister_driver(); + idxd_driver_unregister(&dsa_drv); pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); idxd_unregister_bus_type(); diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index a5fedf787f3e..3692a9c06e33 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -79,23 +79,12 @@ static void idxd_dsa_drv_remove(struct idxd_dev *idxd_dev) } } -static struct idxd_device_driver dsa_drv = { +struct idxd_device_driver dsa_drv = { .name = "dsa", .probe = idxd_dsa_drv_probe, .remove = idxd_dsa_drv_remove, }; -/* IDXD generic driver setup */ -int idxd_register_driver(void) -{ - return idxd_driver_register(&dsa_drv); -} - -void idxd_unregister_driver(void) -{ - idxd_driver_unregister(&dsa_drv); -} - /* IDXD engine attributes */ static ssize_t engine_group_id_show(struct device *dev, struct device_attribute *attr, char *buf) -- Gitee From 266807b9def8af7daf1687d950da4d5d05bfe454 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:44:18 -0700 Subject: [PATCH 062/173] dmaengine: idxd: add type to driver in order to allow device matching mainline inclusion from mainline-v5.15 commit 5fee6567ec387088ec965ee60c63051bbe102cac category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 5fee6567ec38 dmaengine: idxd: add type to driver in order to allow device matching. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add an array of support device types to the idxd_device_driver definition in order to enable simple matching of device type to a given driver. The deprecated / omnibus dsa_drv driver specifies IDXD_DEV_NONE as its only role is to service legacy userspace (old accel-config) directed bind requests and route them to them the proper driver. It need not attach to a device when the bus is autoprobed. The accel-config tooling is being updated to drop its dependency on this deprecated bind scheme. Reviewed-by: Dan Willliams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637465882.744545.17456174666211577867.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/init.c | 5 +++++ drivers/dma/idxd/sysfs.c | 16 +++++++++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 7fc26b7727c0..4bb5a65ec237 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -51,6 +51,7 @@ enum idxd_type { struct idxd_device_driver { const char *name; + enum idxd_dev_type *type; int (*probe)(struct idxd_dev *idxd_dev); void (*remove)(struct idxd_dev *idxd_dev); struct device_driver drv; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 5b628e6c04bf..544ff7137292 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -879,6 +879,11 @@ int __idxd_driver_register(struct idxd_device_driver *idxd_drv, struct module *o { struct device_driver *drv = &idxd_drv->drv; + if (!idxd_drv->type) { + pr_debug("driver type not set (%ps)\n", __builtin_return_address(0)); + return -EINVAL; + } + drv->name = idxd_drv->name; drv->bus = &dsa_bus_type; drv->owner = owner; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 3692a9c06e33..6dfcac629bd7 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -19,9 +19,18 @@ static char *idxd_wq_type_names[] = { static int idxd_config_bus_match(struct device *dev, struct device_driver *drv) { + struct idxd_device_driver *idxd_drv = + container_of(drv, struct idxd_device_driver, drv); struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + int i = 0; + + while (idxd_drv->type[i] != IDXD_DEV_NONE) { + if (idxd_dev->type == idxd_drv->type[i]) + return 1; + i++; + } - return (is_idxd_dev(idxd_dev) || is_idxd_wq_dev(idxd_dev)); + return 0; } static int idxd_config_bus_probe(struct device *dev) @@ -79,10 +88,15 @@ static void idxd_dsa_drv_remove(struct idxd_dev *idxd_dev) } } +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_NONE, +}; + struct idxd_device_driver dsa_drv = { .name = "dsa", .probe = idxd_dsa_drv_probe, .remove = idxd_dsa_drv_remove, + .type = dev_types, }; /* IDXD engine attributes */ -- Gitee From 3cce6aab4900667b6b5c211d683a6a54ed80f26e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:44:24 -0700 Subject: [PATCH 063/173] dmaengine: idxd: create idxd_device sub-driver mainline inclusion from mainline-v5.15 commit 034b3290ba257f1a3c8730f3fba72e11645e7b50 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 034b3290ba25 dmaengine: idxd: create idxd_device sub-driver. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The original architecture of /sys/bus/dsa invented a scheme whereby a single entry in the list of bus drivers, /sys/bus/drivers/dsa, handled all device types and internally routed them to different drivers. Those internal drivers were invisible to userspace. Now, as /sys/bus/dsa wants to grow support for alternate drivers for a given device, for example vfio-mdev instead of kernel-internal-dmaengine, a proper bus device-driver model is needed. The first step in that process is separating the existing omnibus/implicit "dsa" driver into proper individual drivers registered on /sys/bus/dsa. Establish the idxd_drv driver that control the enabling and disabling of the accelerator device. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637466439.744545.15210886092627144577.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 13 +++++++++++++ drivers/dma/idxd/idxd.h | 3 +++ drivers/dma/idxd/init.c | 7 +++++++ 3 files changed, 23 insertions(+) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index d5a0b6fff3b9..12ae3f1639f1 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1349,3 +1349,16 @@ void idxd_device_drv_remove(struct idxd_dev *idxd_dev) if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) idxd_device_reset(idxd); } + +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_DSA, + IDXD_DEV_IAX, + IDXD_DEV_NONE, +}; + +struct idxd_device_driver idxd_drv = { + .type = dev_types, + .probe = idxd_device_drv_probe, + .remove = idxd_device_drv_remove, + .name = "idxd", +}; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 4bb5a65ec237..a356d227f755 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -58,6 +58,7 @@ struct idxd_device_driver { }; extern struct idxd_device_driver dsa_drv; +extern struct idxd_device_driver idxd_drv; struct idxd_irq_entry { struct idxd_device *idxd; @@ -501,6 +502,8 @@ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id); void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id); /* device control */ +int idxd_register_idxd_drv(void); +void idxd_unregister_idxd_drv(void); int idxd_device_drv_probe(struct idxd_dev *idxd_dev); void idxd_device_drv_remove(struct idxd_dev *idxd_dev); int drv_enable_wq(struct idxd_wq *wq); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 544ff7137292..c19b03c17ab9 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -840,6 +840,10 @@ static int __init idxd_init_module(void) if (err < 0) return err; + err = idxd_driver_register(&idxd_drv); + if (err < 0) + goto err_idxd_driver_register; + err = idxd_driver_register(&dsa_drv); if (err < 0) goto err_dsa_driver_register; @@ -859,6 +863,8 @@ static int __init idxd_init_module(void) err_cdev_register: idxd_driver_unregister(&dsa_drv); err_dsa_driver_register: + idxd_driver_unregister(&idxd_drv); +err_idxd_driver_register: idxd_unregister_bus_type(); return err; } @@ -866,6 +872,7 @@ module_init(idxd_init_module); static void __exit idxd_exit_module(void) { + idxd_driver_unregister(&idxd_drv); idxd_driver_unregister(&dsa_drv); pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); -- Gitee From b8b1f3311d1f49c5f18fca621d7cbf75bcdc6b97 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:44:30 -0700 Subject: [PATCH 064/173] dmaengine: idxd: create dmaengine driver for wq 'device' mainline inclusion from mainline-v5.15 commit 0cda4f6986a3824cac500f66326ff267bf37110f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 0cda4f6986a3 dmaengine: idxd: create dmaengine driver for wq 'device'. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The original architecture of /sys/bus/dsa invented a scheme whereby a single entry in the list of bus drivers, /sys/bus/drivers/dsa, handled all device types and internally routed them to different drivers. Those internal drivers were invisible to userspace. Now, as /sys/bus/dsa wants to grow support for alternate drivers for a given device, for example vfio-mdev instead of kernel-internal-dmaengine, a proper bus device-driver model is needed. The first step in that process is separating the existing omnibus/implicit "dsa" driver into proper individual drivers registered on /sys/bus/dsa. Establish the idxd_dmaengine_drv driver that controls the enabling and disabling of the wq and also register and unregister the dma channel. idxd_wq_alloc_resources() and idxd_wq_free_resources() also get moved to the dmaengine driver. The resources (dma descriptors allocation and setup) are only used by the dmaengine driver and should only happen when it loads. The char dev driver (cdev) related bits are left in the __drv_enable_wq() and __drv_disable_wq() calls to be moved when we split out the char dev driver just like how the dmaengine driver is split out. WQ autoload support is not expected currently. With the amount of configuration needed for the device, the wq is always expected to be enabled by a tool (or via sysfs) rather than auto enabled at driver load. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637467033.744545.12330636655625405394.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 40 +++----------------- drivers/dma/idxd/dma.c | 77 +++++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 3 ++ drivers/dma/idxd/init.c | 7 ++++ 4 files changed, 92 insertions(+), 35 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 12ae3f1639f1..4dcc9431ae3d 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1130,7 +1130,7 @@ int idxd_device_load_config(struct idxd_device *idxd) return 0; } -static int __drv_enable_wq(struct idxd_wq *wq) +int __drv_enable_wq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; @@ -1178,12 +1178,7 @@ static int __drv_enable_wq(struct idxd_wq *wq) } } - rc = idxd_wq_alloc_resources(wq); - if (rc < 0) { - dev_dbg(dev, "wq resource alloc failed\n"); - goto err; - } - + rc = 0; spin_lock_irqsave(&idxd->dev_lock, flags); if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) rc = idxd_device_config(idxd); @@ -1207,21 +1202,7 @@ static int __drv_enable_wq(struct idxd_wq *wq) wq->client_count = 0; - if (wq->type == IDXD_WQT_KERNEL) { - rc = idxd_wq_init_percpu_ref(wq); - if (rc < 0) { - dev_dbg(dev, "wq %d percpu_ref setup failed\n", wq->id); - goto err_cpu_ref; - } - } - - if (is_idxd_wq_dmaengine(wq)) { - rc = idxd_register_dma_channel(wq); - if (rc < 0) { - dev_dbg(dev, "wq %d DMA channel register failed\n", wq->id); - goto err_client; - } - } else if (is_idxd_wq_cdev(wq)) { + if (is_idxd_wq_cdev(wq)) { rc = idxd_wq_add_cdev(wq); if (rc < 0) { dev_dbg(dev, "wq %d cdev creation failed\n", wq->id); @@ -1229,12 +1210,9 @@ static int __drv_enable_wq(struct idxd_wq *wq) } } - dev_info(dev, "wq %s enabled\n", dev_name(wq_confdev(wq))); return 0; err_client: - idxd_wq_quiesce(wq); -err_cpu_ref: idxd_wq_unmap_portal(wq); err_map_portal: rc = idxd_wq_disable(wq, false); @@ -1254,19 +1232,14 @@ int drv_enable_wq(struct idxd_wq *wq) return rc; } -static void __drv_disable_wq(struct idxd_wq *wq) +void __drv_disable_wq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; lockdep_assert_held(&wq->wq_lock); - if (wq->type == IDXD_WQT_KERNEL) - idxd_wq_quiesce(wq); - - if (is_idxd_wq_dmaengine(wq)) - idxd_unregister_dma_channel(wq); - else if (is_idxd_wq_cdev(wq)) + if (is_idxd_wq_cdev(wq)) idxd_wq_del_cdev(wq); if (idxd_wq_refcount(wq)) @@ -1278,10 +1251,7 @@ static void __drv_disable_wq(struct idxd_wq *wq) idxd_wq_drain(wq); idxd_wq_reset(wq); - idxd_wq_free_resources(wq); wq->client_count = 0; - - dev_info(dev, "wq %s disabled\n", dev_name(wq_confdev(wq))); } void drv_disable_wq(struct idxd_wq *wq) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 89239569e310..7408df389249 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -285,3 +285,80 @@ void idxd_unregister_dma_channel(struct idxd_wq *wq) wq->idxd_chan = NULL; put_device(wq_confdev(wq)); } + +static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) +{ + struct device *dev = &idxd_dev->conf_dev; + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + struct idxd_device *idxd = wq->idxd; + int rc; + + if (idxd->state != IDXD_DEV_ENABLED) + return -ENXIO; + + mutex_lock(&wq->wq_lock); + wq->type = IDXD_WQT_KERNEL; + rc = __drv_enable_wq(wq); + if (rc < 0) { + dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc); + rc = -ENXIO; + goto err; + } + + rc = idxd_wq_alloc_resources(wq); + if (rc < 0) { + dev_dbg(dev, "WQ resource alloc failed\n"); + goto err_res_alloc; + } + + rc = idxd_wq_init_percpu_ref(wq); + if (rc < 0) { + dev_dbg(dev, "percpu_ref setup failed\n"); + goto err_ref; + } + + rc = idxd_register_dma_channel(wq); + if (rc < 0) { + dev_dbg(dev, "Failed to register dma channel\n"); + goto err_dma; + } + + mutex_unlock(&wq->wq_lock); + return 0; + +err_dma: + idxd_wq_quiesce(wq); +err_ref: + idxd_wq_free_resources(wq); +err_res_alloc: + __drv_disable_wq(wq); +err: + wq->type = IDXD_WQT_NONE; + mutex_unlock(&wq->wq_lock); + return rc; +} + +static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev) +{ + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + + mutex_lock(&wq->wq_lock); + idxd_wq_quiesce(wq); + idxd_unregister_dma_channel(wq); + __drv_disable_wq(wq); + idxd_wq_free_resources(wq); + wq->type = IDXD_WQT_NONE; + mutex_unlock(&wq->wq_lock); +} + +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_WQ, + IDXD_DEV_NONE, +}; + +struct idxd_device_driver idxd_dmaengine_drv = { + .probe = idxd_dmaengine_drv_probe, + .remove = idxd_dmaengine_drv_remove, + .name = "dmaengine", + .type = dev_types, +}; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index a356d227f755..a840c328bec9 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -59,6 +59,7 @@ struct idxd_device_driver { extern struct idxd_device_driver dsa_drv; extern struct idxd_device_driver idxd_drv; +extern struct idxd_device_driver idxd_dmaengine_drv; struct idxd_irq_entry { struct idxd_device *idxd; @@ -507,7 +508,9 @@ void idxd_unregister_idxd_drv(void); int idxd_device_drv_probe(struct idxd_dev *idxd_dev); void idxd_device_drv_remove(struct idxd_dev *idxd_dev); int drv_enable_wq(struct idxd_wq *wq); +int __drv_enable_wq(struct idxd_wq *wq); void drv_disable_wq(struct idxd_wq *wq); +void __drv_disable_wq(struct idxd_wq *wq); int idxd_device_init_reset(struct idxd_device *idxd); int idxd_device_enable(struct idxd_device *idxd); int idxd_device_disable(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index c19b03c17ab9..6f38128ce400 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -844,6 +844,10 @@ static int __init idxd_init_module(void) if (err < 0) goto err_idxd_driver_register; + err = idxd_driver_register(&idxd_dmaengine_drv); + if (err < 0) + goto err_idxd_dmaengine_driver_register; + err = idxd_driver_register(&dsa_drv); if (err < 0) goto err_dsa_driver_register; @@ -863,6 +867,8 @@ static int __init idxd_init_module(void) err_cdev_register: idxd_driver_unregister(&dsa_drv); err_dsa_driver_register: + idxd_driver_unregister(&idxd_dmaengine_drv); +err_idxd_dmaengine_driver_register: idxd_driver_unregister(&idxd_drv); err_idxd_driver_register: idxd_unregister_bus_type(); @@ -872,6 +878,7 @@ module_init(idxd_init_module); static void __exit idxd_exit_module(void) { + idxd_driver_unregister(&idxd_dmaengine_drv); idxd_driver_unregister(&idxd_drv); idxd_driver_unregister(&dsa_drv); pci_unregister_driver(&idxd_pci_driver); -- Gitee From 177d2a426a27d5172b7c2f6c15f3e3025b923a23 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:44:35 -0700 Subject: [PATCH 065/173] dmaengine: idxd: create user driver for wq 'device' mainline inclusion from mainline-v5.15 commit 448c3de8ac8353fc4447738ae3c56c4eb6c2131d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 448c3de8ac83 dmaengine: idxd: create user driver for wq 'device'. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The original architecture of /sys/bus/dsa invented a scheme whereby a single entry in the list of bus drivers, /sys/bus/drivers/dsa, handled all device types and internally routed them to different drivers. Those internal drivers were invisible to userspace. Now, as /sys/bus/dsa wants to grow support for alternate drivers for a given device, for example vfio-mdev instead of kernel-internal-dmaengine, a proper bus device-driver model is needed. The first step in that process is separating the existing omnibus/implicit "dsa" driver into proper individual drivers registered on /sys/bus/dsa. Establish the idxd_user_drv driver that controls the enabling and disabling of the wq and also register and unregister a char device to allow user space to mmap the descriptor submission portal. The cdev related bits are moved to the cdev driver probe/remove and out of the drv_enabe/disable_wq() calls. These bits are exclusive to the cdev operation and not part of the generic enable/disable of the wq device. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637467578.744545.10203997610072341376.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/cdev.c | 53 +++++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/device.c | 14 ----------- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/init.c | 7 ++++++ 4 files changed, 61 insertions(+), 14 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 3a8c1f0fc14b..c31ef293f86c 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -304,6 +304,59 @@ void idxd_wq_del_cdev(struct idxd_wq *wq) put_device(cdev_dev(idxd_cdev)); } +static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) +{ + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + struct idxd_device *idxd = wq->idxd; + int rc; + + if (idxd->state != IDXD_DEV_ENABLED) + return -ENXIO; + + mutex_lock(&wq->wq_lock); + wq->type = IDXD_WQT_USER; + rc = __drv_enable_wq(wq); + if (rc < 0) + goto err; + + rc = idxd_wq_add_cdev(wq); + if (rc < 0) + goto err_cdev; + + mutex_unlock(&wq->wq_lock); + return 0; + +err_cdev: + __drv_disable_wq(wq); +err: + wq->type = IDXD_WQT_NONE; + mutex_unlock(&wq->wq_lock); + return rc; +} + +static void idxd_user_drv_remove(struct idxd_dev *idxd_dev) +{ + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + + mutex_lock(&wq->wq_lock); + idxd_wq_del_cdev(wq); + __drv_disable_wq(wq); + wq->type = IDXD_WQT_NONE; + mutex_unlock(&wq->wq_lock); +} + +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_WQ, + IDXD_DEV_NONE, +}; + +struct idxd_device_driver idxd_user_drv = { + .probe = idxd_user_drv_probe, + .remove = idxd_user_drv_remove, + .name = "user", + .type = dev_types, +}; + int idxd_cdev_register(void) { int rc, i; diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 4dcc9431ae3d..9bbc28d9a9eb 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1201,19 +1201,8 @@ int __drv_enable_wq(struct idxd_wq *wq) } wq->client_count = 0; - - if (is_idxd_wq_cdev(wq)) { - rc = idxd_wq_add_cdev(wq); - if (rc < 0) { - dev_dbg(dev, "wq %d cdev creation failed\n", wq->id); - goto err_client; - } - } - return 0; -err_client: - idxd_wq_unmap_portal(wq); err_map_portal: rc = idxd_wq_disable(wq, false); if (rc < 0) @@ -1239,9 +1228,6 @@ void __drv_disable_wq(struct idxd_wq *wq) lockdep_assert_held(&wq->wq_lock); - if (is_idxd_wq_cdev(wq)) - idxd_wq_del_cdev(wq); - if (idxd_wq_refcount(wq)) dev_warn(dev, "Clients has claim on wq %d: %d\n", wq->id, idxd_wq_refcount(wq)); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index a840c328bec9..bacec9b93a7e 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -60,6 +60,7 @@ struct idxd_device_driver { extern struct idxd_device_driver dsa_drv; extern struct idxd_device_driver idxd_drv; extern struct idxd_device_driver idxd_dmaengine_drv; +extern struct idxd_device_driver idxd_user_drv; struct idxd_irq_entry { struct idxd_device *idxd; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 6f38128ce400..33a80f700ff8 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -848,6 +848,10 @@ static int __init idxd_init_module(void) if (err < 0) goto err_idxd_dmaengine_driver_register; + err = idxd_driver_register(&idxd_user_drv); + if (err < 0) + goto err_idxd_user_driver_register; + err = idxd_driver_register(&dsa_drv); if (err < 0) goto err_dsa_driver_register; @@ -867,6 +871,8 @@ static int __init idxd_init_module(void) err_cdev_register: idxd_driver_unregister(&dsa_drv); err_dsa_driver_register: + idxd_driver_unregister(&idxd_user_drv); +err_idxd_user_driver_register: idxd_driver_unregister(&idxd_dmaengine_drv); err_idxd_dmaengine_driver_register: idxd_driver_unregister(&idxd_drv); @@ -878,6 +884,7 @@ module_init(idxd_init_module); static void __exit idxd_exit_module(void) { + idxd_driver_unregister(&idxd_user_drv); idxd_driver_unregister(&idxd_dmaengine_drv); idxd_driver_unregister(&idxd_drv); idxd_driver_unregister(&dsa_drv); -- Gitee From b96d5db71d60d46ae5f010c886ed1126707e1686 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:44:41 -0700 Subject: [PATCH 066/173] dmaengine: dsa: move dsa_bus_type out of idxd driver to standalone mainline inclusion from mainline-v5.15 commit d9e5481fca74f870cf2fc2f90a0e77e85c0b5b86 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit d9e5481fca74 dmaengine: dsa: move dsa_bus_type out of idxd driver to standalone. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- In preparation for dsa_drv compat support to be built-in, move the bus code to its own compilation unit. A follow-on patch adds the compat implementation. Recall that the compat implementation allows for the deprecated / omnibus dsa_drv binding scheme rather than the idiomatic organization of a full fledged bus driver per driver type. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637468142.744545.2811632736881720857.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/Kconfig | 4 ++ drivers/dma/Makefile | 2 +- drivers/dma/idxd/Makefile | 5 +++ drivers/dma/idxd/bus.c | 92 +++++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/init.c | 30 +------------ drivers/dma/idxd/sysfs.c | 43 ------------------ 6 files changed, 103 insertions(+), 73 deletions(-) create mode 100644 drivers/dma/idxd/bus.c diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 8df68dd4dd06..d65a3e3bd719 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -283,6 +283,10 @@ config INTEL_IDMA64 Enable DMA support for Intel Low Power Subsystem such as found on Intel Skylake PCH. +config INTEL_IDXD_BUS + tristate + default INTEL_IDXD + config INTEL_IDXD tristate "Intel Data Accelerators support" depends on PCI && X86_64 && !UML diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 948a8da05f8b..8928816a4f30 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -42,7 +42,7 @@ obj-$(CONFIG_IMX_DMA) += imx-dma.o obj-$(CONFIG_IMX_SDMA) += imx-sdma.o obj-$(CONFIG_INTEL_IDMA64) += idma64.o obj-$(CONFIG_INTEL_IOATDMA) += ioat/ -obj-$(CONFIG_INTEL_IDXD) += idxd/ +obj-y += idxd/ obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o obj-$(CONFIG_K3_DMA) += k3dma.o obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile index 6d11558756f8..8c29ed4d48c3 100644 --- a/drivers/dma/idxd/Makefile +++ b/drivers/dma/idxd/Makefile @@ -1,4 +1,9 @@ +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=IDXD + obj-$(CONFIG_INTEL_IDXD) += idxd.o idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o + +obj-$(CONFIG_INTEL_IDXD_BUS) += idxd_bus.o +idxd_bus-y := bus.o diff --git a/drivers/dma/idxd/bus.c b/drivers/dma/idxd/bus.c new file mode 100644 index 000000000000..02837f0fb3e4 --- /dev/null +++ b/drivers/dma/idxd/bus.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include "idxd.h" + + +int __idxd_driver_register(struct idxd_device_driver *idxd_drv, struct module *owner, + const char *mod_name) +{ + struct device_driver *drv = &idxd_drv->drv; + + if (!idxd_drv->type) { + pr_debug("driver type not set (%ps)\n", __builtin_return_address(0)); + return -EINVAL; + } + + drv->name = idxd_drv->name; + drv->bus = &dsa_bus_type; + drv->owner = owner; + drv->mod_name = mod_name; + + return driver_register(drv); +} +EXPORT_SYMBOL_GPL(__idxd_driver_register); + +void idxd_driver_unregister(struct idxd_device_driver *idxd_drv) +{ + driver_unregister(&idxd_drv->drv); +} +EXPORT_SYMBOL_GPL(idxd_driver_unregister); + +static int idxd_config_bus_match(struct device *dev, + struct device_driver *drv) +{ + struct idxd_device_driver *idxd_drv = + container_of(drv, struct idxd_device_driver, drv); + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + int i = 0; + + while (idxd_drv->type[i] != IDXD_DEV_NONE) { + if (idxd_dev->type == idxd_drv->type[i]) + return 1; + i++; + } + + return 0; +} + +static int idxd_config_bus_probe(struct device *dev) +{ + struct idxd_device_driver *idxd_drv = + container_of(dev->driver, struct idxd_device_driver, drv); + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return idxd_drv->probe(idxd_dev); +} + +static int idxd_config_bus_remove(struct device *dev) +{ + struct idxd_device_driver *idxd_drv = + container_of(dev->driver, struct idxd_device_driver, drv); + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + idxd_drv->remove(idxd_dev); + return 0; +} + +struct bus_type dsa_bus_type = { + .name = "dsa", + .match = idxd_config_bus_match, + .probe = idxd_config_bus_probe, + .remove = idxd_config_bus_remove, +}; +EXPORT_SYMBOL_GPL(dsa_bus_type); + +static int __init dsa_bus_init(void) +{ + return bus_register(&dsa_bus_type); +} +module_init(dsa_bus_init); + +static void __exit dsa_bus_exit(void) +{ + bus_unregister(&dsa_bus_type); +} +module_exit(dsa_bus_exit); + +MODULE_DESCRIPTION("IDXD driver dsa_bus_type driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 33a80f700ff8..9b797fcdfd7b 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -26,6 +26,7 @@ MODULE_VERSION(IDXD_DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Intel Corporation"); +MODULE_IMPORT_NS(IDXD); static bool sva = true; module_param(sva, bool, 0644); @@ -836,10 +837,6 @@ static int __init idxd_init_module(void) perfmon_init(); - err = idxd_register_bus_type(); - if (err < 0) - return err; - err = idxd_driver_register(&idxd_drv); if (err < 0) goto err_idxd_driver_register; @@ -877,7 +874,6 @@ static int __init idxd_init_module(void) err_idxd_dmaengine_driver_register: idxd_driver_unregister(&idxd_drv); err_idxd_driver_register: - idxd_unregister_bus_type(); return err; } module_init(idxd_init_module); @@ -890,30 +886,6 @@ static void __exit idxd_exit_module(void) idxd_driver_unregister(&dsa_drv); pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); - idxd_unregister_bus_type(); perfmon_exit(); } module_exit(idxd_exit_module); - -int __idxd_driver_register(struct idxd_device_driver *idxd_drv, struct module *owner, - const char *mod_name) -{ - struct device_driver *drv = &idxd_drv->drv; - - if (!idxd_drv->type) { - pr_debug("driver type not set (%ps)\n", __builtin_return_address(0)); - return -EINVAL; - } - - drv->name = idxd_drv->name; - drv->bus = &dsa_bus_type; - drv->owner = owner; - drv->mod_name = mod_name; - - return driver_register(drv); -} - -void idxd_driver_unregister(struct idxd_device_driver *idxd_drv) -{ - driver_unregister(&idxd_drv->drv); -} diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 6dfcac629bd7..ef5cf1db4e7d 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -16,49 +16,6 @@ static char *idxd_wq_type_names[] = { [IDXD_WQT_USER] = "user", }; -static int idxd_config_bus_match(struct device *dev, - struct device_driver *drv) -{ - struct idxd_device_driver *idxd_drv = - container_of(drv, struct idxd_device_driver, drv); - struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); - int i = 0; - - while (idxd_drv->type[i] != IDXD_DEV_NONE) { - if (idxd_dev->type == idxd_drv->type[i]) - return 1; - i++; - } - - return 0; -} - -static int idxd_config_bus_probe(struct device *dev) -{ - struct idxd_device_driver *idxd_drv = - container_of(dev->driver, struct idxd_device_driver, drv); - struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); - - return idxd_drv->probe(idxd_dev); -} - -static int idxd_config_bus_remove(struct device *dev) -{ - struct idxd_device_driver *idxd_drv = - container_of(dev->driver, struct idxd_device_driver, drv); - struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); - - idxd_drv->remove(idxd_dev); - return 0; -} - -struct bus_type dsa_bus_type = { - .name = "dsa", - .match = idxd_config_bus_match, - .probe = idxd_config_bus_probe, - .remove = idxd_config_bus_remove, -}; - static int idxd_dsa_drv_probe(struct idxd_dev *idxd_dev) { if (is_idxd_dev(idxd_dev)) -- Gitee From 15f2700a49539262d9ba81b9332e68789d72d56b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 15 Jul 2021 11:44:47 -0700 Subject: [PATCH 067/173] dmaengine: idxd: move dsa_drv support to compatible mode mainline inclusion from mainline-v5.15 commit 6e7f3ee97bbe2c7d7a53b7dbd7a08a579e03c8c9 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/i596wo cve: na Intel-SIG: commit 6e7f3ee97bbe dmaengine: idxd: move dsa_drv support to compatible mode. incremental backporting patches for dsa/iaa on intel xeon platform. -------------------------------- The original architecture of /sys/bus/dsa invented a scheme whereby a single entry in the list of bus drivers, /sys/bus/drivers/dsa, handled all device types and internally routed them to different different drivers. Those internal drivers were invisible to userspace. With the idxd driver transitioned to a proper bus device-driver model, the legacy behavior needs to be preserved due to it being exposed to user space via sysfs. Create a compat driver to provide the legacy behavior for /sys/bus/dsa/drivers/dsa. This should satisfy user tool accel-config v3.2 or ealier where this behavior is expected. If the distro has a newer accel-config then the legacy mode does not need to be enabled. When the compat driver binds the device (i.e. dsa0) to the dsa driver, it will be bound to the new idxd_drv. The wq device (i.e. wq0.0) will be bound to either the dmaengine_drv or the user_drv. The dsa_drv becomes a routing mechansim for the new drivers. It will not support additional external drivers that are implemented later. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162637468705.744545.4399080971745974435.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/Kconfig | 17 ++++++ drivers/dma/idxd/Makefile | 3 + drivers/dma/idxd/cdev.c | 1 + drivers/dma/idxd/compat.c | 114 ++++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/device.c | 1 + drivers/dma/idxd/dma.c | 1 + drivers/dma/idxd/idxd.h | 10 +++- drivers/dma/idxd/init.c | 7 --- drivers/dma/idxd/sysfs.c | 40 ------------- 9 files changed, 146 insertions(+), 48 deletions(-) create mode 100644 drivers/dma/idxd/compat.c diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index d65a3e3bd719..c735413c649a 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -301,6 +301,23 @@ config INTEL_IDXD If unsure, say N. +config INTEL_IDXD_COMPAT + bool "Legacy behavior for idxd driver" + depends on PCI && X86_64 + select INTEL_IDXD_BUS + help + Compatible driver to support old /sys/bus/dsa/drivers/dsa behavior. + The old behavior performed driver bind/unbind for device and wq + devices all under the dsa driver. The compat driver will emulate + the legacy behavior in order to allow existing support apps (i.e. + accel-config) to continue function. It is expected that accel-config + v3.2 and earlier will need the compat mode. A distro with later + accel-config version can disable this compat config. + + Say Y if you have old applications that require such behavior. + + If unsure, say N. + # Config symbol that collects all the dependencies that's necessary to # support shared virtual memory for the devices supported by idxd. config INTEL_IDXD_SVM diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile index 8c29ed4d48c3..a1e9f2b3a37c 100644 --- a/drivers/dma/idxd/Makefile +++ b/drivers/dma/idxd/Makefile @@ -7,3 +7,6 @@ idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o obj-$(CONFIG_INTEL_IDXD_BUS) += idxd_bus.o idxd_bus-y := bus.o + +obj-$(CONFIG_INTEL_IDXD_COMPAT) += idxd_compat.o +idxd_compat-y := compat.o diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index c31ef293f86c..5bac45af6256 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -356,6 +356,7 @@ struct idxd_device_driver idxd_user_drv = { .name = "user", .type = dev_types, }; +EXPORT_SYMBOL_GPL(idxd_user_drv); int idxd_cdev_register(void) { diff --git a/drivers/dma/idxd/compat.c b/drivers/dma/idxd/compat.c new file mode 100644 index 000000000000..d67746ee0c1a --- /dev/null +++ b/drivers/dma/idxd/compat.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include "idxd.h" + +extern int device_driver_attach(struct device_driver *drv, struct device *dev); +extern void device_driver_detach(struct device *dev); + +#define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ + struct driver_attribute driver_attr_##_name = \ + __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) + +static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count) +{ + struct bus_type *bus = drv->bus; + struct device *dev; + int rc = -ENODEV; + + dev = bus_find_device_by_name(bus, NULL, buf); + if (dev && dev->driver) { + device_driver_detach(dev); + rc = count; + } + + return rc; +} +static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, 0200, NULL, unbind_store); + +static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count) +{ + struct bus_type *bus = drv->bus; + struct device *dev; + struct device_driver *alt_drv; + int rc = -ENODEV; + struct idxd_dev *idxd_dev; + + dev = bus_find_device_by_name(bus, NULL, buf); + if (!dev || dev->driver || drv != &dsa_drv.drv) + return -ENODEV; + + idxd_dev = confdev_to_idxd_dev(dev); + if (is_idxd_dev(idxd_dev)) { + alt_drv = driver_find("idxd", bus); + if (!alt_drv) + return -ENODEV; + } else if (is_idxd_wq_dev(idxd_dev)) { + struct idxd_wq *wq = confdev_to_wq(dev); + + if (is_idxd_wq_kernel(wq)) { + alt_drv = driver_find("dmaengine", bus); + if (!alt_drv) + return -ENODEV; + } else if (is_idxd_wq_user(wq)) { + alt_drv = driver_find("user", bus); + if (!alt_drv) + return -ENODEV; + } else { + return -ENODEV; + } + } + + rc = device_driver_attach(alt_drv, dev); + if (rc < 0) + return rc; + + return count; +} +static DRIVER_ATTR_IGNORE_LOCKDEP(bind, 0200, NULL, bind_store); + +static struct attribute *dsa_drv_compat_attrs[] = { + &driver_attr_bind.attr, + &driver_attr_unbind.attr, + NULL, +}; + +static const struct attribute_group dsa_drv_compat_attr_group = { + .attrs = dsa_drv_compat_attrs, +}; + +static const struct attribute_group *dsa_drv_compat_groups[] = { + &dsa_drv_compat_attr_group, + NULL, +}; + +static int idxd_dsa_drv_probe(struct idxd_dev *idxd_dev) +{ + return -ENODEV; +} + +static void idxd_dsa_drv_remove(struct idxd_dev *idxd_dev) +{ +} + +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_NONE, +}; + +struct idxd_device_driver dsa_drv = { + .name = "dsa", + .probe = idxd_dsa_drv_probe, + .remove = idxd_dsa_drv_remove, + .type = dev_types, + .drv = { + .suppress_bind_attrs = true, + .groups = dsa_drv_compat_groups, + }, +}; + +module_idxd_driver(dsa_drv); +MODULE_IMPORT_NS(IDXD); diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 9bbc28d9a9eb..99350ac9a292 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1318,3 +1318,4 @@ struct idxd_device_driver idxd_drv = { .remove = idxd_device_drv_remove, .name = "idxd", }; +EXPORT_SYMBOL_GPL(idxd_drv); diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 7408df389249..bb082b2dda24 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -362,3 +362,4 @@ struct idxd_device_driver idxd_dmaengine_drv = { .name = "dmaengine", .type = dev_types, }; +EXPORT_SYMBOL_GPL(idxd_dmaengine_drv); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index bacec9b93a7e..d0874d8877d9 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -416,11 +416,16 @@ static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq) return false; } -static inline bool is_idxd_wq_cdev(struct idxd_wq *wq) +static inline bool is_idxd_wq_user(struct idxd_wq *wq) { return wq->type == IDXD_WQT_USER; } +static inline bool is_idxd_wq_kernel(struct idxd_wq *wq) +{ + return wq->type == IDXD_WQT_KERNEL; +} + static inline bool wq_dedicated(struct idxd_wq *wq) { return test_bit(WQ_FLAG_DEDICATED, &wq->flags); @@ -484,6 +489,9 @@ int __must_check __idxd_driver_register(struct idxd_device_driver *idxd_drv, void idxd_driver_unregister(struct idxd_device_driver *idxd_drv); +#define module_idxd_driver(__idxd_driver) \ + module_driver(__idxd_driver, idxd_driver_register, idxd_driver_unregister) + int idxd_register_bus_type(void); void idxd_unregister_bus_type(void); int idxd_register_devices(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 9b797fcdfd7b..8db56f98059f 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -849,10 +849,6 @@ static int __init idxd_init_module(void) if (err < 0) goto err_idxd_user_driver_register; - err = idxd_driver_register(&dsa_drv); - if (err < 0) - goto err_dsa_driver_register; - err = idxd_cdev_register(); if (err) goto err_cdev_register; @@ -866,8 +862,6 @@ static int __init idxd_init_module(void) err_pci_register: idxd_cdev_remove(); err_cdev_register: - idxd_driver_unregister(&dsa_drv); -err_dsa_driver_register: idxd_driver_unregister(&idxd_user_drv); err_idxd_user_driver_register: idxd_driver_unregister(&idxd_dmaengine_drv); @@ -883,7 +877,6 @@ static void __exit idxd_exit_module(void) idxd_driver_unregister(&idxd_user_drv); idxd_driver_unregister(&idxd_dmaengine_drv); idxd_driver_unregister(&idxd_drv); - idxd_driver_unregister(&dsa_drv); pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); perfmon_exit(); diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index ef5cf1db4e7d..a6245ef75f3d 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -16,46 +16,6 @@ static char *idxd_wq_type_names[] = { [IDXD_WQT_USER] = "user", }; -static int idxd_dsa_drv_probe(struct idxd_dev *idxd_dev) -{ - if (is_idxd_dev(idxd_dev)) - return idxd_device_drv_probe(idxd_dev); - - if (is_idxd_wq_dev(idxd_dev)) { - struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); - - return drv_enable_wq(wq); - } - - return -ENODEV; -} - -static void idxd_dsa_drv_remove(struct idxd_dev *idxd_dev) -{ - if (is_idxd_dev(idxd_dev)) { - idxd_device_drv_remove(idxd_dev); - return; - } - - if (is_idxd_wq_dev(idxd_dev)) { - struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); - - drv_disable_wq(wq); - return; - } -} - -static enum idxd_dev_type dev_types[] = { - IDXD_DEV_NONE, -}; - -struct idxd_device_driver dsa_drv = { - .name = "dsa", - .probe = idxd_dsa_drv_probe, - .remove = idxd_dsa_drv_remove, - .type = dev_types, -}; - /* IDXD engine attributes */ static ssize_t engine_group_id_show(struct device *dev, struct device_attribute *attr, char *buf) -- Gitee From 449f10566fafafe9f1e987c51d38e28f8e14985b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 14 Jul 2021 16:25:00 -0700 Subject: [PATCH 068/173] dmaengine: idxd: remove fault processing code mainline inclusion from mainline-v5.15 commit 0e96454ca26cc5c594ec792f7e5168cce726f7cf category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 0e96454ca26c dmaengine: idxd: remove fault processing code. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Kernel memory are pinned and will not cause faults. Since the driver does not support interrupts for user descriptors, no fault errors are expected to come through the misc interrupt. Remove dead code. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162630502789.631986.10591230961790023856.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/irq.c | 95 ++---------------------------------------- 1 file changed, 4 insertions(+), 91 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index be65d55e1fc4..e018459b534f 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -23,10 +23,8 @@ struct idxd_fault { }; static int irq_process_work_list(struct idxd_irq_entry *irq_entry, - enum irq_work_type wtype, int *processed, u64 data); static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, - enum irq_work_type wtype, int *processed, u64 data); static void idxd_device_reinit(struct work_struct *work) @@ -62,46 +60,6 @@ static void idxd_device_reinit(struct work_struct *work) idxd_device_clear_state(idxd); } -static void idxd_device_fault_work(struct work_struct *work) -{ - struct idxd_fault *fault = container_of(work, struct idxd_fault, work); - struct idxd_irq_entry *ie; - int i; - int processed; - int irqcnt = fault->idxd->num_wq_irqs + 1; - - for (i = 1; i < irqcnt; i++) { - ie = &fault->idxd->irq_entries[i]; - irq_process_work_list(ie, IRQ_WORK_PROCESS_FAULT, - &processed, fault->addr); - if (processed) - break; - - irq_process_pending_llist(ie, IRQ_WORK_PROCESS_FAULT, - &processed, fault->addr); - if (processed) - break; - } - - kfree(fault); -} - -static int idxd_device_schedule_fault_process(struct idxd_device *idxd, - u64 fault_addr) -{ - struct idxd_fault *fault; - - fault = kmalloc(sizeof(*fault), GFP_ATOMIC); - if (!fault) - return -ENOMEM; - - fault->addr = fault_addr; - fault->idxd = idxd; - INIT_WORK(&fault->work, idxd_device_fault_work); - queue_work(idxd->wq, &fault->work); - return 0; -} - static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) { struct device *dev = &idxd->pdev->dev; @@ -168,15 +126,6 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) if (!err) return 0; - /* - * This case should rarely happen and typically is due to software - * programming error by the driver. - */ - if (idxd->sw_err.valid && - idxd->sw_err.desc_valid && - idxd->sw_err.fault_addr) - idxd_device_schedule_fault_process(idxd, idxd->sw_err.fault_addr); - gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); if (gensts.state == IDXD_DEVICE_STATE_HALT) { idxd->state = IDXD_DEV_HALTED; @@ -228,43 +177,19 @@ irqreturn_t idxd_misc_thread(int vec, void *data) return IRQ_HANDLED; } -static inline bool match_fault(struct idxd_desc *desc, u64 fault_addr) -{ - /* - * Completion address can be bad as well. Check fault address match for descriptor - * and completion address. - */ - if ((u64)desc->hw == fault_addr || (u64)desc->completion == fault_addr) { - struct idxd_device *idxd = desc->wq->idxd; - struct device *dev = &idxd->pdev->dev; - - dev_warn(dev, "desc with fault address: %#llx\n", fault_addr); - return true; - } - - return false; -} - static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, - enum irq_work_type wtype, int *processed, u64 data) { struct idxd_desc *desc, *t; struct llist_node *head; int queued = 0; unsigned long flags; - enum idxd_complete_type reason; *processed = 0; head = llist_del_all(&irq_entry->pending_llist); if (!head) goto out; - if (wtype == IRQ_WORK_NORMAL) - reason = IDXD_COMPLETE_NORMAL; - else - reason = IDXD_COMPLETE_DEV_FAIL; - llist_for_each_entry_safe(desc, t, head, llnode) { u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; @@ -275,9 +200,7 @@ static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, continue; } - if (unlikely(status != DSA_COMP_SUCCESS)) - match_fault(desc, data); - complete_desc(desc, reason); + complete_desc(desc, IDXD_COMPLETE_NORMAL); (*processed)++; } else { spin_lock_irqsave(&irq_entry->list_lock, flags); @@ -293,20 +216,14 @@ static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, } static int irq_process_work_list(struct idxd_irq_entry *irq_entry, - enum irq_work_type wtype, int *processed, u64 data) { int queued = 0; unsigned long flags; LIST_HEAD(flist); struct idxd_desc *desc, *n; - enum idxd_complete_type reason; *processed = 0; - if (wtype == IRQ_WORK_NORMAL) - reason = IDXD_COMPLETE_NORMAL; - else - reason = IDXD_COMPLETE_DEV_FAIL; /* * This lock protects list corruption from access of list outside of the irq handler @@ -338,9 +255,7 @@ static int irq_process_work_list(struct idxd_irq_entry *irq_entry, continue; } - if (unlikely(status != DSA_COMP_SUCCESS)) - match_fault(desc, data); - complete_desc(desc, reason); + complete_desc(desc, IDXD_COMPLETE_NORMAL); } return queued; @@ -370,14 +285,12 @@ static int idxd_desc_process(struct idxd_irq_entry *irq_entry) * 5. Repeat until no more descriptors. */ do { - rc = irq_process_work_list(irq_entry, IRQ_WORK_NORMAL, - &processed, 0); + rc = irq_process_work_list(irq_entry, &processed, 0); total += processed; if (rc != 0) continue; - rc = irq_process_pending_llist(irq_entry, IRQ_WORK_NORMAL, - &processed, 0); + rc = irq_process_pending_llist(irq_entry, &processed, 0); total += processed; } while (rc != 0); -- Gitee From ca11cd5f7f28ddb2e8f24232cc2235bf62bb5535 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 20 Jul 2021 13:42:10 -0700 Subject: [PATCH 069/173] dmaengine: idxd: Set defaults for GRPCFG traffic class mainline inclusion from mainline-v5.15 commit ade8a86b512cf8db0d0e975a971ce356953cfcb3 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit ade8a86b512c dmaengine: idxd: Set defaults for GRPCFG traffic class. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Set GRPCFG traffic class to value of 1 for best performance on current generation of accelerators. Also add override option to allow experimentation. Sysfs knobs are disabled for DSA/IAX gen1 devices. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162681373005.1968485.3761065664382799202.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- Documentation/admin-guide/kernel-parameters.txt | 5 +++++ drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/init.c | 13 +++++++++++-- drivers/dma/idxd/registers.h | 3 +++ drivers/dma/idxd/sysfs.c | 6 ++++++ 5 files changed, 26 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ebc997e6903a..3860cb462f0b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1753,6 +1753,11 @@ support for the idxd driver. By default it is set to true (1). + idxd.tc_override= [HW] + Format: + Allow override of default traffic class configuration + for the device. By default it is set to false (0). + ieee754= [MIPS] Select IEEE Std 754 conformance mode Format: { strict | legacy | 2008 | relaxed } Default: strict diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index d0874d8877d9..4e4dc0110e77 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -16,6 +16,7 @@ #define IDXD_DRIVER_VERSION "1.00" extern struct kmem_cache *idxd_desc_pool; +extern bool tc_override; struct idxd_wq; struct idxd_dev; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 8db56f98059f..eb09bc591c31 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -32,6 +32,10 @@ static bool sva = true; module_param(sva, bool, 0644); MODULE_PARM_DESC(sva, "Toggle SVA support on/off"); +bool tc_override; +module_param(tc_override, bool, 0644); +MODULE_PARM_DESC(tc_override, "Override traffic class defaults"); + #define DRV_NAME "idxd" bool support_enqcmd; @@ -336,8 +340,13 @@ static int idxd_setup_groups(struct idxd_device *idxd) } idxd->groups[i] = group; - group->tc_a = -1; - group->tc_b = -1; + if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) { + group->tc_a = 1; + group->tc_b = 1; + } else { + group->tc_a = -1; + group->tc_b = -1; + } } return 0; diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 7343a8f48819..ffc7550a77ee 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -7,6 +7,9 @@ #define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25 #define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe +#define DEVICE_VERSION_1 0x100 +#define DEVICE_VERSION_2 0x200 + #define IDXD_MMIO_BAR 0 #define IDXD_WQ_BAR 2 #define IDXD_PORTAL_SIZE PAGE_SIZE diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index a6245ef75f3d..f80341446187 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -327,6 +327,9 @@ static ssize_t group_traffic_class_a_store(struct device *dev, if (idxd->state == IDXD_DEV_ENABLED) return -EPERM; + if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) + return -EPERM; + if (val < 0 || val > 7) return -EINVAL; @@ -366,6 +369,9 @@ static ssize_t group_traffic_class_b_store(struct device *dev, if (idxd->state == IDXD_DEV_ENABLED) return -EPERM; + if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) + return -EPERM; + if (val < 0 || val > 7) return -EINVAL; -- Gitee From a608a9da9169f3c2b01ee03756d93c9e0a3a32df Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 21 Jul 2021 11:35:03 -0700 Subject: [PATCH 070/173] dmaengine: idxd: fix uninit var for alt_drv mainline inclusion from mainline-v5.15 commit 568b2126466f926a10be0b53b40c2d6ae056d8d6 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 568b2126466f dmaengine: idxd: fix uninit var for alt_drv. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- 0-day detected uninitialized alt_drv variable in the bind_store() function. The branch can be taken when device is not idxd device or wq 'struct device'. Init alt_drv to NULL. Fixes: 6e7f3ee97bbe ("dmaengine: idxd: move dsa_drv support to compatible mode") Reported-by: kernel test robot Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162689250332.2114335.636367120454420852.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/compat.c b/drivers/dma/idxd/compat.c index d67746ee0c1a..d7616c240dcd 100644 --- a/drivers/dma/idxd/compat.c +++ b/drivers/dma/idxd/compat.c @@ -34,7 +34,7 @@ static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t cou { struct bus_type *bus = drv->bus; struct device *dev; - struct device_driver *alt_drv; + struct device_driver *alt_drv = NULL; int rc = -ENODEV; struct idxd_dev *idxd_dev; -- Gitee From dd9dc683273dbc9a9ea6c48284a864d0037a5eb7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 20 Jul 2021 13:42:04 -0700 Subject: [PATCH 071/173] dmaengine: idxd: rotate portal address for better performance mainline inclusion from mainline-v5.15 commit a9c171527a3403cae6c1907744b1bc9ca301f912 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit a9c171527a34 dmaengine: idxd: rotate portal address for better performance. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The device submission portal is on a 4k page and any of those 64bit aligned address on the page can be used for descriptor submission. By rotating the offset through the 4k range and prevent successive writes to the same MMIO address, performance improvement is observed through testing. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162681372446.1968485.10634280461681015569.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 1 + drivers/dma/idxd/idxd.h | 20 ++++++++++++++++++++ drivers/dma/idxd/submit.c | 2 +- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 99350ac9a292..41f67a195eb6 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -320,6 +320,7 @@ void idxd_wq_unmap_portal(struct idxd_wq *wq) devm_iounmap(dev, wq->portal); wq->portal = NULL; + wq->portal_offset = 0; } void idxd_wqs_unmap_portal(struct idxd_device *idxd) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 4e4dc0110e77..94983bced189 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -11,6 +11,7 @@ #include #include #include +#include #include "registers.h" #define IDXD_DRIVER_VERSION "1.00" @@ -162,6 +163,7 @@ struct idxd_dma_chan { struct idxd_wq { void __iomem *portal; + u32 portal_offset; struct percpu_ref wq_active; struct completion wq_dead; struct idxd_dev idxd_dev; @@ -468,6 +470,24 @@ static inline int idxd_get_wq_portal_full_offset(int wq_id, return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot); } +#define IDXD_PORTAL_MASK (PAGE_SIZE - 1) + +/* + * Even though this function can be accessed by multiple threads, it is safe to use. + * At worst the address gets used more than once before it gets incremented. We don't + * hit a threshold until iops becomes many million times a second. So the occasional + * reuse of the same address is tolerable compare to using an atomic variable. This is + * safe on a system that has atomic load/store for 32bit integers. Given that this is an + * Intel iEP device, that should not be a problem. + */ +static inline void __iomem *idxd_wq_portal_addr(struct idxd_wq *wq) +{ + int ofs = wq->portal_offset; + + wq->portal_offset = (ofs + sizeof(struct dsa_raw_desc)) & IDXD_PORTAL_MASK; + return wq->portal + ofs; +} + static inline void idxd_wq_get(struct idxd_wq *wq) { wq->client_count++; diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 65b0130ab2db..92ae9a157cc9 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -146,7 +146,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) if (!percpu_ref_tryget_live(&wq->wq_active)) return -ENXIO; - portal = wq->portal; + portal = idxd_wq_portal_addr(wq); /* * The wmb() flushes writes to coherent DMA data before -- Gitee From 3f2a52e762d7c40904f2fa5c73114eb8af3a66c2 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 20 Jul 2021 13:42:15 -0700 Subject: [PATCH 072/173] dmanegine: idxd: add software command status mainline inclusion from mainline-v5.15 commit 125d10373ad991888c9e94d2da49bcc5ccba2127 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 125d10373ad9 dmanegine: idxd: add software command status. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Enabling device and wq returns standard errno and that does not provide enough details to indicate what exactly failed. The hardware command status is only 8bits. Expand the command status to 32bits and use the upper 16 bits to define software errors to provide more details on the exact failure. Bit 31 will be used to indicate the error is software set as the driver is using some of the spec defined hardware error as well. Cc: Ramesh Thomas Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162681373579.1968485.5891788397526827892.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- .../ABI/stable/sysfs-driver-dma-idxd | 2 ++ drivers/dma/idxd/cdev.c | 5 +++- drivers/dma/idxd/device.c | 22 +++++++++++++++--- drivers/dma/idxd/dma.c | 4 ++++ drivers/dma/idxd/idxd.h | 2 +- drivers/dma/idxd/sysfs.c | 11 ++++++++- include/uapi/linux/idxd.h | 23 +++++++++++++++++++ 7 files changed, 63 insertions(+), 6 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index ebd521314803..c67c63bacd3c 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -128,6 +128,8 @@ Date: Aug 28, 2020 KernelVersion: 5.10.0 Contact: dmaengine@vger.kernel.org Description: The last executed device administrative command's status/error. + Also last configuration error overloaded. + Writing to it will clear the status. What: /sys/bus/dsa/devices/wq./block_on_fault Date: Oct 27, 2020 diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 5bac45af6256..a16e76921a9a 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -320,9 +320,12 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) goto err; rc = idxd_wq_add_cdev(wq); - if (rc < 0) + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_CDEV_ERR; goto err_cdev; + } + idxd->cmd_status = 0; mutex_unlock(&wq->wq_lock); return 0; diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 41f67a195eb6..86fa4b4590f9 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -840,6 +840,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->wq_size = wq->size; if (wq->size == 0) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_SIZE; dev_warn(dev, "Incorrect work queue size: 0\n"); return -EINVAL; } @@ -975,6 +976,7 @@ static int idxd_wqs_setup(struct idxd_device *idxd) continue; if (wq_shared(wq) && !device_swq_supported(idxd)) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT; dev_warn(dev, "No shared wq support but configured.\n"); return -EINVAL; } @@ -983,8 +985,10 @@ static int idxd_wqs_setup(struct idxd_device *idxd) configured++; } - if (configured == 0) + if (configured == 0) { + idxd->cmd_status = IDXD_SCMD_WQ_NONE_CONFIGURED; return -EINVAL; + } return 0; } @@ -1140,21 +1144,26 @@ int __drv_enable_wq(struct idxd_wq *wq) lockdep_assert_held(&wq->wq_lock); - if (idxd->state != IDXD_DEV_ENABLED) + if (idxd->state != IDXD_DEV_ENABLED) { + idxd->cmd_status = IDXD_SCMD_DEV_NOT_ENABLED; goto err; + } if (wq->state != IDXD_WQ_DISABLED) { dev_dbg(dev, "wq %d already enabled.\n", wq->id); + idxd->cmd_status = IDXD_SCMD_WQ_ENABLED; rc = -EBUSY; goto err; } if (!wq->group) { dev_dbg(dev, "wq %d not attached to group.\n", wq->id); + idxd->cmd_status = IDXD_SCMD_WQ_NO_GRP; goto err; } if (strlen(wq->name) == 0) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_NAME; dev_dbg(dev, "wq %d name not set.\n", wq->id); goto err; } @@ -1162,6 +1171,7 @@ int __drv_enable_wq(struct idxd_wq *wq) /* Shared WQ checks */ if (wq_shared(wq)) { if (!device_swq_supported(idxd)) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_SVM; dev_dbg(dev, "PASID not enabled and shared wq.\n"); goto err; } @@ -1174,6 +1184,7 @@ int __drv_enable_wq(struct idxd_wq *wq) * threshold via sysfs. */ if (wq->threshold == 0) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_THRESH; dev_dbg(dev, "Shared wq and threshold 0.\n"); goto err; } @@ -1197,6 +1208,7 @@ int __drv_enable_wq(struct idxd_wq *wq) rc = idxd_wq_map_portal(wq); if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_WQ_PORTAL_ERR; dev_dbg(dev, "wq %d portal mapping failed: %d\n", wq->id, rc); goto err_map_portal; } @@ -1259,8 +1271,10 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev) * enabled state, then the device was altered outside of driver's control. * If the state is in halted state, then we don't want to proceed. */ - if (idxd->state != IDXD_DEV_DISABLED) + if (idxd->state != IDXD_DEV_DISABLED) { + idxd->cmd_status = IDXD_SCMD_DEV_ENABLED; return -ENXIO; + } /* Device configuration */ spin_lock_irqsave(&idxd->dev_lock, flags); @@ -1279,9 +1293,11 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev) rc = idxd_register_dma_device(idxd); if (rc < 0) { idxd_device_disable(idxd); + idxd->cmd_status = IDXD_SCMD_DEV_DMA_ERR; return rc; } + idxd->cmd_status = 0; return 0; } diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index bb082b2dda24..997b781b796e 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -307,22 +307,26 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) rc = idxd_wq_alloc_resources(wq); if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_WQ_RES_ALLOC_ERR; dev_dbg(dev, "WQ resource alloc failed\n"); goto err_res_alloc; } rc = idxd_wq_init_percpu_ref(wq); if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_PERCPU_ERR; dev_dbg(dev, "percpu_ref setup failed\n"); goto err_ref; } rc = idxd_register_dma_channel(wq); if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_DMA_CHAN_ERR; dev_dbg(dev, "Failed to register dma channel\n"); goto err_dma; } + idxd->cmd_status = 0; mutex_unlock(&wq->wq_lock); return 0; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 94983bced189..bfcb03329f77 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -252,7 +252,7 @@ struct idxd_device { unsigned long flags; int id; int major; - u8 cmd_status; + u32 cmd_status; struct pci_dev *pdev; void __iomem *reg_base; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index f80341446187..bbebfb21980d 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1223,7 +1223,16 @@ static ssize_t cmd_status_show(struct device *dev, return sysfs_emit(buf, "%#x\n", idxd->cmd_status); } -static DEVICE_ATTR_RO(cmd_status); + +static ssize_t cmd_status_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + idxd->cmd_status = 0; + return count; +} +static DEVICE_ATTR_RW(cmd_status); static struct attribute *idxd_device_attributes[] = { &dev_attr_version.attr, diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index edc346a77c91..ca24c25252fb 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -9,6 +9,29 @@ #include #endif +/* Driver command error status */ +enum idxd_scmd_stat { + IDXD_SCMD_DEV_ENABLED = 0x80000010, + IDXD_SCMD_DEV_NOT_ENABLED = 0x80000020, + IDXD_SCMD_WQ_ENABLED = 0x80000021, + IDXD_SCMD_DEV_DMA_ERR = 0x80020000, + IDXD_SCMD_WQ_NO_GRP = 0x80030000, + IDXD_SCMD_WQ_NO_NAME = 0x80040000, + IDXD_SCMD_WQ_NO_SVM = 0x80050000, + IDXD_SCMD_WQ_NO_THRESH = 0x80060000, + IDXD_SCMD_WQ_PORTAL_ERR = 0x80070000, + IDXD_SCMD_WQ_RES_ALLOC_ERR = 0x80080000, + IDXD_SCMD_PERCPU_ERR = 0x80090000, + IDXD_SCMD_DMA_CHAN_ERR = 0x800a0000, + IDXD_SCMD_CDEV_ERR = 0x800b0000, + IDXD_SCMD_WQ_NO_SWQ_SUPPORT = 0x800c0000, + IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000, + IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, +}; + +#define IDXD_SCMD_SOFTERR_MASK 0x80000000 +#define IDXD_SCMD_SOFTERR_SHIFT 16 + /* Descriptor flags */ #define IDXD_OP_FLAG_FENCE 0x0001 #define IDXD_OP_FLAG_BOF 0x0002 -- Gitee From 2cf7311882ffbafe5705dd1f2feecf6d92c39f3b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 22 Jul 2021 13:10:51 -0700 Subject: [PATCH 073/173] dmaengine: idxd: fix abort status check mainline inclusion from mainline-v5.15 commit b60bb6e2bfc192091b8f792781b83b5e0f9324f6 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit b60bb6e2bfc1 dmaengine: idxd: fix abort status check. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Coverity static analysis of linux-next found issue. The check (status == IDXD_COMP_DESC_ABORT) is always false since status was previously masked with 0x7f and IDXD_COMP_DESC_ABORT is 0xff. Fixes: 6b4b87f2c31a ("dmaengine: idxd: fix submission race window") Reported-by: Colin Ian King Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162698465160.3560828.18173186265683415384.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/irq.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index e018459b534f..65dc7bbb0a13 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -194,7 +194,11 @@ static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; if (status) { - if (unlikely(status == IDXD_COMP_DESC_ABORT)) { + /* + * Check against the original status as ABORT is software defined + * and 0xff, which DSA_COMP_STATUS_MASK can mask out. + */ + if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { complete_desc(desc, IDXD_COMPLETE_ABORT); (*processed)++; continue; @@ -250,7 +254,11 @@ static int irq_process_work_list(struct idxd_irq_entry *irq_entry, list_for_each_entry(desc, &flist, list) { u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; - if (unlikely(status == IDXD_COMP_DESC_ABORT)) { + /* + * Check against the original status as ABORT is software defined + * and 0xff, which DSA_COMP_STATUS_MASK can mask out. + */ + if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { complete_desc(desc, IDXD_COMPLETE_ABORT); continue; } -- Gitee From 8cfd77c47a8045ba563a0a666051855a69ae2c05 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 29 Jul 2021 14:04:01 +0200 Subject: [PATCH 074/173] dmaengine: idxd: Fix a possible NULL pointer dereference mainline inclusion from mainline-v5.15 commit e9c5b0b53ccca81dd0a35c62309e243a57c7959d category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit e9c5b0b53ccc dmaengine: idxd: Fix a possible NULL pointer dereference. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- 'device_driver_attach()' dereferences its first argument (i.e. 'alt_drv') so it must not be NULL. Simplify the error handling logic about NULL 'alt_drv' in order to be more robust and future-proof. Fixes: 568b2126466f ("dmaengine: idxd: fix uninit var for alt_drv") Fixes: 6e7f3ee97bbe ("dmaengine: idxd: move dsa_drv support to compatible mode") Signed-off-by: Christophe JAILLET Acked-by: Dave Jiang Link: https://lore.kernel.org/r/77f0dc4f3966591d1f0cffb614a94085f8895a85.1627560174.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/compat.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/dma/idxd/compat.c b/drivers/dma/idxd/compat.c index d7616c240dcd..3df21615f888 100644 --- a/drivers/dma/idxd/compat.c +++ b/drivers/dma/idxd/compat.c @@ -45,23 +45,16 @@ static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t cou idxd_dev = confdev_to_idxd_dev(dev); if (is_idxd_dev(idxd_dev)) { alt_drv = driver_find("idxd", bus); - if (!alt_drv) - return -ENODEV; } else if (is_idxd_wq_dev(idxd_dev)) { struct idxd_wq *wq = confdev_to_wq(dev); - if (is_idxd_wq_kernel(wq)) { + if (is_idxd_wq_kernel(wq)) alt_drv = driver_find("dmaengine", bus); - if (!alt_drv) - return -ENODEV; - } else if (is_idxd_wq_user(wq)) { + else if (is_idxd_wq_user(wq)) alt_drv = driver_find("user", bus); - if (!alt_drv) - return -ENODEV; - } else { - return -ENODEV; - } } + if (!alt_drv) + return -ENODEV; rc = device_driver_attach(alt_drv, dev); if (rc < 0) -- Gitee From aed3c5b3a5d8a87347f3ad8d6e46ee8ad8ee1db7 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 2 Aug 2021 10:58:20 -0700 Subject: [PATCH 075/173] dmaengine: idxd: Remove unused status variable in irq_process_work_list() mainline inclusion from mainline-v5.15 commit 53cbf462f6b5c9de364efdf443ffb74ed082463a category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 53cbf462f6b5 dmaengine: idxd: Remove unused status variable in irq_process_work_list(). Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- status is no longer used within this block: drivers/dma/idxd/irq.c:255:6: warning: unused variable 'status' [-Wunused-variable] u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; ^ 1 warning generated. Fixes: b60bb6e2bfc1 ("dmaengine: idxd: fix abort status check") Signed-off-by: Nathan Chancellor Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20210802175820.3153920-1-nathan@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/irq.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 65dc7bbb0a13..91e46ca3a0ad 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -252,8 +252,6 @@ static int irq_process_work_list(struct idxd_irq_entry *irq_entry, spin_unlock_irqrestore(&irq_entry->list_lock, flags); list_for_each_entry(desc, &flist, list) { - u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; - /* * Check against the original status as ABORT is software defined * and 0xff, which DSA_COMP_STATUS_MASK can mask out. -- Gitee From 346154d0c1e7c3065f07e5f9db0467f6df01a64a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 3 Aug 2021 15:32:06 -0700 Subject: [PATCH 076/173] dmaengine: idxd: add capability check for 'block on fault' attribute mainline inclusion from mainline-v5.15 commit 81c2f79c2104c5b48f01da674bc2f7d4bc600db4 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 81c2f79c2104 dmaengine: idxd: add capability check for 'block on fault' attribute. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The device general capability has a bit that indicate whether 'block on fault' is supported. Add check to wq sysfs knob to check if cap exists before allowing user to toggle. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162802992615.3084999.12539468940404102898.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/sysfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index bbebfb21980d..e17a495ad3c9 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -642,6 +642,9 @@ static ssize_t wq_block_on_fault_store(struct device *dev, bool bof; int rc; + if (!idxd->hw.gen_cap.block_on_fault) + return -EOPNOTSUPP; + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; -- Gitee From f984a606f499fbe3ef27514a7064dcbc9fbd83d6 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 3 Aug 2021 15:37:15 -0700 Subject: [PATCH 077/173] dmaengine: idxd: clear block on fault flag when clear wq mainline inclusion from mainline-v5.15 commit bd2f4ae5e019efcfadd6b491204fd60adf14f4a3 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit bd2f4ae5e019 dmaengine: idxd: clear block on fault flag when clear wq. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The block on fault flag is not cleared when we disable or reset wq. This causes it to remain set if the user does not clear it on the next configuration load. Add clear of flag in dxd_wq_disable_cleanup() routine. Fixes: da32b28c95a7 ("dmaengine: idxd: cleanup workqueue config after disabling") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162803023553.3086015.8158952172068868803.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 86fa4b4590f9..21f0d732b76e 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -402,6 +402,7 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) wq->priority = 0; wq->ats_dis = 0; clear_bit(WQ_FLAG_DEDICATED, &wq->flags); + clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); } -- Gitee From e837864ef6c26d698e7883ee1ad5a035ff052833 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 3 Aug 2021 15:29:30 -0700 Subject: [PATCH 078/173] dmaengine: idxd: make I/O interrupt handler one shot mainline inclusion from mainline-v5.15 commit d803c8b9f3f2b8e5c047f2d0a27a9ea3ef91510f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit d803c8b9f3f2 dmaengine: idxd: make I/O interrupt handler one shot. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The interrupt thread handler currently loops forever to process outstanding completions. This causes either an "irq X: nobody cared" kernel splat or the NMI watchdog kicks in due to running too long in the function. The irq thread handler is expected to run again after exiting if there are interrupts fired while the thread handler is running. So the handler code can process all the completed I/O in a single pass and exit without losing the follow on completed I/O. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162802977005.3084234.11836261157026497585.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/irq.c | 59 ++++++------------------------------------ 1 file changed, 8 insertions(+), 51 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 91e46ca3a0ad..11addb394793 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -22,11 +22,6 @@ struct idxd_fault { struct idxd_device *idxd; }; -static int irq_process_work_list(struct idxd_irq_entry *irq_entry, - int *processed, u64 data); -static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, - int *processed, u64 data); - static void idxd_device_reinit(struct work_struct *work) { struct idxd_device *idxd = container_of(work, struct idxd_device, work); @@ -177,18 +172,15 @@ irqreturn_t idxd_misc_thread(int vec, void *data) return IRQ_HANDLED; } -static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, - int *processed, u64 data) +static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) { struct idxd_desc *desc, *t; struct llist_node *head; - int queued = 0; unsigned long flags; - *processed = 0; head = llist_del_all(&irq_entry->pending_llist); if (!head) - goto out; + return; llist_for_each_entry_safe(desc, t, head, llnode) { u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; @@ -200,35 +192,25 @@ static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, */ if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { complete_desc(desc, IDXD_COMPLETE_ABORT); - (*processed)++; continue; } complete_desc(desc, IDXD_COMPLETE_NORMAL); - (*processed)++; } else { spin_lock_irqsave(&irq_entry->list_lock, flags); list_add_tail(&desc->list, &irq_entry->work_list); spin_unlock_irqrestore(&irq_entry->list_lock, flags); - queued++; } } - - out: - return queued; } -static int irq_process_work_list(struct idxd_irq_entry *irq_entry, - int *processed, u64 data) +static void irq_process_work_list(struct idxd_irq_entry *irq_entry) { - int queued = 0; unsigned long flags; LIST_HEAD(flist); struct idxd_desc *desc, *n; - *processed = 0; - /* * This lock protects list corruption from access of list outside of the irq handler * thread. @@ -236,16 +218,13 @@ static int irq_process_work_list(struct idxd_irq_entry *irq_entry, spin_lock_irqsave(&irq_entry->list_lock, flags); if (list_empty(&irq_entry->work_list)) { spin_unlock_irqrestore(&irq_entry->list_lock, flags); - return 0; + return; } list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) { if (desc->completion->status) { list_del(&desc->list); - (*processed)++; list_add_tail(&desc->list, &flist); - } else { - queued++; } } @@ -263,13 +242,11 @@ static int irq_process_work_list(struct idxd_irq_entry *irq_entry, complete_desc(desc, IDXD_COMPLETE_NORMAL); } - - return queued; } -static int idxd_desc_process(struct idxd_irq_entry *irq_entry) +irqreturn_t idxd_wq_thread(int irq, void *data) { - int rc, processed, total = 0; + struct idxd_irq_entry *irq_entry = data; /* * There are two lists we are processing. The pending_llist is where @@ -288,29 +265,9 @@ static int idxd_desc_process(struct idxd_irq_entry *irq_entry) * and process the completed entries. * 4. If the entry is still waiting on hardware, list_add_tail() to * the work_list. - * 5. Repeat until no more descriptors. */ - do { - rc = irq_process_work_list(irq_entry, &processed, 0); - total += processed; - if (rc != 0) - continue; - - rc = irq_process_pending_llist(irq_entry, &processed, 0); - total += processed; - } while (rc != 0); - - return total; -} - -irqreturn_t idxd_wq_thread(int irq, void *data) -{ - struct idxd_irq_entry *irq_entry = data; - int processed; - - processed = idxd_desc_process(irq_entry); - if (processed == 0) - return IRQ_NONE; + irq_process_work_list(irq_entry); + irq_process_pending_llist(irq_entry); return IRQ_HANDLED; } -- Gitee From 591a98bf5916f1098cf09f4882b8e83fb34ae63a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 6 Aug 2021 08:36:43 -0700 Subject: [PATCH 079/173] dmaengine: idxd: remove interrupt flag for completion list spinlock mainline inclusion from mainline-v5.15 commit 9fce3b3a0ab4cad407a27b5e36603c23f1b5b278 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 9fce3b3a0ab4 dmaengine: idxd: remove interrupt flag for completion list spinlock. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The list lock is never acquired in interrupt context. Therefore there is no need to disable interrupts. Remove interrupt flags for lock operations. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162826417450.3454650.3733188117742416238.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/irq.c | 12 +++++------- drivers/dma/idxd/submit.c | 5 ++--- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 11addb394793..d221c2e37460 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -176,7 +176,6 @@ static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) { struct idxd_desc *desc, *t; struct llist_node *head; - unsigned long flags; head = llist_del_all(&irq_entry->pending_llist); if (!head) @@ -197,17 +196,16 @@ static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) complete_desc(desc, IDXD_COMPLETE_NORMAL); } else { - spin_lock_irqsave(&irq_entry->list_lock, flags); + spin_lock(&irq_entry->list_lock); list_add_tail(&desc->list, &irq_entry->work_list); - spin_unlock_irqrestore(&irq_entry->list_lock, flags); + spin_unlock(&irq_entry->list_lock); } } } static void irq_process_work_list(struct idxd_irq_entry *irq_entry) { - unsigned long flags; LIST_HEAD(flist); struct idxd_desc *desc, *n; @@ -215,9 +213,9 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry) * This lock protects list corruption from access of list outside of the irq handler * thread. */ - spin_lock_irqsave(&irq_entry->list_lock, flags); + spin_lock(&irq_entry->list_lock); if (list_empty(&irq_entry->work_list)) { - spin_unlock_irqrestore(&irq_entry->list_lock, flags); + spin_unlock(&irq_entry->list_lock); return; } @@ -228,7 +226,7 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry) } } - spin_unlock_irqrestore(&irq_entry->list_lock, flags); + spin_unlock(&irq_entry->list_lock); list_for_each_entry(desc, &flist, list) { /* diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 92ae9a157cc9..4b514c63af15 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -106,14 +106,13 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, { struct idxd_desc *d, *t, *found = NULL; struct llist_node *head; - unsigned long flags; desc->completion->status = IDXD_COMP_DESC_ABORT; /* * Grab the list lock so it will block the irq thread handler. This allows the * abort code to locate the descriptor need to be aborted. */ - spin_lock_irqsave(&ie->list_lock, flags); + spin_lock(&ie->list_lock); head = llist_del_all(&ie->pending_llist); if (head) { llist_for_each_entry_safe(d, t, head, llnode) { @@ -127,7 +126,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, if (!found) found = list_abort_desc(wq, ie, desc); - spin_unlock_irqrestore(&ie->list_lock, flags); + spin_unlock(&ie->list_lock); if (found) complete_desc(found, IDXD_COMPLETE_ABORT); -- Gitee From dfa8468840992f27e12b6c61e146f3b6146305e6 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 6 Aug 2021 10:37:40 -0700 Subject: [PATCH 080/173] dmaengine: idxd: make submit failure path consistent on desc freeing mainline inclusion from mainline-v5.15 commit 0b030f54f094fcd42f4a607a675c1851129a58c8 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 0b030f54f094 dmaengine: idxd: make submit failure path consistent on desc freeing. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The submission path for dmaengine API does not do descriptor freeing on failure. Also, with the abort mechanism, the freeing of descriptor happens when the abort callback is completed. Therefore free descriptor on all error paths for submission call to make things consistent. Also remove the double free that would happen on abort in idxd_dma_tx_submit() call. Fixes: 6b4b87f2c31a ("dmaengine: idxd: fix submission race window") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162827146072.3459011.10255348500504659810.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/dma.c | 4 +--- drivers/dma/idxd/submit.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 997b781b796e..832b908c8583 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -170,10 +170,8 @@ static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx) cookie = dma_cookie_assign(tx); rc = idxd_submit_desc(wq, desc); - if (rc < 0) { - idxd_free_desc(wq, desc); + if (rc < 0) return rc; - } return cookie; } diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 4b514c63af15..de76fb4abac2 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -139,11 +139,15 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) void __iomem *portal; int rc; - if (idxd->state != IDXD_DEV_ENABLED) + if (idxd->state != IDXD_DEV_ENABLED) { + idxd_free_desc(wq, desc); return -EIO; + } - if (!percpu_ref_tryget_live(&wq->wq_active)) + if (!percpu_ref_tryget_live(&wq->wq_active)) { + idxd_free_desc(wq, desc); return -ENXIO; + } portal = idxd_wq_portal_addr(wq); @@ -175,8 +179,11 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) rc = enqcmds(portal, desc->hw); if (rc < 0) { percpu_ref_put(&wq->wq_active); + /* abort operation frees the descriptor */ if (ie) llist_abort_desc(wq, ie, desc); + else + idxd_free_desc(wq, desc); return rc; } } -- Gitee From 1f00f2b12e4cc6e68f5cb8591667a6e9b59d8422 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 6 Aug 2021 10:38:37 -0700 Subject: [PATCH 081/173] dmaengine: idxd: set descriptor allocation size to threshold for swq mainline inclusion from mainline-v5.15 commit 9806eb5c79579e200f88660e043706eb490547e6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 9806eb5c7957 dmaengine: idxd: set descriptor allocation size to threshold for swq. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Since submission is sent to limited portal, the actual wq size for shared wq is set by the threshold rather than the wq size. When the wq type is shared, set the allocated descriptors to the threshold. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162827151733.3459223.3829837172226042408.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 21f0d732b76e..e093cf225a5c 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -141,8 +141,8 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) if (wq->type != IDXD_WQT_KERNEL) return 0; - wq->num_descs = wq->size; - num_descs = wq->size; + num_descs = wq_dedicated(wq) ? wq->size : wq->threshold; + wq->num_descs = num_descs; rc = alloc_hw_descs(wq, num_descs); if (rc < 0) -- Gitee From 8a273a61d121c8d0a75df3e887a3b226599d2238 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 19 Aug 2021 09:34:06 -0700 Subject: [PATCH 082/173] dmaengine: idxd: fix setting up priv mode for dwq mainline inclusion from mainline-v5.15 commit d8071323c5632bdf0a8ef9b9e5662fac43649f9d category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit d8071323c563 dmaengine: idxd: fix setting up priv mode for dwq. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- DSA spec says WQ priv bit is 0 if the Privileged Mode Enable field of the PCI Express PASID capability is 0 and pasid is enabled. Make sure that the WQCFG priv field is set correctly according to usage type. Reject config if setting up kernel WQ type and no support. Also add the correct priv setup for a descriptor. Fixes: 484f910e93b4 ("dmaengine: idxd: fix wq config registers offset programming") Cc: Ramesh Thomas Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162939084657.903168.14160019185148244596.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/Kconfig | 1 + drivers/dma/idxd/device.c | 29 ++++++++++++++++++++++++++++- drivers/dma/idxd/dma.c | 6 +++++- include/uapi/linux/idxd.h | 1 + 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index c735413c649a..9cd7a7d86ca5 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -291,6 +291,7 @@ config INTEL_IDXD tristate "Intel Data Accelerators support" depends on PCI && X86_64 && !UML depends on PCI_MSI + depends on PCI_PASID depends on SBITMAP select DMA_ENGINE help diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index e093cf225a5c..241df74fc047 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -818,6 +818,15 @@ static int idxd_groups_config_write(struct idxd_device *idxd) return 0; } +static bool idxd_device_pasid_priv_enabled(struct idxd_device *idxd) +{ + struct pci_dev *pdev = idxd->pdev; + + if (pdev->pasid_enabled && (pdev->pasid_features & PCI_PASID_CAP_PRIV)) + return true; + return false; +} + static int idxd_wq_config_write(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; @@ -850,7 +859,6 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->wq_thresh = wq->threshold; /* byte 8-11 */ - wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL); if (wq_dedicated(wq)) wq->wqcfg->mode = 1; @@ -860,6 +868,25 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->pasid = idxd->pasid; } + /* + * Here the priv bit is set depending on the WQ type. priv = 1 if the + * WQ type is kernel to indicate privileged access. This setting only + * matters for dedicated WQ. According to the DSA spec: + * If the WQ is in dedicated mode, WQ PASID Enable is 1, and the + * Privileged Mode Enable field of the PCI Express PASID capability + * is 0, this field must be 0. + * + * In the case of a dedicated kernel WQ that is not able to support + * the PASID cap, then the configuration will be rejected. + */ + wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL); + if (wq_dedicated(wq) && wq->wqcfg->pasid_en && + !idxd_device_pasid_priv_enabled(idxd) && + wq->type == IDXD_WQT_KERNEL) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_PRIV; + return -EOPNOTSUPP; + } + wq->wqcfg->priority = wq->priority; if (idxd->hw.gen_cap.block_on_fault && diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 832b908c8583..ce4df9f297d1 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -69,7 +69,11 @@ static inline void idxd_prep_desc_common(struct idxd_wq *wq, hw->src_addr = addr_f1; hw->dst_addr = addr_f2; hw->xfer_size = len; - hw->priv = !!(wq->type == IDXD_WQT_KERNEL); + /* + * For dedicated WQ, this field is ignored and HW will use the WQCFG.priv + * field instead. This field should be set to 1 for kernel descriptors. + */ + hw->priv = 1; hw->completion_addr = compl; } diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index ca24c25252fb..c750eac09fc9 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -27,6 +27,7 @@ enum idxd_scmd_stat { IDXD_SCMD_WQ_NO_SWQ_SUPPORT = 0x800c0000, IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000, IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, + IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, }; #define IDXD_SCMD_SOFTERR_MASK 0x80000000 -- Gitee From ce409a13181e77cf46d31ab8310079de84f761f5 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 24 Aug 2021 14:24:39 -0700 Subject: [PATCH 083/173] dmaengine: idxd: remove interrupt disable for cmd_lock mainline inclusion from mainline-v5.15 commit f9f4082dbc56c40093bcb5c1f62c04a916eca9a2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit f9f4082dbc56 dmaengine: idxd: remove interrupt disable for cmd_lock. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The cmd_lock spinlock is not being used in hard interrupt context. There is no need to disable irq when acquiring the lock. Convert all cmd_lock acquisition to plain spin_lock() calls. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162984027930.1939209.15758413737332339204.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 241df74fc047..4f6516d7555f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -462,7 +462,6 @@ int idxd_device_init_reset(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; union idxd_command_reg cmd; - unsigned long flags; if (idxd_device_is_halted(idxd)) { dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); @@ -472,13 +471,13 @@ int idxd_device_init_reset(struct idxd_device *idxd) memset(&cmd, 0, sizeof(cmd)); cmd.cmd = IDXD_CMD_RESET_DEVICE; dev_dbg(dev, "%s: sending reset for init.\n", __func__); - spin_lock_irqsave(&idxd->cmd_lock, flags); + spin_lock(&idxd->cmd_lock); iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE) cpu_relax(); - spin_unlock_irqrestore(&idxd->cmd_lock, flags); + spin_unlock(&idxd->cmd_lock); return 0; } @@ -487,7 +486,6 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, { union idxd_command_reg cmd; DECLARE_COMPLETION_ONSTACK(done); - unsigned long flags; u32 stat; if (idxd_device_is_halted(idxd)) { @@ -502,7 +500,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, cmd.operand = operand; cmd.int_req = 1; - spin_lock_irqsave(&idxd->cmd_lock, flags); + spin_lock(&idxd->cmd_lock); wait_event_lock_irq(idxd->cmd_waitq, !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags), idxd->cmd_lock); @@ -519,10 +517,10 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, * After command submitted, release lock and go to sleep until * the command completes via interrupt. */ - spin_unlock_irqrestore(&idxd->cmd_lock, flags); + spin_unlock(&idxd->cmd_lock); wait_for_completion(&done); stat = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); - spin_lock_irqsave(&idxd->cmd_lock, flags); + spin_lock(&idxd->cmd_lock); if (status) *status = stat; idxd->cmd_status = stat & GENMASK(7, 0); @@ -530,7 +528,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, __clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags); /* Wake up other pending commands */ wake_up(&idxd->cmd_waitq); - spin_unlock_irqrestore(&idxd->cmd_lock, flags); + spin_unlock(&idxd->cmd_lock); } int idxd_device_enable(struct idxd_device *idxd) @@ -641,7 +639,6 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle, struct device *dev = &idxd->pdev->dev; u32 operand, status; union idxd_command_reg cmd; - unsigned long flags; if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))) return -EOPNOTSUPP; @@ -659,13 +656,13 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle, dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_RELEASE_INT_HANDLE, operand); - spin_lock_irqsave(&idxd->cmd_lock, flags); + spin_lock(&idxd->cmd_lock); iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE) cpu_relax(); status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); - spin_unlock_irqrestore(&idxd->cmd_lock, flags); + spin_unlock(&idxd->cmd_lock); if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) { dev_dbg(dev, "release int handle failed: %#x\n", status); -- Gitee From bf1ba642d69ec48143bba69de56108a264502957 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 24 Aug 2021 14:24:27 -0700 Subject: [PATCH 084/173] dmaengine: idxd: remove interrupt disable for dev_lock mainline inclusion from mainline-v5.15 commit cf84a4b968f38383534bcd0484385c9254828b2c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit cf84a4b968f3 dmaengine: idxd: remove interrupt disable for dev_lock. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The spinlock is not being used in hard interrupt context. There is no need to disable irq when acquiring the lock. The interrupt thread handler also is not in bottom half context, therefore we can also remove disabling of the bh. Convert all dev_lock acquisition to plain spin_lock() calls. Reviewed-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162984026772.1939166.11504067782824765879.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/cdev.c | 5 ++--- drivers/dma/idxd/device.c | 31 ++++++++++++------------------- drivers/dma/idxd/irq.c | 8 ++++---- drivers/dma/idxd/sysfs.c | 10 ++++------ 4 files changed, 22 insertions(+), 32 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index a16e76921a9a..033df43db0ce 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -218,14 +218,13 @@ static __poll_t idxd_cdev_poll(struct file *filp, struct idxd_user_context *ctx = filp->private_data; struct idxd_wq *wq = ctx->wq; struct idxd_device *idxd = wq->idxd; - unsigned long flags; __poll_t out = 0; poll_wait(filp, &wq->err_queue, wait); - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); if (idxd->sw_err.valid) out = EPOLLIN | EPOLLRDNORM; - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->dev_lock); return out; } diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 4f6516d7555f..83a5ff2ecf2a 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -341,19 +341,18 @@ int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid) int rc; union wqcfg wqcfg; unsigned int offset; - unsigned long flags; rc = idxd_wq_disable(wq, false); if (rc < 0) return rc; offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX); - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset); wqcfg.pasid_en = 1; wqcfg.pasid = pasid; iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->dev_lock); rc = idxd_wq_enable(wq); if (rc < 0) @@ -368,19 +367,18 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq) int rc; union wqcfg wqcfg; unsigned int offset; - unsigned long flags; rc = idxd_wq_disable(wq, false); if (rc < 0) return rc; offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX); - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset); wqcfg.pasid_en = 0; wqcfg.pasid = 0; iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->dev_lock); rc = idxd_wq_enable(wq); if (rc < 0) @@ -558,7 +556,6 @@ int idxd_device_disable(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; u32 status; - unsigned long flags; if (!idxd_is_enabled(idxd)) { dev_dbg(dev, "Device is not enabled\n"); @@ -574,22 +571,20 @@ int idxd_device_disable(struct idxd_device *idxd) return -ENXIO; } - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); idxd_device_clear_state(idxd); idxd->state = IDXD_DEV_DISABLED; - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->dev_lock); return 0; } void idxd_device_reset(struct idxd_device *idxd) { - unsigned long flags; - idxd_cmd_exec(idxd, IDXD_CMD_RESET_DEVICE, 0, NULL); - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); idxd_device_clear_state(idxd); idxd->state = IDXD_DEV_DISABLED; - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->dev_lock); } void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid) @@ -1164,7 +1159,6 @@ int __drv_enable_wq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; - unsigned long flags; int rc = -ENXIO; lockdep_assert_held(&wq->wq_lock); @@ -1216,10 +1210,10 @@ int __drv_enable_wq(struct idxd_wq *wq) } rc = 0; - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) rc = idxd_device_config(idxd); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->dev_lock); if (rc < 0) { dev_dbg(dev, "Writing wq %d config failed: %d\n", wq->id, rc); goto err; @@ -1288,7 +1282,6 @@ void drv_disable_wq(struct idxd_wq *wq) int idxd_device_drv_probe(struct idxd_dev *idxd_dev) { struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev); - unsigned long flags; int rc = 0; /* @@ -1302,10 +1295,10 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev) } /* Device configuration */ - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) rc = idxd_device_config(idxd); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->dev_lock); if (rc < 0) return -ENXIO; diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index d221c2e37460..ca88fa7a328e 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -64,7 +64,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) bool err = false; if (cause & IDXD_INTC_ERR) { - spin_lock_bh(&idxd->dev_lock); + spin_lock(&idxd->dev_lock); for (i = 0; i < 4; i++) idxd->sw_err.bits[i] = ioread64(idxd->reg_base + IDXD_SWERR_OFFSET + i * sizeof(u64)); @@ -89,7 +89,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) } } - spin_unlock_bh(&idxd->dev_lock); + spin_unlock(&idxd->dev_lock); val |= IDXD_INTC_ERR; for (i = 0; i < 4; i++) @@ -133,7 +133,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) INIT_WORK(&idxd->work, idxd_device_reinit); queue_work(idxd->wq, &idxd->work); } else { - spin_lock_bh(&idxd->dev_lock); + spin_lock(&idxd->dev_lock); idxd_wqs_quiesce(idxd); idxd_wqs_unmap_portal(idxd); idxd_device_clear_state(idxd); @@ -141,7 +141,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) "idxd halted, need %s.\n", gensts.reset_type == IDXD_DEVICE_RESET_FLR ? "FLR" : "system reset"); - spin_unlock_bh(&idxd->dev_lock); + spin_unlock(&idxd->dev_lock); return -ENXIO; } } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index e17a495ad3c9..84392258b4c1 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1105,16 +1105,15 @@ static ssize_t clients_show(struct device *dev, struct device_attribute *attr, char *buf) { struct idxd_device *idxd = confdev_to_idxd(dev); - unsigned long flags; int count = 0, i; - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); for (i = 0; i < idxd->max_wqs; i++) { struct idxd_wq *wq = idxd->wqs[i]; count += wq->client_count; } - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->dev_lock); return sysfs_emit(buf, "%d\n", count); } @@ -1152,12 +1151,11 @@ static ssize_t errors_show(struct device *dev, { struct idxd_device *idxd = confdev_to_idxd(dev); int i, out = 0; - unsigned long flags; - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock(&idxd->dev_lock); for (i = 0; i < 4; i++) out += sysfs_emit_at(buf, out, "%#018llx ", idxd->sw_err.bits[i]); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock(&idxd->dev_lock); out--; out += sysfs_emit_at(buf, out, "\n"); return out; -- Gitee From 7af052a4be88c5956c1f29945ede557facc182bd Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 29 Sep 2021 12:15:38 -0700 Subject: [PATCH 085/173] dmaengine: idxd: move out percpu_ref_exit() to ensure it's outside submission mainline inclusion from mainline-v5.16 commit 85f604af9c83a4656b1d07bec73298c3ba7d7c1e category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 85f604af9c83 dmaengine: idxd: move out percpu_ref_exit() to ensure it's outside submission. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- percpu_ref_tryget_live() is safe to call as long as ref is between init and exit according to the function comment. Move percpu_ref_exit() so it is called after the dma channel is no longer valid to ensure this holds true. Fixes: 93a40a6d7428 ("dmaengine: idxd: add percpu_ref to descriptor submission path") Suggested-by: Kevin Tian Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163294293832.914350.10326422026738506152.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 1 - drivers/dma/idxd/dma.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 83a5ff2ecf2a..cbbfa17d8d11 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -427,7 +427,6 @@ void idxd_wq_quiesce(struct idxd_wq *wq) { percpu_ref_kill(&wq->wq_active); wait_for_completion(&wq->wq_dead); - percpu_ref_exit(&wq->wq_active); } /* Device control bits */ diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index ce4df9f297d1..ef855a268656 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -334,6 +334,7 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) err_dma: idxd_wq_quiesce(wq); + percpu_ref_exit(&wq->wq_active); err_ref: idxd_wq_free_resources(wq); err_res_alloc: @@ -352,6 +353,7 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev) idxd_wq_quiesce(wq); idxd_unregister_dma_channel(wq); __drv_disable_wq(wq); + percpu_ref_exit(&wq->wq_active); idxd_wq_free_resources(wq); wq->type = IDXD_WQT_NONE; mutex_unlock(&wq->wq_lock); -- Gitee From 8ac18dbad884b4441aa902934d52bfbdeb3f7efa Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 12 Oct 2021 11:01:59 -0700 Subject: [PATCH 086/173] dmaengine: idxd: check GENCAP config support for gencfg register mainline inclusion from mainline-v5.16 commit 79c4c3db7d86b9bec94562275efc82e58f3d0132 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 79c4c3db7d86 dmaengine: idxd: check GENCAP config support for gencfg register. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- DSA spec 1.2 has moved the GENCFG register under the GENCAP configuration support with respect to writability. Add check in driver before writing to GENCFG register. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163406171896.1303830.11217958011385656998.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index cbbfa17d8d11..27612329f510 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -791,7 +791,7 @@ static int idxd_groups_config_write(struct idxd_device *idxd) struct device *dev = &idxd->pdev->dev; /* Setup bandwidth token limit */ - if (idxd->token_limit) { + if (idxd->hw.gen_cap.config_en && idxd->token_limit) { reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); reg.token_limit = idxd->token_limit; iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); -- Gitee From 73753d80accdf3fb498083ab231582440a437a09 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 12 Oct 2021 11:01:19 -0700 Subject: [PATCH 087/173] dmaengine: idxd: remove gen cap field per spec 1.2 update mainline inclusion from mainline-v5.16 commit c5b64b6826e020041ef4f68a281dee2f33c827d6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit c5b64b6826e0 dmaengine: idxd: remove gen cap field per spec 1.2 update. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Remove max_descs_per_engine field. The recently released DSA spec 1.2 [1] has removed this field and made it reserved. [1]: https://software.intel.com/content/dam/develop/external/us/en/documents-tps/341204-intel-data-streaming-accelerator-spec.pdf Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163406167978.1303649.1798682437841822837.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/registers.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index ffc7550a77ee..eeb11e6eb25b 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -36,8 +36,7 @@ union gen_cap_reg { u64 max_batch_shift:4; u64 max_ims_mult:6; u64 config_en:1; - u64 max_descs_per_engine:8; - u64 rsvd3:24; + u64 rsvd3:32; }; u64 bits; } __packed; -- Gitee From 308d7b1516863bdafb1fb1e36e515975d6d769b4 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 20 Oct 2021 09:27:25 -0700 Subject: [PATCH 088/173] dmaengine: idxd: remove kernel wq type set when load configuration mainline inclusion from mainline-v5.16 commit 15af840831f69baa9efb0d50007459d2015397a5 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 15af840831f6 dmaengine: idxd: remove kernel wq type set when load configuration. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Remove setting of wq type on guest kernel during configuration load on RO device config. The user will set the kernel wq type and this setting based on config is not necessary. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163474724511.2607444.1876715711451990426.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 27612329f510..ef3281fc3f8b 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1050,8 +1050,6 @@ static int idxd_wq_load_config(struct idxd_wq *wq) wq->size = wq->wqcfg->wq_size; wq->threshold = wq->wqcfg->wq_thresh; - if (wq->wqcfg->priv) - wq->type = IDXD_WQT_KERNEL; /* The driver does not support shared WQ mode in read-only config yet */ if (wq->wqcfg->mode == 0 || wq->wqcfg->pasid_en) -- Gitee From 642efc92ba8c295ebec7cc140d79dc0dd3f2772c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 21 Sep 2021 13:15:58 -0700 Subject: [PATCH 089/173] dmanegine: idxd: fix resource free ordering on driver removal mainline inclusion from mainline-v5.16 commit 98da0106aac0d3c5d4a3c95d238f1ff88957bbfc category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 98da0106aac0 dmanegine: idxd: fix resource free ordering on driver removal. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Fault triggers on ioread32() when pci driver unbind is envoked. The placement of idxd sub-driver removal causes the probing of the device mmio region after the mmio mapping being torn down. The driver needs the sub-drivers to be unbound but not release the idxd context until all shutdown activities has been done. Move the sub-driver unregistering up before the remove() calls shutdown(). But take a device ref on the idxd->conf_dev so that the memory does not get freed in ->release(). When all cleanup activities has been done, release the ref to allow the idxd memory to be freed. [57159.542766] RIP: 0010:ioread32+0x27/0x60 [57159.547097] Code: 00 66 90 48 81 ff ff ff 03 00 77 1e 48 81 ff 00 00 01 00 76 05 0f b7 d7 ed c3 8b 15 03 50 41 01 b8 ff ff ff ff 85 d2 75 04 c3 <8b> 07 c3 55 83 ea 01 48 89 fe 48 c7 c7 00 70 5f 82 48 89 e5 48 83 [57159.566647] RSP: 0018:ffffc900011abb60 EFLAGS: 00010292 [57159.572295] RAX: ffffc900011e0000 RBX: ffff888107d39800 RCX: 0000000000000000 [57159.579842] RDX: 0000000000000000 RSI: ffffffff82b1e448 RDI: ffffc900011e0090 [57159.587421] RBP: ffffc900011abb88 R08: 0000000000000000 R09: 0000000000000001 [57159.594972] R10: 0000000000000001 R11: 0000000000000000 R12: ffff8881019840d0 [57159.602533] R13: ffff8881097e9000 R14: ffffffffa08542a0 R15: 00000000000003a8 [57159.610093] FS: 00007f991e0a8740(0000) GS:ffff888459900000(0000) knlGS:00000000000 00000 [57159.618614] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [57159.624814] CR2: ffffc900011e0090 CR3: 000000010862a002 CR4: 00000000003706e0 [57159.632397] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [57159.639973] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [57159.647601] Call Trace: [57159.650502] ? idxd_device_disable+0x41/0x110 [idxd] [57159.655948] idxd_device_drv_remove+0x2b/0x80 [idxd] [57159.661374] idxd_config_bus_remove+0x16/0x20 [57159.666191] __device_release_driver+0x163/0x240 [57159.671320] device_release_driver+0x2b/0x40 [57159.676052] bus_remove_device+0xf5/0x160 [57159.680524] device_del+0x19c/0x400 [57159.684440] device_unregister+0x18/0x60 [57159.688792] idxd_remove+0x140/0x1c0 [idxd] [57159.693406] pci_device_remove+0x3e/0xb0 [57159.697758] __device_release_driver+0x163/0x240 [57159.702788] device_driver_detach+0x43/0xb0 [57159.707424] unbind_store+0x11e/0x130 [57159.711537] drv_attr_store+0x24/0x30 [57159.715646] sysfs_kf_write+0x4b/0x60 [57159.719710] kernfs_fop_write_iter+0x153/0x1e0 [57159.724563] new_sync_write+0x120/0x1b0 [57159.728812] vfs_write+0x23e/0x350 [57159.732624] ksys_write+0x70/0xf0 [57159.736335] __x64_sys_write+0x1a/0x20 [57159.740492] do_syscall_64+0x3b/0x90 [57159.744465] entry_SYSCALL_64_after_hwframe+0x44/0xae [57159.749908] RIP: 0033:0x7f991e19c387 [57159.753898] Code: 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 [57159.773564] RSP: 002b:00007ffc2ce2d6a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [57159.781550] RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007f991e19c387 [57159.789133] RDX: 000000000000000c RSI: 000055ee2630e140 RDI: 0000000000000001 [57159.796695] RBP: 000055ee2630e140 R08: 0000000000000000 R09: 00007f991e2324e0 [57159.804246] R10: 00007f991e2323e0 R11: 0000000000000246 R12: 000000000000000c [57159.811800] R13: 00007f991e26f520 R14: 000000000000000c R15: 00007f991e26f700 [57159.819373] Modules linked in: idxd bridge stp llc bnep sunrpc nls_iso8859_1 intel_ rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_code c_realtek iTCO_wdt 8250_dw snd_hda_codec_generic kvm_intel ledtrig_audio iTCO_vendor_s upport snd_hda_intel snd_intel_dspcfg ppdev kvm snd_hda_codec intel_wmi_thunderbolt sn d_hwdep irqbypass iwlwifi btusb snd_hda_core rapl btrtl intel_cstate snd_seq btbcm snd _seq_device btintel snd_pcm cfg80211 bluetooth pcspkr psmouse input_leds snd_timer int el_lpss_pci mei_me intel_lpss snd ecdh_generic ecc mei ucsi_acpi i2c_i801 idma64 i2c_s mbus virt_dma soundcore typec_ucsi typec wmi parport_pc parport video mac_hid acpi_pad sch_fq_codel drm ip_tables x_tables crct10dif_pclmul crc32_pclmul ghash_clmulni_intel usbkbd hid_generic usbmouse aesni_intel usbhid crypto_simd cryptd e1000e hid serio_ra w ahci libahci pinctrl_sunrisepoint fuse msr autofs4 [last unloaded: idxd] [57159.904082] CR2: ffffc900011e0090 [57159.907877] ---[ end trace b4e32f49ce9176a4 ]--- Fixes: 49c4959f04b5 ("dmaengine: idxd: fix sequence for pci driver remove() and shutdown()") Reported-by: Ziye Yang Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163225535868.4152687.9318737776682088722.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index eb09bc591c31..7bf03f371ce1 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -797,11 +797,19 @@ static void idxd_remove(struct pci_dev *pdev) int msixcnt = pci_msix_vec_count(pdev); int i; - dev_dbg(&pdev->dev, "%s called\n", __func__); + idxd_unregister_devices(idxd); + /* + * When ->release() is called for the idxd->conf_dev, it frees all the memory related + * to the idxd context. The driver still needs those bits in order to do the rest of + * the cleanup. However, we do need to unbound the idxd sub-driver. So take a ref + * on the device here to hold off the freeing while allowing the idxd sub-driver + * to unbind. + */ + get_device(idxd_confdev(idxd)); + device_unregister(idxd_confdev(idxd)); idxd_shutdown(pdev); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); - idxd_unregister_devices(idxd); for (i = 0; i < msixcnt; i++) { irq_entry = &idxd->irq_entries[i]; @@ -815,7 +823,7 @@ static void idxd_remove(struct pci_dev *pdev) pci_disable_device(pdev); destroy_workqueue(idxd->wq); perfmon_pmu_remove(idxd); - device_unregister(idxd_confdev(idxd)); + put_device(idxd_confdev(idxd)); } static struct pci_driver idxd_pci_driver = { -- Gitee From 6ab94c50203789d1159e58e4bab9c40171893a8e Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Wed, 8 Sep 2021 17:28:26 +0800 Subject: [PATCH 090/173] dmaengine: idxd: Use list_move_tail instead of list_del/list_add_tail mainline inclusion from mainline-v5.16 commit ee5c6f0ca219b65f5085043d481d9b6f045693d5 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit ee5c6f0ca219 dmaengine: idxd: Use list_move_tail instead of list_del/list_add_tail. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Using list_move_tail() instead of list_del() + list_add_tail() Reported-by: Hulk Robot Signed-off-by: Bixuan Cui Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20210908092826.67765-1-cuibixuan@huawei.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/irq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index ca88fa7a328e..79fcfc4883e4 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -221,8 +221,7 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry) list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) { if (desc->completion->status) { - list_del(&desc->list); - list_add_tail(&desc->list, &flist); + list_move_tail(&desc->list, &flist); } } -- Gitee From 06b2ee9bc6c2730c545c8914ae31045151e2242f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 8 Sep 2021 16:04:03 -0700 Subject: [PATCH 091/173] dmaengine: idxd: add halt interrupt support mainline inclusion from mainline-v5.16 commit 88d97ea82cbe352851a8654ee952d3a694c8c2c6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 88d97ea82cbe dmaengine: idxd: add halt interrupt support. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add halt interrupt support. Given that the misc interrupt handler already check halt state, the driver just need to run the halt handling code when receiving the halt interrupt. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163114224352.846654.14334468363464318828.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/irq.c | 5 +++++ drivers/dma/idxd/registers.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 79fcfc4883e4..17f2f8a31b63 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -63,6 +63,9 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) int i; bool err = false; + if (cause & IDXD_INTC_HALT_STATE) + goto halt; + if (cause & IDXD_INTC_ERR) { spin_lock(&idxd->dev_lock); for (i = 0; i < 4; i++) @@ -121,6 +124,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) if (!err) return 0; +halt: gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); if (gensts.state == IDXD_DEVICE_STATE_HALT) { idxd->state = IDXD_DEV_HALTED; @@ -134,6 +138,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) queue_work(idxd->wq, &idxd->work); } else { spin_lock(&idxd->dev_lock); + idxd->state = IDXD_DEV_HALTED; idxd_wqs_quiesce(idxd); idxd_wqs_unmap_portal(idxd); idxd_device_clear_state(idxd); diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index eeb11e6eb25b..262c8220adbd 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -157,6 +157,7 @@ enum idxd_device_reset_type { #define IDXD_INTC_CMD 0x02 #define IDXD_INTC_OCCUPY 0x04 #define IDXD_INTC_PERFMON_OVFL 0x08 +#define IDXD_INTC_HALT_STATE 0x10 #define IDXD_CMD_OFFSET 0xa0 union idxd_command_reg { -- Gitee From 4c59a6ce1951130cef75ff31584304fa772dd4ac Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 1 Sep 2021 17:18:05 -0700 Subject: [PATCH 092/173] dmaengine: idxd: reconfig device after device reset command mainline inclusion from mainline-v5.16 commit e530a9f3db4188d1f4e3704b0948ef69c04d5ca6 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit e530a9f3db41 dmaengine: idxd: reconfig device after device reset command. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Device reset clears the MSIXPERM table and the device registers. Re-program the MSIXPERM table and re-enable the error interrupts post reset. Fixes: 745e92a6d816 ("dmaengine: idxd: idxd: move remove() bits for idxd 'struct device' to device.c") Reported-by: Sanjay Kumar Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163054188513.2853562.12077053294595278181.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index ef3281fc3f8b..b1407465d5c4 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -583,6 +583,8 @@ void idxd_device_reset(struct idxd_device *idxd) spin_lock(&idxd->dev_lock); idxd_device_clear_state(idxd); idxd->state = IDXD_DEV_DISABLED; + idxd_unmask_error_interrupts(idxd); + idxd_msix_perm_setup(idxd); spin_unlock(&idxd->dev_lock); } -- Gitee From 4aeb0ac3c75cf861606114af889105bc37ca1c1f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 25 Oct 2021 07:59:49 -0700 Subject: [PATCH 093/173] dmaengine: idxd: cleanup completion record allocation mainline inclusion from mainline-v5.16 commit 2efe58cfaad4ad94eab888d287c174de5209a0c2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 2efe58cfaad4 dmaengine: idxd: cleanup completion record allocation. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- According to core-api/dma-api-howto.rst, the address from dma_alloc_coherent is gauranteed to align to the smallest PAGE_SIZE order. That supercedes the 64B/32B alignment requirement of the completion record. Remove alignment adjustment code. Tested-by: Jacob Pan Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163517396063.3484297.7494385225280705372.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 22 +++++----------------- drivers/dma/idxd/idxd.h | 2 -- 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index b1407465d5c4..fab412349f7f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -135,8 +135,6 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; int rc, num_descs, i; - int align; - u64 tmp; if (wq->type != IDXD_WQT_KERNEL) return 0; @@ -148,21 +146,13 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) if (rc < 0) return rc; - align = idxd->data->align; - wq->compls_size = num_descs * idxd->data->compl_size + align; - wq->compls_raw = dma_alloc_coherent(dev, wq->compls_size, - &wq->compls_addr_raw, GFP_KERNEL); - if (!wq->compls_raw) { + wq->compls_size = num_descs * idxd->data->compl_size; + wq->compls = dma_alloc_coherent(dev, wq->compls_size, &wq->compls_addr, GFP_KERNEL); + if (!wq->compls) { rc = -ENOMEM; goto fail_alloc_compls; } - /* Adjust alignment */ - wq->compls_addr = (wq->compls_addr_raw + (align - 1)) & ~(align - 1); - tmp = (u64)wq->compls_raw; - tmp = (tmp + (align - 1)) & ~(align - 1); - wq->compls = (struct dsa_completion_record *)tmp; - rc = alloc_descs(wq, num_descs); if (rc < 0) goto fail_alloc_descs; @@ -191,8 +181,7 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) fail_sbitmap_init: free_descs(wq); fail_alloc_descs: - dma_free_coherent(dev, wq->compls_size, wq->compls_raw, - wq->compls_addr_raw); + dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr); fail_alloc_compls: free_hw_descs(wq); return rc; @@ -207,8 +196,7 @@ void idxd_wq_free_resources(struct idxd_wq *wq) free_hw_descs(wq); free_descs(wq); - dma_free_coherent(dev, wq->compls_size, wq->compls_raw, - wq->compls_addr_raw); + dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr); sbitmap_queue_free(&wq->sbq); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index bfcb03329f77..0cf8d3145870 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -187,9 +187,7 @@ struct idxd_wq { struct dsa_completion_record *compls; struct iax_completion_record *iax_compls; }; - void *compls_raw; dma_addr_t compls_addr; - dma_addr_t compls_addr_raw; int compls_size; struct idxd_desc **descs; struct sbitmap_queue sbq; -- Gitee From 5132fee78dc2cb5f73d1b3b23bca9b23995b42f8 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 25 Oct 2021 08:01:04 -0700 Subject: [PATCH 094/173] dmaengine: idxd: fix resource leak on dmaengine driver disable mainline inclusion from mainline-v5.16 commit a3e340c1574b6679f5b333221284d0959095da52 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit a3e340c1574b dmaengine: idxd: fix resource leak on dmaengine driver disable. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The wq resources needs to be released before the kernel type is reset by __drv_disable_wq(). With dma channels unregistered and wq quiesced, all the wq resources for dmaengine can be freed. There is no need to wait until wq is disabled. With the wq->type being reset to "unknown", the driver is skipping the freeing of the resources. Fixes: 0cda4f6986a3 ("dmaengine: idxd: create dmaengine driver for wq 'device'") Reported-by: Jacob Pan Tested-by: Jacob Pan Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163517405099.3484556.12521975053711345244.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/dma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index ef855a268656..29af898f3c24 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -352,10 +352,9 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev) mutex_lock(&wq->wq_lock); idxd_wq_quiesce(wq); idxd_unregister_dma_channel(wq); + idxd_wq_free_resources(wq); __drv_disable_wq(wq); percpu_ref_exit(&wq->wq_active); - idxd_wq_free_resources(wq); - wq->type = IDXD_WQT_NONE; mutex_unlock(&wq->wq_lock); } -- Gitee From 0eb8ca11847e238e289cf27813e401393be404cb Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 17 Nov 2021 10:03:51 -0700 Subject: [PATCH 095/173] dmaengine: idxd: fix calling wq quiesce inside spinlock mainline inclusion from mainline-v5.16 commit fa51b16d05583c7aebbc06330afb50276243d198 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit fa51b16d0558 dmaengine: idxd: fix calling wq quiesce inside spinlock. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Dan reports that smatch has found idxd_wq_quiesce() is being called inside the idxd->dev_lock. idxd_wq_quiesce() calls wait_for_completion() and therefore it can sleep. Move the call outside of the spinlock as it does not need device lock. Fixes: 5b0c68c473a1 ("dmaengine: idxd: support reporting of halt interrupt") Reported-by: Dan Carpenter Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163716858508.1721911.15051495873516709923.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 17f2f8a31b63..cf2c8bc4f147 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -137,10 +137,10 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) INIT_WORK(&idxd->work, idxd_device_reinit); queue_work(idxd->wq, &idxd->work); } else { - spin_lock(&idxd->dev_lock); idxd->state = IDXD_DEV_HALTED; idxd_wqs_quiesce(idxd); idxd_wqs_unmap_portal(idxd); + spin_lock(&idxd->dev_lock); idxd_device_clear_state(idxd); dev_err(&idxd->pdev->dev, "idxd halted, need %s.\n", -- Gitee From b757600dc22e5f65fa41a75970cc9f25d4e0e5d8 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 8 Dec 2021 10:01:27 -0700 Subject: [PATCH 096/173] dmaengine: idxd: fix missed completion on abort path mainline inclusion from mainline-v5.16 commit 8affd8a4b5ce356c8900cfb037674f3a4a11fbdb category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 8affd8a4b5ce dmaengine: idxd: fix missed completion on abort path. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Ming reported that with the abort path of the descriptor submission, there can be a window where a completed descriptor can be missed to be completed by the irq completion thread: CPU A CPU B Submit (successful) Submit (fail) irq_process_work_list() // empty llist_abort_desc() // remove all descs from pending list irq_process_pending_llist() // empty exit idxd_wq_thread() with no processing Add opportunistic descriptor completion in the abort path in order to remove the missed completion. Fixes: 6b4b87f2c31a ("dmaengine: idxd: fix submission race window") Reported-by: Ming Li Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163898288714.443911.16084982766671976640.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/submit.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index de76fb4abac2..83452fbbb168 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -106,6 +106,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, { struct idxd_desc *d, *t, *found = NULL; struct llist_node *head; + LIST_HEAD(flist); desc->completion->status = IDXD_COMP_DESC_ABORT; /* @@ -120,7 +121,11 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, found = desc; continue; } - list_add_tail(&desc->list, &ie->work_list); + + if (d->completion->status) + list_add_tail(&d->list, &flist); + else + list_add_tail(&d->list, &ie->work_list); } } @@ -130,6 +135,17 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, if (found) complete_desc(found, IDXD_COMPLETE_ABORT); + + /* + * complete_desc() will return desc to allocator and the desc can be + * acquired by a different process and the desc->list can be modified. + * Delete desc from list so the list trasversing does not get corrupted + * by the other process. + */ + list_for_each_entry_safe(d, t, &flist, list) { + list_del_init(&d->list); + complete_desc(d, IDXD_COMPLETE_NORMAL); + } } int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) -- Gitee From 66d44e2b879c9e76c412d1b3100355d181a7f98f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:02 -0700 Subject: [PATCH 097/173] dmaengine: idxd: rework descriptor free path on failure mainline inclusion from mainline-v5.17 commit 5d78abb6fbc974d601dd365b9ce39f320fb5ba79 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 5d78abb6fbc9 dmaengine: idxd: rework descriptor free path on failure. Incremental backporting patches for DSA/IAA on Intel Xeon platform. Deviation from upstream: Merge commit 5cb664fbeba0 Merge branch 'fixes' into next. -------------------------------- Refactor the completion function to allow skipping of descriptor freeing on the submission failure path. This completely removes descriptor freeing from the submit failure path and leave the responsibility to the caller. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528416222.3925689.12859769271667814762.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/dma.c | 10 ++++++++-- drivers/dma/idxd/idxd.h | 8 +------- drivers/dma/idxd/init.c | 9 +++------ drivers/dma/idxd/irq.c | 8 ++++---- drivers/dma/idxd/submit.c | 22 ++++++++-------------- 5 files changed, 24 insertions(+), 33 deletions(-) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 29af898f3c24..427ba9f78aad 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -21,7 +21,8 @@ static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c) } void idxd_dma_complete_txd(struct idxd_desc *desc, - enum idxd_complete_type comp_type) + enum idxd_complete_type comp_type, + bool free_desc) { struct dma_async_tx_descriptor *tx; struct dmaengine_result res; @@ -44,6 +45,9 @@ void idxd_dma_complete_txd(struct idxd_desc *desc, tx->callback = NULL; tx->callback_result = NULL; } + + if (free_desc) + idxd_free_desc(desc->wq, desc); } static void op_flag_setup(unsigned long flags, u32 *desc_flags) @@ -174,8 +178,10 @@ static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx) cookie = dma_cookie_assign(tx); rc = idxd_submit_desc(wq, desc); - if (rc < 0) + if (rc < 0) { + idxd_free_desc(wq, desc); return rc; + } return cookie; } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 0cf8d3145870..3d600f8ee90b 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -579,7 +579,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq); void idxd_unregister_dma_channel(struct idxd_wq *wq); void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res); void idxd_dma_complete_txd(struct idxd_desc *desc, - enum idxd_complete_type comp_type); + enum idxd_complete_type comp_type, bool free_desc); /* cdev */ int idxd_cdev_register(void); @@ -603,10 +603,4 @@ static inline void perfmon_init(void) {} static inline void perfmon_exit(void) {} #endif -static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) -{ - idxd_dma_complete_txd(desc, reason); - idxd_free_desc(desc->wq, desc); -} - #endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 7bf03f371ce1..4373b48cdc91 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -717,10 +717,8 @@ static void idxd_flush_pending_llist(struct idxd_irq_entry *ie) if (!head) return; - llist_for_each_entry_safe(desc, itr, head, llnode) { - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT); - idxd_free_desc(desc->wq, desc); - } + llist_for_each_entry_safe(desc, itr, head, llnode) + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); } static void idxd_flush_work_list(struct idxd_irq_entry *ie) @@ -729,8 +727,7 @@ static void idxd_flush_work_list(struct idxd_irq_entry *ie) list_for_each_entry_safe(desc, iter, &ie->work_list, list) { list_del(&desc->list); - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT); - idxd_free_desc(desc->wq, desc); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); } } diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index cf2c8bc4f147..47b6fc894a23 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -195,11 +195,11 @@ static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) * and 0xff, which DSA_COMP_STATUS_MASK can mask out. */ if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { - complete_desc(desc, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); continue; } - complete_desc(desc, IDXD_COMPLETE_NORMAL); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); } else { spin_lock(&irq_entry->list_lock); list_add_tail(&desc->list, @@ -238,11 +238,11 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry) * and 0xff, which DSA_COMP_STATUS_MASK can mask out. */ if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { - complete_desc(desc, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); continue; } - complete_desc(desc, IDXD_COMPLETE_NORMAL); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); } } diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 83452fbbb168..e5c3689851ea 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -134,17 +134,17 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, spin_unlock(&ie->list_lock); if (found) - complete_desc(found, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false); /* - * complete_desc() will return desc to allocator and the desc can be - * acquired by a different process and the desc->list can be modified. - * Delete desc from list so the list trasversing does not get corrupted - * by the other process. + * completing the descriptor will return desc to allocator and + * the desc can be acquired by a different process and the + * desc->list can be modified. Delete desc from list so the + * list trasversing does not get corrupted by the other process. */ list_for_each_entry_safe(d, t, &flist, list) { list_del_init(&d->list); - complete_desc(d, IDXD_COMPLETE_NORMAL); + idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true); } } @@ -155,15 +155,11 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) void __iomem *portal; int rc; - if (idxd->state != IDXD_DEV_ENABLED) { - idxd_free_desc(wq, desc); + if (idxd->state != IDXD_DEV_ENABLED) return -EIO; - } - if (!percpu_ref_tryget_live(&wq->wq_active)) { - idxd_free_desc(wq, desc); + if (!percpu_ref_tryget_live(&wq->wq_active)) return -ENXIO; - } portal = idxd_wq_portal_addr(wq); @@ -198,8 +194,6 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) /* abort operation frees the descriptor */ if (ie) llist_abort_desc(wq, ie, desc); - else - idxd_free_desc(wq, desc); return rc; } } -- Gitee From 3817287d2d50455a233fd5d1fb81319192cc02d4 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:10 -0700 Subject: [PATCH 098/173] dmaengine: idxd: int handle management refactoring mainline inclusion from mainline-v5.17 commit 8b67426e05584e956775f4b134596b56bc0d35e0 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 8b67426e0558 dmaengine: idxd: int handle management refactoring. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Attach int_handle to irq_entry. This removes the separate management of int handles and reduces the confusion of interating through int handles that is off by 1 count. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528417065.3925689.11505755433684476288.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 8 ++++ drivers/dma/idxd/idxd.h | 10 ++++- drivers/dma/idxd/init.c | 86 ++++++++++++++++++++------------------- drivers/dma/idxd/submit.c | 6 +-- drivers/dma/idxd/sysfs.c | 1 - 5 files changed, 64 insertions(+), 47 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index fab412349f7f..f381319615fd 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1206,6 +1206,13 @@ int __drv_enable_wq(struct idxd_wq *wq) goto err; } + /* + * Device has 1 misc interrupt and N interrupts for descriptor completion. To + * assign WQ to interrupt, we will take the N+1 interrupt since vector 0 is + * for the misc interrupt. + */ + wq->ie = &idxd->irq_entries[wq->id + 1]; + rc = idxd_wq_enable(wq); if (rc < 0) { dev_dbg(dev, "wq %d enabling failed: %d\n", wq->id, rc); @@ -1256,6 +1263,7 @@ void __drv_disable_wq(struct idxd_wq *wq) idxd_wq_drain(wq); idxd_wq_reset(wq); + wq->ie = NULL; wq->client_count = 0; } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 3d600f8ee90b..355159d4ee68 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include "registers.h" @@ -64,6 +65,7 @@ extern struct idxd_device_driver idxd_drv; extern struct idxd_device_driver idxd_dmaengine_drv; extern struct idxd_device_driver idxd_user_drv; +#define INVALID_INT_HANDLE -1 struct idxd_irq_entry { struct idxd_device *idxd; int id; @@ -75,6 +77,9 @@ struct idxd_irq_entry { * and irq thread processing error descriptor. */ spinlock_t list_lock; + int int_handle; + struct idxd_wq *wq; + ioasid_t pasid; }; struct idxd_group { @@ -171,6 +176,7 @@ struct idxd_wq { struct wait_queue_head err_queue; struct idxd_device *idxd; int id; + struct idxd_irq_entry *ie; enum idxd_wq_type type; struct idxd_group *group; int client_count; @@ -266,6 +272,8 @@ struct idxd_device { unsigned int pasid; int num_groups; + int irq_cnt; + bool request_int_handles; u32 msix_perm_offset; u32 wqcfg_offset; @@ -292,8 +300,6 @@ struct idxd_device { struct workqueue_struct *wq; struct work_struct work; - int *int_handles; - struct idxd_pmu *idxd_pmu; }; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 4373b48cdc91..2dbff722e207 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -81,6 +81,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) dev_err(dev, "Not MSI-X interrupt capable.\n"); return -ENOSPC; } + idxd->irq_cnt = msixcnt; rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX); if (rc != msixcnt) { @@ -103,7 +104,18 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) for (i = 0; i < msixcnt; i++) { idxd->irq_entries[i].id = i; idxd->irq_entries[i].idxd = idxd; + /* + * Association of WQ should be assigned starting with irq_entry 1. + * irq_entry 0 is for misc interrupts and has no wq association + */ + if (i > 0) + idxd->irq_entries[i].wq = idxd->wqs[i - 1]; idxd->irq_entries[i].vector = pci_irq_vector(pdev, i); + idxd->irq_entries[i].int_handle = INVALID_INT_HANDLE; + if (device_pasid_enabled(idxd) && i > 0) + idxd->irq_entries[i].pasid = idxd->pasid; + else + idxd->irq_entries[i].pasid = INVALID_IOASID; spin_lock_init(&idxd->irq_entries[i].list_lock); } @@ -135,22 +147,14 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector); - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { - /* - * The MSIX vector enumeration starts at 1 with vector 0 being the - * misc interrupt that handles non I/O completion events. The - * interrupt handles are for IMS enumeration on guest. The misc - * interrupt vector does not require a handle and therefore we start - * the int_handles at index 0. Since 'i' starts at 1, the first - * int_handles index will be 0. - */ - rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1], + if (idxd->request_int_handles) { + rc = idxd_device_request_int_handle(idxd, i, &irq_entry->int_handle, IDXD_IRQ_MSIX); if (rc < 0) { free_irq(irq_entry->vector, irq_entry); goto err_wq_irqs; } - dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]); + dev_dbg(dev, "int handle requested: %u\n", irq_entry->int_handle); } } @@ -161,9 +165,15 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) while (--i >= 0) { irq_entry = &idxd->irq_entries[i]; free_irq(irq_entry->vector, irq_entry); - if (i != 0) - idxd_device_release_int_handle(idxd, - idxd->int_handles[i], IDXD_IRQ_MSIX); + if (irq_entry->int_handle != INVALID_INT_HANDLE) { + idxd_device_release_int_handle(idxd, irq_entry->int_handle, + IDXD_IRQ_MSIX); + irq_entry->int_handle = INVALID_INT_HANDLE; + irq_entry->pasid = INVALID_IOASID; + } + irq_entry->vector = -1; + irq_entry->wq = NULL; + irq_entry->idxd = NULL; } err_misc_irq: /* Disable error interrupt generation */ @@ -179,21 +189,19 @@ static void idxd_cleanup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; struct idxd_irq_entry *irq_entry; - int i, msixcnt; - - msixcnt = pci_msix_vec_count(pdev); - if (msixcnt <= 0) - return; - - irq_entry = &idxd->irq_entries[0]; - free_irq(irq_entry->vector, irq_entry); - - for (i = 1; i < msixcnt; i++) { + int i; + for (i = 0; i < idxd->irq_cnt; i++) { irq_entry = &idxd->irq_entries[i]; - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) - idxd_device_release_int_handle(idxd, idxd->int_handles[i], + if (irq_entry->int_handle != INVALID_INT_HANDLE) { + idxd_device_release_int_handle(idxd, irq_entry->int_handle, IDXD_IRQ_MSIX); + irq_entry->int_handle = INVALID_INT_HANDLE; + irq_entry->pasid = INVALID_IOASID; + } + irq_entry->vector = -1; + irq_entry->wq = NULL; + irq_entry->idxd = NULL; free_irq(irq_entry->vector, irq_entry); } @@ -379,13 +387,6 @@ static int idxd_setup_internals(struct idxd_device *idxd) init_waitqueue_head(&idxd->cmd_waitq); - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { - idxd->int_handles = kcalloc_node(idxd->max_wqs, sizeof(int), GFP_KERNEL, - dev_to_node(dev)); - if (!idxd->int_handles) - return -ENOMEM; - } - rc = idxd_setup_wqs(idxd); if (rc < 0) goto err_wqs; @@ -416,7 +417,6 @@ static int idxd_setup_internals(struct idxd_device *idxd) for (i = 0; i < idxd->max_wqs; i++) put_device(wq_confdev(idxd->wqs[i])); err_wqs: - kfree(idxd->int_handles); return rc; } @@ -451,6 +451,10 @@ static void idxd_read_caps(struct idxd_device *idxd) dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap); } + /* reading command capabilities */ + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) + idxd->request_int_handles = true; + idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift; dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes); idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift; @@ -748,15 +752,15 @@ static void idxd_release_int_handles(struct idxd_device *idxd) struct device *dev = &idxd->pdev->dev; int i, rc; - for (i = 0; i < idxd->num_wq_irqs; i++) { - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) { - rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i], - IDXD_IRQ_MSIX); + for (i = 1; i < idxd->irq_cnt; i++) { + struct idxd_irq_entry *ie = &idxd->irq_entries[i]; + + if (ie->int_handle != INVALID_INT_HANDLE) { + rc = idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); if (rc < 0) - dev_warn(dev, "irq handle %d release failed\n", - idxd->int_handles[i]); + dev_warn(dev, "irq handle %d release failed\n", ie->int_handle); else - dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]); + dev_dbg(dev, "int handle released: %u\n", ie->int_handle); } } } diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index e5c3689851ea..960fbee684b2 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -25,10 +25,10 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match * vector 1:1 to the WQ id, we need to add 1 */ - if (!idxd->int_handles) + if (wq->ie->int_handle == INVALID_INT_HANDLE) desc->hw->int_handle = wq->id + 1; else - desc->hw->int_handle = idxd->int_handles[wq->id]; + desc->hw->int_handle = wq->ie->int_handle; return desc; } @@ -175,7 +175,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * that we designated the descriptor to. */ if (desc->hw->flags & IDXD_OP_FLAG_RCI) { - ie = &idxd->irq_entries[wq->id + 1]; + ie = wq->ie; llist_add(&desc->llnode, &ie->pending_llist); } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 84392258b4c1..ee7a25b33514 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1275,7 +1275,6 @@ static void idxd_conf_device_release(struct device *dev) kfree(idxd->wqs); kfree(idxd->engines); kfree(idxd->irq_entries); - kfree(idxd->int_handles); ida_free(&idxd_ida, idxd->id); kfree(idxd); } -- Gitee From 8ada2d55461907e25884ffbcb454a6c3776de2fe Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:17 -0700 Subject: [PATCH 099/173] dmaengine: idxd: move interrupt handle assignment mainline inclusion from mainline-v5.17 commit eb0cf33a91b46cd50b590d032471f7f977d5a92a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit eb0cf33a91b4 dmaengine: idxd: move interrupt handle assignment. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- In preparation of supporting interrupt handle revoke event, move the interrupt handle assignment to right before the descriptor to be submitted. This allows the interrupt handle revoke logic to assign the latest interrupt handle on submission. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528417767.3925689.7730411152122952808.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/submit.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 960fbee684b2..c3dc450134d8 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -21,15 +21,6 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) if (device_pasid_enabled(idxd)) desc->hw->pasid = idxd->pasid; - /* - * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match - * vector 1:1 to the WQ id, we need to add 1 - */ - if (wq->ie->int_handle == INVALID_INT_HANDLE) - desc->hw->int_handle = wq->id + 1; - else - desc->hw->int_handle = wq->ie->int_handle; - return desc; } @@ -176,6 +167,11 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) */ if (desc->hw->flags & IDXD_OP_FLAG_RCI) { ie = wq->ie; + if (ie->int_handle == INVALID_INT_HANDLE) + desc->hw->int_handle = ie->id; + else + desc->hw->int_handle = ie->int_handle; + llist_add(&desc->llnode, &ie->pending_llist); } -- Gitee From e115c6a95d506d1cb594bdcec3c7da3d3b84293a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:23 -0700 Subject: [PATCH 100/173] dmaengine: idxd: add helper for per interrupt handle drain mainline inclusion from mainline-v5.17 commit 46c6df1c958e55558212cfa94cad201eae48d684 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 46c6df1c958e dmaengine: idxd: add helper for per interrupt handle drain. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The helper is called at the completion of the interrupt handle refresh event. It issues drain descriptors to each of the wq with associated interrupt handle. The drain descriptor will have interrupt request set but without completion record. This will ensure all descriptors with incorrect interrupt completion handle get drained and a completion interrupt is triggered for the guest driver to process them. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528418315.3925689.7944718440052849626.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/irq.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 47b6fc894a23..f980486db590 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -55,6 +55,45 @@ static void idxd_device_reinit(struct work_struct *work) idxd_device_clear_state(idxd); } +/* + * The function sends a drain descriptor for the interrupt handle. The drain ensures + * all descriptors with this interrupt handle is flushed and the interrupt + * will allow the cleanup of the outstanding descriptors. + */ +static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) +{ + struct idxd_wq *wq = ie->wq; + struct idxd_device *idxd = ie->idxd; + struct device *dev = &idxd->pdev->dev; + struct dsa_hw_desc desc = {}; + void __iomem *portal; + int rc; + + /* Issue a simple drain operation with interrupt but no completion record */ + desc.flags = IDXD_OP_FLAG_RCI; + desc.opcode = DSA_OPCODE_DRAIN; + desc.priv = 1; + + if (ie->pasid != INVALID_IOASID) + desc.pasid = ie->pasid; + desc.int_handle = ie->int_handle; + portal = idxd_wq_portal_addr(wq); + + /* + * The wmb() makes sure that the descriptor is all there before we + * issue. + */ + wmb(); + if (wq_dedicated(wq)) { + iosubmit_cmds512(portal, &desc, 1); + } else { + rc = enqcmds(portal, &desc); + /* This should not fail unless hardware failed. */ + if (rc < 0) + dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id); + } +} + static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) { struct device *dev = &idxd->pdev->dev; -- Gitee From 0a0979cea78f93458c1133ca9343118492c47e4b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:29 -0700 Subject: [PATCH 101/173] dmaengine: idxd: create locked version of idxd_quiesce() call mainline inclusion from mainline-v5.17 commit bd5970a0d01f8e45af9b2e2cf1d245b84ea757ba category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit bd5970a0d01f dmaengine: idxd: create locked version of idxd_quiesce() call. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add a locked version of idxd_quiesce() call so that the quiesce can be called with a lock in situations where the lock is not held by the caller. In the driver probe/remove path, the lock is already held, so the raw version can be called w/o locking. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528418980.3925689.5841907054957931211.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 10 +++++++++- drivers/dma/idxd/dma.c | 4 ++-- drivers/dma/idxd/idxd.h | 1 + 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index f381319615fd..943e9967627b 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -411,12 +411,20 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq) return 0; } -void idxd_wq_quiesce(struct idxd_wq *wq) +void __idxd_wq_quiesce(struct idxd_wq *wq) { + lockdep_assert_held(&wq->wq_lock); percpu_ref_kill(&wq->wq_active); wait_for_completion(&wq->wq_dead); } +void idxd_wq_quiesce(struct idxd_wq *wq) +{ + mutex_lock(&wq->wq_lock); + __idxd_wq_quiesce(wq); + mutex_unlock(&wq->wq_lock); +} + /* Device control bits */ static inline bool idxd_is_enabled(struct idxd_device *idxd) { diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 427ba9f78aad..dec47ff9b32d 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -339,7 +339,7 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) return 0; err_dma: - idxd_wq_quiesce(wq); + __idxd_wq_quiesce(wq); percpu_ref_exit(&wq->wq_active); err_ref: idxd_wq_free_resources(wq); @@ -356,7 +356,7 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev) struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); mutex_lock(&wq->wq_lock); - idxd_wq_quiesce(wq); + __idxd_wq_quiesce(wq); idxd_unregister_dma_channel(wq); idxd_wq_free_resources(wq); __drv_disable_wq(wq); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 355159d4ee68..970701738c8a 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -570,6 +570,7 @@ int idxd_wq_map_portal(struct idxd_wq *wq); void idxd_wq_unmap_portal(struct idxd_wq *wq); int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid); int idxd_wq_disable_pasid(struct idxd_wq *wq); +void __idxd_wq_quiesce(struct idxd_wq *wq); void idxd_wq_quiesce(struct idxd_wq *wq); int idxd_wq_init_percpu_ref(struct idxd_wq *wq); -- Gitee From 22b75feb6e0e619911d3aba7fda06e2b9bfcade1 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:36 -0700 Subject: [PATCH 102/173] dmaengine: idxd: handle invalid interrupt handle descriptors mainline inclusion from mainline-v5.17 commit f6d442f7088cbf5e2ac4561aca6888380239d5b9 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit f6d442f7088c dmaengine: idxd: handle invalid interrupt handle descriptors. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Handle a descriptor that has been marked with invalid interrupt handle error in status. Create a work item that will resubmit the descriptor. This typically happens when the driver has handled the revoke interrupt handle event and has a new interrupt handle. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528419601.3925689.4166517602890523193.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/dma.c | 14 ++++++++---- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/irq.c | 50 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index dec47ff9b32d..2421c1435e8e 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -24,18 +24,24 @@ void idxd_dma_complete_txd(struct idxd_desc *desc, enum idxd_complete_type comp_type, bool free_desc) { + struct idxd_device *idxd = desc->wq->idxd; struct dma_async_tx_descriptor *tx; struct dmaengine_result res; int complete = 1; - if (desc->completion->status == DSA_COMP_SUCCESS) + if (desc->completion->status == DSA_COMP_SUCCESS) { res.result = DMA_TRANS_NOERROR; - else if (desc->completion->status) + } else if (desc->completion->status) { + if (idxd->request_int_handles && comp_type != IDXD_COMPLETE_ABORT && + desc->completion->status == DSA_COMP_INT_HANDLE_INVAL && + idxd_queue_int_handle_resubmit(desc)) + return; res.result = DMA_TRANS_WRITE_FAILED; - else if (comp_type == IDXD_COMPLETE_ABORT) + } else if (comp_type == IDXD_COMPLETE_ABORT) { res.result = DMA_TRANS_ABORTED; - else + } else { complete = 0; + } tx = &desc->txd; if (complete && tx->cookie) { diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 970701738c8a..82c4915f58a2 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -524,6 +524,7 @@ void idxd_unregister_devices(struct idxd_device *idxd); int idxd_register_driver(void); void idxd_unregister_driver(void); void idxd_wqs_quiesce(struct idxd_device *idxd); +bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc); /* device interrupt control */ void idxd_msix_perm_setup(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index f980486db590..8193f6f30719 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -22,6 +22,11 @@ struct idxd_fault { struct idxd_device *idxd; }; +struct idxd_resubmit { + struct work_struct work; + struct idxd_desc *desc; +}; + static void idxd_device_reinit(struct work_struct *work) { struct idxd_device *idxd = container_of(work, struct idxd_device, work); @@ -216,6 +221,51 @@ irqreturn_t idxd_misc_thread(int vec, void *data) return IRQ_HANDLED; } +static void idxd_int_handle_resubmit_work(struct work_struct *work) +{ + struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work); + struct idxd_desc *desc = irw->desc; + struct idxd_wq *wq = desc->wq; + int rc; + + desc->completion->status = 0; + rc = idxd_submit_desc(wq, desc); + if (rc < 0) { + dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n", + desc->id, wq->id); + /* + * If the error is not -EAGAIN, it means the submission failed due to wq + * has been killed instead of ENQCMDS failure. Here the driver needs to + * notify the submitter of the failure by reporting abort status. + * + * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the + * abort. + */ + if (rc != -EAGAIN) { + desc->completion->status = IDXD_COMP_DESC_ABORT; + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false); + } + idxd_free_desc(wq, desc); + } + kfree(irw); +} + +bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc) +{ + struct idxd_wq *wq = desc->wq; + struct idxd_device *idxd = wq->idxd; + struct idxd_resubmit *irw; + + irw = kzalloc(sizeof(*irw), GFP_KERNEL); + if (!irw) + return false; + + irw->desc = desc; + INIT_WORK(&irw->work, idxd_int_handle_resubmit_work); + queue_work(idxd->wq, &irw->work); + return true; +} + static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) { struct idxd_desc *desc, *t; -- Gitee From 4eddce6a9ab9af980a49493796588bc923195b27 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:36:41 -0700 Subject: [PATCH 103/173] dmaengine: idxd: handle interrupt handle revoked event mainline inclusion from mainline-v5.17 commit 56fc39f5a36794c4f27f5fee047b641eac3f5b89 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 56fc39f5a367 dmaengine: idxd: handle interrupt handle revoked event. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- "Interrupt handle revoked" is an event that happens when the driver is running on a guest kernel and the VM is migrated to a new machine. The device will trigger an interrupt that signals to the guest driver that the interrupt handles need to be replaced. The misc irq thread function calls a helper function to handle the event. The function uses the WQ percpu_ref to quiesce the kernel submissions. It then replaces the interrupt handles by requesting interrupt handle command for each I/O MSIX vector. Once the handle is updated, the driver will unblock the submission path to allow new submissions. The submitter will attempt to acquire a percpu_ref before submission. When the request fails, it will wait on the wq_resurrect 'completion'. The driver does anticipate the possibility of descriptors being submitted before the WQ percpu_ref is killed. If a descriptor has already been submitted, it will return with incorrect interrupt handle status. The descriptor will be re-submitted with the new interrupt handle on the completion path. For descriptors with incorrect interrupt handles, completion interrupt won't be triggered. At the completion of the interrupt handle refresh, the handling function will call idxd_int_handle_refresh_drain() to issue drain descriptors to each of the wq with associated interrupt handle. The drain descriptor will have interrupt request set but without completion record. This will ensure all descriptors with incorrect interrupt completion handle get drained and a completion interrupt is triggered for the guest driver to process them. Reviewed-by: Kevin Tian Co-Developed-by: Sanjay Kumar Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528420189.3925689.18212568593220415551.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 6 +- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/init.c | 1 + drivers/dma/idxd/irq.c | 137 +++++++++++++++++++++++++++++++++++ drivers/dma/idxd/registers.h | 1 + drivers/dma/idxd/submit.c | 10 ++- 6 files changed, 152 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 943e9967627b..1dc5245107df 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -404,17 +404,21 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq) int rc; memset(&wq->wq_active, 0, sizeof(wq->wq_active)); - rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL); + rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); if (rc < 0) return rc; reinit_completion(&wq->wq_dead); + reinit_completion(&wq->wq_resurrect); return 0; } void __idxd_wq_quiesce(struct idxd_wq *wq) { lockdep_assert_held(&wq->wq_lock); + reinit_completion(&wq->wq_resurrect); percpu_ref_kill(&wq->wq_active); + complete_all(&wq->wq_resurrect); wait_for_completion(&wq->wq_dead); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 82c4915f58a2..51e79201636c 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -171,6 +171,7 @@ struct idxd_wq { u32 portal_offset; struct percpu_ref wq_active; struct completion wq_dead; + struct completion wq_resurrect; struct idxd_dev idxd_dev; struct idxd_cdev *idxd_cdev; struct wait_queue_head err_queue; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 2dbff722e207..912839bf0be3 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -245,6 +245,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd) mutex_init(&wq->wq_lock); init_waitqueue_head(&wq->err_queue); init_completion(&wq->wq_dead); + init_completion(&wq->wq_resurrect); wq->max_xfer_bytes = idxd->max_xfer_bytes; wq->max_batch_size = idxd->max_batch_size; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 8193f6f30719..8a9eaeffc6f5 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "../dmaengine.h" #include "idxd.h" @@ -27,6 +28,11 @@ struct idxd_resubmit { struct idxd_desc *desc; }; +struct idxd_int_handle_revoke { + struct work_struct work; + struct idxd_device *idxd; +}; + static void idxd_device_reinit(struct work_struct *work) { struct idxd_device *idxd = container_of(work, struct idxd_device, work); @@ -99,6 +105,120 @@ static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) } } +static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie) +{ + LIST_HEAD(flist); + struct idxd_desc *d, *t; + struct llist_node *head; + + spin_lock(&ie->list_lock); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(d, t, head, llnode) + list_add_tail(&d->list, &ie->work_list); + } + + list_for_each_entry_safe(d, t, &ie->work_list, list) { + if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL) + list_move_tail(&d->list, &flist); + } + spin_unlock(&ie->list_lock); + + list_for_each_entry_safe(d, t, &flist, list) { + list_del(&d->list); + idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true); + } +} + +static void idxd_int_handle_revoke(struct work_struct *work) +{ + struct idxd_int_handle_revoke *revoke = + container_of(work, struct idxd_int_handle_revoke, work); + struct idxd_device *idxd = revoke->idxd; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + int i, new_handle, rc; + + if (!idxd->request_int_handles) { + kfree(revoke); + dev_warn(dev, "Unexpected int handle refresh interrupt.\n"); + return; + } + + /* + * The loop attempts to acquire new interrupt handle for all interrupt + * vectors that supports a handle. If a new interrupt handle is acquired and the + * wq is kernel type, the driver will kill the percpu_ref to pause all + * ongoing descriptor submissions. The interrupt handle is then changed. + * After change, the percpu_ref is revived and all the pending submissions + * are woken to try again. A drain is sent to for the interrupt handle + * at the end to make sure all invalid int handle descriptors are processed. + */ + for (i = 1; i < idxd->irq_cnt; i++) { + struct idxd_irq_entry *ie = &idxd->irq_entries[i]; + struct idxd_wq *wq = ie->wq; + + rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX); + if (rc < 0) { + dev_warn(dev, "get int handle %d failed: %d\n", i, rc); + /* + * Failed to acquire new interrupt handle. Kill the WQ + * and release all the pending submitters. The submitters will + * get error return code and handle appropriately. + */ + ie->int_handle = INVALID_INT_HANDLE; + idxd_wq_quiesce(wq); + idxd_abort_invalid_int_handle_descs(ie); + continue; + } + + /* No change in interrupt handle, nothing needs to be done */ + if (ie->int_handle == new_handle) + continue; + + if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) { + /* + * All the MSIX interrupts are allocated at once during probe. + * Therefore we need to update all interrupts even if the WQ + * isn't supporting interrupt operations. + */ + ie->int_handle = new_handle; + continue; + } + + mutex_lock(&wq->wq_lock); + reinit_completion(&wq->wq_resurrect); + + /* Kill percpu_ref to pause additional descriptor submissions */ + percpu_ref_kill(&wq->wq_active); + + /* Wait for all submitters quiesce before we change interrupt handle */ + wait_for_completion(&wq->wq_dead); + + ie->int_handle = new_handle; + + /* Revive percpu ref and wake up all the waiting submitters */ + percpu_ref_reinit(&wq->wq_active); + complete_all(&wq->wq_resurrect); + mutex_unlock(&wq->wq_lock); + + /* + * The delay here is to wait for all possible MOVDIR64B that + * are issued before percpu_ref_kill() has happened to have + * reached the PCIe domain before the drain is issued. The driver + * needs to ensure that the drain descriptor issued does not pass + * all the other issued descriptors that contain the invalid + * interrupt handle in order to ensure that the drain descriptor + * interrupt will allow the cleanup of all the descriptors with + * invalid interrupt handle. + */ + if (wq_dedicated(wq)) + udelay(100); + idxd_int_handle_revoke_drain(ie); + } + kfree(revoke); +} + static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) { struct device *dev = &idxd->pdev->dev; @@ -145,6 +265,23 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) err = true; } + if (cause & IDXD_INTC_INT_HANDLE_REVOKED) { + struct idxd_int_handle_revoke *revoke; + + val |= IDXD_INTC_INT_HANDLE_REVOKED; + + revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC); + if (revoke) { + revoke->idxd = idxd; + INIT_WORK(&revoke->work, idxd_int_handle_revoke); + queue_work(idxd->wq, &revoke->work); + + } else { + dev_err(dev, "Failed to allocate work for int handle revoke\n"); + idxd_wqs_quiesce(idxd); + } + } + if (cause & IDXD_INTC_CMD) { val |= IDXD_INTC_CMD; complete(idxd->cmd_done); diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 262c8220adbd..8e396698c22b 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -158,6 +158,7 @@ enum idxd_device_reset_type { #define IDXD_INTC_OCCUPY 0x04 #define IDXD_INTC_PERFMON_OVFL 0x08 #define IDXD_INTC_HALT_STATE 0x10 +#define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000 #define IDXD_CMD_OFFSET 0xa0 union idxd_command_reg { diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index c3dc450134d8..2060a9c2de9c 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -143,14 +143,18 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; struct idxd_irq_entry *ie = NULL; + u32 desc_flags = desc->hw->flags; void __iomem *portal; int rc; if (idxd->state != IDXD_DEV_ENABLED) return -EIO; - if (!percpu_ref_tryget_live(&wq->wq_active)) - return -ENXIO; + if (!percpu_ref_tryget_live(&wq->wq_active)) { + wait_for_completion(&wq->wq_resurrect); + if (!percpu_ref_tryget_live(&wq->wq_active)) + return -ENXIO; + } portal = idxd_wq_portal_addr(wq); @@ -165,7 +169,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) { + if (desc_flags & IDXD_OP_FLAG_RCI) { ie = wq->ie; if (ie->int_handle == INVALID_INT_HANDLE) desc->hw->int_handle = ie->id; -- Gitee From f560354bdb6b8ea387db6e4b75231c909ba8cba5 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 26 Oct 2021 14:45:34 -0700 Subject: [PATCH 104/173] dmaengine: idxd: set defaults for wq configs mainline inclusion from mainline-v5.17 commit 92452a72ebdf1225aa37690d3648f2af6d0b4fca category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 92452a72ebdf dmaengine: idxd: set defaults for wq configs. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add default values for wq size, max_xfer_size and max_batch_size. These values should provide a general guidance for the wq configuration when the user does not specify any specific values. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163528473483.3926048.7950067926287180976.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 13 +++++-------- drivers/dma/idxd/idxd.h | 4 ++++ drivers/dma/idxd/init.c | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 1dc5245107df..36e213a8108d 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -390,6 +390,8 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) clear_bit(WQ_FLAG_DEDICATED, &wq->flags); clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); + wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; + wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; } static void idxd_wq_ref_release(struct percpu_ref *ref) @@ -839,15 +841,12 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset); } + if (wq->size == 0 && wq->type != IDXD_WQT_NONE) + wq->size = WQ_DEFAULT_QUEUE_DEPTH; + /* byte 0-3 */ wq->wqcfg->wq_size = wq->size; - if (wq->size == 0) { - idxd->cmd_status = IDXD_SCMD_WQ_NO_SIZE; - dev_warn(dev, "Incorrect work queue size: 0\n"); - return -EINVAL; - } - /* bytes 4-7 */ wq->wqcfg->wq_thresh = wq->threshold; @@ -993,8 +992,6 @@ static int idxd_wqs_setup(struct idxd_device *idxd) if (!wq->group) continue; - if (!wq->size) - continue; if (wq_shared(wq) && !device_swq_supported(idxd)) { idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 51e79201636c..89e98d69115b 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -150,6 +150,10 @@ struct idxd_cdev { #define WQ_NAME_SIZE 1024 #define WQ_TYPE_SIZE 10 +#define WQ_DEFAULT_QUEUE_DEPTH 16 +#define WQ_DEFAULT_MAX_XFER SZ_2M +#define WQ_DEFAULT_MAX_BATCH 32 + enum idxd_op_type { IDXD_OP_BLOCK = 0, IDXD_OP_NONBLOCK = 1, diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 912839bf0be3..94ecd4bf0f0e 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -246,8 +246,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) init_waitqueue_head(&wq->err_queue); init_completion(&wq->wq_dead); init_completion(&wq->wq_resurrect); - wq->max_xfer_bytes = idxd->max_xfer_bytes; - wq->max_batch_size = idxd->max_batch_size; + wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; + wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); if (!wq->wqcfg) { put_device(conf_dev); -- Gitee From 34337bba35a916ff18b1fd9ffe79cdfda810ff27 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 29 Nov 2021 10:19:38 -0700 Subject: [PATCH 105/173] dmaengine: idxd: add knob for enqcmds retries mainline inclusion from mainline-v5.17 commit 7930d85535751bc8b05c6731c6b79d874671f13c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 7930d8553575 dmaengine: idxd: add knob for enqcmds retries. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add a sysfs knob to allow tuning of retries for the kernel ENQCMDS descriptor submission. While on host, it is not as likely that ENQCMDS return busy during normal operations due to the driver controlling the number of descriptors allocated for submission. However, when the driver is operating as a guest driver, the chance of retry goes up significantly due to sharing a wq with multiple VMs. A default value is provided with the system admin being able to tune the value on a per WQ basis. Suggested-by: Sanjay Kumar Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163820629464.2702134.7577370098568297574.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- .../ABI/stable/sysfs-driver-dma-idxd | 7 ++++ drivers/dma/idxd/device.c | 1 + drivers/dma/idxd/idxd.h | 5 +++ drivers/dma/idxd/init.c | 1 + drivers/dma/idxd/irq.c | 2 +- drivers/dma/idxd/submit.c | 31 ++++++++++++---- drivers/dma/idxd/sysfs.c | 36 +++++++++++++++++++ 7 files changed, 75 insertions(+), 8 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index c67c63bacd3c..6bbaeace2aa6 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -220,6 +220,13 @@ Contact: dmaengine@vger.kernel.org Description: Show the current number of entries in this WQ if WQ Occupancy Support bit WQ capabilities is 1. +What: /sys/bus/dsa/devices/wq./enqcmds_retries +Date Oct 29, 2021 +KernelVersion: 5.17.0 +Contact: dmaengine@vger.kernel.org +Description: Indicate the number of retires for an enqcmds submission on a shared wq. + A max value to set attribute is capped at 64. + What: /sys/bus/dsa/devices/engine./group_id Date: Oct 25, 2019 KernelVersion: 5.6.0 diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 36e213a8108d..5a50ee6f6881 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -387,6 +387,7 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) wq->threshold = 0; wq->priority = 0; wq->ats_dis = 0; + wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; clear_bit(WQ_FLAG_DEDICATED, &wq->flags); clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 89e98d69115b..6b9bfdc557fe 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -52,6 +52,9 @@ enum idxd_type { #define IDXD_NAME_SIZE 128 #define IDXD_PMU_EVENT_MAX 64 +#define IDXD_ENQCMDS_RETRIES 32 +#define IDXD_ENQCMDS_MAX_RETRIES 64 + struct idxd_device_driver { const char *name; enum idxd_dev_type *type; @@ -173,6 +176,7 @@ struct idxd_dma_chan { struct idxd_wq { void __iomem *portal; u32 portal_offset; + unsigned int enqcmds_retries; struct percpu_ref wq_active; struct completion wq_dead; struct completion wq_resurrect; @@ -584,6 +588,7 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq); int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc); struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype); void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc); +int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc); /* dmaengine */ int idxd_register_dma_device(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 94ecd4bf0f0e..8b3afce9ea67 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -248,6 +248,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd) init_completion(&wq->wq_resurrect); wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; + wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); if (!wq->wqcfg) { put_device(conf_dev); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 8a9eaeffc6f5..925171e9738c 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -98,7 +98,7 @@ static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) if (wq_dedicated(wq)) { iosubmit_cmds512(portal, &desc, 1); } else { - rc = enqcmds(portal, &desc); + rc = idxd_enqcmds(wq, portal, &desc); /* This should not fail unless hardware failed. */ if (rc < 0) dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id); diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 2060a9c2de9c..11ac06be1f0a 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -139,6 +139,29 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, } } +/* + * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver + * has better control of number of descriptors being submitted to a shared wq by limiting + * the number of driver allocated descriptors to the wq size. However, when the swq is + * exported to a guest kernel, it may be shared with multiple guest kernels. This means + * the likelihood of getting busy returned on the swq when submitting goes significantly up. + * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving + * up. The sysfs knob can be tuned by the system administrator. + */ +int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) +{ + int rc, retries = 0; + + do { + rc = enqcmds(portal, desc); + if (rc == 0) + break; + cpu_relax(); + } while (retries++ < wq->enqcmds_retries); + + return rc; +} + int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; @@ -182,13 +205,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) if (wq_dedicated(wq)) { iosubmit_cmds512(portal, desc->hw, 1); } else { - /* - * It's not likely that we would receive queue full rejection - * since the descriptor allocation gates at wq size. If we - * receive a -EAGAIN, that means something went wrong such as the - * device is not accepting descriptor at all. - */ - rc = enqcmds(portal, desc->hw); + rc = idxd_enqcmds(wq, portal, desc->hw); if (rc < 0) { percpu_ref_put(&wq->wq_active); /* abort operation frees the descriptor */ diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index ee7a25b33514..938005ca9171 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -951,6 +951,41 @@ static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *at static struct device_attribute dev_attr_wq_occupancy = __ATTR(occupancy, 0444, wq_occupancy_show, NULL); +static ssize_t wq_enqcmds_retries_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + if (wq_dedicated(wq)) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%u\n", wq->enqcmds_retries); +} + +static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + int rc; + unsigned int retries; + + if (wq_dedicated(wq)) + return -EOPNOTSUPP; + + rc = kstrtouint(buf, 10, &retries); + if (rc < 0) + return rc; + + if (retries > IDXD_ENQCMDS_MAX_RETRIES) + retries = IDXD_ENQCMDS_MAX_RETRIES; + + wq->enqcmds_retries = retries; + return count; +} + +static struct device_attribute dev_attr_wq_enqcmds_retries = + __ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store); + static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_clients.attr, &dev_attr_wq_state.attr, @@ -967,6 +1002,7 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_max_batch_size.attr, &dev_attr_wq_ats_disable.attr, &dev_attr_wq_occupancy.attr, + &dev_attr_wq_enqcmds_retries.attr, NULL, }; -- Gitee From 85729faf14888e8f21589291fd2a8a6756a80dea Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Dec 2021 11:51:23 -0700 Subject: [PATCH 106/173] dmaengine: idxd: embed irq_entry in idxd_wq struct mainline inclusion from mainline-v5.17 commit ec0d64231615e50539d83516b974e7947d45fbce category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit ec0d64231615 dmaengine: idxd: embed irq_entry in idxd_wq struct. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- With irq_entry already being associated with the wq in a 1:1 relationship, embed the irq_entry in the idxd_wq struct and remove back pointers for idxe_wq and idxd_device. In the process of this work, clean up the interrupt handle assignment so that there's no decision to be made during submit call on where interrupt handle value comes from. Set the interrupt handle during irq request initialization time. irq_entry 0 is designated as special and is tied to the device itself. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163942148362.2412839.12055447853311267866.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 18 +++--- drivers/dma/idxd/idxd.h | 22 +++++-- drivers/dma/idxd/init.c | 119 +++++++++++++++----------------------- drivers/dma/idxd/irq.c | 10 ++-- drivers/dma/idxd/submit.c | 8 +-- drivers/dma/idxd/sysfs.c | 1 - 6 files changed, 79 insertions(+), 99 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5a50ee6f6881..8233a29f859d 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -21,8 +21,11 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq); /* Interrupt control bits */ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id) { - struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector); + struct idxd_irq_entry *ie; + struct irq_data *data; + ie = idxd_get_ie(idxd, vec_id); + data = irq_get_irq_data(ie->vector); pci_msi_mask_irq(data); } @@ -38,8 +41,11 @@ void idxd_mask_msix_vectors(struct idxd_device *idxd) void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id) { - struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector); + struct idxd_irq_entry *ie; + struct irq_data *data; + ie = idxd_get_ie(idxd, vec_id); + data = irq_get_irq_data(ie->vector); pci_msi_unmask_irq(data); } @@ -1216,13 +1222,6 @@ int __drv_enable_wq(struct idxd_wq *wq) goto err; } - /* - * Device has 1 misc interrupt and N interrupts for descriptor completion. To - * assign WQ to interrupt, we will take the N+1 interrupt since vector 0 is - * for the misc interrupt. - */ - wq->ie = &idxd->irq_entries[wq->id + 1]; - rc = idxd_wq_enable(wq); if (rc < 0) { dev_dbg(dev, "wq %d enabling failed: %d\n", wq->id, rc); @@ -1273,7 +1272,6 @@ void __drv_disable_wq(struct idxd_wq *wq) idxd_wq_drain(wq); idxd_wq_reset(wq); - wq->ie = NULL; wq->client_count = 0; } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 6b9bfdc557fe..d77be03dd8b0 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -70,7 +70,6 @@ extern struct idxd_device_driver idxd_user_drv; #define INVALID_INT_HANDLE -1 struct idxd_irq_entry { - struct idxd_device *idxd; int id; int vector; struct llist_head pending_llist; @@ -81,7 +80,6 @@ struct idxd_irq_entry { */ spinlock_t list_lock; int int_handle; - struct idxd_wq *wq; ioasid_t pasid; }; @@ -185,7 +183,7 @@ struct idxd_wq { struct wait_queue_head err_queue; struct idxd_device *idxd; int id; - struct idxd_irq_entry *ie; + struct idxd_irq_entry ie; enum idxd_wq_type type; struct idxd_group *group; int client_count; @@ -266,6 +264,7 @@ struct idxd_device { int id; int major; u32 cmd_status; + struct idxd_irq_entry ie; /* misc irq, msix 0 */ struct pci_dev *pdev; void __iomem *reg_base; @@ -302,8 +301,6 @@ struct idxd_device { union sw_err_reg sw_err; wait_queue_head_t cmd_waitq; - int num_wq_irqs; - struct idxd_irq_entry *irq_entries; struct idxd_dma_dev *idxd_dma; struct workqueue_struct *wq; @@ -395,6 +392,21 @@ static inline void idxd_dev_set_type(struct idxd_dev *idev, int type) idev->type = type; } +static inline struct idxd_irq_entry *idxd_get_ie(struct idxd_device *idxd, int idx) +{ + return (idx == 0) ? &idxd->ie : &idxd->wqs[idx - 1]->ie; +} + +static inline struct idxd_wq *ie_to_wq(struct idxd_irq_entry *ie) +{ + return container_of(ie, struct idxd_wq, ie); +} + +static inline struct idxd_device *ie_to_idxd(struct idxd_irq_entry *ie) +{ + return container_of(ie, struct idxd_device, ie); +} + extern struct bus_type dsa_bus_type; extern bool support_enqcmd; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 8b3afce9ea67..29c732a94027 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -72,7 +72,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; struct device *dev = &pdev->dev; - struct idxd_irq_entry *irq_entry; + struct idxd_irq_entry *ie; int i, msixcnt; int rc = 0; @@ -90,72 +90,54 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt); - /* - * We implement 1 completion list per MSI-X entry except for - * entry 0, which is for errors and others. - */ - idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry), - GFP_KERNEL, dev_to_node(dev)); - if (!idxd->irq_entries) { - rc = -ENOMEM; - goto err_irq_entries; - } - - for (i = 0; i < msixcnt; i++) { - idxd->irq_entries[i].id = i; - idxd->irq_entries[i].idxd = idxd; - /* - * Association of WQ should be assigned starting with irq_entry 1. - * irq_entry 0 is for misc interrupts and has no wq association - */ - if (i > 0) - idxd->irq_entries[i].wq = idxd->wqs[i - 1]; - idxd->irq_entries[i].vector = pci_irq_vector(pdev, i); - idxd->irq_entries[i].int_handle = INVALID_INT_HANDLE; - if (device_pasid_enabled(idxd) && i > 0) - idxd->irq_entries[i].pasid = idxd->pasid; - else - idxd->irq_entries[i].pasid = INVALID_IOASID; - spin_lock_init(&idxd->irq_entries[i].list_lock); - } - idxd_msix_perm_setup(idxd); - irq_entry = &idxd->irq_entries[0]; - rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread, - 0, "idxd-misc", irq_entry); + ie = idxd_get_ie(idxd, 0); + ie->vector = pci_irq_vector(pdev, 0); + rc = request_threaded_irq(ie->vector, NULL, idxd_misc_thread, 0, "idxd-misc", ie); if (rc < 0) { dev_err(dev, "Failed to allocate misc interrupt.\n"); goto err_misc_irq; } - dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector); + dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", ie->vector); - /* first MSI-X entry is not for wq interrupts */ - idxd->num_wq_irqs = msixcnt - 1; + for (i = 0; i < idxd->max_wqs; i++) { + int msix_idx = i + 1; + + ie = idxd_get_ie(idxd, msix_idx); - for (i = 1; i < msixcnt; i++) { - irq_entry = &idxd->irq_entries[i]; + /* MSIX vector 0 special, wq irq entry starts at 1 */ + ie->id = msix_idx; + ie->vector = pci_irq_vector(pdev, msix_idx); + ie->int_handle = INVALID_INT_HANDLE; + if (device_pasid_enabled(idxd) && i > 0) + ie->pasid = idxd->pasid; + else + ie->pasid = INVALID_IOASID; + spin_lock_init(&ie->list_lock); + init_llist_head(&ie->pending_llist); + INIT_LIST_HEAD(&ie->work_list); - init_llist_head(&idxd->irq_entries[i].pending_llist); - INIT_LIST_HEAD(&idxd->irq_entries[i].work_list); - rc = request_threaded_irq(irq_entry->vector, NULL, - idxd_wq_thread, 0, "idxd-portal", irq_entry); + rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie); if (rc < 0) { - dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector); + dev_err(dev, "Failed to allocate irq %d.\n", ie->vector); goto err_wq_irqs; } - dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector); + dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, ie->vector); if (idxd->request_int_handles) { - rc = idxd_device_request_int_handle(idxd, i, &irq_entry->int_handle, + rc = idxd_device_request_int_handle(idxd, i, &ie->int_handle, IDXD_IRQ_MSIX); if (rc < 0) { - free_irq(irq_entry->vector, irq_entry); + free_irq(ie->vector, ie); goto err_wq_irqs; } - dev_dbg(dev, "int handle requested: %u\n", irq_entry->int_handle); + dev_dbg(dev, "int handle requested: %u\n", ie->int_handle); + } else { + ie->int_handle = msix_idx; } + } idxd_unmask_error_interrupts(idxd); @@ -163,23 +145,19 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) err_wq_irqs: while (--i >= 0) { - irq_entry = &idxd->irq_entries[i]; - free_irq(irq_entry->vector, irq_entry); - if (irq_entry->int_handle != INVALID_INT_HANDLE) { - idxd_device_release_int_handle(idxd, irq_entry->int_handle, - IDXD_IRQ_MSIX); - irq_entry->int_handle = INVALID_INT_HANDLE; - irq_entry->pasid = INVALID_IOASID; + ie = &idxd->wqs[i]->ie; + free_irq(ie->vector, ie); + if (ie->int_handle != INVALID_INT_HANDLE) { + idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); + ie->int_handle = INVALID_INT_HANDLE; + ie->pasid = INVALID_IOASID; } - irq_entry->vector = -1; - irq_entry->wq = NULL; - irq_entry->idxd = NULL; + ie->vector = -1; } err_misc_irq: /* Disable error interrupt generation */ idxd_mask_error_interrupts(idxd); idxd_msix_perm_clear(idxd); - err_irq_entries: pci_free_irq_vectors(pdev); dev_err(dev, "No usable interrupts\n"); return rc; @@ -188,21 +166,18 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) static void idxd_cleanup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; - struct idxd_irq_entry *irq_entry; + struct idxd_irq_entry *ie; int i; for (i = 0; i < idxd->irq_cnt; i++) { - irq_entry = &idxd->irq_entries[i]; - if (irq_entry->int_handle != INVALID_INT_HANDLE) { - idxd_device_release_int_handle(idxd, irq_entry->int_handle, - IDXD_IRQ_MSIX); - irq_entry->int_handle = INVALID_INT_HANDLE; - irq_entry->pasid = INVALID_IOASID; + ie = idxd_get_ie(idxd, i); + if (ie->int_handle != INVALID_INT_HANDLE) { + idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); + ie->int_handle = INVALID_INT_HANDLE; + ie->pasid = INVALID_IOASID; } - irq_entry->vector = -1; - irq_entry->wq = NULL; - irq_entry->idxd = NULL; - free_irq(irq_entry->vector, irq_entry); + free_irq(ie->vector, ie); + ie->vector = -1; } idxd_mask_error_interrupts(idxd); @@ -755,7 +730,7 @@ static void idxd_release_int_handles(struct idxd_device *idxd) int i, rc; for (i = 1; i < idxd->irq_cnt; i++) { - struct idxd_irq_entry *ie = &idxd->irq_entries[i]; + struct idxd_irq_entry *ie = idxd_get_ie(idxd, i); if (ie->int_handle != INVALID_INT_HANDLE) { rc = idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); @@ -783,7 +758,7 @@ static void idxd_shutdown(struct pci_dev *pdev) idxd_mask_error_interrupts(idxd); for (i = 0; i < msixcnt; i++) { - irq_entry = &idxd->irq_entries[i]; + irq_entry = idxd_get_ie(idxd, i); synchronize_irq(irq_entry->vector); if (i == 0) continue; @@ -815,7 +790,7 @@ static void idxd_remove(struct pci_dev *pdev) idxd_disable_system_pasid(idxd); for (i = 0; i < msixcnt; i++) { - irq_entry = &idxd->irq_entries[i]; + irq_entry = idxd_get_ie(idxd, i); free_irq(irq_entry->vector, irq_entry); } idxd_msix_perm_clear(idxd); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 925171e9738c..a1316f341dd6 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -73,8 +73,8 @@ static void idxd_device_reinit(struct work_struct *work) */ static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) { - struct idxd_wq *wq = ie->wq; - struct idxd_device *idxd = ie->idxd; + struct idxd_wq *wq = ie_to_wq(ie); + struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; struct dsa_hw_desc desc = {}; void __iomem *portal; @@ -155,8 +155,8 @@ static void idxd_int_handle_revoke(struct work_struct *work) * at the end to make sure all invalid int handle descriptors are processed. */ for (i = 1; i < idxd->irq_cnt; i++) { - struct idxd_irq_entry *ie = &idxd->irq_entries[i]; - struct idxd_wq *wq = ie->wq; + struct idxd_irq_entry *ie = idxd_get_ie(idxd, i); + struct idxd_wq *wq = ie_to_wq(ie); rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX); if (rc < 0) { @@ -338,7 +338,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) irqreturn_t idxd_misc_thread(int vec, void *data) { struct idxd_irq_entry *irq_entry = data; - struct idxd_device *idxd = irq_entry->idxd; + struct idxd_device *idxd = ie_to_idxd(irq_entry); int rc; u32 cause; diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 11ac06be1f0a..e289fd48711a 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -193,12 +193,8 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * that we designated the descriptor to. */ if (desc_flags & IDXD_OP_FLAG_RCI) { - ie = wq->ie; - if (ie->int_handle == INVALID_INT_HANDLE) - desc->hw->int_handle = ie->id; - else - desc->hw->int_handle = ie->int_handle; - + ie = &wq->ie; + desc->hw->int_handle = ie->int_handle; llist_add(&desc->llnode, &ie->pending_llist); } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 938005ca9171..ff9d4548b945 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1310,7 +1310,6 @@ static void idxd_conf_device_release(struct device *dev) kfree(idxd->groups); kfree(idxd->wqs); kfree(idxd->engines); - kfree(idxd->irq_entries); ida_free(&idxd_ida, idxd->id); kfree(idxd); } -- Gitee From 2898fc69215ce6f91aea19a4c30ed5be9a5354ac Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Dec 2021 11:51:29 -0700 Subject: [PATCH 107/173] dmaengine: idxd: fix descriptor flushing locking mainline inclusion from mainline-v5.17 commit 23a50c8035655c5a1d9b52c878b3ebf7b6b83eea category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 23a50c803565 dmaengine: idxd: fix descriptor flushing locking. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The descriptor flushing for shutdown is not holding the irq_entry list lock. If there's ongoing interrupt completion handling, this can corrupt the list. Add locking to protect list walking. Also refactor the code so it's more compact. Fixes: 8f47d1a5e545 ("dmaengine: idxd: connect idxd to dmaengine subsystem") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163942148935.2412839.18282664745572777280.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 29c732a94027..03c735727f68 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -689,26 +689,28 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; } -static void idxd_flush_pending_llist(struct idxd_irq_entry *ie) +static void idxd_flush_pending_descs(struct idxd_irq_entry *ie) { struct idxd_desc *desc, *itr; struct llist_node *head; + LIST_HEAD(flist); + enum idxd_complete_type ctype; + spin_lock(&ie->list_lock); head = llist_del_all(&ie->pending_llist); - if (!head) - return; - - llist_for_each_entry_safe(desc, itr, head, llnode) - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); -} + if (head) { + llist_for_each_entry_safe(desc, itr, head, llnode) + list_add_tail(&desc->list, &ie->work_list); + } -static void idxd_flush_work_list(struct idxd_irq_entry *ie) -{ - struct idxd_desc *desc, *iter; + list_for_each_entry_safe(desc, itr, &ie->work_list, list) + list_move_tail(&desc->list, &flist); + spin_unlock(&ie->list_lock); - list_for_each_entry_safe(desc, iter, &ie->work_list, list) { + list_for_each_entry_safe(desc, itr, &flist, list) { list_del(&desc->list); - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); + ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT; + idxd_dma_complete_txd(desc, ctype, true); } } @@ -762,8 +764,7 @@ static void idxd_shutdown(struct pci_dev *pdev) synchronize_irq(irq_entry->vector); if (i == 0) continue; - idxd_flush_pending_llist(irq_entry); - idxd_flush_work_list(irq_entry); + idxd_flush_pending_descs(irq_entry); } flush_workqueue(idxd->wq); } -- Gitee From c5eb2361f1c5c80fc924a3e3204323cdc535ad72 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Dec 2021 11:51:34 -0700 Subject: [PATCH 108/173] dmaengine: idxd: change MSIX allocation based on per wq activation mainline inclusion from mainline-v5.17 commit 403a2e236538c6b479ea5bfc8b75a75540cfba6b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 403a2e236538 dmaengine: idxd: change MSIX allocation based on per wq activation. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Change the driver where WQ interrupt is requested only when wq is being enabled. This new scheme set things up so that request_threaded_irq() is only called when a kernel wq type is being enabled. This also sets up for future interrupt request where different interrupt handler such as wq occupancy interrupt can be setup instead of the wq completion interrupt. Not calling request_irq() until the WQ actually needs an irq also prevents wasting of CPU irq vectors on x86 systems, which is a limited resource. idxd_flush_pending_descs() is moved to device.c since descriptor flushing is now part of wq disable rather than shutdown(). Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163942149487.2412839.6691222855803875848.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 161 +++++++++++++++++++++++--------------- drivers/dma/idxd/dma.c | 12 +++ drivers/dma/idxd/idxd.h | 7 +- drivers/dma/idxd/init.c | 133 ++++--------------------------- drivers/dma/idxd/irq.c | 3 + include/uapi/linux/idxd.h | 1 + 6 files changed, 132 insertions(+), 185 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 8233a29f859d..280b41417f41 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -19,36 +19,6 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd); static void idxd_wq_disable_cleanup(struct idxd_wq *wq); /* Interrupt control bits */ -void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id) -{ - struct idxd_irq_entry *ie; - struct irq_data *data; - - ie = idxd_get_ie(idxd, vec_id); - data = irq_get_irq_data(ie->vector); - pci_msi_mask_irq(data); -} - -void idxd_mask_msix_vectors(struct idxd_device *idxd) -{ - struct pci_dev *pdev = idxd->pdev; - int msixcnt = pci_msix_vec_count(pdev); - int i; - - for (i = 0; i < msixcnt; i++) - idxd_mask_msix_vector(idxd, i); -} - -void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id) -{ - struct idxd_irq_entry *ie; - struct irq_data *data; - - ie = idxd_get_ie(idxd, vec_id); - data = irq_get_irq_data(ie->vector); - pci_msi_unmask_irq(data); -} - void idxd_unmask_error_interrupts(struct idxd_device *idxd) { union genctrl_reg genctrl; @@ -593,7 +563,6 @@ void idxd_device_reset(struct idxd_device *idxd) idxd_device_clear_state(idxd); idxd->state = IDXD_DEV_DISABLED; idxd_unmask_error_interrupts(idxd); - idxd_msix_perm_setup(idxd); spin_unlock(&idxd->dev_lock); } @@ -732,36 +701,6 @@ void idxd_device_clear_state(struct idxd_device *idxd) idxd_device_wqs_clear_state(idxd); } -void idxd_msix_perm_setup(struct idxd_device *idxd) -{ - union msix_perm mperm; - int i, msixcnt; - - msixcnt = pci_msix_vec_count(idxd->pdev); - if (msixcnt < 0) - return; - - mperm.bits = 0; - mperm.pasid = idxd->pasid; - mperm.pasid_en = device_pasid_enabled(idxd); - for (i = 1; i < msixcnt; i++) - iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); -} - -void idxd_msix_perm_clear(struct idxd_device *idxd) -{ - union msix_perm mperm; - int i, msixcnt; - - msixcnt = pci_msix_vec_count(idxd->pdev); - if (msixcnt < 0) - return; - - mperm.bits = 0; - for (i = 1; i < msixcnt; i++) - iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); -} - static void idxd_group_config_write(struct idxd_group *group) { struct idxd_device *idxd = group->idxd; @@ -1158,6 +1097,106 @@ int idxd_device_load_config(struct idxd_device *idxd) return 0; } +static void idxd_flush_pending_descs(struct idxd_irq_entry *ie) +{ + struct idxd_desc *desc, *itr; + struct llist_node *head; + LIST_HEAD(flist); + enum idxd_complete_type ctype; + + spin_lock(&ie->list_lock); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(desc, itr, head, llnode) + list_add_tail(&desc->list, &ie->work_list); + } + + list_for_each_entry_safe(desc, itr, &ie->work_list, list) + list_move_tail(&desc->list, &flist); + spin_unlock(&ie->list_lock); + + list_for_each_entry_safe(desc, itr, &flist, list) { + list_del(&desc->list); + ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT; + idxd_dma_complete_txd(desc, ctype, true); + } +} + +static void idxd_device_set_perm_entry(struct idxd_device *idxd, + struct idxd_irq_entry *ie) +{ + union msix_perm mperm; + + if (ie->pasid == INVALID_IOASID) + return; + + mperm.bits = 0; + mperm.pasid = ie->pasid; + mperm.pasid_en = 1; + iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8); +} + +static void idxd_device_clear_perm_entry(struct idxd_device *idxd, + struct idxd_irq_entry *ie) +{ + iowrite32(0, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8); +} + +void idxd_wq_free_irq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct idxd_irq_entry *ie = &wq->ie; + + synchronize_irq(ie->vector); + free_irq(ie->vector, ie); + idxd_flush_pending_descs(ie); + if (idxd->request_int_handles) + idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); + idxd_device_clear_perm_entry(idxd, ie); + ie->vector = -1; + ie->int_handle = INVALID_INT_HANDLE; + ie->pasid = INVALID_IOASID; +} + +int idxd_wq_request_irq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + struct idxd_irq_entry *ie; + int rc; + + ie = &wq->ie; + ie->vector = pci_irq_vector(pdev, ie->id); + ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : INVALID_IOASID; + idxd_device_set_perm_entry(idxd, ie); + + rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie); + if (rc < 0) { + dev_err(dev, "Failed to request irq %d.\n", ie->vector); + goto err_irq; + } + + if (idxd->request_int_handles) { + rc = idxd_device_request_int_handle(idxd, ie->id, &ie->int_handle, + IDXD_IRQ_MSIX); + if (rc < 0) + goto err_int_handle; + } else { + ie->int_handle = ie->id; + } + + return 0; + +err_int_handle: + ie->int_handle = INVALID_INT_HANDLE; + free_irq(ie->vector, ie); +err_irq: + idxd_device_clear_perm_entry(idxd, ie); + ie->pasid = INVALID_IOASID; + return rc; +} + int __drv_enable_wq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 2421c1435e8e..f9ed55011775 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -312,6 +312,14 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) mutex_lock(&wq->wq_lock); wq->type = IDXD_WQT_KERNEL; + + rc = idxd_wq_request_irq(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_WQ_IRQ_ERR; + dev_dbg(dev, "WQ %d irq setup failed: %d\n", wq->id, rc); + goto err_irq; + } + rc = __drv_enable_wq(wq); if (rc < 0) { dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc); @@ -352,6 +360,8 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) err_res_alloc: __drv_disable_wq(wq); err: + idxd_wq_free_irq(wq); +err_irq: wq->type = IDXD_WQT_NONE; mutex_unlock(&wq->wq_lock); return rc; @@ -367,6 +377,8 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev) idxd_wq_free_resources(wq); __drv_disable_wq(wq); percpu_ref_exit(&wq->wq_active); + idxd_wq_free_irq(wq); + wq->type = IDXD_WQT_NONE; mutex_unlock(&wq->wq_lock); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index d77be03dd8b0..6353e762286d 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -548,15 +548,10 @@ void idxd_wqs_quiesce(struct idxd_device *idxd); bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc); /* device interrupt control */ -void idxd_msix_perm_setup(struct idxd_device *idxd); -void idxd_msix_perm_clear(struct idxd_device *idxd); irqreturn_t idxd_misc_thread(int vec, void *data); irqreturn_t idxd_wq_thread(int irq, void *data); void idxd_mask_error_interrupts(struct idxd_device *idxd); void idxd_unmask_error_interrupts(struct idxd_device *idxd); -void idxd_mask_msix_vectors(struct idxd_device *idxd); -void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id); -void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id); /* device control */ int idxd_register_idxd_drv(void); @@ -595,6 +590,8 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq); void __idxd_wq_quiesce(struct idxd_wq *wq); void idxd_wq_quiesce(struct idxd_wq *wq); int idxd_wq_init_percpu_ref(struct idxd_wq *wq); +void idxd_wq_free_irq(struct idxd_wq *wq); +int idxd_wq_request_irq(struct idxd_wq *wq); /* submission */ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 03c735727f68..3505efb7ae71 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -90,7 +90,6 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt); - idxd_msix_perm_setup(idxd); ie = idxd_get_ie(idxd, 0); ie->vector = pci_irq_vector(pdev, 0); @@ -99,65 +98,26 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) dev_err(dev, "Failed to allocate misc interrupt.\n"); goto err_misc_irq; } - - dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", ie->vector); + dev_dbg(dev, "Requested idxd-misc handler on msix vector %d\n", ie->vector); for (i = 0; i < idxd->max_wqs; i++) { int msix_idx = i + 1; ie = idxd_get_ie(idxd, msix_idx); - - /* MSIX vector 0 special, wq irq entry starts at 1 */ ie->id = msix_idx; - ie->vector = pci_irq_vector(pdev, msix_idx); ie->int_handle = INVALID_INT_HANDLE; - if (device_pasid_enabled(idxd) && i > 0) - ie->pasid = idxd->pasid; - else - ie->pasid = INVALID_IOASID; + ie->pasid = INVALID_IOASID; + spin_lock_init(&ie->list_lock); init_llist_head(&ie->pending_llist); INIT_LIST_HEAD(&ie->work_list); - - rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie); - if (rc < 0) { - dev_err(dev, "Failed to allocate irq %d.\n", ie->vector); - goto err_wq_irqs; - } - - dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, ie->vector); - if (idxd->request_int_handles) { - rc = idxd_device_request_int_handle(idxd, i, &ie->int_handle, - IDXD_IRQ_MSIX); - if (rc < 0) { - free_irq(ie->vector, ie); - goto err_wq_irqs; - } - dev_dbg(dev, "int handle requested: %u\n", ie->int_handle); - } else { - ie->int_handle = msix_idx; - } - } idxd_unmask_error_interrupts(idxd); return 0; - err_wq_irqs: - while (--i >= 0) { - ie = &idxd->wqs[i]->ie; - free_irq(ie->vector, ie); - if (ie->int_handle != INVALID_INT_HANDLE) { - idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); - ie->int_handle = INVALID_INT_HANDLE; - ie->pasid = INVALID_IOASID; - } - ie->vector = -1; - } err_misc_irq: - /* Disable error interrupt generation */ idxd_mask_error_interrupts(idxd); - idxd_msix_perm_clear(idxd); pci_free_irq_vectors(pdev); dev_err(dev, "No usable interrupts\n"); return rc; @@ -167,20 +127,15 @@ static void idxd_cleanup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; struct idxd_irq_entry *ie; - int i; + int msixcnt; - for (i = 0; i < idxd->irq_cnt; i++) { - ie = idxd_get_ie(idxd, i); - if (ie->int_handle != INVALID_INT_HANDLE) { - idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); - ie->int_handle = INVALID_INT_HANDLE; - ie->pasid = INVALID_IOASID; - } - free_irq(ie->vector, ie); - ie->vector = -1; - } + msixcnt = pci_msix_vec_count(pdev); + if (msixcnt <= 0) + return; + ie = idxd_get_ie(idxd, 0); idxd_mask_error_interrupts(idxd); + free_irq(ie->vector, ie); pci_free_irq_vectors(pdev); } @@ -592,8 +547,6 @@ static int idxd_probe(struct idxd_device *idxd) if (rc) goto err_config; - dev_dbg(dev, "IDXD interrupt setup complete.\n"); - idxd->major = idxd_cdev_get_major(idxd); rc = perfmon_pmu_init(idxd); @@ -689,31 +642,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; } -static void idxd_flush_pending_descs(struct idxd_irq_entry *ie) -{ - struct idxd_desc *desc, *itr; - struct llist_node *head; - LIST_HEAD(flist); - enum idxd_complete_type ctype; - - spin_lock(&ie->list_lock); - head = llist_del_all(&ie->pending_llist); - if (head) { - llist_for_each_entry_safe(desc, itr, head, llnode) - list_add_tail(&desc->list, &ie->work_list); - } - - list_for_each_entry_safe(desc, itr, &ie->work_list, list) - list_move_tail(&desc->list, &flist); - spin_unlock(&ie->list_lock); - - list_for_each_entry_safe(desc, itr, &flist, list) { - list_del(&desc->list); - ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT; - idxd_dma_complete_txd(desc, ctype, true); - } -} - void idxd_wqs_quiesce(struct idxd_device *idxd) { struct idxd_wq *wq; @@ -726,46 +654,19 @@ void idxd_wqs_quiesce(struct idxd_device *idxd) } } -static void idxd_release_int_handles(struct idxd_device *idxd) -{ - struct device *dev = &idxd->pdev->dev; - int i, rc; - - for (i = 1; i < idxd->irq_cnt; i++) { - struct idxd_irq_entry *ie = idxd_get_ie(idxd, i); - - if (ie->int_handle != INVALID_INT_HANDLE) { - rc = idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); - if (rc < 0) - dev_warn(dev, "irq handle %d release failed\n", ie->int_handle); - else - dev_dbg(dev, "int handle released: %u\n", ie->int_handle); - } - } -} - static void idxd_shutdown(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); - int rc, i; struct idxd_irq_entry *irq_entry; - int msixcnt = pci_msix_vec_count(pdev); + int rc; rc = idxd_device_disable(idxd); if (rc) dev_err(&pdev->dev, "Disabling device failed\n"); - dev_dbg(&pdev->dev, "%s called\n", __func__); - idxd_mask_msix_vectors(idxd); + irq_entry = &idxd->ie; + synchronize_irq(irq_entry->vector); idxd_mask_error_interrupts(idxd); - - for (i = 0; i < msixcnt; i++) { - irq_entry = idxd_get_ie(idxd, i); - synchronize_irq(irq_entry->vector); - if (i == 0) - continue; - idxd_flush_pending_descs(irq_entry); - } flush_workqueue(idxd->wq); } @@ -773,8 +674,6 @@ static void idxd_remove(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); struct idxd_irq_entry *irq_entry; - int msixcnt = pci_msix_vec_count(pdev); - int i; idxd_unregister_devices(idxd); /* @@ -790,12 +689,8 @@ static void idxd_remove(struct pci_dev *pdev) if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); - for (i = 0; i < msixcnt; i++) { - irq_entry = idxd_get_ie(idxd, i); - free_irq(irq_entry->vector, irq_entry); - } - idxd_msix_perm_clear(idxd); - idxd_release_int_handles(idxd); + irq_entry = idxd_get_ie(idxd, 0); + free_irq(irq_entry->vector, irq_entry); pci_free_irq_vectors(pdev); pci_iounmap(pdev, idxd->reg_base); iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index a1316f341dd6..743ead5ebc57 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -158,6 +158,9 @@ static void idxd_int_handle_revoke(struct work_struct *work) struct idxd_irq_entry *ie = idxd_get_ie(idxd, i); struct idxd_wq *wq = ie_to_wq(ie); + if (ie->int_handle == INVALID_INT_HANDLE) + continue; + rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX); if (rc < 0) { dev_warn(dev, "get int handle %d failed: %d\n", i, rc); diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index c750eac09fc9..a8f0ff75c430 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -28,6 +28,7 @@ enum idxd_scmd_stat { IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000, IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, + IDXD_SCMD_WQ_IRQ_ERR = 0x80100000, }; #define IDXD_SCMD_SOFTERR_MASK 0x80000000 -- Gitee From 43df00c00b88b023ab581e1489ab4aa91441e870 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Dec 2021 13:15:17 -0700 Subject: [PATCH 109/173] dmaengine: idxd: fix wq settings post wq disable mainline inclusion from mainline-v5.17 commit 0f225705cf6536826318180831e18a74595efc8d category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 0f225705cf65 dmaengine: idxd: fix wq settings post wq disable. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- By the spec, wq size and group association is not changeable unless device is disabled. Exclude clearing the shadow copy on wq disable/reset. This allows wq type to be changed after disable to be re-enabled. Move the size and group association to its own cleanup and only call it during device disable. Fixes: 0dcfe41e9a4c ("dmanegine: idxd: cleanup all device related bits after disabling device") Reported-by: Lucas Van Tested-by: Lucas Van Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163951291732.2987775.13576571320501115257.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 280b41417f41..3fce7629daa7 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -358,8 +358,6 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) lockdep_assert_held(&wq->wq_lock); memset(wq->wqcfg, 0, idxd->wqcfg_size); wq->type = IDXD_WQT_NONE; - wq->size = 0; - wq->group = NULL; wq->threshold = 0; wq->priority = 0; wq->ats_dis = 0; @@ -371,6 +369,15 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; } +static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq) +{ + lockdep_assert_held(&wq->wq_lock); + + idxd_wq_disable_cleanup(wq); + wq->size = 0; + wq->group = NULL; +} + static void idxd_wq_ref_release(struct percpu_ref *ref) { struct idxd_wq *wq = container_of(ref, struct idxd_wq, wq_active); @@ -689,6 +696,7 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd) if (wq->state == IDXD_WQ_ENABLED) { idxd_wq_disable_cleanup(wq); + idxd_wq_device_reset_cleanup(wq); wq->state = IDXD_WQ_DISABLED; } } -- Gitee From 7365b04adc24aa45ca930a6b61b8ffa8442afadb Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Dec 2021 13:23:09 -0700 Subject: [PATCH 110/173] dmaengine: idxd: change bandwidth token to read buffers mainline inclusion from mainline-v5.17 commit 7ed6f1b85fb613e5e44ef3e14d73f2dc96860935 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 7ed6f1b85fb6 dmaengine: idxd: change bandwidth token to read buffers. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- DSA spec v1.2 has changed the term of "bandwidth tokens" to "read buffers" in order to make the concept clearer. Deprecate bandwidth token naming in the driver and convert to read buffers in order to match with the spec and reduce confusion when reading the spec. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163951338932.2988321.6162640806935567317.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 25 +++++++++++---------- drivers/dma/idxd/idxd.h | 12 +++++------ drivers/dma/idxd/init.c | 6 +++--- drivers/dma/idxd/registers.h | 14 ++++++------ drivers/dma/idxd/sysfs.c | 42 ++++++++++++++++++------------------ 5 files changed, 49 insertions(+), 50 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 3fce7629daa7..573ad8b86804 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -678,9 +678,9 @@ static void idxd_groups_clear_state(struct idxd_device *idxd) memset(&group->grpcfg, 0, sizeof(group->grpcfg)); group->num_engines = 0; group->num_wqs = 0; - group->use_token_limit = false; - group->tokens_allowed = 0; - group->tokens_reserved = 0; + group->use_rdbuf_limit = false; + group->rdbufs_allowed = 0; + group->rdbufs_reserved = 0; group->tc_a = -1; group->tc_b = -1; } @@ -748,10 +748,10 @@ static int idxd_groups_config_write(struct idxd_device *idxd) int i; struct device *dev = &idxd->pdev->dev; - /* Setup bandwidth token limit */ - if (idxd->hw.gen_cap.config_en && idxd->token_limit) { + /* Setup bandwidth rdbuf limit */ + if (idxd->hw.gen_cap.config_en && idxd->rdbuf_limit) { reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); - reg.token_limit = idxd->token_limit; + reg.rdbuf_limit = idxd->rdbuf_limit; iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); } @@ -889,13 +889,12 @@ static void idxd_group_flags_setup(struct idxd_device *idxd) group->tc_b = group->grpcfg.flags.tc_b = 1; else group->grpcfg.flags.tc_b = group->tc_b; - group->grpcfg.flags.use_token_limit = group->use_token_limit; - group->grpcfg.flags.tokens_reserved = group->tokens_reserved; - if (group->tokens_allowed) - group->grpcfg.flags.tokens_allowed = - group->tokens_allowed; + group->grpcfg.flags.use_rdbuf_limit = group->use_rdbuf_limit; + group->grpcfg.flags.rdbufs_reserved = group->rdbufs_reserved; + if (group->rdbufs_allowed) + group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed; else - group->grpcfg.flags.tokens_allowed = idxd->max_tokens; + group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs; } } @@ -1086,7 +1085,7 @@ int idxd_device_load_config(struct idxd_device *idxd) int i, rc; reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); - idxd->token_limit = reg.token_limit; + idxd->rdbuf_limit = reg.rdbuf_limit; for (i = 0; i < idxd->max_groups; i++) { struct idxd_group *group = idxd->groups[i]; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 6353e762286d..da72eb15f610 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -90,9 +90,9 @@ struct idxd_group { int id; int num_engines; int num_wqs; - bool use_token_limit; - u8 tokens_allowed; - u8 tokens_reserved; + bool use_rdbuf_limit; + u8 rdbufs_allowed; + u8 rdbufs_reserved; int tc_a; int tc_b; }; @@ -292,11 +292,11 @@ struct idxd_device { u32 max_batch_size; int max_groups; int max_engines; - int max_tokens; + int max_rdbufs; int max_wqs; int max_wq_size; - int token_limit; - int nr_tokens; /* non-reserved tokens */ + int rdbuf_limit; + int nr_rdbufs; /* non-reserved read buffers */ unsigned int wqcfg_size; union sw_err_reg sw_err; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 3505efb7ae71..08a5f4310188 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -400,9 +400,9 @@ static void idxd_read_caps(struct idxd_device *idxd) dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits); idxd->max_groups = idxd->hw.group_cap.num_groups; dev_dbg(dev, "max groups: %u\n", idxd->max_groups); - idxd->max_tokens = idxd->hw.group_cap.total_tokens; - dev_dbg(dev, "max tokens: %u\n", idxd->max_tokens); - idxd->nr_tokens = idxd->max_tokens; + idxd->max_rdbufs = idxd->hw.group_cap.total_rdbufs; + dev_dbg(dev, "max read buffers: %u\n", idxd->max_rdbufs); + idxd->nr_rdbufs = idxd->max_rdbufs; /* read engine capabilities */ idxd->hw.engine_cap.bits = diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 8e396698c22b..aa642aecdc0b 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -64,9 +64,9 @@ union wq_cap_reg { union group_cap_reg { struct { u64 num_groups:8; - u64 total_tokens:8; - u64 token_en:1; - u64 token_limit:1; + u64 total_rdbufs:8; /* formerly total_tokens */ + u64 rdbuf_ctrl:1; /* formerly token_en */ + u64 rdbuf_limit:1; /* formerly token_limit */ u64 rsvd:46; }; u64 bits; @@ -110,7 +110,7 @@ union offsets_reg { #define IDXD_GENCFG_OFFSET 0x80 union gencfg_reg { struct { - u32 token_limit:8; + u32 rdbuf_limit:8; u32 rsvd:4; u32 user_int_en:1; u32 rsvd2:19; @@ -288,10 +288,10 @@ union group_flags { u32 tc_a:3; u32 tc_b:3; u32 rsvd:1; - u32 use_token_limit:1; - u32 tokens_reserved:8; + u32 use_rdbuf_limit:1; + u32 rdbufs_reserved:8; u32 rsvd2:4; - u32 tokens_allowed:8; + u32 rdbufs_allowed:8; u32 rsvd3:4; }; u32 bits; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index ff9d4548b945..be8c0def2afd 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -99,17 +99,17 @@ struct device_type idxd_engine_device_type = { /* Group attributes */ -static void idxd_set_free_tokens(struct idxd_device *idxd) +static void idxd_set_free_rdbufs(struct idxd_device *idxd) { - int i, tokens; + int i, rdbufs; - for (i = 0, tokens = 0; i < idxd->max_groups; i++) { + for (i = 0, rdbufs = 0; i < idxd->max_groups; i++) { struct idxd_group *g = idxd->groups[i]; - tokens += g->tokens_reserved; + rdbufs += g->rdbufs_reserved; } - idxd->nr_tokens = idxd->max_tokens - tokens; + idxd->nr_rdbufs = idxd->max_rdbufs - rdbufs; } static ssize_t group_tokens_reserved_show(struct device *dev, @@ -118,7 +118,7 @@ static ssize_t group_tokens_reserved_show(struct device *dev, { struct idxd_group *group = confdev_to_group(dev); - return sysfs_emit(buf, "%u\n", group->tokens_reserved); + return sysfs_emit(buf, "%u\n", group->rdbufs_reserved); } static ssize_t group_tokens_reserved_store(struct device *dev, @@ -143,14 +143,14 @@ static ssize_t group_tokens_reserved_store(struct device *dev, if (idxd->state == IDXD_DEV_ENABLED) return -EPERM; - if (val > idxd->max_tokens) + if (val > idxd->max_rdbufs) return -EINVAL; - if (val > idxd->nr_tokens + group->tokens_reserved) + if (val > idxd->nr_rdbufs + group->rdbufs_reserved) return -EINVAL; - group->tokens_reserved = val; - idxd_set_free_tokens(idxd); + group->rdbufs_reserved = val; + idxd_set_free_rdbufs(idxd); return count; } @@ -164,7 +164,7 @@ static ssize_t group_tokens_allowed_show(struct device *dev, { struct idxd_group *group = confdev_to_group(dev); - return sysfs_emit(buf, "%u\n", group->tokens_allowed); + return sysfs_emit(buf, "%u\n", group->rdbufs_allowed); } static ssize_t group_tokens_allowed_store(struct device *dev, @@ -190,10 +190,10 @@ static ssize_t group_tokens_allowed_store(struct device *dev, return -EPERM; if (val < 4 * group->num_engines || - val > group->tokens_reserved + idxd->nr_tokens) + val > group->rdbufs_reserved + idxd->nr_rdbufs) return -EINVAL; - group->tokens_allowed = val; + group->rdbufs_allowed = val; return count; } @@ -207,7 +207,7 @@ static ssize_t group_use_token_limit_show(struct device *dev, { struct idxd_group *group = confdev_to_group(dev); - return sysfs_emit(buf, "%u\n", group->use_token_limit); + return sysfs_emit(buf, "%u\n", group->use_rdbuf_limit); } static ssize_t group_use_token_limit_store(struct device *dev, @@ -232,10 +232,10 @@ static ssize_t group_use_token_limit_store(struct device *dev, if (idxd->state == IDXD_DEV_ENABLED) return -EPERM; - if (idxd->token_limit == 0) + if (idxd->rdbuf_limit == 0) return -EPERM; - group->use_token_limit = !!val; + group->use_rdbuf_limit = !!val; return count; } @@ -1203,7 +1203,7 @@ static ssize_t max_tokens_show(struct device *dev, { struct idxd_device *idxd = confdev_to_idxd(dev); - return sysfs_emit(buf, "%u\n", idxd->max_tokens); + return sysfs_emit(buf, "%u\n", idxd->max_rdbufs); } static DEVICE_ATTR_RO(max_tokens); @@ -1212,7 +1212,7 @@ static ssize_t token_limit_show(struct device *dev, { struct idxd_device *idxd = confdev_to_idxd(dev); - return sysfs_emit(buf, "%u\n", idxd->token_limit); + return sysfs_emit(buf, "%u\n", idxd->rdbuf_limit); } static ssize_t token_limit_store(struct device *dev, @@ -1233,13 +1233,13 @@ static ssize_t token_limit_store(struct device *dev, if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; - if (!idxd->hw.group_cap.token_limit) + if (!idxd->hw.group_cap.rdbuf_limit) return -EPERM; - if (val > idxd->hw.group_cap.total_tokens) + if (val > idxd->hw.group_cap.total_rdbufs) return -EINVAL; - idxd->token_limit = val; + idxd->rdbuf_limit = val; return count; } static DEVICE_ATTR_RW(token_limit); -- Gitee From ee6122cdf34fe45fe4a9f30341e1fd0dfc2d6d9e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Dec 2021 13:23:14 -0700 Subject: [PATCH 111/173] dmaengine: idxd: deprecate token sysfs attributes for read buffers mainline inclusion from mainline-v5.17 commit fde212e44f45e491f8e3875084b587c0c2189078 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit fde212e44f45 dmaengine: idxd: deprecate token sysfs attributes for read buffers. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The following sysfs attributes will be obsolete due to the name change of tokens to read buffers: max_tokens token_limit group/tokens_allowed group/tokens_reserved group/use_token_limit Create new entries and have old entry print warning of deprecation. New attributes to replace the token ones: max_read_buffers read_buffer_limit group/read_buffers_allowed group/read_buffers_reserved group/use_read_buffer_limit Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163951339488.2988321.2424012059911316373.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- .../ABI/stable/sysfs-driver-dma-idxd | 47 ++++-- drivers/dma/idxd/sysfs.c | 145 ++++++++++++++---- 2 files changed, 153 insertions(+), 39 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 6bbaeace2aa6..e1293bfa4187 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -41,14 +41,14 @@ KernelVersion: 5.6.0 Contact: dmaengine@vger.kernel.org Description: The maximum number of groups can be created under this device. -What: /sys/bus/dsa/devices/dsa/max_tokens -Date: Oct 25, 2019 -KernelVersion: 5.6.0 +What: /sys/bus/dsa/devices/dsa/max_read_buffers +Date: Dec 10, 2021 +KernelVersion: 5.17.0 Contact: dmaengine@vger.kernel.org -Description: The total number of bandwidth tokens supported by this device. - The bandwidth tokens represent resources within the DSA +Description: The total number of read buffers supported by this device. + The read buffers represent resources within the DSA implementation, and these resources are allocated by engines to - support operations. + support operations. See DSA spec v1.2 9.2.4 Total Read Buffers. What: /sys/bus/dsa/devices/dsa/max_transfer_size Date: Oct 25, 2019 @@ -115,13 +115,13 @@ KernelVersion: 5.6.0 Contact: dmaengine@vger.kernel.org Description: To indicate if this device is configurable or not. -What: /sys/bus/dsa/devices/dsa/token_limit -Date: Oct 25, 2019 -KernelVersion: 5.6.0 +What: /sys/bus/dsa/devices/dsa/read_buffer_limit +Date: Dec 10, 2021 +KernelVersion: 5.17.0 Contact: dmaengine@vger.kernel.org -Description: The maximum number of bandwidth tokens that may be in use at +Description: The maximum number of read buffers that may be in use at one time by operations that access low bandwidth memory in the - device. + device. See DSA spec v1.2 9.2.8 GENCFG on Global Read Buffer Limit. What: /sys/bus/dsa/devices/dsa/cmd_status Date: Aug 28, 2020 @@ -224,7 +224,7 @@ What: /sys/bus/dsa/devices/wq./enqcmds_retries Date Oct 29, 2021 KernelVersion: 5.17.0 Contact: dmaengine@vger.kernel.org -Description: Indicate the number of retires for an enqcmds submission on a shared wq. +Description: Indicate the number of retires for an enqcmds submission on a sharedwq. A max value to set attribute is capped at 64. What: /sys/bus/dsa/devices/engine./group_id @@ -232,3 +232,26 @@ Date: Oct 25, 2019 KernelVersion: 5.6.0 Contact: dmaengine@vger.kernel.org Description: The group that this engine belongs to. + +What: /sys/bus/dsa/devices/group./use_read_buffer_limit +Date: Dec 10, 2021 +KernelVersion: 5.17.0 +Contact: dmaengine@vger.kernel.org +Description: Enable the use of global read buffer limit for the group. See DSA + spec v1.2 9.2.18 GRPCFG Use Global Read Buffer Limit. + +What: /sys/bus/dsa/devices/group./read_buffers_allowed +Date: Dec 10, 2021 +KernelVersion: 5.17.0 +Contact: dmaengine@vger.kernel.org +Description: Indicates max number of read buffers that may be in use at one time + by all engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read + Buffers Allowed. + +What: /sys/bus/dsa/devices/group./read_buffers_reserved +Date: Dec 10, 2021 +KernelVersion: 5.17.0 +Contact: dmaengine@vger.kernel.org +Description: Indicates the number of Read Buffers reserved for the use of + engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers + Reserved. diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index be8c0def2afd..dfd549685c46 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -112,18 +112,26 @@ static void idxd_set_free_rdbufs(struct idxd_device *idxd) idxd->nr_rdbufs = idxd->max_rdbufs - rdbufs; } -static ssize_t group_tokens_reserved_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t group_read_buffers_reserved_show(struct device *dev, + struct device_attribute *attr, + char *buf) { struct idxd_group *group = confdev_to_group(dev); return sysfs_emit(buf, "%u\n", group->rdbufs_reserved); } -static ssize_t group_tokens_reserved_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t group_tokens_reserved_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n"); + return group_read_buffers_reserved_show(dev, attr, buf); +} + +static ssize_t group_read_buffers_reserved_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; @@ -154,22 +162,42 @@ static ssize_t group_tokens_reserved_store(struct device *dev, return count; } +static ssize_t group_tokens_reserved_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n"); + return group_read_buffers_reserved_store(dev, attr, buf, count); +} + static struct device_attribute dev_attr_group_tokens_reserved = __ATTR(tokens_reserved, 0644, group_tokens_reserved_show, group_tokens_reserved_store); -static ssize_t group_tokens_allowed_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static struct device_attribute dev_attr_group_read_buffers_reserved = + __ATTR(read_buffers_reserved, 0644, group_read_buffers_reserved_show, + group_read_buffers_reserved_store); + +static ssize_t group_read_buffers_allowed_show(struct device *dev, + struct device_attribute *attr, + char *buf) { struct idxd_group *group = confdev_to_group(dev); return sysfs_emit(buf, "%u\n", group->rdbufs_allowed); } -static ssize_t group_tokens_allowed_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t group_tokens_allowed_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n"); + return group_read_buffers_allowed_show(dev, attr, buf); +} + +static ssize_t group_read_buffers_allowed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; @@ -197,22 +225,42 @@ static ssize_t group_tokens_allowed_store(struct device *dev, return count; } +static ssize_t group_tokens_allowed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n"); + return group_read_buffers_allowed_store(dev, attr, buf, count); +} + static struct device_attribute dev_attr_group_tokens_allowed = __ATTR(tokens_allowed, 0644, group_tokens_allowed_show, group_tokens_allowed_store); -static ssize_t group_use_token_limit_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static struct device_attribute dev_attr_group_read_buffers_allowed = + __ATTR(read_buffers_allowed, 0644, group_read_buffers_allowed_show, + group_read_buffers_allowed_store); + +static ssize_t group_use_read_buffer_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) { struct idxd_group *group = confdev_to_group(dev); return sysfs_emit(buf, "%u\n", group->use_rdbuf_limit); } -static ssize_t group_use_token_limit_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t group_use_token_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n"); + return group_use_read_buffer_limit_show(dev, attr, buf); +} + +static ssize_t group_use_read_buffer_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; @@ -239,10 +287,22 @@ static ssize_t group_use_token_limit_store(struct device *dev, return count; } +static ssize_t group_use_token_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n"); + return group_use_read_buffer_limit_store(dev, attr, buf, count); +} + static struct device_attribute dev_attr_group_use_token_limit = __ATTR(use_token_limit, 0644, group_use_token_limit_show, group_use_token_limit_store); +static struct device_attribute dev_attr_group_use_read_buffer_limit = + __ATTR(use_read_buffer_limit, 0644, group_use_read_buffer_limit_show, + group_use_read_buffer_limit_store); + static ssize_t group_engines_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -387,8 +447,11 @@ static struct attribute *idxd_group_attributes[] = { &dev_attr_group_work_queues.attr, &dev_attr_group_engines.attr, &dev_attr_group_use_token_limit.attr, + &dev_attr_group_use_read_buffer_limit.attr, &dev_attr_group_tokens_allowed.attr, + &dev_attr_group_read_buffers_allowed.attr, &dev_attr_group_tokens_reserved.attr, + &dev_attr_group_read_buffers_reserved.attr, &dev_attr_group_traffic_class_a.attr, &dev_attr_group_traffic_class_b.attr, NULL, @@ -1198,26 +1261,42 @@ static ssize_t errors_show(struct device *dev, } static DEVICE_ATTR_RO(errors); -static ssize_t max_tokens_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t max_read_buffers_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", idxd->max_rdbufs); } -static DEVICE_ATTR_RO(max_tokens); -static ssize_t token_limit_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t max_tokens_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see max_read_buffers.\n"); + return max_read_buffers_show(dev, attr, buf); +} + +static DEVICE_ATTR_RO(max_tokens); /* deprecated */ +static DEVICE_ATTR_RO(max_read_buffers); + +static ssize_t read_buffer_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct idxd_device *idxd = confdev_to_idxd(dev); return sysfs_emit(buf, "%u\n", idxd->rdbuf_limit); } -static ssize_t token_limit_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t token_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffer_limit.\n"); + return read_buffer_limit_show(dev, attr, buf); +} + +static ssize_t read_buffer_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct idxd_device *idxd = confdev_to_idxd(dev); unsigned long val; @@ -1242,7 +1321,17 @@ static ssize_t token_limit_store(struct device *dev, idxd->rdbuf_limit = val; return count; } -static DEVICE_ATTR_RW(token_limit); + +static ssize_t token_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffer_limit\n"); + return read_buffer_limit_store(dev, attr, buf, count); +} + +static DEVICE_ATTR_RW(token_limit); /* deprecated */ +static DEVICE_ATTR_RW(read_buffer_limit); static ssize_t cdev_major_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1288,7 +1377,9 @@ static struct attribute *idxd_device_attributes[] = { &dev_attr_state.attr, &dev_attr_errors.attr, &dev_attr_max_tokens.attr, + &dev_attr_max_read_buffers.attr, &dev_attr_token_limit.attr, + &dev_attr_read_buffer_limit.attr, &dev_attr_cdev_major.attr, &dev_attr_cmd_status.attr, NULL, -- Gitee From 7f03777b2760bf11c7248d6ca403852dc90802ce Mon Sep 17 00:00:00 2001 From: Kyung Min Park Date: Sun, 14 Mar 2021 13:15:34 -0700 Subject: [PATCH 112/173] iommu/vt-d: Disable SVM when ATS/PRI/PASID are not enabled in the device mainline inclusion from mainline-v5.13 commit dec991e4722d763130c8ccd92523f2a173f8a7cd category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit dec991e4722d iommu/vt-d: Disable SVM when ATS/PRI/PASID are not enabled in the device. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Currently, the Intel VT-d supports Shared Virtual Memory (SVM) only when IO page fault is supported. Otherwise, shared memory pages can not be swapped out and need to be pinned. The device needs the Address Translation Service (ATS), Page Request Interface (PRI) and Process Address Space Identifier (PASID) capabilities to be enabled to support IO page fault. Disable SVM when ATS, PRI and PASID are not enabled in the device. Signed-off-by: Kyung Min Park Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20210314201534.918-1-kyung.min.park@intel.com Signed-off-by: Joerg Roedel Signed-off-by: Xiaochen Shen --- drivers/iommu/intel/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 9e14cf9d875f..f0dc42363ddc 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -6218,6 +6218,9 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (!info) return -EINVAL; + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) + return -EINVAL; + if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) return 0; } -- Gitee From 32ade338a69c664301f804f67bd1706bd29a59f1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Mar 2021 09:05:57 +0800 Subject: [PATCH 113/173] iommu/vt-d: Remove svm_dev_ops mainline inclusion from mainline-v5.13 commit 2e1a44c1c4acf209c0dd7bc04421d101b9e80d11 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 2e1a44c1c4ac iommu/vt-d: Remove svm_dev_ops. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The svm_dev_ops has never been referenced in the tree, and there's no plan to have anything to use it. Remove it to make the code neat. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210323010600.678627-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Xiaochen Shen --- drivers/iommu/intel/svm.c | 15 +-------------- include/linux/intel-iommu.h | 3 --- include/linux/intel-svm.h | 7 ------- 3 files changed, 1 insertion(+), 24 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index aaf9f9589bdc..18fa4bdab92c 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -462,7 +462,6 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) /* Caller must hold pasid_mutex, mm reference */ static int intel_svm_bind_mm(struct device *dev, unsigned int flags, - struct svm_dev_ops *ops, struct mm_struct *mm, struct intel_svm_dev **sd) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); @@ -512,10 +511,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, /* Find the matching device in svm list */ for_each_svm_dev(sdev, svm, dev) { - if (sdev->ops != ops) { - ret = -EBUSY; - goto out; - } sdev->users++; goto success; } @@ -550,7 +545,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, /* Finish the setup now we know we're keeping it */ sdev->users = 1; - sdev->ops = ops; init_rcu_head(&sdev->rcu); if (!svm) { @@ -1006,13 +1000,6 @@ static irqreturn_t prq_event_thread(int irq, void *d) mmap_read_unlock(svm->mm); mmput(svm->mm); bad_req: - WARN_ON(!sdev); - if (sdev && sdev->ops && sdev->ops->fault_cb) { - int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | - (req->exe_req << 1) | (req->pm_req); - sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, - req->priv_data, rwxp, result); - } /* We get here in the error case where the PASID lookup failed, and these can be NULL. Do not use them below this point! */ sdev = NULL; @@ -1087,7 +1074,7 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) if (drvdata) flags = *(unsigned int *)drvdata; mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev); + ret = intel_svm_bind_mm(dev, flags, mm, &sdev); if (ret) sva = ERR_PTR(ret); else if (sdev) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 805e748f7eae..0e157779c977 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -761,14 +761,11 @@ u32 intel_svm_get_pasid(struct iommu_sva *handle); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); -struct svm_dev_ops; - struct intel_svm_dev { struct list_head list; struct rcu_head rcu; struct device *dev; struct intel_iommu *iommu; - struct svm_dev_ops *ops; struct iommu_sva sva; u32 pasid; int users; diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 39d368a810b8..6c9d10c0fb1e 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -8,13 +8,6 @@ #ifndef __INTEL_SVM_H__ #define __INTEL_SVM_H__ -struct device; - -struct svm_dev_ops { - void (*fault_cb)(struct device *dev, u32 pasid, u64 address, - void *private, int rwxp, int response); -}; - /* Values for rxwp in fault_cb callback */ #define SVM_REQ_READ (1<<3) #define SVM_REQ_WRITE (1<<2) -- Gitee From 5cec0ffaea3b5d9b9a92b51be9f32b65d72ba39e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Mar 2021 09:05:58 +0800 Subject: [PATCH 114/173] iommu/vt-d: Remove SVM_FLAG_PRIVATE_PASID mainline inclusion from mainline-v5.13 commit 06905ea8319731036695cf1a4c53c12b0f9373cb category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 06905ea83197 iommu/vt-d: Remove SVM_FLAG_PRIVATE_PASID. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The SVM_FLAG_PRIVATE_PASID has never been referenced in the tree, and there's no plan to have anything to use it. So cleanup it. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210323010600.678627-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Xiaochen Shen --- drivers/iommu/intel/svm.c | 40 ++++++++++++++++++--------------------- include/linux/intel-svm.h | 16 +++------------- 2 files changed, 21 insertions(+), 35 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 18fa4bdab92c..069b539db81a 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -465,9 +465,9 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, struct mm_struct *mm, struct intel_svm_dev **sd) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct intel_svm *svm = NULL, *t; struct device_domain_info *info; struct intel_svm_dev *sdev; - struct intel_svm *svm = NULL; unsigned long iflags; int pasid_max; int ret; @@ -493,30 +493,26 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, } } - if (!(flags & SVM_FLAG_PRIVATE_PASID)) { - struct intel_svm *t; - - list_for_each_entry(t, &global_svm_list, list) { - if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID)) - continue; - - svm = t; - if (svm->pasid >= pasid_max) { - dev_warn(dev, - "Limited PASID width. Cannot use existing PASID %d\n", - svm->pasid); - ret = -ENOSPC; - goto out; - } + list_for_each_entry(t, &global_svm_list, list) { + if (t->mm != mm) + continue; - /* Find the matching device in svm list */ - for_each_svm_dev(sdev, svm, dev) { - sdev->users++; - goto success; - } + svm = t; + if (svm->pasid >= pasid_max) { + dev_warn(dev, + "Limited PASID width. Cannot use existing PASID %d\n", + svm->pasid); + ret = -ENOSPC; + goto out; + } - break; + /* Find the matching device in svm list */ + for_each_svm_dev(sdev, svm, dev) { + sdev->users++; + goto success; } + + break; } sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 6c9d10c0fb1e..10fa80eef13a 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -14,16 +14,6 @@ #define SVM_REQ_EXEC (1<<1) #define SVM_REQ_PRIV (1<<0) -/* - * The SVM_FLAG_PRIVATE_PASID flag requests a PASID which is *not* the "main" - * PASID for the current process. Even if a PASID already exists, a new one - * will be allocated. And the PASID allocated with SVM_FLAG_PRIVATE_PASID - * will not be given to subsequent callers. This facility allows a driver to - * disambiguate between multiple device contexts which access the same MM, - * if there is no other way to do so. It should be used sparingly, if at all. - */ -#define SVM_FLAG_PRIVATE_PASID (1<<0) - /* * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only * for access to kernel addresses. No IOTLB flushes are automatically done @@ -35,18 +25,18 @@ * It is unlikely that we will ever hook into flush_tlb_kernel_range() to * do such IOTLB flushes automatically. */ -#define SVM_FLAG_SUPERVISOR_MODE (1<<1) +#define SVM_FLAG_SUPERVISOR_MODE BIT(0) /* * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest * processes. Compared to the host bind, the primary differences are: * 1. mm life cycle management * 2. fault reporting */ -#define SVM_FLAG_GUEST_MODE (1<<2) +#define SVM_FLAG_GUEST_MODE BIT(1) /* * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space, * which requires guest and host PASID translation at both directions. */ -#define SVM_FLAG_GUEST_PASID (1<<3) +#define SVM_FLAG_GUEST_PASID BIT(2) #endif /* __INTEL_SVM_H__ */ -- Gitee From 239deb9fbce6a2602953974b92a7e71dd48b38b1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:00:58 +0800 Subject: [PATCH 115/173] iommu/vt-d: Add pasid private data helpers mainline inclusion from mainline-v5.13 commit 100b8a14a37074796a467d6a9147317538ee70cf category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 100b8a14a370 iommu/vt-d: Add pasid private data helpers. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- We are about to use iommu_sva_alloc/free_pasid() helpers in iommu core. That means the pasid life cycle will be managed by iommu core. Use a local array to save the per pasid private data instead of attaching it the real pasid. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Xiaochen Shen --- drivers/iommu/intel/svm.c | 62 ++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 069b539db81a..4e036e71b8c2 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,23 @@ static void intel_svm_drain_prq(struct device *dev, u32 pasid); #define PRQ_ORDER 0 +static DEFINE_XARRAY_ALLOC(pasid_private_array); +static int pasid_private_add(ioasid_t pasid, void *priv) +{ + return xa_alloc(&pasid_private_array, &pasid, priv, + XA_LIMIT(pasid, pasid), GFP_ATOMIC); +} + +static void pasid_private_remove(ioasid_t pasid) +{ + xa_erase(&pasid_private_array, pasid); +} + +static void *pasid_private_find(ioasid_t pasid) +{ + return xa_load(&pasid_private_array, pasid); +} + int intel_svm_enable_prq(struct intel_iommu *iommu) { struct page *pages; @@ -224,7 +242,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, if (pasid == INVALID_IOASID || pasid >= PASID_MAX) return -EINVAL; - svm = ioasid_find(NULL, pasid, NULL); + svm = pasid_private_find(pasid); if (IS_ERR(svm)) return PTR_ERR(svm); @@ -334,7 +352,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, svm->gpasid = data->gpasid; svm->flags |= SVM_FLAG_GUEST_PASID; } - ioasid_set_data(data->hpasid, svm); + pasid_private_add(data->hpasid, svm); INIT_LIST_HEAD_RCU(&svm->devs); mmput(svm->mm); } @@ -388,7 +406,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, list_add_rcu(&sdev->list, &svm->devs); out: if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) { - ioasid_set_data(data->hpasid, NULL); + pasid_private_remove(data->hpasid); kfree(svm); } @@ -431,7 +449,7 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid) * the unbind, IOMMU driver will get notified * and perform cleanup. */ - ioasid_set_data(pasid, NULL); + pasid_private_remove(pasid); kfree(svm); } } @@ -547,8 +565,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, svm = kzalloc(sizeof(*svm), GFP_KERNEL); if (!svm) { ret = -ENOMEM; - kfree(sdev); - goto out; + goto sdev_err; } if (pasid_max > intel_pasid_max_id) @@ -556,13 +573,16 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, /* Do not use PASID 0, reserved for RID to PASID */ svm->pasid = ioasid_alloc(NULL, PASID_MIN, - pasid_max - 1, svm); + pasid_max - 1, NULL); if (svm->pasid == INVALID_IOASID) { - kfree(svm); - kfree(sdev); ret = -ENOSPC; - goto out; + goto svm_err; } + + ret = pasid_private_add(svm->pasid, svm); + if (ret) + goto pasid_err; + svm->notifier.ops = &intel_mmuops; svm->mm = mm; svm->flags = flags; @@ -571,12 +591,8 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, ret = -ENOMEM; if (mm) { ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) { - ioasid_put(svm->pasid); - kfree(svm); - kfree(sdev); - goto out; - } + if (ret) + goto priv_err; } spin_lock_irqsave(&iommu->lock, iflags); @@ -590,8 +606,13 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, if (ret) { if (mm) mmu_notifier_unregister(&svm->notifier, mm); +priv_err: + pasid_private_remove(svm->pasid); +pasid_err: ioasid_put(svm->pasid); +svm_err: kfree(svm); +sdev_err: kfree(sdev); goto out; } @@ -614,10 +635,8 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, (cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0)); spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - kfree(sdev); - goto out; - } + if (ret) + goto sdev_err; } list_add_rcu(&sdev->list, &svm->devs); success: @@ -670,6 +689,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) load_pasid(svm->mm, PASID_DISABLED); } list_del(&svm->list); + pasid_private_remove(svm->pasid); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. * If that is not obeyed, subtle errors will happen. @@ -925,7 +945,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) } if (!svm || svm->pasid != req->pasid) { rcu_read_lock(); - svm = ioasid_find(NULL, req->pasid, NULL); + svm = pasid_private_find(req->pasid); /* It *can't* go away, because the driver is not permitted * to unbind the mm while any page faults are outstanding. * So we only need RCU to protect the internal idr code. */ -- Gitee From efb579eaee502f7ab5e0c633e7f0ac92928b5720 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:00:59 +0800 Subject: [PATCH 116/173] iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers mainline inclusion from mainline-v5.13 commit 40483774141625b9685b177fb6e1f36de48d33f8 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 404837741416 iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Align the pasid alloc/free code with the generic helpers defined in the iommu core. This also refactored the SVA binding code to improve the readability. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Xiaochen Shen --- drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 3 + drivers/iommu/intel/svm.c | 278 +++++++++++++++--------------------- include/linux/intel-iommu.h | 1 - 4 files changed, 120 insertions(+), 163 deletions(-) diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 5337ee1584b0..4a6919f95094 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -40,6 +40,7 @@ config INTEL_IOMMU_SVM select PCI_PRI select MMU_NOTIFIER select IOASID + select IOMMU_SVA_LIB help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f0dc42363ddc..965c8f641a86 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -6218,6 +6218,9 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (!info) return -EINVAL; + if (intel_iommu_enable_pasid(info->iommu, dev)) + return -ENODEV; + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) return -EINVAL; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 4e036e71b8c2..7675de59e60b 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -23,9 +23,11 @@ #include #include "pasid.h" +#include "../iommu-sva-lib.h" static irqreturn_t prq_event_thread(int irq, void *d); static void intel_svm_drain_prq(struct device *dev, u32 pasid); +#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) #define PRQ_ORDER 0 @@ -222,7 +224,6 @@ static const struct mmu_notifier_ops intel_mmuops = { }; static DEFINE_MUTEX(pasid_mutex); -static LIST_HEAD(global_svm_list); #define for_each_svm_dev(sdev, svm, d) \ list_for_each_entry((sdev), &(svm)->devs, list) \ @@ -477,79 +478,80 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) mutex_unlock(&mm->context.lock); } -/* Caller must hold pasid_mutex, mm reference */ -static int -intel_svm_bind_mm(struct device *dev, unsigned int flags, - struct mm_struct *mm, struct intel_svm_dev **sd) +static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, + unsigned int flags) { - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - struct intel_svm *svm = NULL, *t; - struct device_domain_info *info; - struct intel_svm_dev *sdev; - unsigned long iflags; - int pasid_max; - int ret; + ioasid_t max_pasid = dev_is_pci(dev) ? + pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; - if (!iommu || dmar_disabled) - return -EINVAL; + return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); +} - if (!intel_svm_capable(iommu)) - return -ENOTSUPP; +static void intel_svm_free_pasid(struct mm_struct *mm) +{ + iommu_sva_free_pasid(mm); +} - if (dev_is_pci(dev)) { - pasid_max = pci_max_pasids(to_pci_dev(dev)); - if (pasid_max < 0) - return -EINVAL; - } else - pasid_max = 1 << 20; +static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, + struct device *dev, + struct mm_struct *mm, + unsigned int flags) +{ + struct device_domain_info *info = get_domain_info(dev); + unsigned long iflags, sflags; + struct intel_svm_dev *sdev; + struct intel_svm *svm; + int ret = 0; - /* Bind supervisor PASID shuld have mm = NULL */ - if (flags & SVM_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap) || mm) { - pr_err("Supervisor PASID with user provided mm.\n"); - return -EINVAL; - } - } + svm = pasid_private_find(mm->pasid); + if (!svm) { + svm = kzalloc(sizeof(*svm), GFP_KERNEL); + if (!svm) + return ERR_PTR(-ENOMEM); - list_for_each_entry(t, &global_svm_list, list) { - if (t->mm != mm) - continue; + svm->pasid = mm->pasid; + svm->mm = mm; + svm->flags = flags; + INIT_LIST_HEAD_RCU(&svm->devs); - svm = t; - if (svm->pasid >= pasid_max) { - dev_warn(dev, - "Limited PASID width. Cannot use existing PASID %d\n", - svm->pasid); - ret = -ENOSPC; - goto out; + if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { + svm->notifier.ops = &intel_mmuops; + ret = mmu_notifier_register(&svm->notifier, mm); + if (ret) { + kfree(svm); + return ERR_PTR(ret); + } } - /* Find the matching device in svm list */ - for_each_svm_dev(sdev, svm, dev) { - sdev->users++; - goto success; + ret = pasid_private_add(svm->pasid, svm); + if (ret) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + kfree(svm); + return ERR_PTR(ret); } + } - break; + /* Find the matching device in svm list */ + for_each_svm_dev(sdev, svm, dev) { + sdev->users++; + goto success; } sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); if (!sdev) { ret = -ENOMEM; - goto out; + goto free_svm; } + sdev->dev = dev; sdev->iommu = iommu; - - ret = intel_iommu_enable_pasid(iommu, dev); - if (ret) { - kfree(sdev); - goto out; - } - - info = get_domain_info(dev); sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); + sdev->users = 1; + sdev->pasid = svm->pasid; + sdev->sva.dev = dev; + init_rcu_head(&sdev->rcu); if (info->ats_enabled) { sdev->dev_iotlb = 1; sdev->qdep = info->ats_qdep; @@ -557,96 +559,37 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, sdev->qdep = 0; } - /* Finish the setup now we know we're keeping it */ - sdev->users = 1; - init_rcu_head(&sdev->rcu); - - if (!svm) { - svm = kzalloc(sizeof(*svm), GFP_KERNEL); - if (!svm) { - ret = -ENOMEM; - goto sdev_err; - } - - if (pasid_max > intel_pasid_max_id) - pasid_max = intel_pasid_max_id; - - /* Do not use PASID 0, reserved for RID to PASID */ - svm->pasid = ioasid_alloc(NULL, PASID_MIN, - pasid_max - 1, NULL); - if (svm->pasid == INVALID_IOASID) { - ret = -ENOSPC; - goto svm_err; - } - - ret = pasid_private_add(svm->pasid, svm); - if (ret) - goto pasid_err; + /* Setup the pasid table: */ + sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? + PASID_FLAG_SUPERVISOR_MODE : 0; + sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; + spin_lock_irqsave(&iommu->lock, iflags); + ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, + FLPT_DEFAULT_DID, sflags); + spin_unlock_irqrestore(&iommu->lock, iflags); - svm->notifier.ops = &intel_mmuops; - svm->mm = mm; - svm->flags = flags; - INIT_LIST_HEAD_RCU(&svm->devs); - INIT_LIST_HEAD(&svm->list); - ret = -ENOMEM; - if (mm) { - ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) - goto priv_err; - } + if (ret) + goto free_sdev; - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - if (mm) - mmu_notifier_unregister(&svm->notifier, mm); -priv_err: - pasid_private_remove(svm->pasid); -pasid_err: - ioasid_put(svm->pasid); -svm_err: - kfree(svm); -sdev_err: - kfree(sdev); - goto out; - } + /* The newly allocated pasid is loaded to the mm. */ + if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs)) + load_pasid(mm, svm->pasid); - list_add_tail(&svm->list, &global_svm_list); - if (mm) { - /* The newly allocated pasid is loaded to the mm. */ - load_pasid(mm, svm->pasid); - } - } else { - /* - * Binding a new device with existing PASID, need to setup - * the PASID entry. - */ - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) - goto sdev_err; - } list_add_rcu(&sdev->list, &svm->devs); success: - sdev->pasid = svm->pasid; - sdev->sva.dev = dev; - if (sd) - *sd = sdev; - ret = 0; -out: - return ret; + return &sdev->sva; + +free_sdev: + kfree(sdev); +free_svm: + if (list_empty(&svm->devs)) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + pasid_private_remove(mm->pasid); + kfree(svm); + } + + return ERR_PTR(ret); } /* Caller must hold pasid_mutex */ @@ -655,6 +598,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) struct intel_svm_dev *sdev; struct intel_iommu *iommu; struct intel_svm *svm; + struct mm_struct *mm; int ret = -EINVAL; iommu = device_to_iommu(dev, NULL, NULL); @@ -664,6 +608,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); if (ret) goto out; + mm = svm->mm; if (sdev) { sdev->users--; @@ -682,13 +627,12 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { - ioasid_put(svm->pasid); - if (svm->mm) { - mmu_notifier_unregister(&svm->notifier, svm->mm); + intel_svm_free_pasid(mm); + if (svm->notifier.ops) { + mmu_notifier_unregister(&svm->notifier, mm); /* Clear mm's pasid. */ - load_pasid(svm->mm, PASID_DISABLED); + load_pasid(mm, PASID_DISABLED); } - list_del(&svm->list); pasid_private_remove(svm->pasid); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. @@ -1073,31 +1017,42 @@ static irqreturn_t prq_event_thread(int irq, void *d) return IRQ_RETVAL(handled); } -#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) -struct iommu_sva * -intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) { - struct iommu_sva *sva = ERR_PTR(-EINVAL); - struct intel_svm_dev *sdev = NULL; + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); unsigned int flags = 0; + struct iommu_sva *sva; int ret; - /* - * TODO: Consolidate with generic iommu-sva bind after it is merged. - * It will require shared SVM data structures, i.e. combine io_mm - * and intel_svm etc. - */ if (drvdata) flags = *(unsigned int *)drvdata; + + if (flags & SVM_FLAG_SUPERVISOR_MODE) { + if (!ecap_srs(iommu->ecap)) { + dev_err(dev, "%s: Supervisor PASID not supported\n", + iommu->name); + return ERR_PTR(-EOPNOTSUPP); + } + + if (mm) { + dev_err(dev, "%s: Supervisor PASID with user provided mm\n", + iommu->name); + return ERR_PTR(-EINVAL); + } + + mm = &init_mm; + } + mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(dev, flags, mm, &sdev); - if (ret) - sva = ERR_PTR(ret); - else if (sdev) - sva = &sdev->sva; - else - WARN(!sdev, "SVM bind succeeded with no sdev!\n"); + ret = intel_svm_alloc_pasid(dev, mm, flags); + if (ret) { + mutex_unlock(&pasid_mutex); + return ERR_PTR(ret); + } + sva = intel_svm_bind_mm(iommu, dev, mm, flags); + if (IS_ERR_OR_NULL(sva)) + intel_svm_free_pasid(mm); mutex_unlock(&pasid_mutex); return sva; @@ -1105,10 +1060,9 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) void intel_svm_unbind(struct iommu_sva *sva) { - struct intel_svm_dev *sdev; + struct intel_svm_dev *sdev = to_intel_svm_dev(sva); mutex_lock(&pasid_mutex); - sdev = to_intel_svm_dev(sva); intel_svm_unbind_mm(sdev->dev, sdev->pasid); mutex_unlock(&pasid_mutex); } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0e157779c977..3f54def870b8 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -782,7 +782,6 @@ struct intel_svm { u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; - struct list_head list; }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} -- Gitee From d77e92aba791b459f3b38b63d80d0ecfd7c849ec Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 Aug 2021 20:43:20 +0800 Subject: [PATCH 117/173] iommu/vt-d: Fix PASID reference leak mainline inclusion from mainline-v5.14 commit 62ef907a045e1a81830941c48004d7af71c9d75a category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 62ef907a045e iommu/vt-d: Fix PASID reference leak. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- A PASID reference is increased whenever a device is bound to an mm (and its PASID) successfully (i.e. the device's sdev user count is increased). But the reference is not dropped every time the device is unbound successfully from the mm (i.e. the device's sdev user count is decreased). The reference is dropped only once by calling intel_svm_free_pasid() when there isn't any device bound to the mm. intel_svm_free_pasid() drops the reference and only frees the PASID on zero reference. Fix the issue by dropping the PASID reference and freeing the PASID when no reference on successful unbinding the device by calling intel_svm_free_pasid() . Fixes: 4048377414162 ("iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers") Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20210813181345.1870742-1-fenghua.yu@intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210817124321.1517985-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Xiaochen Shen --- drivers/iommu/intel/svm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7675de59e60b..8825574dcef2 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -627,7 +627,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { - intel_svm_free_pasid(mm); if (svm->notifier.ops) { mmu_notifier_unregister(&svm->notifier, mm); /* Clear mm's pasid. */ @@ -642,6 +641,8 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree(svm); } } + /* Drop a PASID reference and free it if no reference. */ + intel_svm_free_pasid(mm); } out: return ret; -- Gitee From 6f8fd90de9163b8034ba47de1d88bfbbf91d101f Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Sat, 28 Aug 2021 15:06:21 +0800 Subject: [PATCH 118/173] iommu/vt-d: Fix PASID leak in intel_svm_unbind_mm() mainline inclusion from mainline-v5.15 commit a21518cb23a3c7d49bafcb59862fd389fd829d4b category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit a21518cb23a3 iommu/vt-d: Fix PASID leak in intel_svm_unbind_mm(). Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The mm->pasid will be used in intel_svm_free_pasid() after load_pasid() during unbinding mm. Clearing it in load_pasid() will cause PASID cannot be freed in intel_svm_free_pasid(). Additionally mm->pasid was updated already before load_pasid() during pasid allocation. No need to update it again in load_pasid() during binding mm. Don't update mm->pasid to avoid the issues in both binding mm and unbinding mm. Fixes: 4048377414162 ("iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers") Reported-and-tested-by: Dave Jiang Co-developed-by: Jacob Pan Signed-off-by: Jacob Pan Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20210826215918.4073446-1-fenghua.yu@intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210828070622.2437559-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Xiaochen Shen --- drivers/iommu/intel/svm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 8825574dcef2..9edb468622ba 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -469,9 +469,6 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) { mutex_lock(&mm->context.lock); - /* Synchronize with READ_ONCE in update_pasid(). */ - smp_store_release(&mm->pasid, pasid); - /* Update PASID MSR on all CPUs running the mm's tasks. */ on_each_cpu_mask(mm_cpumask(mm), _load_pasid, NULL, true); -- Gitee From 5eb609b5d13b99a7968246dfcf57581f2126b913 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 14 Oct 2021 13:38:37 +0800 Subject: [PATCH 119/173] iommu/vt-d: Clean up unused PASID updating functions mainline inclusion from mainline-v5.16 commit 00ecd5401349a5e23d2ad768a23b110030165fac category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 00ecd5401349 iommu/vt-d: Clean up unused PASID updating functions. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- update_pasid() and its call chain are currently unused in the tree because Thomas disabled the ENQCMD feature. The feature will be re-enabled shortly using a different approach and update_pasid() and its call chain will not be used in the new approach. Remove the useless functions. Signed-off-by: Fenghua Yu Reviewed-by: Tony Luck Link: https://lore.kernel.org/r/20210920192349.2602141-1-fenghua.yu@intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20211014053839.727419-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Xiaochen Shen --- arch/x86/include/asm/fpu/api.h | 2 -- drivers/iommu/intel/svm.c | 24 +----------------------- 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index d6e8933faafe..66df5565917c 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -81,8 +81,6 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); */ #define PASID_DISABLED 0 -static inline void update_pasid(void) { } - /* Trap handling */ extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern void fpu_sync_fpstate(struct fpu *fpu); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 9edb468622ba..702916e63dbc 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -460,21 +460,6 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid) return ret; } -static void _load_pasid(void *unused) -{ - update_pasid(); -} - -static void load_pasid(struct mm_struct *mm, u32 pasid) -{ - mutex_lock(&mm->context.lock); - - /* Update PASID MSR on all CPUs running the mm's tasks. */ - on_each_cpu_mask(mm_cpumask(mm), _load_pasid, NULL, true); - - mutex_unlock(&mm->context.lock); -} - static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, unsigned int flags) { @@ -568,10 +553,6 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, if (ret) goto free_sdev; - /* The newly allocated pasid is loaded to the mm. */ - if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs)) - load_pasid(mm, svm->pasid); - list_add_rcu(&sdev->list, &svm->devs); success: return &sdev->sva; @@ -624,11 +605,8 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { - if (svm->notifier.ops) { + if (svm->notifier.ops) mmu_notifier_unregister(&svm->notifier, mm); - /* Clear mm's pasid. */ - load_pasid(mm, PASID_DISABLED); - } pasid_private_remove(svm->pasid); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. -- Gitee From f1adf934ba2acb267084a4ccdad1ea18e0156d41 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:44 -0800 Subject: [PATCH 120/173] iommu/sva: Rename CONFIG_IOMMU_SVA_LIB to CONFIG_IOMMU_SVA mainline inclusion from mainline-v5.18 commit 7ba564722d98e3e7bc3922ad4f2885ca0336674e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 7ba564722d98 iommu/sva: Rename CONFIG_IOMMU_SVA_LIB to CONFIG_IOMMU_SVA. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- This CONFIG option originally only referred to the Shared Virtual Address (SVA) library. But it is now also used for non-library portions of code. Drop the "_LIB" suffix so that there is just one configuration option for all code relating to SVA. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-2-fenghua.yu@intel.com Signed-off-by: Xiaochen Shen --- drivers/iommu/Kconfig | 6 +++--- drivers/iommu/Makefile | 3 +-- drivers/iommu/intel/Kconfig | 2 +- drivers/iommu/iommu-sva-lib.h | 6 +++--- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 044df15aa0c9..d632d456ed3c 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -103,8 +103,8 @@ config IOMMU_DMA select IRQ_MSI_IOMMU select NEED_SG_DMA_LENGTH -# Shared Virtual Addressing library -config IOMMU_SVA_LIB +# Shared Virtual Addressing +config IOMMU_SVA bool select IOASID @@ -318,7 +318,7 @@ config ARM_SMMU_V3 config ARM_SMMU_V3_SVA bool "Shared Virtual Addressing support for the ARM SMMUv3" depends on ARM_SMMU_V3 - select IOMMU_SVA_LIB + select IOMMU_SVA select MMU_NOTIFIER help Support for sharing process address spaces with devices using the diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 1e7519f56afb..ae38d815537e 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -27,5 +27,4 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o -obj-$(CONFIG_IOMMU_SVA_LIB) += io-pgfault.o +obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 4a6919f95094..cecdad7f2aba 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -40,7 +40,7 @@ config INTEL_IOMMU_SVM select PCI_PRI select MMU_NOTIFIER select IOASID - select IOMMU_SVA_LIB + select IOMMU_SVA help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h index 031155010ca8..95dc3ebc1928 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva-lib.h @@ -17,7 +17,7 @@ struct device; struct iommu_fault; struct iopf_queue; -#ifdef CONFIG_IOMMU_SVA_LIB +#ifdef CONFIG_IOMMU_SVA int iommu_queue_iopf(struct iommu_fault *fault, void *cookie); int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); @@ -28,7 +28,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); -#else /* CONFIG_IOMMU_SVA_LIB */ +#else /* CONFIG_IOMMU_SVA */ static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) { return -ENODEV; @@ -64,5 +64,5 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue) { return -ENODEV; } -#endif /* CONFIG_IOMMU_SVA_LIB */ +#endif /* CONFIG_IOMMU_SVA */ #endif /* _IOMMU_SVA_LIB_H */ -- Gitee From c6b5b64b92b7ea8ac3a70817677f242b1bb76cdc Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:45 -0800 Subject: [PATCH 121/173] mm: Change CONFIG option for mm->pasid field mainline inclusion from mainline-v5.18 commit 7a853c2d5951419fdf3c1c9d2b6f5a38f6a6857d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 7a853c2d5951 mm: Change CONFIG option for mm->pasid field. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- This currently depends on CONFIG_IOMMU_SUPPORT. But it is only needed when CONFIG_IOMMU_SVA option is enabled. Change the CONFIG guards around definition and initialization of mm->pasid field. Suggested-by: Jacob Pan Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-3-fenghua.yu@intel.com Signed-off-by: Xiaochen Shen --- include/linux/mm_types.h | 2 +- kernel/fork.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ba69c5ecd4ce..99ae1f9a9019 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -608,7 +608,7 @@ struct mm_struct { #endif struct work_struct async_put_work; -#ifdef CONFIG_IOMMU_SUPPORT +#ifdef CONFIG_IOMMU_SVA u32 pasid; #endif diff --git a/kernel/fork.c b/kernel/fork.c index 5e928cdf0526..7d865656d942 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1016,7 +1016,7 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) static void mm_init_pasid(struct mm_struct *mm) { -#ifdef CONFIG_IOMMU_SUPPORT +#ifdef CONFIG_IOMMU_SVA mm->pasid = INIT_PASID; #endif } -- Gitee From fcd391d8b774db55876ba328fea4b2608a280b83 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:46 -0800 Subject: [PATCH 122/173] iommu/ioasid: Introduce a helper to check for valid PASIDs mainline inclusion from mainline-v5.18 commit 7a5fbc9bcba5325a45297a4ba00091f39a63a1ed category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 7a5fbc9bcba5 iommu/ioasid: Introduce a helper to check for valid PASIDs. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Define a pasid_valid() helper to check if a given PASID is valid. [ bp: Massage commit message. ] Suggested-by: Ashok Raj Suggested-by: Jacob Pan Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-4-fenghua.yu@intel.com Signed-off-by: Xiaochen Shen --- include/linux/ioasid.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index e9dacd4b9f6b..2237f64dbaae 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -41,6 +41,10 @@ void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator); int ioasid_set_data(ioasid_t ioasid, void *data); +static inline bool pasid_valid(ioasid_t ioasid) +{ + return ioasid != INVALID_IOASID; +} #else /* !CONFIG_IOASID */ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, @@ -78,5 +82,10 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data) return -ENOTSUPP; } +static inline bool pasid_valid(ioasid_t ioasid) +{ + return false; +} + #endif /* CONFIG_IOASID */ #endif /* __LINUX_IOASID_H */ -- Gitee From 0ba8bb12446da8aeee657d2563d1d85d0aa7e6e3 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:47 -0800 Subject: [PATCH 123/173] kernel/fork: Initialize mm's PASID mainline inclusion from mainline-v5.18 commit a6cbd44093ef305b02ad5f80ed54abf0148a696c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit a6cbd44093ef kernel/fork: Initialize mm's PASID. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- A new mm doesn't have a PASID yet when it's created. Initialize the mm's PASID on fork() or for init_mm to INVALID_IOASID (-1). INIT_PASID (0) is reserved for kernel legacy DMA PASID. It cannot be allocated to a user process. Initializing the process's PASID to 0 may cause confusion that's why the process uses the reserved kernel legacy DMA PASID. Initializing the PASID to INVALID_IOASID (-1) explicitly tells the process doesn't have a valid PASID yet. Even though the only user of mm_pasid_init() is in fork.c, define it in as the first of three mm/pasid life cycle functions (init/set/drop) to keep these all together. Suggested-by: Dave Hansen Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-5-fenghua.yu@intel.com Signed-off-by: Xiaochen Shen --- include/linux/sched/mm.h | 10 ++++++++++ kernel/fork.c | 10 ++-------- mm/init-mm.c | 4 ++++ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index e3e5e149b00e..82f77c1506c1 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -8,6 +8,7 @@ #include #include #include +#include /* * Routines for handling mm_structs @@ -372,4 +373,13 @@ static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) } #endif +#ifdef CONFIG_IOMMU_SVA +static inline void mm_pasid_init(struct mm_struct *mm) +{ + mm->pasid = INVALID_IOASID; +} +#else +static inline void mm_pasid_init(struct mm_struct *mm) {} +#endif + #endif /* _LINUX_SCHED_MM_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 7d865656d942..a2a8071707af 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -96,6 +96,7 @@ #include #include #include +#include #include #include @@ -1014,13 +1015,6 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } -static void mm_init_pasid(struct mm_struct *mm) -{ -#ifdef CONFIG_IOMMU_SVA - mm->pasid = INIT_PASID; -#endif -} - static void mm_init_uprobes_state(struct mm_struct *mm) { #ifdef CONFIG_UPROBES @@ -1051,7 +1045,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); - mm_init_pasid(mm); + mm_pasid_init(mm); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); diff --git a/mm/init-mm.c b/mm/init-mm.c index 153162669f80..2dfed38be177 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -10,6 +10,7 @@ #include #include +#include #include #ifndef INIT_MM_CONTEXT @@ -38,5 +39,8 @@ struct mm_struct init_mm = { .mmlist = LIST_HEAD_INIT(init_mm.mmlist), .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, +#ifdef CONFIG_IOMMU_SVA + .pasid = INVALID_IOASID, +#endif INIT_MM_CONTEXT(init_mm) }; -- Gitee From e34d4d40c80958792d9b01e7b02b080450a1604c Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:48 -0800 Subject: [PATCH 124/173] iommu/sva: Assign a PASID to mm on PASID allocation and free it on mm exit mainline inclusion from mainline-v5.18 commit 701fac40384f07197b106136012804c3cae0b3de category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 701fac40384f iommu/sva: Assign a PASID to mm on PASID allocation and free it on mm exit. Incremental backporting patches for DSA/IAA on Intel Xeon platform. Deviation from upstream: Replace ioasid_put() with ioasid_free() in drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c for the change in out of tree commit: 7ca848426e5d iommu/arm-smmu-v3: Support auxiliary domains. -------------------------------- PASIDs are process-wide. It was attempted to use refcounted PASIDs to free them when the last thread drops the refcount. This turned out to be complex and error prone. Given the fact that the PASID space is 20 bits, which allows up to 1M processes to have a PASID associated concurrently, PASID resource exhaustion is not a realistic concern. Therefore, it was decided to simplify the approach and stick with lazy on demand PASID allocation, but drop the eager free approach and make an allocated PASID's lifetime bound to the lifetime of the process. Get rid of the refcounting mechanisms and replace/rename the interfaces to reflect this new approach. [ bp: Massage commit message. ] Suggested-by: Dave Hansen Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Lu Baolu Reviewed-by: Jacob Pan Reviewed-by: Thomas Gleixner Acked-by: Joerg Roedel Link: https://lore.kernel.org/r/20220207230254.3342514-6-fenghua.yu@intel.com Signed-off-by: Xiaochen Shen --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 5 +-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 4 +- drivers/iommu/intel/iommu.c | 4 +- drivers/iommu/intel/svm.c | 9 ----- drivers/iommu/ioasid.c | 38 ++---------------- drivers/iommu/iommu-sva-lib.c | 39 ++++++------------- drivers/iommu/iommu-sva-lib.h | 1 - include/linux/ioasid.h | 12 +----- include/linux/sched/mm.h | 16 ++++++++ kernel/fork.c | 1 + 10 files changed, 40 insertions(+), 89 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index d4e1238347a6..a8e8fd9d7c8b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -351,15 +351,13 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm); if (IS_ERR(bond->smmu_mn)) { ret = PTR_ERR(bond->smmu_mn); - goto err_free_pasid; + goto err_free_bond; } list_add(&bond->list, &master->bonds); trace_smmu_bind_alloc(dev, mm->pasid); return &bond->sva; -err_free_pasid: - iommu_sva_free_pasid(mm); err_free_bond: kfree(bond); return ERR_PTR(ret); @@ -403,7 +401,6 @@ void arm_smmu_sva_unbind(struct iommu_sva *handle) trace_smmu_unbind_free(handle->dev, bond->mm->pasid); list_del(&bond->list); arm_smmu_mmu_notifier_put(bond->smmu_mn); - iommu_sva_free_pasid(bond->mm); kfree(bond); } else { trace_smmu_unbind_put(handle->dev, bond->mm->pasid); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 53602868a90e..9e702788e90f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2506,7 +2506,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) arm_smmu_free_asid(&s1_cfg->cd); mutex_unlock(&arm_smmu_asid_lock); if (smmu_domain->ssid) - ioasid_put(smmu_domain->ssid); + ioasid_free(smmu_domain->ssid); } if (s2_cfg->set) { if (s2_cfg->vmid) @@ -3980,7 +3980,7 @@ static int arm_smmu_aux_attach_dev(struct iommu_domain *domain, struct device *d smmu_domain->smmu = NULL; smmu_domain->ssid = 0; smmu_domain->parent = NULL; - ioasid_put(ssid); + ioasid_free(ssid); goto out_unlock; } } else if (smmu_domain->parent != parent_smmu_domain) { diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 965c8f641a86..e1772596c50f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5471,7 +5471,7 @@ static int aux_domain_add_dev(struct dmar_domain *domain, link_failed: spin_unlock_irqrestore(&device_domain_lock, flags); if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + ioasid_free(domain->default_pasid); return ret; } @@ -5501,7 +5501,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, spin_unlock_irqrestore(&device_domain_lock, flags); if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + ioasid_free(domain->default_pasid); } static int prepare_domain_attach_device(struct iommu_domain *domain, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 702916e63dbc..0b7f9e39ce3b 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -469,11 +469,6 @@ static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); } -static void intel_svm_free_pasid(struct mm_struct *mm) -{ - iommu_sva_free_pasid(mm); -} - static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, struct mm_struct *mm, @@ -616,8 +611,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree(svm); } } - /* Drop a PASID reference and free it if no reference. */ - intel_svm_free_pasid(mm); } out: return ret; @@ -1027,8 +1020,6 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void } sva = intel_svm_bind_mm(iommu, dev, mm, flags); - if (IS_ERR_OR_NULL(sva)) - intel_svm_free_pasid(mm); mutex_unlock(&pasid_mutex); return sva; diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c index 50ee27bbd04e..a786c034907c 100644 --- a/drivers/iommu/ioasid.c +++ b/drivers/iommu/ioasid.c @@ -2,7 +2,7 @@ /* * I/O Address Space ID allocator. There is one global IOASID space, split into * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and - * free IOASIDs with ioasid_alloc and ioasid_put. + * free IOASIDs with ioasid_alloc() and ioasid_free(). */ #include #include @@ -15,7 +15,6 @@ struct ioasid_data { struct ioasid_set *set; void *private; struct rcu_head rcu; - refcount_t refs; }; /* @@ -315,7 +314,6 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, data->set = set; data->private = private; - refcount_set(&data->refs, 1); /* * Custom allocator needs allocator data to perform platform specific @@ -348,34 +346,11 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, EXPORT_SYMBOL_GPL(ioasid_alloc); /** - * ioasid_get - obtain a reference to the IOASID - */ -void ioasid_get(ioasid_t ioasid) -{ - struct ioasid_data *ioasid_data; - - spin_lock(&ioasid_allocator_lock); - ioasid_data = xa_load(&active_allocator->xa, ioasid); - if (ioasid_data) - refcount_inc(&ioasid_data->refs); - else - WARN_ON(1); - spin_unlock(&ioasid_allocator_lock); -} -EXPORT_SYMBOL_GPL(ioasid_get); - -/** - * ioasid_put - Release a reference to an ioasid + * ioasid_free - Free an ioasid * @ioasid: the ID to remove - * - * Put a reference to the IOASID, free it when the number of references drops to - * zero. - * - * Return: %true if the IOASID was freed, %false otherwise. */ -bool ioasid_put(ioasid_t ioasid) +void ioasid_free(ioasid_t ioasid) { - bool free = false; struct ioasid_data *ioasid_data; spin_lock(&ioasid_allocator_lock); @@ -385,10 +360,6 @@ bool ioasid_put(ioasid_t ioasid) goto exit_unlock; } - free = refcount_dec_and_test(&ioasid_data->refs); - if (!free) - goto exit_unlock; - active_allocator->ops->free(ioasid, active_allocator->ops->pdata); /* Custom allocator needs additional steps to free the xa element */ if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) { @@ -398,9 +369,8 @@ bool ioasid_put(ioasid_t ioasid) exit_unlock: spin_unlock(&ioasid_allocator_lock); - return free; } -EXPORT_SYMBOL_GPL(ioasid_put); +EXPORT_SYMBOL_GPL(ioasid_free); /** * ioasid_find - Find IOASID data diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c index bd41405d34e9..106506143896 100644 --- a/drivers/iommu/iommu-sva-lib.c +++ b/drivers/iommu/iommu-sva-lib.c @@ -18,8 +18,7 @@ static DECLARE_IOASID_SET(iommu_sva_pasid); * * Try to allocate a PASID for this mm, or take a reference to the existing one * provided it fits within the [@min, @max] range. On success the PASID is - * available in mm->pasid, and must be released with iommu_sva_free_pasid(). - * @min must be greater than 0, because 0 indicates an unused mm->pasid. + * available in mm->pasid and will be available for the lifetime of the mm. * * Returns 0 on success and < 0 on error. */ @@ -33,38 +32,24 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) return -EINVAL; mutex_lock(&iommu_sva_lock); - if (mm->pasid) { - if (mm->pasid >= min && mm->pasid <= max) - ioasid_get(mm->pasid); - else + /* Is a PASID already associated with this mm? */ + if (pasid_valid(mm->pasid)) { + if (mm->pasid < min || mm->pasid >= max) ret = -EOVERFLOW; - } else { - pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); - if (pasid == INVALID_IOASID) - ret = -ENOMEM; - else - mm->pasid = pasid; + goto out; } + + pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); + if (!pasid_valid(pasid)) + ret = -ENOMEM; + else + mm_pasid_set(mm, pasid); +out: mutex_unlock(&iommu_sva_lock); return ret; } EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid); -/** - * iommu_sva_free_pasid - Release the mm's PASID - * @mm: the mm - * - * Drop one reference to a PASID allocated with iommu_sva_alloc_pasid() - */ -void iommu_sva_free_pasid(struct mm_struct *mm) -{ - mutex_lock(&iommu_sva_lock); - if (ioasid_put(mm->pasid)) - mm->pasid = 0; - mutex_unlock(&iommu_sva_lock); -} -EXPORT_SYMBOL_GPL(iommu_sva_free_pasid); - /* ioasid_find getter() requires a void * argument */ static bool __mmget_not_zero(void *mm) { diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h index 95dc3ebc1928..8909ea1094e3 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva-lib.h @@ -9,7 +9,6 @@ #include int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max); -void iommu_sva_free_pasid(struct mm_struct *mm); struct mm_struct *iommu_sva_find(ioasid_t pasid); /* I/O Page fault */ diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index 2237f64dbaae..af1c9d62e642 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -34,8 +34,7 @@ struct ioasid_allocator_ops { #if IS_ENABLED(CONFIG_IOASID) ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, void *private); -void ioasid_get(ioasid_t ioasid); -bool ioasid_put(ioasid_t ioasid); +void ioasid_free(ioasid_t ioasid); void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)); int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); @@ -53,14 +52,7 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, return INVALID_IOASID; } -static inline void ioasid_get(ioasid_t ioasid) -{ -} - -static inline bool ioasid_put(ioasid_t ioasid) -{ - return false; -} +static inline void ioasid_free(ioasid_t ioasid) { } static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 82f77c1506c1..f58a2730a130 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -378,8 +378,24 @@ static inline void mm_pasid_init(struct mm_struct *mm) { mm->pasid = INVALID_IOASID; } + +/* Associate a PASID with an mm_struct: */ +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) +{ + mm->pasid = pasid; +} + +static inline void mm_pasid_drop(struct mm_struct *mm) +{ + if (pasid_valid(mm->pasid)) { + ioasid_free(mm->pasid); + mm->pasid = INVALID_IOASID; + } +} #else static inline void mm_pasid_init(struct mm_struct *mm) {} +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {} +static inline void mm_pasid_drop(struct mm_struct *mm) {} #endif #endif /* _LINUX_SCHED_MM_H */ diff --git a/kernel/fork.c b/kernel/fork.c index a2a8071707af..1d702e8f4b72 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1133,6 +1133,7 @@ static inline void __mmput(struct mm_struct *mm) } if (mm->binfmt) module_put(mm->binfmt->module); + mm_pasid_drop(mm); mmdrop(mm); } -- Gitee From c15e3e1b26a586a5b9978d9a38c39c2d57dab74e Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:49 -0800 Subject: [PATCH 125/173] x86/fpu: Clear PASID when copying fpstate mainline inclusion from mainline-v5.18 commit dc7507ddce593cbd9c93330024a5658db6f8ef73 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit dc7507ddce59 x86/fpu: Clear PASID when copying fpstate. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The kernel must allocate a Process Address Space ID (PASID) on behalf of each process which will use ENQCMD and program it into the new MSR to communicate the process identity to platform hardware. ENQCMD uses the PASID stored in this MSR to tag requests from this process. The PASID state must be cleared on fork() since fork creates a new address space. For clone(), it would be functionally OK to copy the PASID. However, clearing it is _also_ functionally OK since any PASID use will trigger the #GP handler to populate the MSR. Copying the PASID state has two main downsides: * It requires differentiating fork() and clone() in the code, both in the FPU code and keeping tsk->pasid_activated consistent. * It guarantees that the PASID is out of its init state, which incurs small but non-zero cost on every XSAVE/XRSTOR. The main downside of clearing the PASID at fpstate copy is the future, one-time #GP for the thread. Use the simplest approach: clear the PASID state both on clone() and fork(). Rely on the #GP handler for MSR population in children. Also, just clear the PASID bit from xfeatures if XSAVE is supported. This will have no effect on systems that do not have PASID support. It is virtually zero overhead because 'dst_fpu' was just written and the whole thing is cache hot. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-7-fenghua.yu@intel.com Signed-off-by: Xiaochen Shen --- arch/x86/kernel/fpu/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1db281e8ef21..d00e52fcc9d0 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -502,6 +502,13 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags) fpu_inherit_perms(dst_fpu); fpregs_unlock(); + /* + * Children never inherit PASID state. + * Force it to have its init value: + */ + if (use_xsave()) + dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID; + trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); -- Gitee From ad0345abd4e9bd45b446bccc24893b5ea3ce898c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 7 Feb 2022 15:02:50 -0800 Subject: [PATCH 126/173] sched: Define and initialize a flag to identify valid PASID in the task mainline inclusion from mainline-v5.18 commit a3d29e8291b622780eb6e4e3eeaf2b24ec78fd43 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit a3d29e8291b6 sched: Define and initialize a flag to identify valid PASID in the task. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add a new single bit field to the task structure to track whether this task has initialized the IA32_PASID MSR to the mm's PASID. Initialize the field to zero when creating a new task with fork/clone. Signed-off-by: Peter Zijlstra Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-8-fenghua.yu@intel.com Signed-off-by: Xiaochen Shen --- include/linux/sched.h | 3 +++ kernel/fork.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index 888e5cc2fb6a..50b7414946d3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -872,6 +872,9 @@ struct task_struct { /* Stalled due to lack of memory */ unsigned in_memstall:1; #endif +#ifdef CONFIG_IOMMU_SVA + unsigned pasid_activated:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ diff --git a/kernel/fork.c b/kernel/fork.c index 1d702e8f4b72..72d2834cc4fd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -963,6 +963,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->use_memdelay = 0; #endif +#ifdef CONFIG_IOMMU_SVA + tsk->pasid_activated = 0; +#endif + #ifdef CONFIG_MEMCG tsk->active_memcg = NULL; #endif -- Gitee From 2a25299b22693d0f0cc13f22c901d85ebfd77d41 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:51 -0800 Subject: [PATCH 127/173] x86/traps: Demand-populate PASID MSR via #GP mainline inclusion from mainline-v5.18 commit fa6af69f38d3f409bedc55d0112eec36ed526d4b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit fa6af69f38d3 x86/traps: Demand-populate PASID MSR via #GP. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- All tasks start with PASID state disabled. This means that the first time they execute an ENQCMD instruction they will take a #GP fault. Modify the #GP fault handler to check if the "mm" for the task has already been allocated a PASID. If so, try to fix the #GP fault by loading the IA32_PASID MSR. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-9-fenghua.yu@intel.com Signed-off-by: Xiaochen Shen --- arch/x86/kernel/traps.c | 55 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a47529436a6d..035fd6b89ecc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -562,6 +563,57 @@ static bool fixup_iopl_exception(struct pt_regs *regs) return true; } +/* + * The unprivileged ENQCMD instruction generates #GPs if the + * IA32_PASID MSR has not been populated. If possible, populate + * the MSR from a PASID previously allocated to the mm. + */ +static bool try_fixup_enqcmd_gp(void) +{ +#ifdef CONFIG_IOMMU_SVA + u32 pasid; + + /* + * MSR_IA32_PASID is managed using XSAVE. Directly + * writing to the MSR is only possible when fpregs + * are valid and the fpstate is not. This is + * guaranteed when handling a userspace exception + * in *before* interrupts are re-enabled. + */ + lockdep_assert_irqs_disabled(); + + /* + * Hardware without ENQCMD will not generate + * #GPs that can be fixed up here. + */ + if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) + return false; + + pasid = current->mm->pasid; + + /* + * If the mm has not been allocated a + * PASID, the #GP can not be fixed up. + */ + if (!pasid_valid(pasid)) + return false; + + /* + * Did this thread already have its PASID activated? + * If so, the #GP must be from something else. + */ + if (current->pasid_activated) + return false; + + wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID); + current->pasid_activated = 1; + + return true; +#else + return false; +#endif +} + DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) { char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; @@ -570,6 +622,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) unsigned long gp_addr; int ret; + if (user_mode(regs) && try_fixup_enqcmd_gp()) + return; + cond_local_irq_enable(regs); if (static_cpu_has(X86_FEATURE_UMIP)) { -- Gitee From b7dda6d50f74b137bd090bde16bc24e0e41d0f79 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:52 -0800 Subject: [PATCH 128/173] x86/cpufeatures: Re-enable ENQCMD mainline inclusion from mainline-v5.18 commit 7c1ef59145f1c8bf9a2cc7a6ebf2fd56bbb440de category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 7c1ef59145f1 x86/cpufeatures: Re-enable ENQCMD. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The ENQCMD feature can only be used if CONFIG_INTEL_IOMMU_SVM is set. Add X86_FEATURE_ENQCMD to the disabled features mask as appropriate so that cpu_feature_enabled() can be used to check the feature. [ bp: Massage commit message. ] Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-10-fenghua.yu@intel.com Signed-off-by: Xiaochen Shen --- arch/x86/include/asm/disabled-features.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 83a4a21c6a24..fb51349e45a7 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -75,8 +75,11 @@ # define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) #endif -/* Force disable because it's broken beyond repair */ -#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#ifdef CONFIG_INTEL_IOMMU_SVM +# define DISABLE_ENQCMD 0 +#else +# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#endif #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 -- Gitee From f1471f46d9261363a46298d950409ef588c901bd Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:53 -0800 Subject: [PATCH 129/173] tools/objtool: Check for use of the ENQCMD instruction in the kernel mainline inclusion from mainline-v5.18 commit 6e3133d901e89a4ba83ce7ebd8c27bbeaa9ed1f2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 6e3133d901e8 tools/objtool: Check for use of the ENQCMD instruction in the kernel. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The ENQCMD instruction implicitly accesses the PASID_MSR to fill in the pasid field of the descriptor being submitted to an accelerator. But there is no precise (and stable across kernel changes) point at which the PASID_MSR is updated from the value for one task to the next. Kernel code that uses accelerators must always use the ENQCMDS instruction which does not access the PASID_MSR. Check for use of the ENQCMD instruction in the kernel and warn on its usage. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Acked-by: Josh Poimboeuf Signed-off-by: Xiaochen Shen --- tools/objtool/arch/x86/decode.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index d8f47704fd85..828d902f482b 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -92,7 +92,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, { struct insn insn; int x86_64, sign; - unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, + unsigned char op1, op2, op3, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0; struct stack_op *op = NULL; @@ -118,6 +118,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op1 = insn.opcode.bytes[0]; op2 = insn.opcode.bytes[1]; + op3 = insn.opcode.bytes[2]; if (insn.rex_prefix.nbytes) { rex = insn.rex_prefix.bytes[0]; @@ -424,6 +425,14 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, /* nopl/nopw */ *type = INSN_NOP; + } else if (op2 == 0x38 && op3 == 0xf8) { + if (insn.prefixes.nbytes == 1 && + insn.prefixes.bytes[0] == 0xf2) { + /* ENQCMD cannot be used in the kernel. */ + WARN("ENQCMD instruction at %s:%lx", sec->name, + offset); + } + } else if (op2 == 0xa0 || op2 == 0xa8) { /* push fs/gs */ -- Gitee From a3c17a36dc9b3f0c9fc18cf2f39416923cd5de5c Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:54 -0800 Subject: [PATCH 130/173] Documentation/x86: Update documentation for SVA (Shared Virtual Addressing) mainline inclusion from mainline-v5.18 commit 83aa52ffed5d35a08e24452d0471e1684075cdf8 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 83aa52ffed5d Documentation/x86: Update documentation for SVA (Shared Virtual Addressing). Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Adjust the documentation to the new way how a PASID is being allocated, freed and fixed up. Based on a patch by Ashok Raj [ bp: Massage commit message, fix htmldocs build warning ] Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-12-fenghua.yu@intel.com Signed-off-by: Xiaochen Shen --- Documentation/x86/sva.rst | 53 ++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst index 076efd51ef1f..2e9b8b0f9a0f 100644 --- a/Documentation/x86/sva.rst +++ b/Documentation/x86/sva.rst @@ -104,18 +104,47 @@ The MSR must be configured on each logical CPU before any application thread can interact with a device. Threads that belong to the same process share the same page tables, thus the same MSR value. -PASID is cleared when a process is created. The PASID allocation and MSR -programming may occur long after a process and its threads have been created. -One thread must call iommu_sva_bind_device() to allocate the PASID for the -process. If a thread uses ENQCMD without the MSR first being populated, a #GP -will be raised. The kernel will update the PASID MSR with the PASID for all -threads in the process. A single process PASID can be used simultaneously -with multiple devices since they all share the same address space. - -One thread can call iommu_sva_unbind_device() to free the allocated PASID. -The kernel will clear the PASID MSR for all threads belonging to the process. - -New threads inherit the MSR value from the parent. +PASID Life Cycle Management +=========================== + +PASID is initialized as INVALID_IOASID (-1) when a process is created. + +Only processes that access SVA-capable devices need to have a PASID +allocated. This allocation happens when a process opens/binds an SVA-capable +device but finds no PASID for this process. Subsequent binds of the same, or +other devices will share the same PASID. + +Although the PASID is allocated to the process by opening a device, +it is not active in any of the threads of that process. It's loaded to the +IA32_PASID MSR lazily when a thread tries to submit a work descriptor +to a device using the ENQCMD. + +That first access will trigger a #GP fault because the IA32_PASID MSR +has not been initialized with the PASID value assigned to the process +when the device was opened. The Linux #GP handler notes that a PASID has +been allocated for the process, and so initializes the IA32_PASID MSR +and returns so that the ENQCMD instruction is re-executed. + +On fork(2) or exec(2) the PASID is removed from the process as it no +longer has the same address space that it had when the device was opened. + +On clone(2) the new task shares the same address space, so will be +able to use the PASID allocated to the process. The IA32_PASID is not +preemptively initialized as the PASID value might not be allocated yet or +the kernel does not know whether this thread is going to access the device +and the cleared IA32_PASID MSR reduces context switch overhead by xstate +init optimization. Since #GP faults have to be handled on any threads that +were created before the PASID was assigned to the mm of the process, newly +created threads might as well be treated in a consistent way. + +Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in +all threads in unbind, free the PASID lazily only on mm exit. + +If a process does a close(2) of the device file descriptor and munmap(2) +of the device MMIO portal, then the driver will unbind the device. The +PASID is still marked VALID in the PASID_MSR for any threads in the +process that accessed the device. But this is harmless as without the +MMIO portal they cannot submit new work to the device. Relationships ============= -- Gitee From 5a44ce759a5b51764c59882c4a944d6b7bad3259 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 24 Jan 2022 09:20:33 -0700 Subject: [PATCH 131/173] dmaengine: idxd: restore traffic class defaults after wq reset mainline inclusion from mainline-v5.18 commit ea7c8f598c323f6ebaf9ddae01fb2a981fe8c56a category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit ea7c8f598c32 dmaengine: idxd: restore traffic class defaults after wq reset. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- When clearing the group configurations, the driver fails to restore the default setting for DSA 1.x based devices. Add defaults in idxd_groups_clear_state() for traffic class configuration. Fixes: ade8a86b512c ("dmaengine: idxd: Set defaults for GRPCFG traffic class") Reported-by: Binuraj Ravindran Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/164304123369.824298.6952463420266592087.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 573ad8b86804..3061fe857d69 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -681,8 +681,13 @@ static void idxd_groups_clear_state(struct idxd_device *idxd) group->use_rdbuf_limit = false; group->rdbufs_allowed = 0; group->rdbufs_reserved = 0; - group->tc_a = -1; - group->tc_b = -1; + if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) { + group->tc_a = 1; + group->tc_b = 1; + } else { + group->tc_a = -1; + group->tc_b = -1; + } } } -- Gitee From 1fd5aeaf2e2450be23e77c8b5f48cb96eef5b349 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 15 Jan 2022 08:24:20 +0100 Subject: [PATCH 132/173] dmaengine: idxd: Remove useless DMA-32 fallback configuration mainline inclusion from mainline-v5.18 commit b6f2f0352c0302076dcd0e0297592ea7d37f2242 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit b6f2f0352c03 dmaengine: idxd: Remove useless DMA-32 fallback configuration. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- As stated in [1], dma_set_mask() with a 64-bit mask never fails if dev->dma_mask is non-NULL. So, if it fails, the 32 bits case will also fail for the same reason. Simplify code and remove some dead code accordingly. [1]: https://lore.kernel.org/linux-kernel/YL3vSPK5DXTNvgdx@infradead.org/#t Signed-off-by: Christophe JAILLET Acked-by: Dave Jiang Link: https://lore.kernel.org/r/009c80294dba72858cd8a6ed2ed81041df1b1e82.1642231430.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 08a5f4310188..993a5dcca24f 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -604,8 +604,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_dbg(dev, "Set DMA masks\n"); rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (rc) - rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (rc) goto err; -- Gitee From 7ec85f99d96cbceff45592448d88c1eb2f13e7c3 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 5 Apr 2022 14:53:39 -0700 Subject: [PATCH 133/173] dmaengine: idxd: fix device cleanup on disable mainline inclusion from mainline-v5.18 commit 12e45e89556d7a532120f976081e9e7582addd2b category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 12e45e89556d dmaengine: idxd: fix device cleanup on disable. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- There are certain parts of WQ that needs to be cleaned up even after WQ is disabled during the device disable. Those are the unchangeable parts for a WQ when the device is still enabled. Move the cleanup outside of WQ state check. Remove idxd_wq_disable_cleanup() inside idxd_wq_device_reset_cleanup() since only the unchangeable parts need to be cleared. Fixes: 0f225705cf65 ("dmaengine: idxd: fix wq settings post wq disable") Reported-by: Tony Zhu Tested-by: Tony Zhu Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/164919561905.1455025.13542366389944678346.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 3061fe857d69..5a0535a0f850 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -373,7 +373,6 @@ static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq) { lockdep_assert_held(&wq->wq_lock); - idxd_wq_disable_cleanup(wq); wq->size = 0; wq->group = NULL; } @@ -701,9 +700,9 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd) if (wq->state == IDXD_WQ_ENABLED) { idxd_wq_disable_cleanup(wq); - idxd_wq_device_reset_cleanup(wq); wq->state = IDXD_WQ_DISABLED; } + idxd_wq_device_reset_cleanup(wq); } } -- Gitee From 26bfdd92f2a196a628f94c65db0efa680fc24650 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 18 Apr 2022 14:31:10 -0700 Subject: [PATCH 134/173] dmaengine: idxd: match type for retries var in idxd_enqcmds() mainline inclusion from mainline-v5.18 commit 5d9d16e5aa0cf023e600bf716239fd9caa2d4148 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 5d9d16e5aa0c dmaengine: idxd: match type for retries var in idxd_enqcmds(). Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- wq->enqcmds_retries is defined as unsigned int. However, retries on the stack is defined as int. Change retries to unsigned int to compare the same type. Fixes: 7930d8553575 ("dmaengine: idxd: add knob for enqcmds retries") Suggested-by: Thiago Macieira Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/165031747059.3658198.6035308204505664375.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/submit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index e289fd48711a..554b0602d2e9 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -150,7 +150,8 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, */ int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) { - int rc, retries = 0; + unsigned int retries = 0; + int rc; do { rc = enqcmds(portal, desc); -- Gitee From 8765376fc162a459aa0dbf7ca7a3e6b3127443e9 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 18 Apr 2022 14:33:21 -0700 Subject: [PATCH 135/173] dmaengine: idxd: fix retry value to be constant for duration of function call mainline inclusion from mainline-v5.18 commit bc3452cdfc468a65965d0ac397c940acb787ea4d category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit bc3452cdfc46 dmaengine: idxd: fix retry value to be constant for duration of function call. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- When retries is compared to wq->enqcmds_retries each loop of idxd_enqcmds(), wq->enqcmds_retries can potentially changed by user. Assign the value of retries to wq->enqcmds_retries during initialization so it is the original value set when entering the function. Fixes: 7930d8553575 ("dmaengine: idxd: add knob for enqcmds retries") Suggested-by: Dave Hansen Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/165031760154.3658664.1983547716619266558.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/submit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 554b0602d2e9..c01db23e3333 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -150,7 +150,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, */ int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) { - unsigned int retries = 0; + unsigned int retries = wq->enqcmds_retries; int rc; do { @@ -158,7 +158,7 @@ int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) if (rc == 0) break; cpu_relax(); - } while (retries++ < wq->enqcmds_retries); + } while (retries--); return rc; } -- Gitee From e0db0f22cec82148e02a723f1c3228b0d38e6ade Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 11 Apr 2022 15:06:34 -0700 Subject: [PATCH 136/173] dmaengine: idxd: skip clearing device context when device is read-only mainline inclusion from mainline-v5.18 commit 1cd8e751d96c43ece3f6842ac2244a37d9332c3a category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 1cd8e751d96c dmaengine: idxd: skip clearing device context when device is read-only. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- If the device shows up as read-only configuration, skip the clearing of the state as the context must be preserved for device re-enable after being disabled. Fixes: 0dcfe41e9a4c ("dmanegine: idxd: cleanup all device related bits after disabling device") Reported-by: Tony Zhu Tested-by: Tony Zhu Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/164971479479.2200566.13980022473526292759.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5a0535a0f850..f652da6ab47d 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -708,6 +708,9 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd) void idxd_device_clear_state(struct idxd_device *idxd) { + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return; + idxd_groups_clear_state(idxd); idxd_engines_clear_state(idxd); idxd_device_wqs_clear_state(idxd); -- Gitee From 5cdded804adfaea7748c045b6d4f4af08f41ec50 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 7 Apr 2022 11:28:28 -0700 Subject: [PATCH 137/173] dmaengine: idxd: don't load pasid config until needed mainline inclusion from mainline-v5.19 commit 3157dd0a366183adaea2f4d8721961637d562fee category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 3157dd0a3661 dmaengine: idxd: don't load pasid config until needed. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The driver currently programs the system pasid to the WQ preemptively when system pasid is enabled. Given that a dwq will reprogram the pasid and possibly a different pasid, the programming is not necessary. The pasid_en bit can be set for swq as it does not need pasid programming but needs the pasid_en bit. Remove system pasid programming on device config write. Add pasid programming for kernel wq type on wq driver enable. The char dev driver already reprograms the dwq on ->open() call so there's no change. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/164935607115.1660372.6734518676950372366.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 66 ++++++++++++++++++++++++++++-------- drivers/dma/idxd/registers.h | 1 + 2 files changed, 53 insertions(+), 14 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index f652da6ab47d..8ed460253fa5 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -299,24 +299,46 @@ void idxd_wqs_unmap_portal(struct idxd_device *idxd) } } -int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid) +static void __idxd_wq_set_priv_locked(struct idxd_wq *wq, int priv) { struct idxd_device *idxd = wq->idxd; - int rc; union wqcfg wqcfg; unsigned int offset; - rc = idxd_wq_disable(wq, false); - if (rc < 0) - return rc; + offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PRIVL_IDX); + spin_lock(&idxd->dev_lock); + wqcfg.bits[WQCFG_PRIVL_IDX] = ioread32(idxd->reg_base + offset); + wqcfg.priv = priv; + wq->wqcfg->bits[WQCFG_PRIVL_IDX] = wqcfg.bits[WQCFG_PRIVL_IDX]; + iowrite32(wqcfg.bits[WQCFG_PRIVL_IDX], idxd->reg_base + offset); + spin_unlock(&idxd->dev_lock); +} + +static void __idxd_wq_set_pasid_locked(struct idxd_wq *wq, int pasid) +{ + struct idxd_device *idxd = wq->idxd; + union wqcfg wqcfg; + unsigned int offset; offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX); spin_lock(&idxd->dev_lock); wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset); wqcfg.pasid_en = 1; wqcfg.pasid = pasid; + wq->wqcfg->bits[WQCFG_PASID_IDX] = wqcfg.bits[WQCFG_PASID_IDX]; iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset); spin_unlock(&idxd->dev_lock); +} + +int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid) +{ + int rc; + + rc = idxd_wq_disable(wq, false); + if (rc < 0) + return rc; + + __idxd_wq_set_pasid_locked(wq, pasid); rc = idxd_wq_enable(wq); if (rc < 0) @@ -799,7 +821,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq) */ for (i = 0; i < WQCFG_STRIDES(idxd); i++) { wq_offset = WQCFG_OFFSET(idxd, wq->id, i); - wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset); + wq->wqcfg->bits[i] |= ioread32(idxd->reg_base + wq_offset); } if (wq->size == 0 && wq->type != IDXD_WQT_NONE) @@ -815,14 +837,8 @@ static int idxd_wq_config_write(struct idxd_wq *wq) if (wq_dedicated(wq)) wq->wqcfg->mode = 1; - if (device_pasid_enabled(idxd)) { - wq->wqcfg->pasid_en = 1; - if (wq->type == IDXD_WQT_KERNEL && wq_dedicated(wq)) - wq->wqcfg->pasid = idxd->pasid; - } - /* - * Here the priv bit is set depending on the WQ type. priv = 1 if the + * The WQ priv bit is set depending on the WQ type. priv = 1 if the * WQ type is kernel to indicate privileged access. This setting only * matters for dedicated WQ. According to the DSA spec: * If the WQ is in dedicated mode, WQ PASID Enable is 1, and the @@ -832,7 +848,6 @@ static int idxd_wq_config_write(struct idxd_wq *wq) * In the case of a dedicated kernel WQ that is not able to support * the PASID cap, then the configuration will be rejected. */ - wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL); if (wq_dedicated(wq) && wq->wqcfg->pasid_en && !idxd_device_pasid_priv_enabled(idxd) && wq->type == IDXD_WQT_KERNEL) { @@ -1265,6 +1280,29 @@ int __drv_enable_wq(struct idxd_wq *wq) } } + /* + * In the event that the WQ is configurable for pasid and priv bits. + * For kernel wq, the driver should setup the pasid, pasid_en, and priv bit. + * However, for non-kernel wq, the driver should only set the pasid_en bit for + * shared wq. A dedicated wq that is not 'kernel' type will configure pasid and + * pasid_en later on so there is no need to setup. + */ + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { + int priv = 0; + + if (device_pasid_enabled(idxd)) { + if (is_idxd_wq_kernel(wq) || wq_shared(wq)) { + u32 pasid = wq_dedicated(wq) ? idxd->pasid : 0; + + __idxd_wq_set_pasid_locked(wq, pasid); + } + } + + if (is_idxd_wq_kernel(wq)) + priv = 1; + __idxd_wq_set_priv_locked(wq, priv); + } + rc = 0; spin_lock(&idxd->dev_lock); if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index aa642aecdc0b..02449aa9c454 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -353,6 +353,7 @@ union wqcfg { } __packed; #define WQCFG_PASID_IDX 2 +#define WQCFG_PRIVL_IDX 2 #define WQCFG_OCCUP_IDX 6 #define WQCFG_OCCUP_MASK 0xffff -- Gitee From 54a2e916b296b3bd77a09de76e8a083e21c055d4 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 21 Mar 2022 13:40:51 -0700 Subject: [PATCH 138/173] dmaengine: idxd: remove trailing white space on input str for wq name mainline inclusion from mainline-v5.19 commit 81f5eb2b11ba748ee7e393e2faf32af773ae332d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 81f5eb2b11ba dmaengine: idxd: remove trailing white space on input str for wq name. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add string processing with strim() in order to remove trailing white spaces that may be input by user for the wq->name. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/164789525123.2799661.13795829125221129132.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/sysfs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index dfd549685c46..3b25d9fbbf3b 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -832,6 +832,7 @@ static ssize_t wq_name_store(struct device *dev, size_t count) { struct idxd_wq *wq = confdev_to_wq(dev); + char *input, *pos; if (wq->state != IDXD_WQ_DISABLED) return -EPERM; @@ -846,9 +847,14 @@ static ssize_t wq_name_store(struct device *dev, if (wq->type == IDXD_WQT_KERNEL && device_pasid_enabled(wq->idxd)) return -EOPNOTSUPP; + input = kstrndup(buf, count, GFP_KERNEL); + if (!input) + return -ENOMEM; + + pos = strim(input); memset(wq->name, 0, WQ_NAME_SIZE + 1); - strncpy(wq->name, buf, WQ_NAME_SIZE); - strreplace(wq->name, '\n', '\0'); + sprintf(wq->name, "%s", pos); + kfree(input); return count; } -- Gitee From d7e1b60adb950df47eb3d66df22bfb293e0b50af Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 11 Mar 2022 16:23:22 -0700 Subject: [PATCH 139/173] dmaengine: idxd: update IAA definitions for user header mainline inclusion from mainline-v5.19 commit 2d7991fe867974a8e5065ee9691451a406b9320d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 2d7991fe8679 dmaengine: idxd: update IAA definitions for user header. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add additional structure definitions for Intel In-memory Analytics Accelerator (IAA/IAX). See specification (1) for more details. 1: https://cdrdv2.intel.com/v1/dl/getContent/721858 Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/164704100212.1373038.18362680016033557757.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- include/uapi/linux/idxd.h | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index a8f0ff75c430..bce7c43657d5 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -53,6 +53,11 @@ enum idxd_scmd_stat { /* IAX */ #define IDXD_OP_FLAG_RD_SRC2_AECS 0x010000 +#define IDXD_OP_FLAG_RD_SRC2_2ND 0x020000 +#define IDXD_OP_FLAG_WR_SRC2_AECS_COMP 0x040000 +#define IDXD_OP_FLAG_WR_SRC2_AECS_OVFL 0x080000 +#define IDXD_OP_FLAG_SRC2_STS 0x100000 +#define IDXD_OP_FLAG_CRC_RFC3720 0x200000 /* Opcode */ enum dsa_opcode { @@ -81,6 +86,18 @@ enum iax_opcode { IAX_OPCODE_MEMMOVE, IAX_OPCODE_DECOMPRESS = 0x42, IAX_OPCODE_COMPRESS, + IAX_OPCODE_CRC64, + IAX_OPCODE_ZERO_DECOMP_32 = 0x48, + IAX_OPCODE_ZERO_DECOMP_16, + IAX_OPCODE_DECOMP_32 = 0x4c, + IAX_OPCODE_DECOMP_16, + IAX_OPCODE_SCAN = 0x50, + IAX_OPCODE_SET_MEMBER, + IAX_OPCODE_EXTRACT, + IAX_OPCODE_SELECT, + IAX_OPCODE_RLE_BURST, + IAX_OPCDE_FIND_UNIQUE, + IAX_OPCODE_EXPAND, }; /* Completion record status */ @@ -120,6 +137,7 @@ enum iax_completion_status { IAX_COMP_NONE = 0, IAX_COMP_SUCCESS, IAX_COMP_PAGE_FAULT_IR = 0x04, + IAX_COMP_ANALYTICS_ERROR = 0x0a, IAX_COMP_OUTBUF_OVERFLOW, IAX_COMP_BAD_OPCODE = 0x10, IAX_COMP_INVALID_FLAGS, @@ -140,7 +158,10 @@ enum iax_completion_status { IAX_COMP_WATCHDOG, IAX_COMP_INVALID_COMP_FLAG = 0x30, IAX_COMP_INVALID_FILTER_FLAG, - IAX_COMP_INVALID_NUM_ELEMS = 0x33, + IAX_COMP_INVALID_INPUT_SIZE, + IAX_COMP_INVALID_NUM_ELEMS, + IAX_COMP_INVALID_SRC1_WIDTH, + IAX_COMP_INVALID_INVERT_OUT, }; #define DSA_COMP_STATUS_MASK 0x7f @@ -319,8 +340,12 @@ struct iax_completion_record { uint32_t output_size; uint8_t output_bits; uint8_t rsvd3; - uint16_t rsvd4; - uint64_t rsvd5[4]; + uint16_t xor_csum; + uint32_t crc; + uint32_t min; + uint32_t max; + uint32_t sum; + uint64_t rsvd4[2]; } __attribute__((packed)); struct iax_raw_completion_record { -- Gitee From 225f3ae6c4d5360d9b0add76ffa792dc2909581d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 11 Apr 2022 15:11:16 -0700 Subject: [PATCH 140/173] dmaengine: idxd: set max_xfer and max_batch for RO device mainline inclusion from mainline-v5.19 commit 23084545dbb0ac0d1f0acad915bdeed7bd5f48cd category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 23084545dbb0 dmaengine: idxd: set max_xfer and max_batch for RO device. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Load the max_xfer_size and max_batch_size values from the values read from registers to the shadow variables. This will allow the read-only device to display the correct values for the sysfs attributes. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/164971507673.2201761.11244446608988838897.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 8ed460253fa5..bcc2cf606719 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1033,6 +1033,9 @@ static int idxd_wq_load_config(struct idxd_wq *wq) wq->priority = wq->wqcfg->priority; + wq->max_xfer_bytes = 1ULL << wq->wqcfg->max_xfer_shift; + wq->max_batch_size = 1ULL << wq->wqcfg->max_batch_shift; + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, i); dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wqcfg_offset, wq->wqcfg->bits[i]); -- Gitee From 7a1a39260c6addbcd5152ed5c3a10fbffd9b876d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 4 Mar 2022 14:02:57 -0700 Subject: [PATCH 141/173] dmaengine: idxd: move wq irq enabling to after device enable mainline inclusion from mainline-v5.19 commit 439b5e765a00a546e8f6b6eedac69889d0b5a869 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 439b5e765a00 dmaengine: idxd: move wq irq enabling to after device enable. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Move the calling of request_irq() and other related irq setup code until after the WQ is successfully enabled. This reduces the amount of setup/teardown if the wq is not configured correctly and cannot be enabled. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/164642777730.179702.1880317757087484299.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/dma.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index f9ed55011775..c112b397f626 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -313,13 +313,6 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) mutex_lock(&wq->wq_lock); wq->type = IDXD_WQT_KERNEL; - rc = idxd_wq_request_irq(wq); - if (rc < 0) { - idxd->cmd_status = IDXD_SCMD_WQ_IRQ_ERR; - dev_dbg(dev, "WQ %d irq setup failed: %d\n", wq->id, rc); - goto err_irq; - } - rc = __drv_enable_wq(wq); if (rc < 0) { dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc); @@ -327,6 +320,13 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) goto err; } + rc = idxd_wq_request_irq(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_WQ_IRQ_ERR; + dev_dbg(dev, "WQ %d irq setup failed: %d\n", wq->id, rc); + goto err_irq; + } + rc = idxd_wq_alloc_resources(wq); if (rc < 0) { idxd->cmd_status = IDXD_SCMD_WQ_RES_ALLOC_ERR; @@ -358,10 +358,10 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) err_ref: idxd_wq_free_resources(wq); err_res_alloc: - __drv_disable_wq(wq); -err: idxd_wq_free_irq(wq); err_irq: + __drv_disable_wq(wq); +err: wq->type = IDXD_WQT_NONE; mutex_unlock(&wq->wq_lock); return rc; -- Gitee From c0ef9efa23c28ce73be48ad5ecfe0aa11f966518 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 20 Apr 2022 09:43:36 -0700 Subject: [PATCH 142/173] dmaengine: idxd: refactor wq driver enable/disable operations mainline inclusion from mainline-v5.19 commit 63c14ae6c161dec8ff3be49277edc75a769e054a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 63c14ae6c161 dmaengine: idxd: refactor wq driver enable/disable operations. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Move the core driver operations from wq driver to the drv_enable_wq() and drv_disable_wq() functions. The move should reduce the wq driver's knowledge of the core driver operations and prevent code confusion for future wq drivers. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/165047301643.3841827.11222723219862233060.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/cdev.c | 6 ++-- drivers/dma/idxd/device.c | 58 +++++++++++++++++++++++++-------------- drivers/dma/idxd/dma.c | 38 ++----------------------- drivers/dma/idxd/idxd.h | 2 -- 4 files changed, 43 insertions(+), 61 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 033df43db0ce..bd44293804d1 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -314,7 +314,7 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) mutex_lock(&wq->wq_lock); wq->type = IDXD_WQT_USER; - rc = __drv_enable_wq(wq); + rc = drv_enable_wq(wq); if (rc < 0) goto err; @@ -329,7 +329,7 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) return 0; err_cdev: - __drv_disable_wq(wq); + drv_disable_wq(wq); err: wq->type = IDXD_WQT_NONE; mutex_unlock(&wq->wq_lock); @@ -342,7 +342,7 @@ static void idxd_user_drv_remove(struct idxd_dev *idxd_dev) mutex_lock(&wq->wq_lock); idxd_wq_del_cdev(wq); - __drv_disable_wq(wq); + drv_disable_wq(wq); wq->type = IDXD_WQT_NONE; mutex_unlock(&wq->wq_lock); } diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index bcc2cf606719..5363fb9218f2 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1198,6 +1198,9 @@ int idxd_wq_request_irq(struct idxd_wq *wq) struct idxd_irq_entry *ie; int rc; + if (wq->type != IDXD_WQT_KERNEL) + return 0; + ie = &wq->ie; ie->vector = pci_irq_vector(pdev, ie->id); ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : INVALID_IOASID; @@ -1229,7 +1232,7 @@ int idxd_wq_request_irq(struct idxd_wq *wq) return rc; } -int __drv_enable_wq(struct idxd_wq *wq) +int drv_enable_wq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; @@ -1330,8 +1333,36 @@ int __drv_enable_wq(struct idxd_wq *wq) } wq->client_count = 0; + + rc = idxd_wq_request_irq(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_WQ_IRQ_ERR; + dev_dbg(dev, "WQ %d irq setup failed: %d\n", wq->id, rc); + goto err_irq; + } + + rc = idxd_wq_alloc_resources(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_WQ_RES_ALLOC_ERR; + dev_dbg(dev, "WQ resource alloc failed\n"); + goto err_res_alloc; + } + + rc = idxd_wq_init_percpu_ref(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_PERCPU_ERR; + dev_dbg(dev, "percpu_ref setup failed\n"); + goto err_ref; + } + return 0; +err_ref: + idxd_wq_free_resources(wq); +err_res_alloc: + idxd_wq_free_irq(wq); +err_irq: + idxd_wq_unmap_portal(wq); err_map_portal: rc = idxd_wq_disable(wq, false); if (rc < 0) @@ -1340,17 +1371,7 @@ int __drv_enable_wq(struct idxd_wq *wq) return rc; } -int drv_enable_wq(struct idxd_wq *wq) -{ - int rc; - - mutex_lock(&wq->wq_lock); - rc = __drv_enable_wq(wq); - mutex_unlock(&wq->wq_lock); - return rc; -} - -void __drv_disable_wq(struct idxd_wq *wq) +void drv_disable_wq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; @@ -1361,21 +1382,16 @@ void __drv_disable_wq(struct idxd_wq *wq) dev_warn(dev, "Clients has claim on wq %d: %d\n", wq->id, idxd_wq_refcount(wq)); + idxd_wq_free_resources(wq); idxd_wq_unmap_portal(wq); - idxd_wq_drain(wq); idxd_wq_reset(wq); - + percpu_ref_exit(&wq->wq_active); + idxd_wq_free_irq(wq); + wq->type = IDXD_WQT_NONE; wq->client_count = 0; } -void drv_disable_wq(struct idxd_wq *wq) -{ - mutex_lock(&wq->wq_lock); - __drv_disable_wq(wq); - mutex_unlock(&wq->wq_lock); -} - int idxd_device_drv_probe(struct idxd_dev *idxd_dev) { struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev); diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index c112b397f626..d66cef5a918e 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -313,34 +313,13 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) mutex_lock(&wq->wq_lock); wq->type = IDXD_WQT_KERNEL; - rc = __drv_enable_wq(wq); + rc = drv_enable_wq(wq); if (rc < 0) { dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc); rc = -ENXIO; goto err; } - rc = idxd_wq_request_irq(wq); - if (rc < 0) { - idxd->cmd_status = IDXD_SCMD_WQ_IRQ_ERR; - dev_dbg(dev, "WQ %d irq setup failed: %d\n", wq->id, rc); - goto err_irq; - } - - rc = idxd_wq_alloc_resources(wq); - if (rc < 0) { - idxd->cmd_status = IDXD_SCMD_WQ_RES_ALLOC_ERR; - dev_dbg(dev, "WQ resource alloc failed\n"); - goto err_res_alloc; - } - - rc = idxd_wq_init_percpu_ref(wq); - if (rc < 0) { - idxd->cmd_status = IDXD_SCMD_PERCPU_ERR; - dev_dbg(dev, "percpu_ref setup failed\n"); - goto err_ref; - } - rc = idxd_register_dma_channel(wq); if (rc < 0) { idxd->cmd_status = IDXD_SCMD_DMA_CHAN_ERR; @@ -353,14 +332,7 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) return 0; err_dma: - __idxd_wq_quiesce(wq); - percpu_ref_exit(&wq->wq_active); -err_ref: - idxd_wq_free_resources(wq); -err_res_alloc: - idxd_wq_free_irq(wq); -err_irq: - __drv_disable_wq(wq); + drv_disable_wq(wq); err: wq->type = IDXD_WQT_NONE; mutex_unlock(&wq->wq_lock); @@ -374,11 +346,7 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev) mutex_lock(&wq->wq_lock); __idxd_wq_quiesce(wq); idxd_unregister_dma_channel(wq); - idxd_wq_free_resources(wq); - __drv_disable_wq(wq); - percpu_ref_exit(&wq->wq_active); - idxd_wq_free_irq(wq); - wq->type = IDXD_WQT_NONE; + drv_disable_wq(wq); mutex_unlock(&wq->wq_lock); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index da72eb15f610..8e03fb548d13 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -559,9 +559,7 @@ void idxd_unregister_idxd_drv(void); int idxd_device_drv_probe(struct idxd_dev *idxd_dev); void idxd_device_drv_remove(struct idxd_dev *idxd_dev); int drv_enable_wq(struct idxd_wq *wq); -int __drv_enable_wq(struct idxd_wq *wq); void drv_disable_wq(struct idxd_wq *wq); -void __drv_disable_wq(struct idxd_wq *wq); int idxd_device_init_reset(struct idxd_device *idxd); int idxd_device_enable(struct idxd_device *idxd); int idxd_device_disable(struct idxd_device *idxd); -- Gitee From 7210b42f19abe20ca56946406bd0e3d1ddb37784 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 11 May 2022 17:11:57 -0700 Subject: [PATCH 143/173] dmaengine: idxd: Separate user and kernel pasid enabling mainline inclusion from mainline-v5.19 commit 42a1b73852c4a176d233a192422b5e1d0ba67cbf category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 42a1b73852c4 dmaengine: idxd: Separate user and kernel pasid enabling. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The idxd driver always gated the pasid enabling under a single knob and this assumption is incorrect. The pasid used for kernel operation can be independently toggled and has no dependency on the user pasid (and vice versa). Split the two so they are independent "enabled" flags. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/165231431746.986466.5666862038354800551.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/cdev.c | 4 ++-- drivers/dma/idxd/device.c | 6 +++--- drivers/dma/idxd/idxd.h | 16 ++++++++++++++-- drivers/dma/idxd/init.c | 30 +++++++++++++++--------------- drivers/dma/idxd/sysfs.c | 2 +- 5 files changed, 35 insertions(+), 23 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index bd44293804d1..c2808fd081d6 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -99,7 +99,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) ctx->wq = wq; filp->private_data = ctx; - if (device_pasid_enabled(idxd)) { + if (device_user_pasid_enabled(idxd)) { sva = iommu_sva_bind_device(dev, current->mm, NULL); if (IS_ERR(sva)) { rc = PTR_ERR(sva); @@ -152,7 +152,7 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) if (wq_shared(wq)) { idxd_device_drain_pasid(idxd, ctx->pasid); } else { - if (device_pasid_enabled(idxd)) { + if (device_user_pasid_enabled(idxd)) { /* The wq disable in the disable pasid function will drain the wq */ rc = idxd_wq_disable_pasid(wq); if (rc < 0) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5363fb9218f2..c4e1b2465d85 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -968,7 +968,7 @@ static int idxd_wqs_setup(struct idxd_device *idxd) if (!wq->group) continue; - if (wq_shared(wq) && !device_swq_supported(idxd)) { + if (wq_shared(wq) && !wq_shared_supported(wq)) { idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT; dev_warn(dev, "No shared wq support but configured.\n"); return -EINVAL; @@ -1266,7 +1266,7 @@ int drv_enable_wq(struct idxd_wq *wq) /* Shared WQ checks */ if (wq_shared(wq)) { - if (!device_swq_supported(idxd)) { + if (!wq_shared_supported(wq)) { idxd->cmd_status = IDXD_SCMD_WQ_NO_SVM; dev_dbg(dev, "PASID not enabled and shared wq.\n"); goto err; @@ -1296,7 +1296,7 @@ int drv_enable_wq(struct idxd_wq *wq) if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { int priv = 0; - if (device_pasid_enabled(idxd)) { + if (wq_pasid_enabled(wq)) { if (is_idxd_wq_kernel(wq) || wq_shared(wq)) { u32 pasid = wq_dedicated(wq) ? idxd->pasid : 0; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 8e03fb548d13..77d241a92bd1 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -239,6 +239,7 @@ enum idxd_device_flag { IDXD_FLAG_CONFIGURABLE = 0, IDXD_FLAG_CMD_RUNNING, IDXD_FLAG_PASID_ENABLED, + IDXD_FLAG_USER_PASID_ENABLED, }; struct idxd_dma_dev { @@ -469,9 +470,20 @@ static inline bool device_pasid_enabled(struct idxd_device *idxd) return test_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); } -static inline bool device_swq_supported(struct idxd_device *idxd) +static inline bool device_user_pasid_enabled(struct idxd_device *idxd) { - return (support_enqcmd && device_pasid_enabled(idxd)); + return test_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); +} + +static inline bool wq_pasid_enabled(struct idxd_wq *wq) +{ + return (is_idxd_wq_kernel(wq) && device_pasid_enabled(wq->idxd)) || + (is_idxd_wq_user(wq) && device_user_pasid_enabled(wq->idxd)); +} + +static inline bool wq_shared_supported(struct idxd_wq *wq) +{ + return (support_enqcmd && wq_pasid_enabled(wq)); } enum idxd_portal_prot { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 993a5dcca24f..355fb3ef4cbf 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -512,18 +512,15 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD reset complete\n"); if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { - rc = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA); - if (rc == 0) { - rc = idxd_enable_system_pasid(idxd); - if (rc < 0) { - iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); - dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc); - } else { - set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); - } - } else { - dev_warn(dev, "Unable to turn on SVA feature.\n"); - } + if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) + dev_warn(dev, "Unable to turn on user SVA feature.\n"); + else + set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); + + if (idxd_enable_system_pasid(idxd)) + dev_warn(dev, "No in-kernel DMA with PASID.\n"); + else + set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); } else if (!sva) { dev_warn(dev, "User forced SVA off via module param.\n"); } @@ -561,7 +558,8 @@ static int idxd_probe(struct idxd_device *idxd) err: if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); - iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); + if (device_user_pasid_enabled(idxd)) + iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); return rc; } @@ -574,7 +572,8 @@ static void idxd_cleanup(struct idxd_device *idxd) idxd_cleanup_internals(idxd); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); - iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); + if (device_user_pasid_enabled(idxd)) + iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); } static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -691,7 +690,8 @@ static void idxd_remove(struct pci_dev *pdev) free_irq(irq_entry->vector, irq_entry); pci_free_irq_vectors(pdev); pci_iounmap(pdev, idxd->reg_base); - iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + if (device_user_pasid_enabled(idxd)) + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); pci_disable_device(pdev); destroy_workqueue(idxd->wq); perfmon_pmu_remove(idxd); diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 3b25d9fbbf3b..3f262a57441b 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -588,7 +588,7 @@ static ssize_t wq_mode_store(struct device *dev, if (sysfs_streq(buf, "dedicated")) { set_bit(WQ_FLAG_DEDICATED, &wq->flags); wq->threshold = 0; - } else if (sysfs_streq(buf, "shared") && device_swq_supported(idxd)) { + } else if (sysfs_streq(buf, "shared")) { clear_bit(WQ_FLAG_DEDICATED, &wq->flags); } else { return -EINVAL; -- Gitee From 8ecc0c1818aef2a6491b7ca3ea21382a2470ab37 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 11 May 2022 17:00:44 -0700 Subject: [PATCH 144/173] dmaengine: idxd: fix lockdep warning on device driver removal mainline inclusion from mainline-v5.19 commit cf4ac3fef33883a14131d8925d7edfbdb7d69b68 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit cf4ac3fef338 dmaengine: idxd: fix lockdep warning on device driver removal. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Jacob reported that with lockdep debug turned on, idxd_device_driver removal causes kernel splat from lock assert warning for idxd_device_wqs_clear_state(). Make sure idxd_device_wqs_clear_state() holds the wq lock for each wq when cleaning the wq state. Move the call outside of the device spinlock. Reported-by: Jacob Pan Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/165231364426.986304.9294302800482492780.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index c4e1b2465d85..fe890d1ee3eb 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -577,19 +577,15 @@ int idxd_device_disable(struct idxd_device *idxd) return -ENXIO; } - spin_lock(&idxd->dev_lock); idxd_device_clear_state(idxd); - idxd->state = IDXD_DEV_DISABLED; - spin_unlock(&idxd->dev_lock); return 0; } void idxd_device_reset(struct idxd_device *idxd) { idxd_cmd_exec(idxd, IDXD_CMD_RESET_DEVICE, 0, NULL); - spin_lock(&idxd->dev_lock); idxd_device_clear_state(idxd); - idxd->state = IDXD_DEV_DISABLED; + spin_lock(&idxd->dev_lock); idxd_unmask_error_interrupts(idxd); spin_unlock(&idxd->dev_lock); } @@ -716,13 +712,14 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd) { int i; - lockdep_assert_held(&idxd->dev_lock); for (i = 0; i < idxd->max_wqs; i++) { struct idxd_wq *wq = idxd->wqs[i]; if (wq->state == IDXD_WQ_ENABLED) { + mutex_lock(&wq->wq_lock); idxd_wq_disable_cleanup(wq); wq->state = IDXD_WQ_DISABLED; + mutex_unlock(&wq->wq_lock); } idxd_wq_device_reset_cleanup(wq); } @@ -733,9 +730,12 @@ void idxd_device_clear_state(struct idxd_device *idxd) if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return; + idxd_device_wqs_clear_state(idxd); + spin_lock(&idxd->dev_lock); idxd_groups_clear_state(idxd); idxd_engines_clear_state(idxd); - idxd_device_wqs_clear_state(idxd); + idxd->state = IDXD_DEV_DISABLED; + spin_unlock(&idxd->dev_lock); } static void idxd_group_config_write(struct idxd_group *group) -- Gitee From e865202e1989058ef5d14f2b17bfe625d26718bc Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 11 May 2022 17:01:13 -0700 Subject: [PATCH 145/173] dmaengine: idxd: free irq before wq type is reset mainline inclusion from mainline-v5.19 commit 9120c879d28873829dfa2f511c68162d52540e6a category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 9120c879d288 dmaengine: idxd: free irq before wq type is reset. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Call idxd_wq_free_irq() in the drv_disable_wq() function before idxd_wq_reset() is called. Otherwise the wq type is reset and the irq does not get freed. Fixes: 63c14ae6c161 ("dmaengine: idxd: refactor wq driver enable/disable operations") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/165231367316.986407.11001767338124941736.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index fe890d1ee3eb..77443759d5ec 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1385,9 +1385,9 @@ void drv_disable_wq(struct idxd_wq *wq) idxd_wq_free_resources(wq); idxd_wq_unmap_portal(wq); idxd_wq_drain(wq); + idxd_wq_free_irq(wq); idxd_wq_reset(wq); percpu_ref_exit(&wq->wq_active); - idxd_wq_free_irq(wq); wq->type = IDXD_WQT_NONE; wq->client_count = 0; } -- Gitee From 294619d25f33ca9f9d3f7a566bd86c78e7e76569 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 6 May 2022 15:23:52 -0700 Subject: [PATCH 146/173] dmaengine: idxd: make idxd_register/unregister_dma_channel() static mainline inclusion from mainline-v5.19 commit 8e6226f0f1a321de5f9ffdcb3fe920f94b45d38b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 8e6226f0f1a3 dmaengine: idxd: make idxd_register/unregister_dma_channel() static. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Since idxd_register/unregister_dma_channel() are only called locally, make them static. Reported-by: kernel test robot Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/165187583222.3287435.12882651040433040246.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/dma.c | 4 ++-- drivers/dma/idxd/idxd.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index d66cef5a918e..e0874cb4721c 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -250,7 +250,7 @@ void idxd_unregister_dma_device(struct idxd_device *idxd) dma_async_device_unregister(&idxd->idxd_dma->dma); } -int idxd_register_dma_channel(struct idxd_wq *wq) +static int idxd_register_dma_channel(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; struct dma_device *dma = &idxd->idxd_dma->dma; @@ -287,7 +287,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq) return 0; } -void idxd_unregister_dma_channel(struct idxd_wq *wq) +static void idxd_unregister_dma_channel(struct idxd_wq *wq) { struct idxd_dma_chan *idxd_chan = wq->idxd_chan; struct dma_chan *chan = &idxd_chan->chan; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 77d241a92bd1..fed0dfc1eaa8 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -612,8 +612,6 @@ int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc); /* dmaengine */ int idxd_register_dma_device(struct idxd_device *idxd); void idxd_unregister_dma_device(struct idxd_device *idxd); -int idxd_register_dma_channel(struct idxd_wq *wq); -void idxd_unregister_dma_channel(struct idxd_wq *wq); void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res); void idxd_dma_complete_txd(struct idxd_desc *desc, enum idxd_complete_type comp_type, bool free_desc); -- Gitee From 86a8ae01ff754ffc5555cf78134491b57aa6178b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 5 May 2022 08:05:07 -0700 Subject: [PATCH 147/173] dmaengine: idxd: skip irq free when wq type is not kernel mainline inclusion from mainline-v5.19 commit d0ad42388a396813771e9407614f40d128ad62db category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit d0ad42388a39 dmaengine: idxd: skip irq free when wq type is not kernel. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Skip wq irq resources freeing when wq type is not kernel since the driver skips the irq alloction during wq enable. Add check in wq type check in idxd_wq_free_irq() to mirror idxd_wq_request_irq(). Fixes: 63c14ae6c161 ("dmaengine: idxd: refactor wq driver enable/disable operations") Reported-by: Tony Zu Tested-by: Tony Zu Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/165176310726.2112428.7474366910758522079.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 77443759d5ec..06cbca71cf26 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1179,6 +1179,9 @@ void idxd_wq_free_irq(struct idxd_wq *wq) struct idxd_device *idxd = wq->idxd; struct idxd_irq_entry *ie = &wq->ie; + if (wq->type != IDXD_WQT_KERNEL) + return; + synchronize_irq(ie->vector); free_irq(ie->vector, ie); idxd_flush_pending_descs(ie); -- Gitee From b67dcc70afc32539de165a59d90487b534bf824c Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Mon, 16 May 2022 11:54:12 +0000 Subject: [PATCH 148/173] dmaengine: idxd: Remove unnecessary synchronize_irq() before free_irq() mainline inclusion from mainline-v5.19 commit 411dccf9d271e49f37471c73ebedb18719d6b608 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 411dccf9d271 dmaengine: idxd: Remove unnecessary synchronize_irq() before free_irq(). Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Calling synchronize_irq() right before free_irq() is quite useless. On one hand the IRQ can easily fire again before free_irq() is entered, on the other hand free_irq() itself calls synchronize_irq() internally (in a race condition free way), before any state associated with the IRQ is freed. Signed-off-by: Minghao Chi Link: https://lore.kernel.org/r/20220516115412.1651772-1-chi.minghao@zte.com.cn Acked-by: Dave Jiang Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 06cbca71cf26..ab846880803c 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1182,7 +1182,6 @@ void idxd_wq_free_irq(struct idxd_wq *wq) if (wq->type != IDXD_WQT_KERNEL) return; - synchronize_irq(ie->vector); free_irq(ie->vector, ie); idxd_flush_pending_descs(ie); if (idxd->request_int_handles) -- Gitee From e3bfb96e5adeb32936824655edfd719a4ddce1d2 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 25 Apr 2022 11:03:29 -0700 Subject: [PATCH 149/173] dmaengine: idxd: make idxd_wq_enable() return 0 if wq is already enabled mainline inclusion from mainline-v5.19 commit d1a28597808268b87f156138aad3104aa255e62b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit d1a285978082 dmaengine: idxd: make idxd_wq_enable() return 0 if wq is already enabled. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- When calling idxd_wq_enable() and wq is already enabled, code should return 0 and indicate function is successful instead of return error code and fail. This should also put idxd_wq_enable() in sync with idxd_wq_disable() where it returns 0 if wq is already disabled. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/165090980906.1378449.1939401700832432886.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index ab846880803c..d1c59109a127 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -184,7 +184,7 @@ int idxd_wq_enable(struct idxd_wq *wq) if (wq->state == IDXD_WQ_ENABLED) { dev_dbg(dev, "WQ %d already enabled\n", wq->id); - return -ENXIO; + return 0; } idxd_cmd_exec(idxd, IDXD_CMD_ENABLE_WQ, wq->id, &status); -- Gitee From eff323a8ce81682f31cea4e473d852933d29f885 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 28 Jun 2022 16:00:56 -0700 Subject: [PATCH 150/173] dmaengine: idxd: force wq context cleanup on device disable path mainline inclusion from mainline-v5.19 commit 44c4237cf3436bda2b185ff728123651ad133f69 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 44c4237cf343 dmaengine: idxd: force wq context cleanup on device disable path. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Testing shown that when a wq mode is setup to be dedicated and then torn down and reconfigured to shared, the wq configured end up being dedicated anyays. The root cause is when idxd_device_wqs_clear_state() gets called during idxd_driver removal, idxd_wq_disable_cleanup() does not get called vs when the wq driver is removed first. The check of wq state being "enabled" causes the cleanup to be bypassed. However, idxd_driver->remove() releases all wq drivers. So the wqs goes to "disabled" state and will never be "enabled". By that point, the driver has no idea if the wq was previously configured or clean. So force call idxd_wq_disable_cleanup() on all wqs always to make sure everything gets cleaned up. Reported-by: Tony Zhu Tested-by: Tony Zhu Fixes: 0dcfe41e9a4c ("dmanegine: idxd: cleanup all device related bits after disabling device") Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220628230056.2527816-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index d1c59109a127..5a8cc52c1abf 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -715,13 +715,10 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd) for (i = 0; i < idxd->max_wqs; i++) { struct idxd_wq *wq = idxd->wqs[i]; - if (wq->state == IDXD_WQ_ENABLED) { - mutex_lock(&wq->wq_lock); - idxd_wq_disable_cleanup(wq); - wq->state = IDXD_WQ_DISABLED; - mutex_unlock(&wq->wq_lock); - } + mutex_lock(&wq->wq_lock); + idxd_wq_disable_cleanup(wq); idxd_wq_device_reset_cleanup(wq); + mutex_unlock(&wq->wq_lock); } } -- Gitee From b92ae8e4296a61c7b12f4ecfff02da8d0162c032 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Sat, 25 Jun 2022 22:16:48 -0700 Subject: [PATCH 151/173] dmaengine: idxd: Only call idxd_enable_system_pasid() if succeeded in enabling SVA feature mainline inclusion from mainline-v5.19 commit 8ffccd119a5908b240a26182be44c0ff3d1e3d85 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 8ffccd119a59 dmaengine: idxd: Only call idxd_enable_system_pasid() if succeeded in enabling SVA feature. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- On a Sapphire Rapids system if boot without intel_iommu=on, the IDXD driver will crash during probe in iommu_sva_bind_device(). [ 21.423729] BUG: kernel NULL pointer dereference, address: 0000000000000038 [ 21.445108] #PF: supervisor read access in kernel mode [ 21.450912] #PF: error_code(0x0000) - not-present page [ 21.456706] PGD 0 [ 21.459047] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 21.464004] CPU: 0 PID: 1420 Comm: kworker/0:3 Not tainted 5.19.0-0.rc3.27.eln120.x86_64 #1 [ 21.464011] Hardware name: Intel Corporation EAGLESTREAM/EAGLESTREAM, BIOS EGSDCRB1.SYS.0067.D12.2110190954 10/19/2021 [ 21.464015] Workqueue: events work_for_cpu_fn [ 21.464030] RIP: 0010:iommu_sva_bind_device+0x1d/0xe0 [ 21.464046] Code: c3 cc 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 57 41 56 49 89 d6 41 55 41 54 55 53 48 83 ec 08 48 8b 87 d8 02 00 00 <48> 8b 40 38 48 8b 50 10 48 83 7a 70 00 48 89 14 24 0f 84 91 00 00 [ 21.464050] RSP: 0018:ff7245d9096b7db8 EFLAGS: 00010296 [ 21.464054] RAX: 0000000000000000 RBX: ff1eadeec8a51000 RCX: 0000000000000000 [ 21.464058] RDX: ff7245d9096b7e24 RSI: 0000000000000000 RDI: ff1eadeec8a510d0 [ 21.464060] RBP: ff1eadeec8a51000 R08: ffffffffb1a12300 R09: ff1eadffbfce25b4 [ 21.464062] R10: ffffffffffffffff R11: 0000000000000038 R12: ffffffffc09f8000 [ 21.464065] R13: ff1eadeec8a510d0 R14: ff7245d9096b7e24 R15: ff1eaddf54429000 [ 21.464067] FS: 0000000000000000(0000) GS:ff1eadee7f600000(0000) knlGS:0000000000000000 [ 21.464070] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 21.464072] CR2: 0000000000000038 CR3: 00000008c0e10006 CR4: 0000000000771ef0 [ 21.464074] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 21.464076] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 [ 21.464078] PKRU: 55555554 [ 21.464079] Call Trace: [ 21.464083] [ 21.464092] idxd_pci_probe+0x259/0x1070 [idxd] [ 21.464121] local_pci_probe+0x3e/0x80 [ 21.464132] work_for_cpu_fn+0x13/0x20 [ 21.464136] process_one_work+0x1c4/0x380 [ 21.464143] worker_thread+0x1ab/0x380 [ 21.464147] ? _raw_spin_lock_irqsave+0x23/0x50 [ 21.464158] ? process_one_work+0x380/0x380 [ 21.464161] kthread+0xe6/0x110 [ 21.464168] ? kthread_complete_and_exit+0x20/0x20 [ 21.464172] ret_from_fork+0x1f/0x30 iommu_sva_bind_device() requires SVA has been enabled successfully on the IDXD device before it's called. Otherwise, iommu_sva_bind_device() will access a NULL pointer. If Intel IOMMU is disabled, SVA cannot be enabled and thus idxd_enable_system_pasid() and iommu_sva_bind_device() should not be called. Fixes: 42a1b73852c4 ("dmaengine: idxd: Separate user and kernel pasid enabling") Cc: Vinod Koul Cc: linux-kernel@vger.kernel.org Cc: Dave Jiang Cc: Fenghua Yu Link: https://lore.kernel.org/dmaengine/20220623170232.6whonfjuh3m5vcoy@cantor/ Signed-off-by: Jerry Snitselaar Acked-by: Fenghua Yu Link: https://lore.kernel.org/r/20220626051648.14249-1-jsnitsel@redhat.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/init.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 355fb3ef4cbf..aa3478257ddb 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -512,15 +512,16 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD reset complete\n"); if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { - if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) + if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) { dev_warn(dev, "Unable to turn on user SVA feature.\n"); - else + } else { set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); - if (idxd_enable_system_pasid(idxd)) - dev_warn(dev, "No in-kernel DMA with PASID.\n"); - else - set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + if (idxd_enable_system_pasid(idxd)) + dev_warn(dev, "No in-kernel DMA with PASID.\n"); + else + set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + } } else if (!sva) { dev_warn(dev, "User forced SVA off via module param.\n"); } -- Gitee From 06b4d8e200a0e8d80839a335a1bb2ddb34148a62 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 15 Jun 2022 16:26:51 -0700 Subject: [PATCH 152/173] MAINTAINERS: idxd driver maintainer update mainline inclusion from mainline-v6.0 commit 5dc8638872ef0deeae1371a9e2d551920870f76a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 5dc8638872ef MAINTAINERS: idxd driver maintainer update. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add Fenghua as maintainer of the idxd driver. Signed-off-by: Fenghua Yu Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/20220615232651.177098-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a8a9608823e2..bb290c569c73 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8949,7 +8949,8 @@ S: Supported Q: https://patchwork.kernel.org/project/linux-dmaengine/list/ F: drivers/dma/ioat* -INTEL IADX DRIVER +INTEL IDXD DRIVER +M: Fenghua Yu M: Dave Jiang L: dmaengine@vger.kernel.org S: Supported -- Gitee From 2f3a389cfae87d54336eb8bd41723d65cd46eab4 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 6 Jul 2022 17:20:52 -0700 Subject: [PATCH 153/173] dmaengine: idxd: Correct IAX operation code names mainline inclusion from mainline-v6.0 commit d0b55afa47694f6f61b40f578ede7bde1648fe48 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit d0b55afa4769 dmaengine: idxd: Correct IAX operation code names. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Some IAX operation code nomenclatures are misleading or don't match with others: 1. Operation code 0x4c is Zero Compress 32. IAX_OPCODE_DECOMP_32 is a misleading name. Change it to IAX_OPCODE_ZERO_COMP_32. 2. Operation code 0x4d is Zero Compress 16. IAX_OPCODE_DECOMP_16 is a misleading name. Change it to IAX_OPCODE_ZERO_COMP_16. 3. IAX_OPCDE_FIND_UNIQUE is corrected to match with other nomenclatures. Co-developed-by: Li Zhang Signed-off-by: Li Zhang Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20220707002052.1546361-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- include/uapi/linux/idxd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index bce7c43657d5..095299c75828 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -89,14 +89,14 @@ enum iax_opcode { IAX_OPCODE_CRC64, IAX_OPCODE_ZERO_DECOMP_32 = 0x48, IAX_OPCODE_ZERO_DECOMP_16, - IAX_OPCODE_DECOMP_32 = 0x4c, - IAX_OPCODE_DECOMP_16, + IAX_OPCODE_ZERO_COMP_32 = 0x4c, + IAX_OPCODE_ZERO_COMP_16, IAX_OPCODE_SCAN = 0x50, IAX_OPCODE_SET_MEMBER, IAX_OPCODE_EXTRACT, IAX_OPCODE_SELECT, IAX_OPCODE_RLE_BURST, - IAX_OPCDE_FIND_UNIQUE, + IAX_OPCODE_FIND_UNIQUE, IAX_OPCODE_EXPAND, }; -- Gitee From 05bc8812bff91a43b86d8b4291efe1d8477ad81e Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Tue, 23 Aug 2022 09:37:09 -0700 Subject: [PATCH 154/173] dmaengine: idxd: avoid deadlock in process_misc_interrupts() mainline inclusion from mainline-v6.1 commit 407171717a4f4d2d80825584643374a2dfdb0540 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: 407171717a4f dmaengine: idxd: avoid deadlock in process_misc_interrupts(). Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- idxd_device_clear_state() now grabs the idxd->dev_lock itself, so don't grab the lock prior to calling it. This was seen in testing after dmar fault occurred on system, resulting in lockup stack traces. Cc: Fenghua Yu Cc: Dave Jiang Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Fixes: cf4ac3fef338 ("dmaengine: idxd: fix lockdep warning on device driver removal") Signed-off-by: Jerry Snitselaar Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20220823163709.2102468-1-jsnitsel@redhat.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/irq.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 743ead5ebc57..5b9921475be6 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -324,13 +324,11 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) idxd->state = IDXD_DEV_HALTED; idxd_wqs_quiesce(idxd); idxd_wqs_unmap_portal(idxd); - spin_lock(&idxd->dev_lock); idxd_device_clear_state(idxd); dev_err(&idxd->pdev->dev, "idxd halted, need %s.\n", gensts.reset_type == IDXD_DEVICE_RESET_FLR ? "FLR" : "system reset"); - spin_unlock(&idxd->dev_lock); return -ENXIO; } } -- Gitee From b39ec204ad604eb2ecc43a5738505a9c35e021d2 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Wed, 28 Sep 2022 08:48:55 -0700 Subject: [PATCH 155/173] dmaengine: idxd: Set wq state to disabled in idxd_wq_disable_cleanup() minline inclusion from mainline-v6.1 commit 8e527aac055557897d43a3d74d7970ef5cf6a8bb category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 8e527aac0555 dmaengine: idxd: Set wq state to disabled in idxd_wq_disable_cleanup(). Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- If we are calling idxd_wq_disable_cleanup(), the workqueue should be in a disabled state. So set the workqueue state to IDXD_WQ_DISABLED so that the state reflects that. Currently if there is a device failure, and a software reset is attempted the workqueues will not be re-enabled due to idxd_wq_enable() seeing that state as already being IDXD_WQ_ENABLED. Cc: Fenghua Yu Cc: Dave Jiang Cc: Vinod Koul Signed-off-by: Jerry Snitselaar Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20220928154856.623545-2-jsnitsel@redhat.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5a8cc52c1abf..31911e255ac1 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -258,7 +258,6 @@ void idxd_wq_reset(struct idxd_wq *wq) operand = BIT(wq->id % 16) | ((wq->id / 16) << 16); idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL); idxd_wq_disable_cleanup(wq); - wq->state = IDXD_WQ_DISABLED; } int idxd_wq_map_portal(struct idxd_wq *wq) @@ -378,6 +377,7 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) struct idxd_device *idxd = wq->idxd; lockdep_assert_held(&wq->wq_lock); + wq->state = IDXD_WQ_DISABLED; memset(wq->wqcfg, 0, idxd->wqcfg_size); wq->type = IDXD_WQT_NONE; wq->threshold = 0; -- Gitee From c1dbe7f78a3acd3fb4114f95bb7285a633b4f5b0 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 30 Dec 2020 11:41:52 +0200 Subject: [PATCH 156/173] lib: bitmap: Introduce node-aware alloc API mainline inclusion from mainline-v5.16 commit 7529cc7fbd9c02eda6851f3260416cbe198a321d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 7529cc7fbd9c lib: bitmap: Introduce node-aware alloc API. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Expose new node-aware API for bitmap allocation: bitmap_alloc_node() / bitmap_zalloc_node(). Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Xiaochen Shen --- include/linux/bitmap.h | 2 ++ lib/bitmap.c | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index c4f6a9270c03..e98da9994cdd 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -122,6 +122,8 @@ struct device; */ extern unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); extern unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); +extern unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node); +extern unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node); extern void bitmap_free(const unsigned long *bitmap); /* Managed variants of the above. */ diff --git a/lib/bitmap.c b/lib/bitmap.c index 27e08c0e547e..0aca7ac94456 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1258,6 +1258,19 @@ unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags) } EXPORT_SYMBOL(bitmap_zalloc); +unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node) +{ + return kmalloc_array_node(BITS_TO_LONGS(nbits), sizeof(unsigned long), + flags, node); +} +EXPORT_SYMBOL(bitmap_alloc_node); + +unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node) +{ + return bitmap_alloc_node(nbits, flags | __GFP_ZERO, node); +} +EXPORT_SYMBOL(bitmap_zalloc_node); + void bitmap_free(const unsigned long *bitmap) { kfree(bitmap); -- Gitee From f4783ea527d61e524658f1aae680b44cde0abaac Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Wed, 28 Sep 2022 08:48:56 -0700 Subject: [PATCH 157/173] dmaengine: idxd: track enabled workqueues in bitmap mainline inclusion from mainline-v6.1 commit de5819b994893197c71c86d21af10f85f50d6499 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit de5819b99489 dmaengine: idxd: track enabled workqueues in bitmap. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Now that idxd_wq_disable_cleanup() sets the workqueue state to IDXD_WQ_DISABLED, use a bitmap to track which workqueues have been enabled. This will then be used to determine which workqueues should be re-enabled when attempting a software reset to recover from a device halt state. Cc: Fenghua Yu Cc: Dave Jiang Cc: Vinod Koul Signed-off-by: Jerry Snitselaar Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20220928154856.623545-3-jsnitsel@redhat.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 2 ++ drivers/dma/idxd/idxd.h | 2 ++ drivers/dma/idxd/init.c | 6 ++++++ drivers/dma/idxd/irq.c | 5 +++-- drivers/dma/idxd/sysfs.c | 1 + 5 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 31911e255ac1..f0c7d6d348e3 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -196,6 +196,7 @@ int idxd_wq_enable(struct idxd_wq *wq) } wq->state = IDXD_WQ_ENABLED; + set_bit(wq->id, idxd->wq_enable_map); dev_dbg(dev, "WQ %d enabled\n", wq->id); return 0; } @@ -223,6 +224,7 @@ int idxd_wq_disable(struct idxd_wq *wq, bool reset_config) if (reset_config) idxd_wq_disable_cleanup(wq); + clear_bit(wq->id, idxd->wq_enable_map); wq->state = IDXD_WQ_DISABLED; dev_dbg(dev, "WQ %d disabled\n", wq->id); return 0; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index fed0dfc1eaa8..f527a7f88b92 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include "registers.h" @@ -299,6 +300,7 @@ struct idxd_device { int rdbuf_limit; int nr_rdbufs; /* non-reserved read buffers */ unsigned int wqcfg_size; + unsigned long *wq_enable_map; union sw_err_reg sw_err; wait_queue_head_t cmd_waitq; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index aa3478257ddb..7e27e69ff741 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -151,6 +151,12 @@ static int idxd_setup_wqs(struct idxd_device *idxd) if (!idxd->wqs) return -ENOMEM; + idxd->wq_enable_map = bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL, dev_to_node(dev)); + if (!idxd->wq_enable_map) { + kfree(idxd->wqs); + return -ENOMEM; + } + for (i = 0; i < idxd->max_wqs; i++) { wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev)); if (!wq) { diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 5b9921475be6..5927d371493c 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -49,11 +49,12 @@ static void idxd_device_reinit(struct work_struct *work) goto out; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = idxd->wqs[i]; + if (test_bit(i, idxd->wq_enable_map)) { + struct idxd_wq *wq = idxd->wqs[i]; - if (wq->state == IDXD_WQ_ENABLED) { rc = idxd_wq_enable(wq); if (rc < 0) { + clear_bit(i, idxd->wq_enable_map); dev_warn(dev, "Unable to re-enable wq %s\n", dev_name(wq_confdev(wq))); } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 3f262a57441b..3325b16ed959 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1405,6 +1405,7 @@ static void idxd_conf_device_release(struct device *dev) struct idxd_device *idxd = confdev_to_idxd(dev); kfree(idxd->groups); + bitmap_free(idxd->wq_enable_map); kfree(idxd->wqs); kfree(idxd->engines); ida_free(&idxd_ida, idxd->id); -- Gitee From 52984b04c6ee2200c72f9f84d20fb428aef662dd Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Wed, 28 Sep 2022 01:47:47 +0000 Subject: [PATCH 158/173] dmaengine: idxd: Remove unused struct idxd_fault mainline inclusion from mainline-v6.1 commit d1083fd04302a95bc4dcf1c059537da87b39bd9a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit d1083fd04302 dmaengine: idxd: Remove unused struct idxd_fault. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Since fault processing code has been removed, struct idxd_fault is not used any more and can be removed as well. Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20220928014747.106808-1-yuancan@huawei.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/irq.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 5927d371493c..aa314ebec587 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -17,12 +17,6 @@ enum irq_work_type { IRQ_WORK_PROCESS_FAULT, }; -struct idxd_fault { - struct work_struct work; - u64 addr; - struct idxd_device *idxd; -}; - struct idxd_resubmit { struct work_struct work; struct idxd_desc *desc; -- Gitee From 6ccf7a59e184bb57c6c2709e9445d1729dba93a0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sat, 17 Sep 2022 09:12:18 -0700 Subject: [PATCH 159/173] dmaengine: idxd: convert ats_dis to a wq flag mainline inclusion from mainline-v6.1 commit 22bd0df846ca1388ce9f5d54fb6e9f597c932ba9 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 22bd0df846ca dmaengine: idxd: convert ats_dis to a wq flag. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Make wq attributes access consistent. Convert ats_dis to wq flag WQ_FLAG_ATS_DISABLE. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220917161222.2835172-2-fenghua.yu@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 4 ++-- drivers/dma/idxd/idxd.h | 2 +- drivers/dma/idxd/sysfs.c | 7 +++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index f0c7d6d348e3..88986db57743 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -384,10 +384,10 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) wq->type = IDXD_WQT_NONE; wq->threshold = 0; wq->priority = 0; - wq->ats_dis = 0; wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; clear_bit(WQ_FLAG_DEDICATED, &wq->flags); clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; @@ -861,7 +861,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->bof = 1; if (idxd->hw.wq_cap.wq_ats_support) - wq->wqcfg->wq_ats_disable = wq->ats_dis; + wq->wqcfg->wq_ats_disable = test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); /* bytes 12-15 */ wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index f527a7f88b92..4e7e21264be7 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -133,6 +133,7 @@ enum idxd_wq_state { enum idxd_wq_flag { WQ_FLAG_DEDICATED = 0, WQ_FLAG_BLOCK_ON_FAULT, + WQ_FLAG_ATS_DISABLE, }; enum idxd_wq_type { @@ -209,7 +210,6 @@ struct idxd_wq { char name[WQ_NAME_SIZE + 1]; u64 max_xfer_bytes; u32 max_batch_size; - bool ats_dis; }; struct idxd_engine { diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 3325b16ed959..8ff599ea48a4 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -973,7 +973,7 @@ static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute * { struct idxd_wq *wq = confdev_to_wq(dev); - return sysfs_emit(buf, "%u\n", wq->ats_dis); + return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags)); } static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr, @@ -994,7 +994,10 @@ static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute if (rc < 0) return rc; - wq->ats_dis = ats_dis; + if (ats_dis) + set_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); + else + clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); return count; } -- Gitee From b0f00c93a2e05838300cd6e08073401b3734363f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sat, 17 Sep 2022 09:12:19 -0700 Subject: [PATCH 160/173] dmanegine: idxd: reformat opcap output to match bitmap_parse() input mainline inclusion from mainline-v6.1 commit a8563a33a5e26064061f2fb34215c97f0e2995f4 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit a8563a33a5e2 dmanegine: idxd: reformat opcap output to match bitmap_parse() input. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- To make input and output consistent and prepping for the per WQ operation configuration support, change the output of opcap display to match the input that is expected by bitmap_parse() helper function. The output will be a bitmap with field width as the number of bits using the %*pb format specifier for printk() family. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220917161222.2835172-3-fenghua.yu@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/idxd.h | 2 ++ drivers/dma/idxd/init.c | 20 ++++++++++++++++++++ drivers/dma/idxd/registers.h | 2 ++ drivers/dma/idxd/sysfs.c | 9 ++------- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 4e7e21264be7..fd0642b8cadc 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -310,6 +310,8 @@ struct idxd_device { struct work_struct work; struct idxd_pmu *idxd_pmu; + + unsigned long *opcap_bmap; }; /* IDXD software descriptor */ diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 7e27e69ff741..f6f0654a79b8 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -375,6 +375,19 @@ static void idxd_read_table_offsets(struct idxd_device *idxd) dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset); } +static void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count) +{ + int i, j, nr; + + for (i = 0, nr = 0; i < count; i++) { + for (j = 0; j < BITS_PER_LONG_LONG; j++) { + if (val[i] & BIT(j)) + set_bit(nr, bmap); + nr++; + } + } +} + static void idxd_read_caps(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -433,6 +446,7 @@ static void idxd_read_caps(struct idxd_device *idxd) IDXD_OPCAP_OFFSET + i * sizeof(u64)); dev_dbg(dev, "opcap[%d]: %#llx\n", i, idxd->hw.opcap.bits[i]); } + multi_u64_to_bmap(idxd->opcap_bmap, &idxd->hw.opcap.bits[0], 4); } static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) @@ -454,6 +468,12 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d if (idxd->id < 0) return NULL; + idxd->opcap_bmap = bitmap_zalloc_node(IDXD_MAX_OPCAP_BITS, GFP_KERNEL, dev_to_node(dev)); + if (!idxd->opcap_bmap) { + ida_free(&idxd_ida, idxd->id); + return NULL; + } + device_initialize(conf_dev); conf_dev->parent = dev; conf_dev->bus = &dsa_bus_type; diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 02449aa9c454..4c96ea85f843 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -90,6 +90,8 @@ struct opcap { u64 bits[4]; }; +#define IDXD_MAX_OPCAP_BITS 256U + #define IDXD_OPCAP_OFFSET 0x40 #define IDXD_TABLE_OFFSET 0x60 diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 8ff599ea48a4..57aeeee835ab 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1180,14 +1180,8 @@ static ssize_t op_cap_show(struct device *dev, struct device_attribute *attr, char *buf) { struct idxd_device *idxd = confdev_to_idxd(dev); - int i, rc = 0; - - for (i = 0; i < 4; i++) - rc += sysfs_emit_at(buf, rc, "%#llx ", idxd->hw.opcap.bits[i]); - rc--; - rc += sysfs_emit_at(buf, rc, "\n"); - return rc; + return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, idxd->opcap_bmap); } static DEVICE_ATTR_RO(op_cap); @@ -1412,6 +1406,7 @@ static void idxd_conf_device_release(struct device *dev) kfree(idxd->wqs); kfree(idxd->engines); ida_free(&idxd_ida, idxd->id); + bitmap_free(idxd->opcap_bmap); kfree(idxd); } -- Gitee From 947d2b103a2df9a495a5170cd500bd86ff2714b0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sat, 17 Sep 2022 09:12:20 -0700 Subject: [PATCH 161/173] dmaengine: idxd: add WQ operation cap restriction support mainline inclusion from mainline-v6.1 commit b0325aefd398d8b536ba46ee2e5d24252c1b2258 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit b0325aefd398 dmaengine: idxd: add WQ operation cap restriction support. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- DSA 2.0 add the capability of configuring DMA ops on a per workqueue basis. This means that certain ops can be disabled by the system administrator for certain wq. By default, all ops are available. A bitmap is used to store the ops due to total op size of 256 bits and it is more convenient to use a range list to specify which bits are enabled. One of the usage to support this is for VM migration between different iteration of devices. The newer ops are disabled in order to allow guest to migrate to a host that only support older ops. Another usage is to restrict the WQ to certain operations for QoS of performance. A sysfs of ops_config attribute is added per wq. It is only usable when the ops_config bit is set under WQ_CAP register. This means that this attribute will return -EOPNOTSUPP on DSA 1.x devices. The expected input is a range list for the bits per operation the WQ supports. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220917161222.2835172-4-fenghua.yu@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- .../ABI/stable/sysfs-driver-dma-idxd | 11 +++ drivers/dma/idxd/device.c | 15 +++- drivers/dma/idxd/idxd.h | 2 + drivers/dma/idxd/init.c | 10 +++ drivers/dma/idxd/registers.h | 8 +- drivers/dma/idxd/sysfs.c | 85 +++++++++++++++++++ 6 files changed, 128 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index e1293bfa4187..5b9d59e83daa 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -227,6 +227,17 @@ Contact: dmaengine@vger.kernel.org Description: Indicate the number of retires for an enqcmds submission on a sharedwq. A max value to set attribute is capped at 64. +What: /sys/bus/dsa/devices/wq./op_config +Date: Sept 14, 2022 +KernelVersion: 6.0.0 +Contact: dmaengine@vger.kernel.org +Description: Shows the operation capability bits displayed in bitmap format + presented by %*pb printk() output format specifier. + The attribute can be configured when the WQ is disabled in + order to configure the WQ to accept specific bits that + correlates to the operations allowed. It's visible only + on platforms that support the capability. + What: /sys/bus/dsa/devices/engine./group_id Date: Oct 25, 2019 KernelVersion: 5.6.0 diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 88986db57743..df1c108e4bb3 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -391,6 +391,8 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) memset(wq->name, 0, WQ_NAME_SIZE); wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; + if (wq->opcap_bmap) + bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); } static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq) @@ -809,7 +811,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq) struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; u32 wq_offset; - int i; + int i, n; if (!wq->group) return 0; @@ -867,6 +869,17 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); wq->wqcfg->max_batch_shift = ilog2(wq->max_batch_size); + /* bytes 32-63 */ + if (idxd->hw.wq_cap.op_config && wq->opcap_bmap) { + memset(wq->wqcfg->op_config, 0, IDXD_MAX_OPCAP_BITS / 8); + for_each_set_bit(n, wq->opcap_bmap, IDXD_MAX_OPCAP_BITS) { + int pos = n % BITS_PER_LONG_LONG; + int idx = n / BITS_PER_LONG_LONG; + + wq->wqcfg->op_config[idx] |= BIT(pos); + } + } + dev_dbg(dev, "WQ %d CFGs\n", wq->id); for (i = 0; i < WQCFG_STRIDES(idxd); i++) { wq_offset = WQCFG_OFFSET(idxd, wq->id, i); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index fd0642b8cadc..ba6e94f0cd6e 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -196,6 +196,8 @@ struct idxd_wq { enum idxd_wq_state state; unsigned long flags; union wqcfg *wqcfg; + unsigned long *opcap_bmap; + struct dsa_hw_desc **hw_descs; int num_descs; union { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index f6f0654a79b8..2b18d512cbfc 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -191,6 +191,16 @@ static int idxd_setup_wqs(struct idxd_device *idxd) rc = -ENOMEM; goto err; } + + if (idxd->hw.wq_cap.op_config) { + wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); + if (!wq->opcap_bmap) { + put_device(conf_dev); + rc = -ENOMEM; + goto err; + } + bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); + } idxd->wqs[i] = wq; } diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 4c96ea85f843..7b95be8f0f64 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -54,7 +54,8 @@ union wq_cap_reg { u64 priority:1; u64 occupancy:1; u64 occupancy_int:1; - u64 rsvd3:10; + u64 op_config:1; + u64 rsvd3:9; }; u64 bits; } __packed; @@ -350,8 +351,11 @@ union wqcfg { /* bytes 28-31 */ u32 rsvd8; + + /* bytes 32-63 */ + u64 op_config[4]; }; - u32 bits[8]; + u32 bits[16]; } __packed; #define WQCFG_PASID_IDX 2 diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 57aeeee835ab..6dbdd74e5bae 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1058,6 +1058,68 @@ static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attrib static struct device_attribute dev_attr_wq_enqcmds_retries = __ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store); +static ssize_t wq_op_config_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, wq->opcap_bmap); +} + +static int idxd_verify_supported_opcap(struct idxd_device *idxd, unsigned long *opmask) +{ + int bit; + + /* + * The OPCAP is defined as 256 bits that represents each operation the device + * supports per bit. Iterate through all the bits and check if the input mask + * is set for bits that are not set in the OPCAP for the device. If no OPCAP + * bit is set and input mask has the bit set, then return error. + */ + for_each_set_bit(bit, opmask, IDXD_MAX_OPCAP_BITS) { + if (!test_bit(bit, idxd->opcap_bmap)) + return -EINVAL; + } + + return 0; +} + +static ssize_t wq_op_config_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + unsigned long *opmask; + int rc; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + opmask = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); + if (!opmask) + return -ENOMEM; + + rc = bitmap_parse(buf, count, opmask, IDXD_MAX_OPCAP_BITS); + if (rc < 0) + goto err; + + rc = idxd_verify_supported_opcap(idxd, opmask); + if (rc < 0) + goto err; + + bitmap_copy(wq->opcap_bmap, opmask, IDXD_MAX_OPCAP_BITS); + + bitmap_free(opmask); + return count; + +err: + bitmap_free(opmask); + return rc; +} + +static struct device_attribute dev_attr_wq_op_config = + __ATTR(op_config, 0644, wq_op_config_show, wq_op_config_store); + static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_clients.attr, &dev_attr_wq_state.attr, @@ -1075,11 +1137,33 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_ats_disable.attr, &dev_attr_wq_occupancy.attr, &dev_attr_wq_enqcmds_retries.attr, + &dev_attr_wq_op_config.attr, NULL, }; +static bool idxd_wq_attr_op_config_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return attr == &dev_attr_wq_op_config.attr && + !idxd->hw.wq_cap.op_config; +} + +static umode_t idxd_wq_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + + if (idxd_wq_attr_op_config_invisible(attr, idxd)) + return 0; + + return attr->mode; +} + static const struct attribute_group idxd_wq_attribute_group = { .attrs = idxd_wq_attributes, + .is_visible = idxd_wq_attr_visible, }; static const struct attribute_group *idxd_wq_attribute_groups[] = { @@ -1091,6 +1175,7 @@ static void idxd_conf_wq_release(struct device *dev) { struct idxd_wq *wq = confdev_to_wq(dev); + bitmap_free(wq->opcap_bmap); kfree(wq->wqcfg); kfree(wq); } -- Gitee From 961d522a99b1b602c517d5b700e4ad8018d99544 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sat, 17 Sep 2022 09:12:21 -0700 Subject: [PATCH 162/173] dmaengine: idxd: add configuration for concurrent work descriptor processing mainline inclusion from mainline-v6.1 commit 1f2737521af2b7d018971f1d873856fff02d2b33 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 1f2737521af2 dmaengine: idxd: add configuration for concurrent work descriptor processing. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add sysfs knob to allow control of the number of work descriptors that can be concurrently processed by an engine in the group as a fraction of the Maximum Work Descriptors in Progress value specified in ENGCAP register. This control knob is part of toggle for QoS control. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220917161222.2835172-5-fenghua.yu@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- .../ABI/stable/sysfs-driver-dma-idxd | 12 +++++ drivers/dma/idxd/device.c | 13 +++-- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/registers.h | 23 ++++---- drivers/dma/idxd/sysfs.c | 53 +++++++++++++++++++ 5 files changed, 87 insertions(+), 15 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 5b9d59e83daa..d0b2a218de61 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -266,3 +266,15 @@ Contact: dmaengine@vger.kernel.org Description: Indicates the number of Read Buffers reserved for the use of engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers Reserved. + +What: /sys/bus/dsa/devices/group./desc_progress_limit +Date: Sept 14, 2022 +KernelVersion: 6.0.0 +Contact: dmaengine@vger.kernel.org +Description: Allows control of the number of work descriptors that can be + concurrently processed by an engine in the group as a fraction + of the Maximum Work Descriptors in Progress value specified in + the ENGCAP register. The acceptable values are 0 (default), + 1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of + the max value). It's visible only on platforms that support + the capability. diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index df1c108e4bb3..05a982e143fe 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -709,6 +709,7 @@ static void idxd_groups_clear_state(struct idxd_device *idxd) group->tc_a = -1; group->tc_b = -1; } + group->desc_progress_limit = 0; } } @@ -765,10 +766,10 @@ static void idxd_group_config_write(struct idxd_group *group) /* setup GRPFLAGS */ grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id); - iowrite32(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset); - dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n", + iowrite64(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#llx\n", group->id, grpcfg_offset, - ioread32(idxd->reg_base + grpcfg_offset)); + ioread64(idxd->reg_base + grpcfg_offset)); } static int idxd_groups_config_write(struct idxd_device *idxd) @@ -929,6 +930,8 @@ static void idxd_group_flags_setup(struct idxd_device *idxd) group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed; else group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs; + + group->grpcfg.flags.desc_progress_limit = group->desc_progress_limit; } } @@ -1111,8 +1114,8 @@ static void idxd_group_load_config(struct idxd_group *group) } grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id); - group->grpcfg.flags.bits = ioread32(idxd->reg_base + grpcfg_offset); - dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n", + group->grpcfg.flags.bits = ioread64(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#llx\n", group->id, grpcfg_offset, group->grpcfg.flags.bits); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index ba6e94f0cd6e..7ee870e5ca67 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -96,6 +96,7 @@ struct idxd_group { u8 rdbufs_reserved; int tc_a; int tc_b; + int desc_progress_limit; }; struct idxd_pmu { diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 7b95be8f0f64..2cc2543edd58 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -68,7 +68,8 @@ union group_cap_reg { u64 total_rdbufs:8; /* formerly total_tokens */ u64 rdbuf_ctrl:1; /* formerly token_en */ u64 rdbuf_limit:1; /* formerly token_limit */ - u64 rsvd:46; + u64 progress_limit:1; /* descriptor and batch descriptor */ + u64 rsvd:45; }; u64 bits; } __packed; @@ -288,16 +289,18 @@ union msix_perm { union group_flags { struct { - u32 tc_a:3; - u32 tc_b:3; - u32 rsvd:1; - u32 use_rdbuf_limit:1; - u32 rdbufs_reserved:8; - u32 rsvd2:4; - u32 rdbufs_allowed:8; - u32 rsvd3:4; + u64 tc_a:3; + u64 tc_b:3; + u64 rsvd:1; + u64 use_rdbuf_limit:1; + u64 rdbufs_reserved:8; + u64 rsvd2:4; + u64 rdbufs_allowed:8; + u64 rsvd3:4; + u64 desc_progress_limit:2; + u64 rsvd4:30; }; - u32 bits; + u64 bits; } __packed; struct grpcfg { diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 6dbdd74e5bae..3624bdeb71f6 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -443,6 +443,37 @@ static struct device_attribute dev_attr_group_traffic_class_b = __ATTR(traffic_class_b, 0644, group_traffic_class_b_show, group_traffic_class_b_store); +static ssize_t group_desc_progress_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%d\n", group->desc_progress_limit); +} + +static ssize_t group_desc_progress_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_group *group = confdev_to_group(dev); + int val, rc; + + rc = kstrtoint(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (val & ~GENMASK(1, 0)) + return -EINVAL; + + group->desc_progress_limit = val; + return count; +} + +static struct device_attribute dev_attr_group_desc_progress_limit = + __ATTR(desc_progress_limit, 0644, group_desc_progress_limit_show, + group_desc_progress_limit_store); + static struct attribute *idxd_group_attributes[] = { &dev_attr_group_work_queues.attr, &dev_attr_group_engines.attr, @@ -454,11 +485,33 @@ static struct attribute *idxd_group_attributes[] = { &dev_attr_group_read_buffers_reserved.attr, &dev_attr_group_traffic_class_a.attr, &dev_attr_group_traffic_class_b.attr, + &dev_attr_group_desc_progress_limit.attr, NULL, }; +static bool idxd_group_attr_progress_limit_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return attr == &dev_attr_group_desc_progress_limit.attr && + !idxd->hw.group_cap.progress_limit; +} + +static umode_t idxd_group_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct idxd_group *group = confdev_to_group(dev); + struct idxd_device *idxd = group->idxd; + + if (idxd_group_attr_progress_limit_invisible(attr, idxd)) + return 0; + + return attr->mode; +} + static const struct attribute_group idxd_group_attribute_group = { .attrs = idxd_group_attributes, + .is_visible = idxd_group_attr_visible, }; static const struct attribute_group *idxd_group_attribute_groups[] = { -- Gitee From ab94741944c222cbe38c67a112ad727451acc31e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sat, 17 Sep 2022 09:12:22 -0700 Subject: [PATCH 163/173] dmaengine: idxd: add configuration for concurrent batch descriptor processing mainline inclusion from mainline-v6.1 commit 7ca68fa3c8ab83dfa539f16c5b4b1aec2e33320d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 7ca68fa3c8ab dmaengine: idxd: add configuration for concurrent batch descriptor processing. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Add sysfs knob to allow control of the number of batch descriptors that can be concurrently processed by an engine in the group as a fraction of the Maximum Work Descriptors in Progress value specfied in ENGCAP register. This control knob is part of toggle for QoS control. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220917161222.2835172-6-fenghua.yu@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- .../ABI/stable/sysfs-driver-dma-idxd | 12 +++++++ drivers/dma/idxd/device.c | 2 ++ drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/registers.h | 4 ++- drivers/dma/idxd/sysfs.c | 36 +++++++++++++++++-- 5 files changed, 52 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index d0b2a218de61..76774249cf51 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -278,3 +278,15 @@ Description: Allows control of the number of work descriptors that can be 1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of the max value). It's visible only on platforms that support the capability. + +What: /sys/bus/dsa/devices/group./batch_progress_limit +Date: Sept 14, 2022 +KernelVersion: 6.0.0 +Contact: dmaengine@vger.kernel.org +Description: Allows control of the number of batch descriptors that can be + concurrently processed by an engine in the group as a fraction + of the Maximum Batch Descriptors in Progress value specified in + the ENGCAP register. The acceptable values are 0 (default), + 1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of + the max value). It's visible only on platforms that support + the capability. diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 05a982e143fe..2c1e6f6daa62 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -710,6 +710,7 @@ static void idxd_groups_clear_state(struct idxd_device *idxd) group->tc_b = -1; } group->desc_progress_limit = 0; + group->batch_progress_limit = 0; } } @@ -932,6 +933,7 @@ static void idxd_group_flags_setup(struct idxd_device *idxd) group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs; group->grpcfg.flags.desc_progress_limit = group->desc_progress_limit; + group->grpcfg.flags.batch_progress_limit = group->batch_progress_limit; } } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 7ee870e5ca67..1196ab342f01 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -97,6 +97,7 @@ struct idxd_group { int tc_a; int tc_b; int desc_progress_limit; + int batch_progress_limit; }; struct idxd_pmu { diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 2cc2543edd58..fe3b8d04f9db 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -298,7 +298,9 @@ union group_flags { u64 rdbufs_allowed:8; u64 rsvd3:4; u64 desc_progress_limit:2; - u64 rsvd4:30; + u64 rsvd4:2; + u64 batch_progress_limit:2; + u64 rsvd5:26; }; u64 bits; } __packed; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 3624bdeb71f6..bdaccf9e0436 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -474,6 +474,36 @@ static struct device_attribute dev_attr_group_desc_progress_limit = __ATTR(desc_progress_limit, 0644, group_desc_progress_limit_show, group_desc_progress_limit_store); +static ssize_t group_batch_progress_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%d\n", group->batch_progress_limit); +} + +static ssize_t group_batch_progress_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_group *group = confdev_to_group(dev); + int val, rc; + + rc = kstrtoint(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (val & ~GENMASK(1, 0)) + return -EINVAL; + + group->batch_progress_limit = val; + return count; +} + +static struct device_attribute dev_attr_group_batch_progress_limit = + __ATTR(batch_progress_limit, 0644, group_batch_progress_limit_show, + group_batch_progress_limit_store); static struct attribute *idxd_group_attributes[] = { &dev_attr_group_work_queues.attr, &dev_attr_group_engines.attr, @@ -486,14 +516,16 @@ static struct attribute *idxd_group_attributes[] = { &dev_attr_group_traffic_class_a.attr, &dev_attr_group_traffic_class_b.attr, &dev_attr_group_desc_progress_limit.attr, + &dev_attr_group_batch_progress_limit.attr, NULL, }; static bool idxd_group_attr_progress_limit_invisible(struct attribute *attr, struct idxd_device *idxd) { - return attr == &dev_attr_group_desc_progress_limit.attr && - !idxd->hw.group_cap.progress_limit; + return (attr == &dev_attr_group_desc_progress_limit.attr || + attr == &dev_attr_group_batch_progress_limit.attr) && + !idxd->hw.group_cap.progress_limit; } static umode_t idxd_group_attr_visible(struct kobject *kobj, -- Gitee From 863ffba93ac1d3e5cc91c9366e4de82a91cf96bf Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 14 Oct 2022 15:25:41 -0700 Subject: [PATCH 164/173] dmaengine: idxd: Do not enable user type Work Queue without Shared Virtual Addressing mainline inclusion from mainline-v6.1 commit 0ec8ce07394442d722806fe61b901a5b2b17249d category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 0ec8ce073944 dmaengine: idxd: Do not enable user type Work Queue without Shared Virtual Addressing. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- When the idxd_user_drv driver is bound to a Work Queue (WQ) device without IOMMU or with IOMMU Passthrough without Shared Virtual Addressing (SVA), the application gains direct access to physical memory via the device by programming physical address to a submitted descriptor. This allows direct userspace read and write access to arbitrary physical memory. This is inconsistent with the security goals of a good kernel API. Unlike vfio_pci driver, the IDXD char device driver does not provide any ways to pin user pages and translate the address from user VA to IOVA or PA without IOMMU SVA. Therefore the application has no way to instruct the device to perform DMA function. This makes the char device not usable for normal application usage. Since user type WQ without SVA cannot be used for normal application usage and presents the security issue, bind idxd_user_drv driver and enable user type WQ only when SVA is enabled (i.e. user PASID is enabled). Fixes: 448c3de8ac83 ("dmaengine: idxd: create user driver for wq 'device'") Cc: stable@vger.kernel.org Suggested-by: Arjan Van De Ven Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20221014222541.3912195-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/cdev.c | 18 ++++++++++++++++++ include/uapi/linux/idxd.h | 1 + 2 files changed, 19 insertions(+) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index c2808fd081d6..a9b96b18772f 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -312,6 +312,24 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) if (idxd->state != IDXD_DEV_ENABLED) return -ENXIO; + /* + * User type WQ is enabled only when SVA is enabled for two reasons: + * - If no IOMMU or IOMMU Passthrough without SVA, userspace + * can directly access physical address through the WQ. + * - The IDXD cdev driver does not provide any ways to pin + * user pages and translate the address from user VA to IOVA or + * PA without IOMMU SVA. Therefore the application has no way + * to instruct the device to perform DMA function. This makes + * the cdev not usable for normal application usage. + */ + if (!device_user_pasid_enabled(idxd)) { + idxd->cmd_status = IDXD_SCMD_WQ_USER_NO_IOMMU; + dev_dbg(&idxd->pdev->dev, + "User type WQ cannot be enabled without SVA.\n"); + + return -EOPNOTSUPP; + } + mutex_lock(&wq->wq_lock); wq->type = IDXD_WQT_USER; rc = drv_enable_wq(wq); diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 095299c75828..2b9e7feba3f3 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -29,6 +29,7 @@ enum idxd_scmd_stat { IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, IDXD_SCMD_WQ_IRQ_ERR = 0x80100000, + IDXD_SCMD_WQ_USER_NO_IOMMU = 0x80110000, }; #define IDXD_SCMD_SOFTERR_MASK 0x80000000 -- Gitee From a31b6e15cc9af631fb90bbf76bec646d5994fb54 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sat, 1 Oct 2022 04:15:27 +0800 Subject: [PATCH 165/173] dmaengine: idxd: Fix max batch size for Intel IAA mainline inclusion from mainline-v6.1 commit e8dbd6445dd6b38c4c50410a86f13158486ee99a category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit e8dbd6445dd6 dmaengine: idxd: Fix max batch size for Intel IAA. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- >From Intel IAA spec [1], Intel IAA does not support batch processing. Two batch related default values for IAA are incorrect in current code: (1) The max batch size of device is set during device initialization, that indicates batch is supported. It should be always 0 on IAA. (2) The max batch size of work queue is set to WQ_DEFAULT_MAX_BATCH (32) as the default value regardless of Intel DSA or IAA device during work queue setup and cleanup. It should be always 0 on IAA. Fix the issues by setting the max batch size of device and max batch size of work queue to 0 on IAA device, that means batch is not supported. [1]: https://cdrdv2.intel.com/v1/dl/getContent/721858 Fixes: 23084545dbb0 ("dmaengine: idxd: set max_xfer and max_batch for RO device") Fixes: 92452a72ebdf ("dmaengine: idxd: set defaults for wq configs") Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Signed-off-by: Xiaochen Shen Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20220930201528.18621-2-xiaochen.shen@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 6 +++--- drivers/dma/idxd/idxd.h | 32 ++++++++++++++++++++++++++++++++ drivers/dma/idxd/init.c | 4 ++-- drivers/dma/idxd/sysfs.c | 2 +- 4 files changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 2c1e6f6daa62..670b71927db0 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -390,7 +390,7 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; - wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; + idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH); if (wq->opcap_bmap) bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); } @@ -869,7 +869,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq) /* bytes 12-15 */ wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); - wq->wqcfg->max_batch_shift = ilog2(wq->max_batch_size); + idxd_wqcfg_set_max_batch_shift(idxd->data->type, wq->wqcfg, ilog2(wq->max_batch_size)); /* bytes 32-63 */ if (idxd->hw.wq_cap.op_config && wq->opcap_bmap) { @@ -1051,7 +1051,7 @@ static int idxd_wq_load_config(struct idxd_wq *wq) wq->priority = wq->wqcfg->priority; wq->max_xfer_bytes = 1ULL << wq->wqcfg->max_xfer_shift; - wq->max_batch_size = 1ULL << wq->wqcfg->max_batch_shift; + idxd_wq_set_max_batch_size(idxd->data->type, wq, 1U << wq->wqcfg->max_batch_shift); for (i = 0; i < WQCFG_STRIDES(idxd); i++) { wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, i); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 1196ab342f01..7ced8d283d98 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -548,6 +548,38 @@ static inline int idxd_wq_refcount(struct idxd_wq *wq) return wq->client_count; }; +/* + * Intel IAA does not support batch processing. + * The max batch size of device, max batch size of wq and + * max batch shift of wqcfg should be always 0 on IAA. + */ +static inline void idxd_set_max_batch_size(int idxd_type, struct idxd_device *idxd, + u32 max_batch_size) +{ + if (idxd_type == IDXD_TYPE_IAX) + idxd->max_batch_size = 0; + else + idxd->max_batch_size = max_batch_size; +} + +static inline void idxd_wq_set_max_batch_size(int idxd_type, struct idxd_wq *wq, + u32 max_batch_size) +{ + if (idxd_type == IDXD_TYPE_IAX) + wq->max_batch_size = 0; + else + wq->max_batch_size = max_batch_size; +} + +static inline void idxd_wqcfg_set_max_batch_shift(int idxd_type, union wqcfg *wqcfg, + u32 max_batch_shift) +{ + if (idxd_type == IDXD_TYPE_IAX) + wqcfg->max_batch_shift = 0; + else + wqcfg->max_batch_shift = max_batch_shift; +} + int __must_check __idxd_driver_register(struct idxd_device_driver *idxd_drv, struct module *module, const char *mod_name); #define idxd_driver_register(driver) \ diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 2b18d512cbfc..09cbf0c179ba 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -183,7 +183,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd) init_completion(&wq->wq_dead); init_completion(&wq->wq_resurrect); wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; - wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; + idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH); wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); if (!wq->wqcfg) { @@ -418,7 +418,7 @@ static void idxd_read_caps(struct idxd_device *idxd) idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift; dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes); - idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift; + idxd_set_max_batch_size(idxd->data->type, idxd, 1U << idxd->hw.gen_cap.max_batch_shift); dev_dbg(dev, "max batch size: %u\n", idxd->max_batch_size); if (idxd->hw.gen_cap.config_en) set_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags); diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index bdaccf9e0436..7269bd54554f 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1046,7 +1046,7 @@ static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribu if (batch_size > idxd->max_batch_size) return -EINVAL; - wq->max_batch_size = (u32)batch_size; + idxd_wq_set_max_batch_size(idxd->data->type, wq, (u32)batch_size); return count; } -- Gitee From 91c6ee7d59360e5156e7da05636959d9a4957401 Mon Sep 17 00:00:00 2001 From: Fengqian Gao Date: Fri, 30 Sep 2022 11:28:35 +0800 Subject: [PATCH 166/173] dmaengine: idxd: fix RO device state error after been disabled/reset mainline inclusion from mainline-v6.1 commit 0b8c97a1d8c1bb6a853b8bb1778e8fef17b86fc9 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 0b8c97a1d8c1 dmaengine: idxd: fix RO device state error after been disabled/reset. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- When IDXD is not configurable, that means its WQ, engine, and group configurations cannot be changed. But it can be disabled and its state should be set as disabled regardless it's configurable or not. Fix this by setting device state IDXD_DEV_DISABLED for read-only device as well in idxd_device_clear_state(). Fixes: cf4ac3fef338 ("dmaengine: idxd: fix lockdep warning on device driver removal") Signed-off-by: Fengqian Gao Reviewed-by: Xiaochen Shen Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20220930032835.2290-1-fengqian.gao@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 670b71927db0..6f44fa8f78a5 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -730,13 +730,21 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd) void idxd_device_clear_state(struct idxd_device *idxd) { - if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) - return; + /* IDXD is always disabled. Other states are cleared only when IDXD is configurable. */ + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { + /* + * Clearing wq state is protected by wq lock. + * So no need to be protected by device lock. + */ + idxd_device_wqs_clear_state(idxd); + + spin_lock(&idxd->dev_lock); + idxd_groups_clear_state(idxd); + idxd_engines_clear_state(idxd); + } else { + spin_lock(&idxd->dev_lock); + } - idxd_device_wqs_clear_state(idxd); - spin_lock(&idxd->dev_lock); - idxd_groups_clear_state(idxd); - idxd_engines_clear_state(idxd); idxd->state = IDXD_DEV_DISABLED; spin_unlock(&idxd->dev_lock); } -- Gitee From 22e414907aa2099a349e6cd6907d07f39f06c5c2 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sat, 1 Oct 2022 04:15:28 +0800 Subject: [PATCH 167/173] dmaengine: idxd: Make max batch size attributes in sysfs invisible for Intel IAA mainline inclusion from mainline-v6.2 commit 91123b37e8a99cc489d5bdcfebd1c25f29382504 category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 91123b37e8a9 dmaengine: idxd: Make max batch size attributes in sysfs invisible for Intel IAA. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- In current code, dev.max_batch_size and wq.max_batch_size attributes in sysfs are exposed to user to show or update the values. >From Intel IAA spec [1], Intel IAA does not support batch processing. So these sysfs attributes should not be supported on IAA device. Fix this issue by making the attributes of max_batch_size invisible in sysfs through is_visible() filter when the device is IAA. Add description in the ABI documentation to mention that the attributes are not visible when the device does not support batch. [1]: https://cdrdv2.intel.com/v1/dl/getContent/721858 Fixes: e7184b159dd3 ("dmaengine: idxd: add support for configurable max wq batch size") Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Xiaochen Shen Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20220930201528.18621-3-xiaochen.shen@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- .../ABI/stable/sysfs-driver-dma-idxd | 2 ++ drivers/dma/idxd/sysfs.c | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 76774249cf51..e4cbf5ce6eb8 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -22,6 +22,7 @@ Date: Oct 25, 2019 KernelVersion: 5.6.0 Contact: dmaengine@vger.kernel.org Description: The largest number of work descriptors in a batch. + It's not visible when the device does not support batch. What: /sys/bus/dsa/devices/dsa/max_work_queues_size Date: Oct 25, 2019 @@ -205,6 +206,7 @@ KernelVersion: 5.10.0 Contact: dmaengine@vger.kernel.org Description: The max batch size for this workqueue. Cannot exceed device max batch size. Configurable parameter. + It's not visible when the device does not support batch. What: /sys/bus/dsa/devices/wq./ats_disable Date: Nov 13, 2020 diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 7269bd54554f..7909767e9836 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1233,6 +1233,14 @@ static bool idxd_wq_attr_op_config_invisible(struct attribute *attr, !idxd->hw.wq_cap.op_config; } +static bool idxd_wq_attr_max_batch_size_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* Intel IAA does not support batch processing, make it invisible */ + return attr == &dev_attr_wq_max_batch_size.attr && + idxd->data->type == IDXD_TYPE_IAX; +} + static umode_t idxd_wq_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1243,6 +1251,9 @@ static umode_t idxd_wq_attr_visible(struct kobject *kobj, if (idxd_wq_attr_op_config_invisible(attr, idxd)) return 0; + if (idxd_wq_attr_max_batch_size_invisible(attr, idxd)) + return 0; + return attr->mode; } @@ -1533,6 +1544,26 @@ static ssize_t cmd_status_store(struct device *dev, struct device_attribute *att } static DEVICE_ATTR_RW(cmd_status); +static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* Intel IAA does not support batch processing, make it invisible */ + return attr == &dev_attr_max_batch_size.attr && + idxd->data->type == IDXD_TYPE_IAX; +} + +static umode_t idxd_device_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct idxd_device *idxd = confdev_to_idxd(dev); + + if (idxd_device_attr_max_batch_size_invisible(attr, idxd)) + return 0; + + return attr->mode; +} + static struct attribute *idxd_device_attributes[] = { &dev_attr_version.attr, &dev_attr_max_groups.attr, @@ -1560,6 +1591,7 @@ static struct attribute *idxd_device_attributes[] = { static const struct attribute_group idxd_device_attribute_group = { .attrs = idxd_device_attributes, + .is_visible = idxd_device_attr_visible, }; static const struct attribute_group *idxd_attribute_groups[] = { -- Gitee From b800fe68097b49f6ffdf0454cac6f9fef05e3424 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sat, 22 Oct 2022 15:49:49 +0800 Subject: [PATCH 168/173] dmaengine: idxd: Make read buffer sysfs attributes invisible for Intel IAA mainline inclusion from mainline-v6.2 commit 9a8ddb35a9d5d3ad76784a012459b256a9d7de7e category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 9a8ddb35a9d5 dmaengine: idxd: Make read buffer sysfs attributes invisible for Intel IAA. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- In current code, the following sysfs attributes are exposed to user to show or update the values: max_read_buffers (max_tokens) read_buffer_limit (token_limit) group/read_buffers_allowed (group/tokens_allowed) group/read_buffers_reserved (group/tokens_reserved) group/use_read_buffer_limit (group/use_token_limit) >From Intel IAA spec [1], Intel IAA does not support Read Buffer allocation control. So these sysfs attributes should not be supported on IAA device. Fix this issue by making these sysfs attributes invisible through is_visible() filter when the device is IAA. Add description in the ABI documentation to mention that these attributes are not visible when the device does not support Read Buffer allocation control. [1]: https://cdrdv2.intel.com/v1/dl/getContent/721858 Fixes: fde212e44f45 ("dmaengine: idxd: deprecate token sysfs attributes for read buffers") Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Xiaochen Shen Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20221022074949.11719-1-xiaochen.shen@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- .../ABI/stable/sysfs-driver-dma-idxd | 10 ++++++ drivers/dma/idxd/sysfs.c | 36 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index e4cbf5ce6eb8..bbe6b8bf9203 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -50,6 +50,8 @@ Description: The total number of read buffers supported by this device. The read buffers represent resources within the DSA implementation, and these resources are allocated by engines to support operations. See DSA spec v1.2 9.2.4 Total Read Buffers. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/dsa/max_transfer_size Date: Oct 25, 2019 @@ -123,6 +125,8 @@ Contact: dmaengine@vger.kernel.org Description: The maximum number of read buffers that may be in use at one time by operations that access low bandwidth memory in the device. See DSA spec v1.2 9.2.8 GENCFG on Global Read Buffer Limit. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/dsa/cmd_status Date: Aug 28, 2020 @@ -252,6 +256,8 @@ KernelVersion: 5.17.0 Contact: dmaengine@vger.kernel.org Description: Enable the use of global read buffer limit for the group. See DSA spec v1.2 9.2.18 GRPCFG Use Global Read Buffer Limit. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/group./read_buffers_allowed Date: Dec 10, 2021 @@ -260,6 +266,8 @@ Contact: dmaengine@vger.kernel.org Description: Indicates max number of read buffers that may be in use at one time by all engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers Allowed. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/group./read_buffers_reserved Date: Dec 10, 2021 @@ -268,6 +276,8 @@ Contact: dmaengine@vger.kernel.org Description: Indicates the number of Read Buffers reserved for the use of engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers Reserved. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/group./desc_progress_limit Date: Sept 14, 2022 diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 7909767e9836..3229dfc78650 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -528,6 +528,22 @@ static bool idxd_group_attr_progress_limit_invisible(struct attribute *attr, !idxd->hw.group_cap.progress_limit; } +static bool idxd_group_attr_read_buffers_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* + * Intel IAA does not support Read Buffer allocation control, + * make these attributes invisible. + */ + return (attr == &dev_attr_group_use_token_limit.attr || + attr == &dev_attr_group_use_read_buffer_limit.attr || + attr == &dev_attr_group_tokens_allowed.attr || + attr == &dev_attr_group_read_buffers_allowed.attr || + attr == &dev_attr_group_tokens_reserved.attr || + attr == &dev_attr_group_read_buffers_reserved.attr) && + idxd->data->type == IDXD_TYPE_IAX; +} + static umode_t idxd_group_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -538,6 +554,9 @@ static umode_t idxd_group_attr_visible(struct kobject *kobj, if (idxd_group_attr_progress_limit_invisible(attr, idxd)) return 0; + if (idxd_group_attr_read_buffers_invisible(attr, idxd)) + return 0; + return attr->mode; } @@ -1552,6 +1571,20 @@ static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr, idxd->data->type == IDXD_TYPE_IAX; } +static bool idxd_device_attr_read_buffers_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* + * Intel IAA does not support Read Buffer allocation control, + * make these attributes invisible. + */ + return (attr == &dev_attr_max_tokens.attr || + attr == &dev_attr_max_read_buffers.attr || + attr == &dev_attr_token_limit.attr || + attr == &dev_attr_read_buffer_limit.attr) && + idxd->data->type == IDXD_TYPE_IAX; +} + static umode_t idxd_device_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1561,6 +1594,9 @@ static umode_t idxd_device_attr_visible(struct kobject *kobj, if (idxd_device_attr_max_batch_size_invisible(attr, idxd)) return 0; + if (idxd_device_attr_read_buffers_invisible(attr, idxd)) + return 0; + return attr->mode; } -- Gitee From 476c56abba935c872dc7fb19ef43af44015d7315 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 10 Nov 2022 17:27:15 -0800 Subject: [PATCH 169/173] dmaengine: idxd: Fix crc_val field for completion record mainline inclusion from mainline-v6.2 commit dc901d98b1fe6e52ab81cd3e0879379168e06daa category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit dc901d98b1fe dmaengine: idxd: Fix crc_val field for completion record. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- The crc_val in the completion record should be 64 bits and not 32 bits. Fixes: 4ac823e9cd85 ("dmaengine: idxd: fix delta_rec and crc size field for completion record") Reported-by: Nirav N Shah Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20221111012715.2031481-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- include/uapi/linux/idxd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 2b9e7feba3f3..1d553bedbdb5 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -295,7 +295,7 @@ struct dsa_completion_record { }; uint32_t delta_rec_size; - uint32_t crc_val; + uint64_t crc_val; /* DIF check & strip */ struct { -- Gitee From 1ac6171ecb62ebc857980b67ea685d1b4e951910 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 13 Nov 2022 21:34:02 +0100 Subject: [PATCH 170/173] dmaengine: idxd: Remove linux/msi.h include mainline inclusion from mainline-v6.2 commit 444eef7d5695393f214d83180f3e4bb99621cd07 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: commit 444eef7d5695 dmaengine: idxd: Remove linux/msi.h include. Incremental backporting patches for DSA/IAA on Intel Xeon platform. -------------------------------- Nothing in this file needs anything from linux/msi.h Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Dave Jiang Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Link: https://lore.kernel.org/r/20221113202428.573536003@linutronix.de Signed-off-by: Vinod Koul Signed-off-by: Xiaochen Shen --- drivers/dma/idxd/device.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 6f44fa8f78a5..06f5d3783d77 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include "../dmaengine.h" #include "idxd.h" -- Gitee From 7b61c8bfe11ce709f9936d3faeb04c2a07c06ab7 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Mon, 21 Nov 2022 14:46:45 +0800 Subject: [PATCH 171/173] dmaengine: idxd: Fix kABI for IDXD perfmon cpu hot plug state category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: dmaengine: idxd: Fix kABI for IDXD perfmon cpu hot plug state. ------------------------------------------------- In commit ("dmaengine: idxd: Add IDXD performance monitor support"), the introduction of Intel IDXD performance monitor feature adds new CPU hot plug state 'CPUHP_AP_PERF_X86_IDXD_ONLINE' in the middle of data structure 'enum cpuhp_state' that causes kABI breakage. Fix it by reusing other arch's entry for CPUHP_AP_PERF_X86_IDXD_ONLINE to avoid kABI breakage. Signed-off-by: Xiaochen Shen --- include/linux/cpuhotplug.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 3c7e775daece..820939794b79 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -3,6 +3,7 @@ #define __CPUHOTPLUG_H #include +#include /* * CPU-up CPU-down @@ -169,7 +170,7 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_RAPL_ONLINE, CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, - CPUHP_AP_PERF_X86_IDXD_ONLINE, + /* kABI: CPUHP_AP_PERF_X86_IDXD_ONLINE, */ CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, @@ -202,6 +203,9 @@ enum cpuhp_state { CPUHP_ONLINE, }; +/* Reuse another arch's entry to avoid kABI breakage */ +#define CPUHP_AP_PERF_X86_IDXD_ONLINE CPUHP_AP_PERF_ARM_L2X0_ONLINE + int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke, int (*startup)(unsigned int cpu), int (*teardown)(unsigned int cpu), bool multi_instance); -- Gitee From 3922e76213a46ffb0772c63300ec197d9674f676 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Mon, 21 Nov 2022 14:48:37 +0800 Subject: [PATCH 172/173] sched: Fix kABI for task->pasid_activated category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: sched: Fix kABI for task->pasid_activated. ------------------------------------------------- In commit ("sched: Define and initialize a flag to identify valid PASID in the task"), a new single bit field 'pasid_activated' is added to the middle of 'struct task_struct' that causes kABI breakage. Fix it by using KABI_FILL_HOLE() API for task->pasid_activated. Signed-off-by: Xiaochen Shen --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 50b7414946d3..88c942975028 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -873,7 +873,7 @@ struct task_struct { unsigned in_memstall:1; #endif #ifdef CONFIG_IOMMU_SVA - unsigned pasid_activated:1; + KABI_FILL_HOLE(unsigned pasid_activated:1) #endif unsigned long atomic_flags; /* Flags requiring atomic access. */ -- Gitee From 927622294e51369ab669110f7962cd698994f889 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Mon, 21 Nov 2022 14:49:12 +0800 Subject: [PATCH 173/173] openeuler_defconfig: Enable configs for Intel IDXD driver category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596WO CVE: NA Intel-SIG: openeuler_defconfig: Enable configs for Intel IDXD driver. -------------------------------------------- Enable necessary kernel configs for Intel IDXD driver in openeuler_defconfig for Intel DSA/IAA features. Signed-off-by: Xiaochen Shen --- arch/x86/configs/openeuler_defconfig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 300444226d6a..a064726f4597 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -6357,7 +6357,11 @@ CONFIG_DMA_VIRTUAL_CHANNELS=y CONFIG_DMA_ACPI=y # CONFIG_ALTERA_MSGDMA is not set CONFIG_INTEL_IDMA64=m +CONFIG_INTEL_IDXD_BUS=m CONFIG_INTEL_IDXD=m +# CONFIG_INTEL_IDXD_COMPAT is not set +CONFIG_INTEL_IDXD_SVM=y +CONFIG_INTEL_IDXD_PERFMON=y CONFIG_INTEL_IOATDMA=m # CONFIG_PLX_DMA is not set # CONFIG_QCOM_HIDMA_MGMT is not set @@ -6606,11 +6610,12 @@ CONFIG_IOMMU_SUPPORT=y # CONFIG_IOMMU_DEBUGFS is not set CONFIG_IOMMU_DEFAULT_PASSTHROUGH=y CONFIG_IOMMU_DMA=y +CONFIG_IOMMU_SVA=y CONFIG_AMD_IOMMU=y CONFIG_AMD_IOMMU_V2=m CONFIG_DMAR_TABLE=y CONFIG_INTEL_IOMMU=y -# CONFIG_INTEL_IOMMU_SVM is not set +CONFIG_INTEL_IOMMU_SVM=y # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_INTEL_IOMMU_FLOPPY_WA=y # CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON is not set -- Gitee