From a25806da033e9b0d95f72901af8105e94f51f474 Mon Sep 17 00:00:00 2001 From: Jiakun Shuai Date: Tue, 9 Sep 2025 17:26:19 +0800 Subject: [PATCH 1/6] pswiotlb: Move pswiotlb dma functions behind dma_map_ops To reduce the difficulty of maintenance, pswiotlb_dma* functions are moved behind the struct dma_map_ops. Signed-off-by: Cui Chao Signed-off-by: Jiakun Shuai --- arch/arm64/mm/dma-mapping.c | 8 + drivers/pci/pci.c | 9 - include/linux/device.h | 18 +- include/linux/pswiotlb.h | 1 + kernel/dma/mapping.c | 59 +---- kernel/dma/phytium/pswiotlb-dma.h | 74 +++++- kernel/dma/phytium/pswiotlb-iommu.c | 12 +- kernel/dma/phytium/pswiotlb-mapping.c | 337 ++++++++++++++++++++++++-- 8 files changed, 430 insertions(+), 88 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3cb101e8cb29..17ebc794a775 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -13,6 +13,10 @@ #include #include +#ifdef CONFIG_PSWIOTLB +#include +#endif + void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { @@ -61,5 +65,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, if (iommu) iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1); +#ifdef CONFIG_PSWIOTLB + pswiotlb_setup_dma_ops(dev); +#endif + xen_setup_dma_ops(dev); } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1a9593042506..3136b66efcfb 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4546,15 +4546,6 @@ void __weak pcibios_set_master(struct pci_dev *dev) */ void pci_set_master(struct pci_dev *dev) { -#ifdef CONFIG_PSWIOTLB - if ((pswiotlb_force_disable != true) && - is_phytium_ps_socs()) { - dev->dev.can_use_pswiotlb = pswiotlb_is_dev_in_passthroughlist(dev); - dev_info(&dev->dev, "The device %s use pswiotlb because vendor 0x%04x %s in pswiotlb passthroughlist\n", - dev->dev.can_use_pswiotlb ? "would" : "would NOT", - dev->vendor, dev->dev.can_use_pswiotlb ? "is NOT" : "is"); - } -#endif __pci_set_master(dev, true); pcibios_set_master(dev); } diff --git a/include/linux/device.h b/include/linux/device.h index 79b04c5ca2c8..485ee4ce633d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -658,7 +658,10 @@ struct device_physical_location { * @dma_io_tlb_lock: Protects changes to the list of active pools. * @dma_uses_io_tlb: %true if device has used the software IO TLB. * @dma_p_io_tlb_mem: Phytium Software IO TLB allocator. Not for driver use. + * @orig_dma_ops: Original DMA mapping operations for this device. + * @local_node: NUMA node this device is really belong to. * @dma_uses_p_io_tlb: %true if device has used the Phytium software IO TLB. + * @can_use_pswiotlb: %true if device can use the Phytium software IO TLB. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. @@ -767,8 +770,16 @@ struct device { #endif #ifdef CONFIG_PSWIOTLB struct p_io_tlb_mem *dma_p_io_tlb_mem; - bool dma_uses_p_io_tlb; - bool can_use_pswiotlb; +#ifdef CONFIG_DMA_OPS + const struct dma_map_ops *orig_dma_ops; +#endif + struct { +#ifdef CONFIG_NUMA + int local_node; +#endif + bool dma_uses_p_io_tlb; + bool can_use_pswiotlb; + }; #endif #ifdef CONFIG_SWIOTLB_DYNAMIC struct list_head dma_io_tlb_pools; @@ -783,9 +794,6 @@ struct device { #ifdef CONFIG_NUMA int numa_node; /* NUMA node this device is close to */ -#ifdef CONFIG_PSWIOTLB - int local_node; /* NUMA node this device is really belong to */ -#endif #endif dev_t devt; /* dev_t, creates the sysfs "dev" */ u32 id; /* device instance */ diff --git a/include/linux/pswiotlb.h b/include/linux/pswiotlb.h index 26b2a9d7f5a1..510cff3ad342 100644 --- a/include/linux/pswiotlb.h +++ b/include/linux/pswiotlb.h @@ -79,6 +79,7 @@ dma_addr_t pswiotlb_map(struct device *dev, int nid, phys_addr_t phys, void pswiotlb_store_local_node(struct pci_dev *dev, struct pci_bus *bus); void iommu_dma_unmap_sg_pswiotlb(struct device *dev, struct scatterlist *sg, unsigned long iova, size_t mapped, int nents, enum dma_data_direction dir, unsigned long attrs); +void pswiotlb_setup_dma_ops(struct device *dev); #ifdef CONFIG_PSWIOTLB struct pswiotlb_passthroughlist { struct list_head node; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 1743f5d88ead..c64f03c59195 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -159,13 +159,6 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, if (WARN_ON_ONCE(!dev->dma_mask)) return DMA_MAPPING_ERROR; -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev) && - !pswiotlb_bypass_is_needed(dev, 0, dir)) { - addr = pswiotlb_dma_map_page_distribute(dev, page, offset, size, dir, attrs); - return addr; - } -#endif if (dma_map_direct(dev, ops) || arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); @@ -184,12 +177,7 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_unmap_page_attrs_distribute(dev, addr, size, dir, attrs); - return; - } -#endif + if (dma_map_direct(dev, ops) || arch_dma_unmap_page_direct(dev, addr + size)) dma_direct_unmap_page(dev, addr, size, dir, attrs); @@ -210,13 +198,6 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, if (WARN_ON_ONCE(!dev->dma_mask)) return 0; -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev) && - !pswiotlb_bypass_is_needed(dev, nents, dir)) { - ents = pswiotlb_dma_map_sg_attrs_distribute(dev, sg, nents, dir, attrs); - return ents; - } -#endif if (dma_map_direct(dev, ops) || arch_dma_map_sg_direct(dev, sg, nents)) ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); @@ -311,12 +292,7 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); debug_dma_unmap_sg(dev, sg, nents, dir); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_unmap_sg_attrs_distribute(dev, sg, nents, dir, attrs); - return; - } -#endif + if (dma_map_direct(dev, ops) || arch_dma_unmap_sg_direct(dev, sg, nents)) dma_direct_unmap_sg(dev, sg, nents, dir, attrs); @@ -364,12 +340,7 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_sync_single_for_cpu_distribute(dev, addr, size, dir); - return; - } -#endif + if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); else if (ops->sync_single_for_cpu) @@ -384,12 +355,7 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_sync_single_for_device_distribute(dev, addr, size, dir); - return; - } -#endif + if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_device(dev, addr, size, dir); else if (ops->sync_single_for_device) @@ -404,12 +370,7 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_sync_sg_for_cpu_distribute(dev, sg, nelems, dir); - return; - } -#endif + if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); else if (ops->sync_sg_for_cpu) @@ -424,12 +385,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); -#ifdef CONFIG_PSWIOTLB - if (check_if_pswiotlb_is_applicable(dev)) { - pswiotlb_dma_sync_sg_for_device_distribute(dev, sg, nelems, dir); - return; - } -#endif + if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_device(dev, sg, nelems, dir); else if (ops->sync_sg_for_device) @@ -563,9 +519,6 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, if (WARN_ON_ONCE(flag & __GFP_COMP)) return NULL; -#ifdef CONFIG_PSWIOTLB - check_if_pswiotlb_is_applicable(dev); -#endif if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) return cpu_addr; diff --git a/kernel/dma/phytium/pswiotlb-dma.h b/kernel/dma/phytium/pswiotlb-dma.h index 0f159a389174..8d355e8baa3a 100644 --- a/kernel/dma/phytium/pswiotlb-dma.h +++ b/kernel/dma/phytium/pswiotlb-dma.h @@ -13,6 +13,69 @@ #include extern bool pswiotlb_force_disable; +struct pswiotlb_dma_map_ops { + unsigned int flags; + + void *(*alloc)(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + unsigned long attrs); + void (*free)(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs); + struct page *(*alloc_pages)(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, + gfp_t gfp); + void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); + struct sg_table *(*alloc_noncontiguous)(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, + unsigned long attrs); + void (*free_noncontiguous)(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir); + int (*mmap)(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); + + int (*get_sgtable)(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); + + dma_addr_t (*map_page)(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); + void (*unmap_page)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + /* + * map_sg should return a negative error code on error. See + * dma_map_sgtable() for a list of appropriate error codes + * and their meanings. + */ + int (*map_sg)(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); + void (*unmap_sg)(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); + dma_addr_t (*map_resource)(struct device *dev, phys_addr_t phys_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + void (*unmap_resource)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + void (*sync_single_for_cpu)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); + void (*sync_single_for_device)(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction dir); + void (*sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir); + void (*sync_sg_for_device)(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir); + void (*cache_sync)(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction); + int (*dma_supported)(struct device *dev, u64 mask); + u64 (*get_required_mask)(struct device *dev); + size_t (*max_mapping_size)(struct device *dev); + size_t (*opt_mapping_size)(void); + unsigned long (*get_merge_boundary)(struct device *dev); +}; #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_PSWIOTLB) void pswiotlb_dma_direct_sync_sg_for_device(struct device *dev, @@ -46,7 +109,7 @@ static inline void pswiotlb_dma_direct_sync_sg_for_cpu(struct device *dev, #ifdef CONFIG_PSWIOTLB int pswiotlb_dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); -dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, struct page *page, +dma_addr_t pswiotlb_dma_map_page_attrs_distribute(struct device *dev, struct page *page, size_t offset, size_t size, enum dma_data_direction dir, unsigned long attrs); void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, dma_addr_t addr, @@ -80,6 +143,8 @@ void pswiotlb_iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir); void pswiotlb_iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir); +void pswiotlb_iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs); static inline bool check_if_pswiotlb_is_applicable(struct device *dev) { @@ -212,7 +277,7 @@ static inline int pswiotlb_dma_direct_map_sg(struct device *dev, struct scatterl return 0; } -static inline dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, +static inline dma_addr_t pswiotlb_dma_map_page_attrs_distribute(struct device *dev, struct page *page, size_t offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { @@ -304,6 +369,11 @@ static inline void pswiotlb_iommu_dma_sync_sg_for_device(struct device *dev, { } +static inline void pswiotlb_iommu_dma_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle, unsigned long attrs) +{ +} + static inline bool check_if_pswiotlb_is_applicable(struct device *dev) { return false; diff --git a/kernel/dma/phytium/pswiotlb-iommu.c b/kernel/dma/phytium/pswiotlb-iommu.c index bc8954afff89..e19ae0c2df6b 100644 --- a/kernel/dma/phytium/pswiotlb-iommu.c +++ b/kernel/dma/phytium/pswiotlb-iommu.c @@ -116,16 +116,24 @@ struct iova_fq { * The following functions are ported from * ./drivers/iommu/dma-iommu.c * ./drivers/iommu/iommu.c + * static inline bool fq_full(struct iova_fq *fq); + * static void fq_flush_iotlb(struct iommu_dma_cookie *cookie); + * static inline unsigned int fq_ring_add(struct iova_fq *fq); + * static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq); * static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, * phys_addr_t paddr, size_t size, size_t *count); * static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, - * phys_addr_t paddr, size_t size, int prot, - * gfp_t gfp, size_t *mapped); + * phys_addr_t paddr, size_t size, int prot, gfp_t gfp, size_t *mapped); * static int __iommu_map(struct iommu_domain *domain, unsigned long iova, * phys_addr_t paddr, size_t size, int prot, gfp_t gfp); + * static bool dev_use_swiotlb(struct device *dev, size_t size, + * enum dma_data_direction dir); * static bool dev_is_untrusted(struct device *dev); * static int dma_info_to_prot(enum dma_data_direction dir, bool coherent, * unsigned long attrs); + * static void queue_iova(struct iommu_dma_cookie *cookie, + * unsigned long pfn, unsigned long pages, + * struct list_head *freelist); * static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, * size_t size, u64 dma_limit, struct device *dev); * static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, diff --git a/kernel/dma/phytium/pswiotlb-mapping.c b/kernel/dma/phytium/pswiotlb-mapping.c index 65674b7bdeab..7d518b5646c7 100644 --- a/kernel/dma/phytium/pswiotlb-mapping.c +++ b/kernel/dma/phytium/pswiotlb-mapping.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "../debug.h" #include "../direct.h" #include "pswiotlb-dma.h" @@ -22,8 +24,18 @@ * ./drivers/dma/mapping.c * static bool dma_go_direct(struct device *dev, dma_addr_t mask, * const struct dma_map_ops *ops); + * static inline bool dma_alloc_direct(struct device *dev, + * const struct dma_map_ops *ops); * static inline bool dma_map_direct(struct device *dev, * const struct dma_map_ops *ops); + * static struct page *__dma_alloc_pages(struct device *dev, size_t size, + * dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); + * static struct sg_table *alloc_single_sgt(struct device *dev, size_t size, + * enum dma_data_direction dir, gfp_t gfp); + * static void __dma_free_pages(struct device *dev, size_t size, struct page *page, + * dma_addr_t dma_handle, enum dma_data_direction dir); + * static void free_single_sgt(struct device *dev, size_t size, + * struct sg_table *sgt, enum dma_data_direction dir); */ static bool dma_go_direct(struct device *dev, dma_addr_t mask, @@ -39,23 +51,38 @@ static bool dma_go_direct(struct device *dev, dma_addr_t mask, return false; } +static inline bool dma_alloc_direct(struct device *dev, + const struct dma_map_ops *ops) +{ + return dma_go_direct(dev, dev->coherent_dma_mask, ops); +} + static inline bool dma_map_direct(struct device *dev, const struct dma_map_ops *ops) { return dma_go_direct(dev, *dev->dma_mask, ops); } -dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, struct page *page, + +dma_addr_t pswiotlb_dma_map_page_attrs_distribute(struct device *dev, struct page *page, size_t offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { - const struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = dev->orig_dma_ops; dma_addr_t addr; if (dma_map_direct(dev, ops) || - arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) - addr = pswiotlb_dma_direct_map_page(dev, page, offset, size, dir, attrs); - else - addr = pswiotlb_iommu_dma_map_page(dev, page, offset, size, dir, attrs); + arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) { + if (!pswiotlb_bypass_is_needed(dev, 0, dir)) + addr = pswiotlb_dma_direct_map_page(dev, page, offset, size, dir, attrs); + else + addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); + } else { + if (!pswiotlb_bypass_is_needed(dev, 0, dir)) + addr = pswiotlb_iommu_dma_map_page(dev, page, offset, size, dir, attrs); + else + addr = ops->map_page(dev, page, offset, size, dir, attrs); + } + kmsan_handle_dma(page, offset, size, dir); debug_dma_map_page(dev, page, offset, size, dir, addr, attrs); return addr; @@ -64,7 +91,7 @@ dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, struct page *pag void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - const struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = dev->orig_dma_ops; if (dma_map_direct(dev, ops) || arch_dma_unmap_page_direct(dev, addr + size)) @@ -77,14 +104,21 @@ void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, dma_addr_t add int pswiotlb_dma_map_sg_attrs_distribute(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { - const struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = dev->orig_dma_ops; int ents; if (dma_map_direct(dev, ops) || - arch_dma_map_sg_direct(dev, sg, nents)) - ents = pswiotlb_dma_direct_map_sg(dev, sg, nents, dir, attrs); - else - ents = pswiotlb_iommu_dma_map_sg(dev, sg, nents, dir, attrs); + arch_dma_map_sg_direct(dev, sg, nents)) { + if (!pswiotlb_bypass_is_needed(dev, nents, dir)) + ents = pswiotlb_dma_direct_map_sg(dev, sg, nents, dir, attrs); + else + ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); + } else { + if (!pswiotlb_bypass_is_needed(dev, nents, dir)) + ents = pswiotlb_iommu_dma_map_sg(dev, sg, nents, dir, attrs); + else + ents = ops->map_sg(dev, sg, nents, dir, attrs); + } if (ents > 0) debug_dma_map_sg(dev, sg, nents, ents, dir, attrs); @@ -99,7 +133,7 @@ void pswiotlb_dma_unmap_sg_attrs_distribute(struct device *dev, struct scatterli int nents, enum dma_data_direction dir, unsigned long attrs) { - const struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = dev->orig_dma_ops; if (dma_map_direct(dev, ops) || arch_dma_unmap_sg_direct(dev, sg, nents)) @@ -111,7 +145,7 @@ void pswiotlb_dma_unmap_sg_attrs_distribute(struct device *dev, struct scatterli void pswiotlb_dma_sync_single_for_cpu_distribute(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - const struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = dev->orig_dma_ops; if (dma_map_direct(dev, ops)) pswiotlb_dma_direct_sync_single_for_cpu(dev, addr, size, dir); @@ -123,7 +157,7 @@ void pswiotlb_dma_sync_single_for_cpu_distribute(struct device *dev, dma_addr_t void pswiotlb_dma_sync_single_for_device_distribute(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - const struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = dev->orig_dma_ops; if (dma_map_direct(dev, ops)) pswiotlb_dma_direct_sync_single_for_device(dev, addr, size, dir); @@ -135,7 +169,7 @@ void pswiotlb_dma_sync_single_for_device_distribute(struct device *dev, dma_addr void pswiotlb_dma_sync_sg_for_cpu_distribute(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { - const struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = dev->orig_dma_ops; if (dma_map_direct(dev, ops)) pswiotlb_dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); @@ -147,7 +181,7 @@ void pswiotlb_dma_sync_sg_for_cpu_distribute(struct device *dev, struct scatterl void pswiotlb_dma_sync_sg_for_device_distribute(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { - const struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = dev->orig_dma_ops; if (dma_map_direct(dev, ops)) pswiotlb_dma_direct_sync_sg_for_device(dev, sg, nelems, dir); @@ -155,3 +189,272 @@ void pswiotlb_dma_sync_sg_for_device_distribute(struct device *dev, struct scatt pswiotlb_iommu_dma_sync_sg_for_device(dev, sg, nelems, dir); debug_dma_sync_sg_for_device(dev, sg, nelems, dir); } + +static dma_addr_t pswiotlb_dma_map_resource_distribute(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + return dma_direct_map_resource(dev, phys, size, dir, attrs); +} + +static struct page *__dma_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (WARN_ON_ONCE(!dev->coherent_dma_mask)) + return NULL; + if (WARN_ON_ONCE(gfp & (__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM))) + return NULL; + if (WARN_ON_ONCE(gfp & __GFP_COMP)) + return NULL; + + size = PAGE_ALIGN(size); + if (dma_alloc_direct(dev, ops)) + return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); + if (!ops->alloc_pages) + return NULL; + return ops->alloc_pages(dev, size, dma_handle, dir, gfp); +} + +static struct sg_table *alloc_single_sgt(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp) +{ + struct sg_table *sgt; + struct page *page; + + sgt = kmalloc(sizeof(*sgt), gfp); + if (!sgt) + return NULL; + if (sg_alloc_table(sgt, 1, gfp)) + goto out_free_sgt; + page = __dma_alloc_pages(dev, size, &sgt->sgl->dma_address, dir, gfp); + if (!page) + goto out_free_table; + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + sg_dma_len(sgt->sgl) = sgt->sgl->length; + return sgt; +out_free_table: + sg_free_table(sgt); +out_free_sgt: + kfree(sgt); + return NULL; +} + +static struct sg_table *pswiotlb_dma_alloc_noncontiguous_distribute(struct device *dev, + size_t size, enum dma_data_direction dir, gfp_t gfp, + unsigned long attrs) +{ + return alloc_single_sgt(dev, size, dir, gfp); +} + +static void __dma_free_pages(struct device *dev, size_t size, struct page *page, + dma_addr_t dma_handle, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + size = PAGE_ALIGN(size); + if (dma_alloc_direct(dev, ops)) + dma_direct_free_pages(dev, size, page, dma_handle, dir); + else if (ops->free_pages) + ops->free_pages(dev, size, page, dma_handle, dir); +} + +static void free_single_sgt(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ + __dma_free_pages(dev, size, sg_page(sgt->sgl), sgt->sgl->dma_address, + dir); + sg_free_table(sgt); + kfree(sgt); +} + +static void pswiotlb_dma_free_noncontiguous_distribute(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ + free_single_sgt(dev, size, sgt, dir); +} + +static int pswiotlb_dma_get_sgtable_distribute(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + return dma_direct_get_sgtable(dev, sgt, cpu_addr, dma_addr, + size, attrs); +} + +static int pswiotlb_dma_mmap_distribute(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + return dma_direct_mmap(dev, vma, cpu_addr, dma_addr, size, + attrs); +} + +static u64 pswiotlb_dma_get_required_mask_distribute(struct device *dev) +{ + return dma_direct_get_required_mask(dev); +} + +static void *pswiotlb_dma_alloc_distribute(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) +{ + const struct dma_map_ops *ops = dev->orig_dma_ops; + void *cpu_addr; + + check_if_pswiotlb_is_applicable(dev); + + if (dma_alloc_direct(dev, ops)) + cpu_addr = dma_direct_alloc(dev, size, handle, gfp, attrs); + else + cpu_addr = ops->alloc(dev, size, handle, gfp, attrs); + + return cpu_addr; +} + +static void pswiotlb_dma_free_distribute(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle, unsigned long attrs) +{ + const struct dma_map_ops *ops = dev->orig_dma_ops; + + if (dma_alloc_direct(dev, ops)) + dma_direct_free(dev, size, cpu_addr, handle, attrs); + else + ops->free(dev, size, cpu_addr, handle, attrs); +} + +static struct page *pswiotlb_dma_common_alloc_pages_distribute(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); +} + +static void pswiotlb_dma_common_free_pages_distribute(struct device *dev, size_t size, + struct page *page, dma_addr_t dma_handle, enum dma_data_direction dir) +{ + dma_direct_free_pages(dev, size, page, dma_handle, dir); +} + +static int pswiotlb_dma_supported_distribute(struct device *dev, u64 mask) +{ + return dma_direct_supported(dev, mask); +} + +static size_t pswiotlb_dma_max_mapping_size_distribute(struct device *dev) +{ + const struct dma_map_ops *ops = dev->orig_dma_ops; + + if (dma_map_direct(dev, ops)) + return dma_direct_max_mapping_size(dev); + else + return SIZE_MAX; +} + +static size_t pswiotlb_iommu_dma_opt_mapping_size(void) +{ + if (iommu_default_passthrough()) + return SIZE_MAX; + else + return iova_rcache_range(); +} + +static size_t pswiotlb_dma_opt_mapping_size_distribute(void) +{ + size_t size; + + size = pswiotlb_iommu_dma_opt_mapping_size(); + + return min(SIZE_MAX, size); +} + +static unsigned long pswiotlb_dma_get_merge_boundary_distribute(struct device *dev) +{ + return 0; /* can't merge */ +} + +static const struct dma_map_ops pswiotlb_noiommu_dma_ops = { + .flags = DMA_F_PCI_P2PDMA_SUPPORTED, + .alloc = pswiotlb_dma_alloc_distribute, + .free = pswiotlb_dma_free_distribute, + .alloc_pages = pswiotlb_dma_common_alloc_pages_distribute, + .free_pages = pswiotlb_dma_common_free_pages_distribute, + .alloc_noncontiguous = pswiotlb_dma_alloc_noncontiguous_distribute, + .free_noncontiguous = pswiotlb_dma_free_noncontiguous_distribute, + .mmap = pswiotlb_dma_mmap_distribute, + .get_sgtable = pswiotlb_dma_get_sgtable_distribute, + .map_page = pswiotlb_dma_map_page_attrs_distribute, + .unmap_page = pswiotlb_dma_unmap_page_attrs_distribute, + .map_sg = pswiotlb_dma_map_sg_attrs_distribute, + .unmap_sg = pswiotlb_dma_unmap_sg_attrs_distribute, + .sync_single_for_cpu = pswiotlb_dma_sync_single_for_cpu_distribute, + .sync_single_for_device = pswiotlb_dma_sync_single_for_device_distribute, + .sync_sg_for_cpu = pswiotlb_dma_sync_sg_for_cpu_distribute, + .sync_sg_for_device = pswiotlb_dma_sync_sg_for_device_distribute, + .map_resource = pswiotlb_dma_map_resource_distribute, + .unmap_resource = NULL, + .get_merge_boundary = pswiotlb_dma_get_merge_boundary_distribute, + .get_required_mask = pswiotlb_dma_get_required_mask_distribute, + .dma_supported = pswiotlb_dma_supported_distribute, + .max_mapping_size = pswiotlb_dma_max_mapping_size_distribute, + .opt_mapping_size = pswiotlb_dma_opt_mapping_size_distribute, +}; +struct pswiotlb_dma_map_ops *pswiotlb_clone_orig_dma_ops(struct device *dev, + const struct dma_map_ops *ops) +{ + struct pswiotlb_dma_map_ops *new_dma_ops = kmalloc(sizeof(struct pswiotlb_dma_map_ops), + GFP_KERNEL); + if (!new_dma_ops) + return NULL; + + memcpy(new_dma_ops, ops, sizeof(struct pswiotlb_dma_map_ops)); + + return new_dma_ops; +} + +void pswiotlb_setup_dma_ops(struct device *dev) +{ + const struct dma_map_ops *orig_ops = get_dma_ops(dev); + struct pswiotlb_dma_map_ops *new_ops; + struct pci_dev *pdev; + + if (dev && dev_is_pci(dev) && (pswiotlb_force_disable != true) && + is_phytium_ps_socs()) { + pdev = to_pci_dev(dev); + pdev->dev.can_use_pswiotlb = pswiotlb_is_dev_in_passthroughlist(pdev); + dev_info(&pdev->dev, "The device %s use pswiotlb because vendor 0x%04x %s in pswiotlb passthroughlist\n", + pdev->dev.can_use_pswiotlb ? "would" : "would NOT", + pdev->vendor, pdev->dev.can_use_pswiotlb ? "is NOT" : "is"); + } + + if (check_if_pswiotlb_is_applicable(dev)) { + if (!orig_ops) + set_dma_ops(dev, &pswiotlb_noiommu_dma_ops); + else { + new_ops = pswiotlb_clone_orig_dma_ops(dev, orig_ops); + if (!new_ops) { + dev_warn(dev, "Failed to clone dma ops, pswiotlb is NOT applicable\n"); + return; + } + + dev->orig_dma_ops = get_dma_ops(dev); + new_ops->alloc = pswiotlb_dma_alloc_distribute; + new_ops->map_page = pswiotlb_dma_map_page_attrs_distribute; + new_ops->unmap_page = pswiotlb_dma_unmap_page_attrs_distribute; + new_ops->map_sg = pswiotlb_dma_map_sg_attrs_distribute; + new_ops->unmap_sg = pswiotlb_dma_unmap_sg_attrs_distribute; + new_ops->sync_single_for_cpu = + pswiotlb_dma_sync_single_for_cpu_distribute; + new_ops->sync_single_for_device = + pswiotlb_dma_sync_single_for_device_distribute; + new_ops->sync_sg_for_cpu = + pswiotlb_dma_sync_sg_for_cpu_distribute; + new_ops->sync_sg_for_device = + pswiotlb_dma_sync_sg_for_device_distribute; + new_ops->max_mapping_size = + pswiotlb_dma_max_mapping_size_distribute; + new_ops->opt_mapping_size = + pswiotlb_dma_opt_mapping_size_distribute; + + set_dma_ops(dev, (const struct dma_map_ops *)new_ops); + } + } +} -- Gitee From a588472d007ba5cb8c5d0848da8ee4f430e1bb4c Mon Sep 17 00:00:00 2001 From: Jiakun Shuai Date: Tue, 9 Sep 2025 17:31:13 +0800 Subject: [PATCH 2/6] pswiotlb: Optimized variable types and removed unused header files Optimized variable types that read frequency is much greater than write frequency or only read once during the initialization. Signed-off-by: Cui Chao Signed-off-by: Jiakun Shuai --- drivers/pci/pci.c | 3 --- include/linux/pswiotlb.h | 2 +- kernel/dma/contiguous.c | 6 +++--- kernel/dma/mapping.c | 3 --- kernel/dma/phytium/pswiotlb.c | 2 +- 5 files changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 3136b66efcfb..048a927531b7 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -36,9 +36,6 @@ #include #endif #include "pci.h" -#ifdef CONFIG_PSWIOTLB -#include -#endif DEFINE_MUTEX(pci_slot_mutex); diff --git a/include/linux/pswiotlb.h b/include/linux/pswiotlb.h index 510cff3ad342..1c61ac685a6d 100644 --- a/include/linux/pswiotlb.h +++ b/include/linux/pswiotlb.h @@ -26,7 +26,7 @@ struct p_io_tlb_pool; #define PSWIOTLB_FORCEOFF (1 << 1) /* force phytium bounce buffering off*/ #define PSWIOTLB_ANY (1 << 2) /* allow any memory for the buffer */ #define PSWIOTLB_FREE_THRESHOLD 30 -static bool is_ps_socs; +static bool __read_mostly is_ps_socs; /* * Maximum allowable number of contiguous slabs to map, diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 6a78ea7a8303..77ec526971c3 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -362,13 +362,13 @@ static struct page *cma_alloc_aligned(struct cma *cma, size_t size, gfp_t gfp) */ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { +#ifdef CONFIG_DMA_NUMA_CMA + int nid = dev_to_node(dev); +#endif #ifdef CONFIG_PSWIOTLB if (check_if_pswiotlb_is_applicable(dev)) return NULL; #endif -#ifdef CONFIG_DMA_NUMA_CMA - int nid = dev_to_node(dev); -#endif /* CMA can be used only in the context which permits sleeping */ if (!gfpflags_allow_blocking(gfp)) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index c64f03c59195..98c4ea854a2f 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -16,9 +16,6 @@ #include #include "debug.h" #include "direct.h" -#ifdef CONFIG_PSWIOTLB -#include "./phytium/pswiotlb-dma.h" -#endif #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ diff --git a/kernel/dma/phytium/pswiotlb.c b/kernel/dma/phytium/pswiotlb.c index 061a5c5a28ab..786a63d6f561 100644 --- a/kernel/dma/phytium/pswiotlb.c +++ b/kernel/dma/phytium/pswiotlb.c @@ -77,7 +77,7 @@ struct p_io_tlb_slot { unsigned int list; }; -bool pswiotlb_force_disable; +bool __ro_after_init pswiotlb_force_disable; static struct page *alloc_dma_pages(int nid, gfp_t gfp, size_t bytes); -- Gitee From cbae724e5028ce5b8288e209ca32f128479598db Mon Sep 17 00:00:00 2001 From: Jiakun Shuai Date: Tue, 9 Sep 2025 17:33:56 +0800 Subject: [PATCH 3/6] pswiotlb: Move PCI-related changes to the PCI quirks Storing local node and setting dma segment boundary are moved to PCI quirks. Signed-off-by: Cui Chao Signed-off-by: Jiakun Shuai --- drivers/pci/probe.c | 12 ++---------- drivers/pci/quirks.c | 14 ++++++++++++++ include/linux/pci.h | 3 +++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 18fe32880862..9b1a32d41d15 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -20,9 +20,6 @@ #include #include #include "pci.h" -#ifdef CONFIG_PSWIOTLB -#include -#endif #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ #define CARDBUS_RESERVE_BUSNR 3 @@ -2579,13 +2576,8 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) dma_set_max_seg_size(&dev->dev, 65536); dma_set_seg_boundary(&dev->dev, 0xffffffff); -#ifdef CONFIG_PSWIOTLB - if ((pswiotlb_force_disable != true) && - is_phytium_ps_socs()) { - pswiotlb_store_local_node(dev, bus); - dma_set_seg_boundary(&dev->dev, 0xffffffffffff); - } -#endif + + pci_configure_pswiotlb(dev, bus); pcie_failed_link_retrain(dev); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 9ae9829ba2c9..de2762348fcc 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -32,6 +32,9 @@ #include #include #include "pci.h" +#ifdef CONFIG_PSWIOTLB +#include +#endif /* * Retrain the link of a downstream PCIe port by hand if necessary. @@ -6350,6 +6353,17 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5020, of_pci_make_dev_node); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5021, of_pci_make_dev_node); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REDHAT, 0x0005, of_pci_make_dev_node); +void pci_configure_pswiotlb(struct pci_dev *dev, struct pci_bus *bus) +{ +#ifdef CONFIG_PSWIOTLB + if ((pswiotlb_force_disable != true) && + is_phytium_ps_socs()) { + pswiotlb_store_local_node(dev, bus); + dma_set_seg_boundary(&dev->dev, 0xffffffffffff); + } +#endif +} + /* * Devices known to require a longer delay before first config space access * after reset recovery or resume from D3cold: diff --git a/include/linux/pci.h b/include/linux/pci.h index 1a8fe61349e0..15e789c0c89f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2309,9 +2309,12 @@ enum pci_fixup_pass { #ifdef CONFIG_PCI_QUIRKS void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev); +void pci_configure_pswiotlb(struct pci_dev *dev, struct pci_bus *bus); #else static inline void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) { } +static inline void pci_configure_pswiotlb(struct pci_dev *dev, + struct pci_bus *bus) { } #endif void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); -- Gitee From 5d3c6566649cd3c3a5032883ec115cfde8957479 Mon Sep 17 00:00:00 2001 From: Jiakun Shuai Date: Tue, 9 Sep 2025 17:40:00 +0800 Subject: [PATCH 4/6] pswiotlb: Move platform-identification to cpu errata The judgment process whether the CPU can use the PSWIOTLB is placed in the initialization phase of cpu features by cpu errata. Signed-off-by: Cui Chao Signed-off-by: Jiakun Shuai --- arch/arm64/Kconfig | 10 ++++++++++ arch/arm64/include/asm/cputype.h | 3 +++ arch/arm64/kernel/cpu_errata.c | 30 ++++++++++++++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + include/linux/pswiotlb.h | 18 ++---------------- kernel/dma/phytium/pswiotlb.c | 5 +++-- 6 files changed, 49 insertions(+), 18 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index aff99b41981d..d82f03eb1502 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1309,6 +1309,16 @@ config SOCIONEXT_SYNQUACER_PREITS If unsure, say Y. +config PHYTIUM_ERRATUM_FT3386 + bool "FT3386: enable Phytium FT3386 pswiotlb can improve dma performance" + depends on PSWIOTLB + default y + help + Phytium FT3386 pswiotlb can improve D2H dma performance and + should be enabled by default. + + If unsure, say Y. + endmenu # "ARM errata workarounds via the alternatives framework" choice diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index a48d40cf1dfc..66662d3dad40 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -64,6 +64,7 @@ #define ARM_CPU_IMP_APPLE 0x61 #define ARM_CPU_IMP_AMPERE 0xC0 #define ARM_CPU_IMP_MICROSOFT 0x6D +#define ARM_CPU_IMP_PHYTIUM 0x70 #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -163,6 +164,8 @@ #define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ +#define MIDR_FT_FTC862 MIDR_CPU_MODEL(ARM_CPU_IMP_PHYTIUM, PHYTIUM_CPU_PART_FTC862) + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 463b48d0f925..e7928fa5a8e6 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -483,6 +483,28 @@ static const struct midr_range erratum_ac03_cpu_38_list[] = { }; #endif +#ifdef CONFIG_PHYTIUM_ERRATUM_FT3386 +#define SOC_ID_PS23064 0x8 +#define SOC_ID_PS24080 0x6 +#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) +bool __read_mostly is_ps_socs; +static bool +should_enable_phytium_ft3386_pswiotlb(const struct arm64_cpu_capabilities *entry, int unused) +{ + u32 model; + u32 soc_id; + + soc_id = read_sysreg_s(SYS_AIDR_EL1); + model = read_cpuid_id(); + if ((soc_id == SOC_ID_PS23064 || soc_id == SOC_ID_PS24080) + && model == entry->midr_range.model) { + is_ps_socs = true; + return true; + } else + return false; +} +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -796,6 +818,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_RANGE_LIST(erratum_spec_unpriv_load_list), }, #endif +#ifdef CONFIG_PHYTIUM_ERRATUM_FT3386 + { + .desc = "Phytium erratum FT3386", + .capability = ARM64_WORKAROUND_PHYTIUM_FT3386, + ERRATA_MIDR_ALL_VERSIONS(MIDR_FT_FTC862), + .matches = should_enable_phytium_ft3386_pswiotlb, + }, +#endif #ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38 { .desc = "AmpereOne erratum AC03_CPU_38", diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index c251ef3caae5..676cc51f0d99 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -101,3 +101,4 @@ WORKAROUND_REPEAT_TLBI WORKAROUND_SPECULATIVE_AT WORKAROUND_SPECULATIVE_SSBS WORKAROUND_SPECULATIVE_UNPRIV_LOAD +WORKAROUND_PHYTIUM_FT3386 diff --git a/include/linux/pswiotlb.h b/include/linux/pswiotlb.h index 1c61ac685a6d..718db1ace085 100644 --- a/include/linux/pswiotlb.h +++ b/include/linux/pswiotlb.h @@ -18,15 +18,11 @@ struct scatterlist; extern bool pswiotlb_force_disable; struct p_io_tlb_pool; -#define SOC_ID_PS23064 0x8 -#define SOC_ID_PS24080 0x6 -#define MIDR_PS 0x700F8620 -#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) #define PSWIOTLB_VERBOSE (1 << 0) /* verbose initialization */ #define PSWIOTLB_FORCEOFF (1 << 1) /* force phytium bounce buffering off*/ #define PSWIOTLB_ANY (1 << 2) /* allow any memory for the buffer */ #define PSWIOTLB_FREE_THRESHOLD 30 -static bool __read_mostly is_ps_socs; +extern bool __read_mostly is_ps_socs; /* * Maximum allowable number of contiguous slabs to map, @@ -212,19 +208,9 @@ struct p_io_tlb_pool *pswiotlb_find_pool(struct device *dev, int nid, phys_addr_ static inline bool is_phytium_ps_socs(void) { - unsigned int soc_id; - unsigned int midr; - if (likely(is_ps_socs)) return true; - - soc_id = read_sysreg_s(SYS_AIDR_EL1); - midr = read_cpuid_id(); - if ((soc_id == SOC_ID_PS23064 || soc_id == SOC_ID_PS24080) - && midr == MIDR_PS) { - is_ps_socs = true; - return true; - } else + else return false; } diff --git a/kernel/dma/phytium/pswiotlb.c b/kernel/dma/phytium/pswiotlb.c index 786a63d6f561..fe428658bdc7 100644 --- a/kernel/dma/phytium/pswiotlb.c +++ b/kernel/dma/phytium/pswiotlb.c @@ -1723,7 +1723,8 @@ static int __init pswiotlb_create_default_debugfs(void) { char name[20] = ""; - if (!pswiotlb_mtimer_alive && !pswiotlb_force_disable) { + if (!pswiotlb_mtimer_alive && !pswiotlb_force_disable + && is_phytium_ps_socs()) { pr_info("setup pswiotlb monitor timer service\n"); timer_setup(&service_timer, pswiotlb_monitor_service, 0); pswiotlb_mtimer_alive = true; @@ -1732,7 +1733,7 @@ static int __init pswiotlb_create_default_debugfs(void) mod_timer(&service_timer, jiffies + 2 * HZ); } - if (!pswiotlb_force_disable) { + if (!pswiotlb_force_disable && is_phytium_ps_socs()) { sprintf(name, "%s", "pswiotlb"); pswiotlb_create_pswiotlb_debugfs_files(name); } -- Gitee From 567eb87183d61d5800e4e94537bacdaf5d6df986 Mon Sep 17 00:00:00 2001 From: Cui Chao Date: Tue, 19 Aug 2025 16:31:31 +0800 Subject: [PATCH 5/6] pswiotlb: Adjust the location of the platform-identification process Move platform-identification process in front of the reference of struct device. Signed-off-by: Cui Chao Signed-off-by: Jiakun Shuai --- kernel/dma/phytium/pswiotlb-dma.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/dma/phytium/pswiotlb-dma.h b/kernel/dma/phytium/pswiotlb-dma.h index 8d355e8baa3a..bd5fac8e79a2 100644 --- a/kernel/dma/phytium/pswiotlb-dma.h +++ b/kernel/dma/phytium/pswiotlb-dma.h @@ -148,8 +148,8 @@ void pswiotlb_iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, static inline bool check_if_pswiotlb_is_applicable(struct device *dev) { - if (dev && dev->can_use_pswiotlb && is_phytium_ps_socs() - && !pswiotlb_force_disable) { + if (!pswiotlb_force_disable && is_phytium_ps_socs() + && dev && dev->can_use_pswiotlb) { if (dev->numa_node == NUMA_NO_NODE || dev->numa_node != dev->local_node) dev->numa_node = dev->local_node; -- Gitee From c7eee1b19c662d67996ce5d663c02dfa3b5dca13 Mon Sep 17 00:00:00 2001 From: Cui Chao Date: Thu, 21 Aug 2025 18:04:04 +0800 Subject: [PATCH 6/6] pswiotlb: Fix a issue of member variable force not being initialized Change the name of the member variable from force to forceoff and initialize it. Signed-off-by: Cui Chao Signed-off-by: Jiakun Shuai --- include/trace/events/pswiotlb.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/trace/events/pswiotlb.h b/include/trace/events/pswiotlb.h index ed26c41a4046..b65b35949953 100644 --- a/include/trace/events/pswiotlb.h +++ b/include/trace/events/pswiotlb.h @@ -20,7 +20,7 @@ TRACE_EVENT(pswiotlb_bounced, __field(u64, dma_mask) __field(dma_addr_t, dev_addr) __field(size_t, size) - __field(bool, force) + __field(bool, forceoff) ), TP_fast_assign( @@ -28,6 +28,7 @@ TRACE_EVENT(pswiotlb_bounced, __entry->dma_mask = (dev->dma_mask ? *dev->dma_mask : 0); __entry->dev_addr = dev_addr; __entry->size = size; + __entry->forceoff = pswiotlb_force_disable; ), TP_printk("dev_name: %s dma_mask=%llx dev_addr=%llx size=%zu %s", @@ -35,7 +36,7 @@ TRACE_EVENT(pswiotlb_bounced, __entry->dma_mask, (unsigned long long)__entry->dev_addr, __entry->size, - __entry->force ? "NORMAL" : "FORCEOFF") + __entry->forceoff ? "FORCEOFF" : "NORMAL") ); #endif /* _TRACE_PSWIOTLB_H */ -- Gitee